parrotcode: IO Layer for UTF8 | |
Contents | C |
src/io/io_utf8.c - IO Layer for UTF8
Convert output to utf8. Convert input to Parrot's internal string representation.
*/
#include "parrot/parrot.h" #include "io_private.h" #include "../unicode.h"
/* HEADERIZER HFILE: none */
/* HEADERIZER BEGIN: static */
static size_t PIO_utf8_read( PARROT_INTERP, NOTNULL(ParrotIOLayer *layer), NOTNULL(ParrotIO *io), NOTNULL(STRING **buf) ) __attribute__nonnull__(1) __attribute__nonnull__(2) __attribute__nonnull__(3) __attribute__nonnull__(4);
static size_t PIO_utf8_write( PARROT_INTERP, NOTNULL(ParrotIOLayer *l), NOTNULL(ParrotIO *io), NOTNULL(STRING *s) ) __attribute__nonnull__(1) __attribute__nonnull__(2) __attribute__nonnull__(3) __attribute__nonnull__(4);
/* HEADERIZER END: static */
static const ParrotIOLayerAPI pio_utf8_layer_api = { PIO_null_init, PIO_base_new_layer, PIO_base_delete_layer, PIO_null_push_layer, PIO_null_pop_layer, PIO_null_open, PIO_null_open2, PIO_null_open3, PIO_null_open_async, PIO_null_fdopen, PIO_null_close, PIO_utf8_write, PIO_null_write_async, PIO_utf8_read, PIO_null_read_async, PIO_null_flush, PIO_null_peek, PIO_null_seek, PIO_null_tell, PIO_null_setbuf, PIO_null_setlinebuf, PIO_null_getcount, PIO_null_fill, PIO_null_eof, NULL, /* no poll */ NULL, /* no socket */ NULL, /* no connect */ NULL, /* no send */ NULL, /* no recv */ NULL, /* no bind */ NULL, /* no listen */ NULL /* no accept */ };
ParrotIOLayer pio_utf8_layer = { NULL, "utf8", 0, &pio_utf8_layer_api, NULL, NULL };
ParrotIOLayer * PIO_utf8_register_layer(void) { return &pio_utf8_layer; }
static size_t PIO_utf8_read(PARROT_INTERP, NOTNULL(ParrotIOLayer *layer), NOTNULL(ParrotIO *io), NOTNULL(STRING **buf)) { STRING *s, *s2; String_iter iter;
size_t len = PIO_read_down(interp, layer->down, io, buf);
s = *buf;
s->charset = Parrot_unicode_charset_ptr;
s->encoding = Parrot_utf8_encoding_ptr;
/* count chars, verify utf8 */
Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);
while (iter.bytepos < s->bufused) {
if (iter.bytepos + 4 > s->bufused) {
const utf8_t *u8ptr = (utf8_t *)((char *)s->strstart +
iter.bytepos);
UINTVAL c = *u8ptr;
if (UTF8_IS_START(c)) {
UINTVAL len2 = UTF8SKIP(u8ptr);
if (iter.bytepos + len2 <= s->bufused)
goto ok;
/* need len-1 more chars */
len2--;
s2 = NULL;
s2 = PIO_make_io_string(interp, &s2, len2);
s2->bufused = len2;
s2->charset = Parrot_unicode_charset_ptr;
s2->encoding = Parrot_utf8_encoding_ptr;
PIO_read_down(interp, layer->down, io, &s2);
s->strlen = iter.charpos;
s = string_append(interp, s, s2);
len += len2 + 1;
/* check last char */
}
}
ok:
iter.get_and_advance(interp, &iter);
}
s->strlen = iter.charpos;
return len;
}
static size_t PIO_utf8_write(PARROT_INTERP, NOTNULL(ParrotIOLayer *l), NOTNULL(ParrotIO *io), NOTNULL(STRING *s)) { STRING *dest;
if (s->encoding == Parrot_utf8_encoding_ptr)
return PIO_write_down(interp, l->down, io, s);
dest = Parrot_utf8_encoding_ptr->to_encoding(interp, s,
new_string_header(interp, 0));
return PIO_write_down(interp, l->down, io, dest);
}
/*
src/io/io_passdown.c, src/io/io.c, src/io/io_layers.c, src/io/io_private.h.
Initially written by Leo.
*/
/* * Local variables: * c-file-style: "parrot" * End: * vim: expandtab shiftwidth=4: */
|