src/io/io_utf8.c - IO Layer for UTF8


Convert output to utf8. Convert input to Parrot's internal string representation.

utf8 layer functions ^


#include "parrot/parrot.h" #include "io_private.h" #include "../unicode.h"


/* HEADERIZER BEGIN: static */

static size_t PIO_utf8_read( PARROT_INTERP, NOTNULL(ParrotIOLayer *layer), NOTNULL(ParrotIO *io), NOTNULL(STRING **buf) ) __attribute__nonnull__(1) __attribute__nonnull__(2) __attribute__nonnull__(3) __attribute__nonnull__(4);

static size_t PIO_utf8_write( PARROT_INTERP, NOTNULL(ParrotIOLayer *l), NOTNULL(ParrotIO *io), NOTNULL(STRING *s) ) __attribute__nonnull__(1) __attribute__nonnull__(2) __attribute__nonnull__(3) __attribute__nonnull__(4);

/* HEADERIZER END: static */

static const ParrotIOLayerAPI pio_utf8_layer_api = { PIO_null_init, PIO_base_new_layer, PIO_base_delete_layer, PIO_null_push_layer, PIO_null_pop_layer, PIO_null_open, PIO_null_open2, PIO_null_open3, PIO_null_open_async, PIO_null_fdopen, PIO_null_close, PIO_utf8_write, PIO_null_write_async, PIO_utf8_read, PIO_null_read_async, PIO_null_flush, PIO_null_peek, PIO_null_seek, PIO_null_tell, PIO_null_setbuf, PIO_null_setlinebuf, PIO_null_getcount, PIO_null_fill, PIO_null_eof, NULL, /* no poll */ NULL, /* no socket */ NULL, /* no connect */ NULL, /* no send */ NULL, /* no recv */ NULL, /* no bind */ NULL, /* no listen */ NULL /* no accept */ };

ParrotIOLayer pio_utf8_layer = { NULL, "utf8", 0, &pio_utf8_layer_api, NULL, NULL };

PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL ParrotIOLayer * PIO_utf8_register_layer(void) { return &pio_utf8_layer; }

static size_t PIO_utf8_read(PARROT_INTERP, NOTNULL(ParrotIOLayer *layer), NOTNULL(ParrotIO *io), NOTNULL(STRING **buf)) { STRING *s, *s2; String_iter iter;

    size_t len  = PIO_read_down(interp, layer->down, io, buf);
    s           = *buf;
    s->charset  = Parrot_unicode_charset_ptr;
    s->encoding = Parrot_utf8_encoding_ptr;

    /* count chars, verify utf8 */
    Parrot_utf8_encoding_ptr->iter_init(interp, s, &iter);

    while (iter.bytepos < s->bufused) {
        if (iter.bytepos + 4 > s->bufused) {
            const utf8_t *u8ptr = (utf8_t *)((char *)s->strstart +
            UINTVAL c = *u8ptr;

            if (UTF8_IS_START(c)) {
                UINTVAL len2 = UTF8SKIP(u8ptr);
                INTVAL  read;

                if (iter.bytepos + len2 <= s->bufused)
                    goto ok;

                /* need len - 1 more chars */
                s2           = NULL;
                s2           = PIO_make_io_string(interp, &s2, len2);
                s2->bufused  = len2;
                s2->charset  = Parrot_unicode_charset_ptr;
                s2->encoding = Parrot_utf8_encoding_ptr;

                /* need to check the amount read here? */
                read         = PIO_read_down(interp, layer->down, io, &s2);

                s->strlen    = iter.charpos;
                s            = string_append(interp, s, s2);
                len         += len2 + 1;

                /* check last char */
        iter.get_and_advance(interp, &iter);
    s->strlen = iter.charpos;
    return len;

static size_t PIO_utf8_write(PARROT_INTERP, NOTNULL(ParrotIOLayer *l), NOTNULL(ParrotIO *io), NOTNULL(STRING *s)) { STRING *dest;

    if (s->encoding == Parrot_utf8_encoding_ptr)
        return PIO_write_down(interp, l->down, io, s);

    dest = Parrot_utf8_encoding_ptr->to_encoding(interp, s,
            new_string_header(interp, 0));
    return PIO_write_down(interp, l->down, io, dest);



src/io/io_passdown.c, src/io/io.c, src/io/io_layers.c, src/io/io_private.h.


Initially written by Leo.


/* * Local variables: * c-file-style: "parrot" * End: * vim: expandtab shiftwidth=4: */
