| parrotcode: UTF-8 encoding | |
| Contents | C | 

src/encodings/utf8.c - UTF-8 encoding

UTF-8 (http://www.utf-8.com/).

static UINTVAL utf8_charactersbyte_len bytes from *ptr.static UINTVAL utf8_decode*ptr.static void *utf8_encodec.static const void *utf8_skip_forwardptr n characters forward.static const void *utf8_skip_backwardptr n characters back.
static UINTVAL utf8_decode_and_advanceget_and_advance function.    if (UTF8_IS_START(c)) {
        UINTVAL len = UTF8SKIP(u8ptr);
        c &= UTF8_START_MASK(len);
        i->bytepos += len;
        for (len--; len; len--) {
            u8ptr++;
            if (!UTF8_IS_CONTINUATION(*u8ptr)) {
                real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
            }
            c = UTF8_ACCUMULATE(c, *u8ptr);
        }
        if (UNICODE_IS_SURROGATE(c)) {
            real_exception(interp, NULL, MALFORMED_UTF8, "Surrogate in UTF-8 string\n");
        }
    }
    else if (!UNICODE_IS_INVARIANT(c)) {
        real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
    }
    else {
        i->bytepos++;
    }
    i->charpos++;
    return c;
}
static void utf8_encode_and_advanceset_and_advance function.static void utf8_set_positionset_position function.static STRING *to_encodingstatic UINTVAL get_codepointstatic void set_codepointstatic UINTVAL get_bytestatic void set_bytestatic STRING *get_codepointsstatic STRING *get_bytesstatic STRING *get_codepoints_inplacestatic STRING *get_bytes_inplacestatic void set_codepointsstatic void set_bytesstatic void become_encodingstatic UINTVAL codepointsstatic UINTVAL bytesstatic void iter_initENCODING *Parrot_encoding_utf8_init
src/encodings/fixed_8.c, src/string.c, include/parrot/string.h, docs/string.pod.
|  |   |