| parrotcode: UTF-8 encoding | |
| Contents | C |

src/encodings/utf8.c - UTF-8 encoding

UTF-8 (http://www.utf-8.com/).

utf8_charactersbyte_len bytes from *ptr.utf8_decode*ptr.utf8_encodec.utf8_skip_forwardptr n characters forward.utf8_skip_backwardptr n characters back.
utf8_decode_and_advanceget_and_advance function. if (UTF8_IS_START(c)) {
UINTVAL len = UTF8SKIP(u8ptr);
c &= UTF8_START_MASK(len);
i->bytepos += len;
for (len--; len; len--) {
u8ptr++;
if (!UTF8_IS_CONTINUATION(*u8ptr)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
}
c = UTF8_ACCUMULATE(c, *u8ptr);
}
if (UNICODE_IS_SURROGATE(c)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Surrogate in UTF-8 string\n");
}
}
else if (!UNICODE_IS_INVARIANT(c)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
}
else {
i->bytepos++;
}
i->charpos++;
return c;
}
utf8_encode_and_advanceset_and_advance function.utf8_set_positionset_position function.to_encodingget_codepointset_codepointget_byteset_byteget_codepointsget_bytesget_codepoints_inplaceget_bytes_inplaceset_codepointsset_bytesbecome_encodingcodepointsbytesiter_initParrot_encoding_utf8_init
src/encodings/fixed_8.c, src/string.c, include/parrot/string.h, docs/string.pod.
|
|
|