parrotcode: UTF-8 encoding | |
Contents | C |
src/encodings/utf8.c - UTF-8 encoding
UTF-8 (http://www.utf-8.com/).
static UINTVAL utf8_characters
byte_len
bytes from *ptr
.static UINTVAL utf8_decode
*ptr
.static void *utf8_encode
c
.static const void *utf8_skip_forward
ptr
n
characters forward.static const void *utf8_skip_backward
ptr
n
characters back.static UINTVAL utf8_decode_and_advance
get_and_advance
function. if (UTF8_IS_START(c)) {
UINTVAL len = UTF8SKIP(u8ptr);
c &= UTF8_START_MASK(len);
i->bytepos += len;
for (len--; len; len--) {
u8ptr++;
if (!UTF8_IS_CONTINUATION(*u8ptr))
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,
"Malformed UTF-8 string\n");
c = UTF8_ACCUMULATE(c, *u8ptr);
}
if (UNICODE_IS_SURROGATE(c))
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,
"Surrogate in UTF-8 string\n");
}
else if (!UNICODE_IS_INVARIANT(c)) {
Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_MALFORMED_UTF8,
"Malformed UTF-8 string\n");
}
else {
i->bytepos++;
}
i->charpos++;
return c;
}
static void utf8_encode_and_advance
set_and_advance
function.static void utf8_set_position
set_position
function.static STRING *to_encoding
src
to this particular encoding. If dest
is provided, it will contain the result. Otherwise this function operates in place.static UINTVAL get_codepoint
src
at position offset
.static void set_codepoint
src
at position offset
, the codepoint codepoint
.static UINTVAL get_byte
src
at position offset
.static void set_byte
src
at position offset
, the byte byte
.static STRING *get_codepoints
src
at position offset
and length count
.static STRING *get_bytes
src
at position offset
and length count
.static STRING *get_codepoints_inplace
src
at position offset
count
codepoints and returns them in return_string
.static STRING *get_bytes_inplace
src
at position offset
count
bytes and returns them in return_string
.static void set_codepoints
src
at position offset
for count
codepoints with the contents of string new_codepoints
.static void set_bytes
src
at position offset
for count
bytes with the contents of string new_bytes
.static void become_encoding
static UINTVAL codepoints
src
.static UINTVAL bytes
src
.static void iter_init
src
the string iterator iter
.ENCODING *Parrot_encoding_utf8_init
src/encodings/fixed_8.c, src/string.c, include/parrot/string.h, docs/string.pod.
|