parrotcode: UTF-8 encoding | |
Contents | C |
src/encodings/utf8.c - UTF-8 encoding
UTF-8 (http://www.utf-8.com/).
static UINTVAL utf8_characters(PARROT_INTERP, NOTNULL(const utf8_t *ptr), UINTVAL byte_len)
byte_len
bytes from *ptr
.static UINTVAL utf8_decode(PARROT_INTERP, NOTNULL(const utf8_t *ptr))
*ptr
.static void *utf8_encode(PARROT_INTERP, NOTNULL(void *ptr), UINTVAL c)
c
.static const void *utf8_skip_forward(NOTNULL(const void *ptr), UINTVAL n)
ptr
n
characters forward.static const void *utf8_skip_backward(NOTNULL(const void *ptr), UINTVAL n)
ptr
n
characters back.static UINTVAL utf8_decode_and_advance(PARROT_INTERP, NOTNULL(String_iter *i))
get_and_advance
function. if (UTF8_IS_START(c)) {
UINTVAL len = UTF8SKIP(u8ptr);
c &= UTF8_START_MASK(len);
i->bytepos += len;
for (len--; len; len--) {
u8ptr++;
if (!UTF8_IS_CONTINUATION(*u8ptr)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
}
c = UTF8_ACCUMULATE(c, *u8ptr);
}
if (UNICODE_IS_SURROGATE(c)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Surrogate in UTF-8 string\n");
}
}
else if (!UNICODE_IS_INVARIANT(c)) {
real_exception(interp, NULL, MALFORMED_UTF8, "Malformed UTF-8 string\n");
}
else {
i->bytepos++;
}
i->charpos++;
return c;
}
static void utf8_encode_and_advance(PARROT_INTERP, NOTNULL(String_iter *i), UINTVAL c)
set_and_advance
function.static void utf8_set_position(SHIM_INTERP, NOTNULL(String_iter *i), UINTVAL pos)
set_position
function.static STRING *to_encoding(PARROT_INTERP, NOTNULL(STRING *src), NULLOK(STRING *dest))
static UINTVAL get_codepoint(PARROT_INTERP, NOTNULL(const STRING *src), UINTVAL offset)
static void set_codepoint(PARROT_INTERP, NOTNULL(STRING *src), UINTVAL offset, UINTVAL codepoint)
static UINTVAL get_byte(SHIM_INTERP, NOTNULL(const STRING *src), UINTVAL offset)
static void set_byte(PARROT_INTERP, NOTNULL(const STRING *src), UINTVAL offset, UINTVAL byte)
PARROT_CANNOT_RETURN_NULL static STRING *get_codepoints(PARROT_INTERP, NOTNULL(STRING *src), UINTVAL offset, UINTVAL count)
PARROT_CANNOT_RETURN_NULL static STRING *get_bytes(PARROT_INTERP, NOTNULL(STRING *src), UINTVAL offset, UINTVAL count)
PARROT_CANNOT_RETURN_NULL static STRING *get_codepoints_inplace(PARROT_INTERP, NOTNULL(STRING *src), UINTVAL offset, UINTVAL count, NOTNULL(STRING *return_string))
static STRING *get_bytes_inplace(PARROT_INTERP, SHIM(STRING *src), UINTVAL offset, UINTVAL count, SHIM(STRING *return_string))
static void set_codepoints(PARROT_INTERP, SHIM(STRING *src), UINTVAL offset, UINTVAL count, SHIM(STRING *new_codepoints))
static void set_bytes(PARROT_INTERP, SHIM(STRING *src), UINTVAL offset, UINTVAL count, SHIM(STRING *new_bytes))
static void become_encoding(PARROT_INTERP, SHIM(STRING *src))
static UINTVAL codepoints(PARROT_INTERP, NOTNULL(STRING *src))
PARROT_PURE_FUNCTION static UINTVAL bytes(SHIM_INTERP, NOTNULL(STRING *src))
static void iter_init(SHIM_INTERP, NOTNULL(const STRING *src), NOTNULL(String_iter *iter))
ENCODING *Parrot_encoding_utf8_init(PARROT_INTERP)
src/encodings/fixed_8.c, src/string.c, include/parrot/string.h, docs/string.pod.
|