parrotcode: Parrot Strings | |
Contents | C |
src/string.c - Parrot Strings
This file implements the non-ICU parts of the Parrot string subsystem.
Note that bufstart
and buflen
are used by the memory subsystem.
The string functions may only use buflen
to determine,
if there is some space left beyond bufused
.
This is the only valid usage of these two data members,
beside setting bufstart
/buflen
for external strings.
PARROT_API void Parrot_unmake_COW(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING *Parrot_make_COW_reference(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *Parrot_reuse_COW_reference(SHIM_INTERP, NOTNULL(STRING *s), NOTNULL(STRING *d))
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_set(PARROT_INTERP, NULLOK(STRING *dest), NOTNULL(STRING *src))
Creation, enlargement, etc.
PARROT_API void string_init(PARROT_INTERP)
PARROT_API void string_deinit(PARROT_INTERP)
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_PURE_FUNCTION UINTVAL string_capacity(SHIM_INTERP, NOTNULL(const STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_make_empty(PARROT_INTERP, parrot_string_representation_t representation, UINTVAL capacity)
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CAN_RETURN_NULL const CHARSET *string_rep_compatible(SHIM_INTERP, NOTNULL(const STRING *a), NOTNULL(const STRING *b), ARGOUT(const ENCODING **e))
ascii <op> utf8 => utf8
=> ascii, B<if> C<STRING *b> has ascii chars only.
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_append(PARROT_INTERP, NULLOK(STRING *a), NULLOK(STRING *b))
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_MALLOC PARROT_CANNOT_RETURN_NULL STRING *string_from_cstring(PARROT_INTERP, NULLOK(const char *const buffer), const UINTVAL len)
PARROT_API PARROT_CANNOT_RETURN_NULL const char *string_primary_encoding_for_representation(PARROT_INTERP, parrot_string_representation_t representation)
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *const_string(PARROT_INTERP, NOTNULL(const char *buffer))
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_make(PARROT_INTERP, NULLOK(const char *buffer), UINTVAL len, NULLOK(const char *charset_name), UINTVAL flags)
len
bytes of string data read from buffer
.charset_name
specifies the string's representation. The currently recognised values are: 'iso-8859-1'
'ascii'
'binary'
'unicode'
unicode
implies the utf-8
encoding, and the other three assume fixed-8
encoding.charset
is unspecified the default charset 'ascii' will be used.flags
is optionally one or more PObj_*
flags OR
-ed together.PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_make_direct(PARROT_INTERP, NULLOK(const char *buffer), UINTVAL len, NOTNULL(const ENCODING *encoding), NOTNULL(const CHARSET *charset), UINTVAL flags)
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_grow(PARROT_INTERP, NOTNULL(STRING *s), INTVAL addlen)
PARROT_API PARROT_PURE_FUNCTION UINTVAL string_length(SHIM_INTERP, NOTNULL(const STRING *s))
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_index(PARROT_INTERP, NOTNULL(const STRING *s), UINTVAL idx)
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_str_index(PARROT_INTERP, NOTNULL(const STRING *s), NOTNULL(const STRING *s2), INTVAL start)
start
. The return value is a (0 based) offset in characters, not bytes. If second string is not specified, then return -1.PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_ord(PARROT_INTERP, NOTNULL(const STRING *s), INTVAL idx)
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING *string_chr(PARROT_INTERP, UINTVAL character)
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING *string_copy(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_IGNORABLE_RESULT INTVAL string_compute_strlen(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_max_bytes(SHIM_INTERP, NOTNULL(const STRING *s), INTVAL nchars)
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_concat(PARROT_INTERP, NULLOK(STRING *a), NULLOK(STRING *b), UINTVAL Uflags)
NULL
, then a copy of the non-NULL
string is returned. If both strings are NULL
, then a new zero-length string is created and returned.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_repeat(PARROT_INTERP, NOTNULL(const STRING *s), UINTVAL num, ARGOUT_NULLOK(STRING **d))
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING *string_substr(PARROT_INTERP, NOTNULL(STRING *src), INTVAL offset, INTVAL length, ARGOUT_NULLOK(STRING **d), int replace_dest)
length
from offset
from the specified Parrot string and stores it in **d
, allocating memory if necessary. The substring is also returned.PARROT_API PARROT_CAN_RETURN_NULL STRING *string_replace(PARROT_INTERP, NOTNULL(STRING *src), INTVAL offset, INTVAL length, NOTNULL(STRING *rep), ARGOUT_NULLOK(STRING **d))
substr EXPR, OFFSET, LENGTH, REPLACEMENT
length
characters from offset
in the first Parrot string with the second Parrot string, returning what was replaced.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_chopn(PARROT_INTERP, NOTNULL(STRING *s), INTVAL n)
n
characters of the specified Parrot string. If n
is negative, cuts the string after +n
characters. The returned string is a copy of the one passed in.PARROT_API void string_chopn_inplace(PARROT_INTERP, NOTNULL(STRING *s), INTVAL n)
n
characters of the specified Parrot string. If n
is negative, cuts the string after +n
characters. The string passed in is modified and returned.PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_compare(PARROT_INTERP, NULLOK(const STRING *s1), NULLOK(const STRING *s2))
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_equal(PARROT_INTERP, NULLOK(const STRING *s1), NULLOK(const STRING *s2))
static void make_writable(PARROT_INTERP, ARGINOUT(STRING **s), const size_t len, parrot_string_representation_t representation)
len
. The representation
argument is required in case a new Parrot string has to be created.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_bitwise_and(PARROT_INTERP, NULLOK(STRING *s1), NULLOK(STRING *s2), ARGOUT_NULLOK(STRING **dest))
AND
on two Parrot string, performing type and encoding conversions if necessary. If the second string is not NULL
then it is reused, otherwise a new Parrot string is created.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_bitwise_or(PARROT_INTERP, NULLOK(STRING *s1), NULLOK(STRING *s2), ARGOUT_NULLOK(STRING **dest))
OR
on two Parrot strings, performing type and encoding conversions if necessary. If the third string is not NULL
then it is reused, otherwise a new Parrot string is created.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_bitwise_xor(PARROT_INTERP, NULLOK(STRING *s1), NULLOK(STRING *s2), ARGOUT_NULLOK(STRING **dest))
XOR
on two Parrot strings, performing type and encoding conversions if necessary. If the second string is not NULL
then it is reused, otherwise a new Parrot string is created.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_bitwise_not(PARROT_INTERP, NULLOK(STRING *s), ARGOUT_NULLOK(STRING **dest))
NOT
on a Parrot string. If the second string is not NULL
then it is reused, otherwise a new Parrot string is created.PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_bool(PARROT_INTERP, NOTNULL(const STRING *s))
0
, ""
or "0"
.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_nprintf(PARROT_INTERP, NULLOK(STRING *dest), INTVAL bytelen, NOTNULL(const char *format), ...)
Parrot_snprintf()
except that it writes to and returns a Parrot string.bytelen
does not include space for a (non-existent) trailing '\0'
. dest
may be a NULL
pointer, in which case a new native string will be created. If bytelen
is 0, the behaviour becomes more sprintf
-ish than snprintf
-like. bytelen
is measured in the encoding of *dest
.PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_printf(PARROT_INTERP, NOTNULL(const char *format), ...)
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL string_to_int(SHIM_INTERP, NOTNULL(const STRING *s))
sign = '+' | '-'
digit = "Any code point considered a digit by the chartype"
indicator = 'e' | 'E'
digits = digit [digit]...
decimal-part = digits '.' [digits] | ['.'] digits
exponent-part = indicator [sign] digits
numeric-string = [sign] decimal-part [exponent-part]
PARROT_API PARROT_WARN_UNUSED_RESULT FLOATVAL string_to_num(PARROT_INTERP, NOTNULL(const STRING *s))
string_to_int()
except that a floating-point value is returned.PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_from_int(PARROT_INTERP, INTVAL i)
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_from_num(PARROT_INTERP, FLOATVAL f)
PARROT_API PARROT_MALLOC PARROT_CANNOT_RETURN_NULL char *string_to_cstring(SHIM_INTERP, NOTNULL(const STRING *s))
string_cstring_free()
to free the string. Failure to do this will result in a memory leak.PARROT_API void string_cstring_free(NULLOK(char *p))
string_to_cstring()
.PARROT_API void string_pin(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API void string_unpin(PARROT_INTERP, NOTNULL(STRING *s))
string_pin()
so that the string once again uses managed memory.PARROT_API PARROT_WARN_UNUSED_RESULT size_t string_hash(PARROT_INTERP, NULLOK(STRING *s), size_t seed)
s->hashval
.PARROT_API PARROT_CAN_RETURN_NULL STRING *string_escape_string(PARROT_INTERP, NULLOK(const STRING *src))
string_unescape_cstring
can handle are esacped as \x, as well as a double quote character. Other control chars and codepoints < 0x100 are escaped as \xhh, codepoints up to 0xffff, as \uhhhh, and codepoints greater than this as \x{hh...hh}.PARROT_API PARROT_CAN_RETURN_NULL STRING *string_escape_string_delimited(PARROT_INTERP, NULLOK(const STRING *src), UINTVAL limit)
PARROT_API PARROT_CANNOT_RETURN_NULL STRING *string_unescape_cstring(PARROT_INTERP, NOTNULL(const char *cstring), char delimiter, NULLOK(const char *enc_char))
\xhh 1..2 hex digits
\ooo 1..3 oct digits
\cX control char X
\x{h..h} 1..8 hex digits
\uhhhh 4 hex digits
\Uhhhhhhhh 8 hex digits
\a, \b, \t, \n, \v, \f, \r, \e
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_MALLOC STRING *string_upcase(PARROT_INTERP, NOTNULL(const STRING *s))
PARROT_API void string_upcase_inplace(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_MALLOC STRING *string_downcase(PARROT_INTERP, NOTNULL(const STRING *s))
PARROT_API void string_downcase_inplace(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_MALLOC STRING *string_titlecase(PARROT_INTERP, NOTNULL(const STRING *s))
PARROT_API void string_titlecase_inplace(PARROT_INTERP, NOTNULL(STRING *s))
PARROT_API PARROT_CANNOT_RETURN_NULL PARROT_WARN_UNUSED_RESULT STRING *string_increment(PARROT_INTERP, NOTNULL(const STRING *s))
PARROT_API PARROT_PURE_FUNCTION PARROT_CANNOT_RETURN_NULL const char *Parrot_string_cstring(SHIM_INTERP, NOTNULL(const STRING *str))
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL Parrot_string_is_cclass(PARROT_INTERP, INTVAL flags, NOTNULL(STRING *s), UINTVAL offset)
s
at given offset is in the given character class flags
. See also include/parrot/cclass.h for possible character classes. Returns 0 otherwise, or if the string is empty or NULL.PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL Parrot_string_find_cclass(PARROT_INTERP, INTVAL flags, NOTNULL(STRING *s), UINTVAL offset, UINTVAL count)
PARROT_API PARROT_WARN_UNUSED_RESULT INTVAL Parrot_string_find_not_cclass(PARROT_INTERP, INTVAL flags, NULLOK(STRING *s), UINTVAL offset, UINTVAL count)
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CAN_RETURN_NULL STRING *Parrot_string_trans_charset(PARROT_INTERP, NULLOK(STRING *src), INTVAL charset_nr, NULLOK(STRING *dest))
dest
== NULL, converts src
to the given charset or encoding inplace, else returns a copy of src
with the charset/encoding in dest.PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CAN_RETURN_NULL STRING *Parrot_string_trans_encoding(PARROT_INTERP, NULLOK(STRING *src), INTVAL encoding_nr, NULLOK(STRING *dest))
dest
== NULL, converts src
to the given charset or encoding inplace, else returns a copy of src
with the charset/encoding in dest.PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CAN_RETURN_NULL STRING *string_compose(PARROT_INTERP, NULLOK(STRING *src))
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *string_join(PARROT_INTERP, NULLOK(STRING *j), NOTNULL(PMC *ar))
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL PMC *string_split(PARROT_INTERP, NOTNULL(STRING *delim), NOTNULL(STRING *str))
PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *uint_to_str(PARROT_INTERP, NOTNULL(char *tc), UHUGEINTVAL num, char base, int minus)
num
converted to a Parrot STRING
.base
must be defined, a default of 10 is not assumed. The caller has to verify that base >= 2 && base <= 36
The buffer tc
must be at least sizeof (UHUGEINTVAL)*8 + 1
chars big.minus
is true then -
is prepended to the string representation.PARROT_API PARROT_WARN_UNUSED_RESULT PARROT_CANNOT_RETURN_NULL STRING *int_to_str(PARROT_INTERP, NOTNULL(char *tc), HUGEINTVAL num, char base)
num
converted to a Parrot STRING
.base
must be defined, a default of 10 is not assumed.num < 0
then -
is prepended to the string representation.
|