This package provides functions for handling of unicode characters and utf8 strings. See also Glib.Convert.
Types |
|---|
type G_Unicode_Type is
(Unicode_Control,
Unicode_Format,
Unicode_Unassigned,
Unicode_Private_Use,
Unicode_Surrogate,
Unicode_Lowercase_Letter,
Unicode_Modifier_Letter,
Unicode_Other_Letter,
Unicode_Titlecase_Letter,
Unicode_Uppercase_Letter,
Unicode_Combining_Mark,
Unicode_Enclosing_Mark,
Unicode_Non_Spacing_Mark,
Unicode_Decimal_Number,
Unicode_Letter_Number,
Unicode_Other_Number,
Unicode_Connect_Punctuation,
Unicode_Dash_Punctuation,
Unicode_Close_Punctuation,
Unicode_Final_Punctuation,
Unicode_Initial_Punctuation,
Unicode_Other_Punctuation,
Unicode_Open_Punctuation,
Unicode_Currency_Symbol,
Unicode_Modifier_Symbol,
Unicode_Math_Symbol,
Unicode_Other_Symbol,
Unicode_Line_Separator,
Unicode_Paragraph_Separator,
Unicode_Space_Separator);
| |
|
The possible character classifications.
See http://www.unicode.org/Public/UNIDATA/UnicodeData.html
|
Subprograms |
|---|
procedure UTF8_Validate (Str : UTF8_String; Valid : out Boolean; Invalid_Pos : out Natural); | ||
|
Validate a UTF8 string. | ||
Character classes | ||
function Is_Space (Char : Gunichar) return Boolean; | ||
|
True if Char is a space character
| ||
function Is_Alnum (Char : Gunichar) return Boolean; | ||
|
True if Char is an alphabetical or numerical character
| ||
function Is_Alpha (Char : Gunichar) return Boolean; | ||
|
True if Char is an alphabetical character
| ||
function Is_Digit (Char : Gunichar) return Boolean; | ||
|
True if Char is a digit
| ||
function Is_Lower (Char : Gunichar) return Boolean; | ||
|
True if Char is a lower-case character
| ||
function Is_Upper (Char : Gunichar) return Boolean; | ||
|
True if Char is an upper-case character
| ||
function Is_Punct (Char : Gunichar) return Boolean; | ||
|
True if Char is a punctuation character
| ||
function Unichar_Type (Char : Gunichar) return G_Unicode_Type; | ||
|
Return the unicode character type of a given character
| ||
Case handling | ||
function To_Lower (Char : Gunichar) return Gunichar; | ||
|
Convert Char to lower cases
| ||
function To_Upper (Char : Gunichar) return Gunichar; | ||
|
Convert Char to upper cases
| ||
function UTF8_Strdown (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; | ||
|
| ||
function UTF8_Strdown (Str : UTF8_String) return UTF8_String; | ||
|
Convert Str to lower cases
| ||
function UTF8_Strup (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; | ||
|
| ||
function UTF8_Strup (Str : UTF8_String) return UTF8_String; | ||
|
Convert Str to upper cases
| ||
Manipulating strings | ||
function UTF8_Strlen (Str : ICS.chars_ptr; Max : Integer := -1) return Glong; | ||
|
| ||
function UTF8_Strlen (Str : UTF8_String) return Glong; | ||
|
Return the number of characters in Str
| ||
function UTF8_Find_Next_Char (Str : ICS.chars_ptr; Str_End : ICS.chars_ptr := ICS.Null_Ptr) return ICS.chars_ptr; | ||
|
| ||
function UTF8_Find_Next_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
|
| ||
function UTF8_Next_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
|
| ||
function UTF8_Find_Prev_Char (Str_Start : ICS.chars_ptr; Str : ICS.chars_ptr) return ICS.chars_ptr; | ||
|
| ||
function UTF8_Find_Prev_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
|
Find the start of the previous UTF8 character after the Index-th byte. | ||
Conversions | ||
function Unichar_To_UTF8 (C : Gunichar; Buffer : ICS.chars_ptr := ICS.Null_Ptr) return Natural; | ||
|
| ||
procedure Unichar_To_UTF8 (C : Gunichar; Buffer : out UTF8_String; Last : out Natural); | ||
|
Encode C into Buffer. Buffer must have at least 6 bytes free. | ||
function UTF8_Get_Char (Str : UTF8_String) return Gunichar; | ||
|
Converts a sequence of bytes encoded as UTF8 to a unicode character. | ||
function UTF8_Get_Char_Validated (Str : UTF8_String) return Gunichar; | ||
|
Same as above. However, if the sequence if an incomplete start of a | ||