00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef __G_UNICODE_H__
00024 #define __G_UNICODE_H__
00025
00026 #include <_ansi.h>
00027 #include <glib/gerror.h>
00028 #include <glib/gtypes.h>
00029
00030 G_BEGIN_DECLS
00031
00032 typedef guint32 gunichar;
00033 typedef guint16 gunichar2;
00034
00035
00036
00037
00038 typedef enum
00039 {
00040 G_UNICODE_CONTROL,
00041 G_UNICODE_FORMAT,
00042 G_UNICODE_UNASSIGNED,
00043 G_UNICODE_PRIVATE_USE,
00044 G_UNICODE_SURROGATE,
00045 G_UNICODE_LOWERCASE_LETTER,
00046 G_UNICODE_MODIFIER_LETTER,
00047 G_UNICODE_OTHER_LETTER,
00048 G_UNICODE_TITLECASE_LETTER,
00049 G_UNICODE_UPPERCASE_LETTER,
00050 G_UNICODE_COMBINING_MARK,
00051 G_UNICODE_ENCLOSING_MARK,
00052 G_UNICODE_NON_SPACING_MARK,
00053 G_UNICODE_DECIMAL_NUMBER,
00054 G_UNICODE_LETTER_NUMBER,
00055 G_UNICODE_OTHER_NUMBER,
00056 G_UNICODE_CONNECT_PUNCTUATION,
00057 G_UNICODE_DASH_PUNCTUATION,
00058 G_UNICODE_CLOSE_PUNCTUATION,
00059 G_UNICODE_FINAL_PUNCTUATION,
00060 G_UNICODE_INITIAL_PUNCTUATION,
00061 G_UNICODE_OTHER_PUNCTUATION,
00062 G_UNICODE_OPEN_PUNCTUATION,
00063 G_UNICODE_CURRENCY_SYMBOL,
00064 G_UNICODE_MODIFIER_SYMBOL,
00065 G_UNICODE_MATH_SYMBOL,
00066 G_UNICODE_OTHER_SYMBOL,
00067 G_UNICODE_LINE_SEPARATOR,
00068 G_UNICODE_PARAGRAPH_SEPARATOR,
00069 G_UNICODE_SPACE_SEPARATOR
00070 } GUnicodeType;
00071
00072
00073
00074
00075
00076
00077 typedef enum
00078 {
00079 G_UNICODE_BREAK_MANDATORY,
00080 G_UNICODE_BREAK_CARRIAGE_RETURN,
00081 G_UNICODE_BREAK_LINE_FEED,
00082 G_UNICODE_BREAK_COMBINING_MARK,
00083 G_UNICODE_BREAK_SURROGATE,
00084 G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
00085 G_UNICODE_BREAK_INSEPARABLE,
00086 G_UNICODE_BREAK_NON_BREAKING_GLUE,
00087 G_UNICODE_BREAK_CONTINGENT,
00088 G_UNICODE_BREAK_SPACE,
00089 G_UNICODE_BREAK_AFTER,
00090 G_UNICODE_BREAK_BEFORE,
00091 G_UNICODE_BREAK_BEFORE_AND_AFTER,
00092 G_UNICODE_BREAK_HYPHEN,
00093 G_UNICODE_BREAK_NON_STARTER,
00094 G_UNICODE_BREAK_OPEN_PUNCTUATION,
00095 G_UNICODE_BREAK_CLOSE_PUNCTUATION,
00096 G_UNICODE_BREAK_QUOTATION,
00097 G_UNICODE_BREAK_EXCLAMATION,
00098 G_UNICODE_BREAK_IDEOGRAPHIC,
00099 G_UNICODE_BREAK_NUMERIC,
00100 G_UNICODE_BREAK_INFIX_SEPARATOR,
00101 G_UNICODE_BREAK_SYMBOL,
00102 G_UNICODE_BREAK_ALPHABETIC,
00103 G_UNICODE_BREAK_PREFIX,
00104 G_UNICODE_BREAK_POSTFIX,
00105 G_UNICODE_BREAK_COMPLEX_CONTEXT,
00106 G_UNICODE_BREAK_AMBIGUOUS,
00107 G_UNICODE_BREAK_UNKNOWN,
00108 G_UNICODE_BREAK_NEXT_LINE,
00109 G_UNICODE_BREAK_WORD_JOINER,
00110 G_UNICODE_BREAK_HANGUL_L_JAMO,
00111 G_UNICODE_BREAK_HANGUL_V_JAMO,
00112 G_UNICODE_BREAK_HANGUL_T_JAMO,
00113 G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
00114 G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE
00115 } GUnicodeBreakType;
00116
00117
00118
00119
00120
00121
00122
00123 IMPORT_C gboolean g_get_charset (G_CONST_RETURN char **charset);
00124
00125
00126
00127 IMPORT_C gboolean g_unichar_isalnum (gunichar c) G_GNUC_CONST;
00128 IMPORT_C gboolean g_unichar_isalpha (gunichar c) G_GNUC_CONST;
00129 IMPORT_C gboolean g_unichar_iscntrl (gunichar c) G_GNUC_CONST;
00130 IMPORT_C gboolean g_unichar_isdigit (gunichar c) G_GNUC_CONST;
00131 IMPORT_C gboolean g_unichar_isgraph (gunichar c) G_GNUC_CONST;
00132 IMPORT_C gboolean g_unichar_islower (gunichar c) G_GNUC_CONST;
00133 IMPORT_C gboolean g_unichar_isprint (gunichar c) G_GNUC_CONST;
00134 IMPORT_C gboolean g_unichar_ispunct (gunichar c) G_GNUC_CONST;
00135 IMPORT_C gboolean g_unichar_isspace (gunichar c) G_GNUC_CONST;
00136 IMPORT_C gboolean g_unichar_isupper (gunichar c) G_GNUC_CONST;
00137 IMPORT_C gboolean g_unichar_isxdigit (gunichar c) G_GNUC_CONST;
00138 IMPORT_C gboolean g_unichar_istitle (gunichar c) G_GNUC_CONST;
00139 IMPORT_C gboolean g_unichar_isdefined (gunichar c) G_GNUC_CONST;
00140 IMPORT_C gboolean g_unichar_iswide (gunichar c) G_GNUC_CONST;
00141
00142
00143
00144 IMPORT_C gunichar g_unichar_toupper (gunichar c) G_GNUC_CONST;
00145 IMPORT_C gunichar g_unichar_tolower (gunichar c) G_GNUC_CONST;
00146 IMPORT_C gunichar g_unichar_totitle (gunichar c) G_GNUC_CONST;
00147
00148
00149
00150 IMPORT_C gint g_unichar_digit_value (gunichar c) G_GNUC_CONST;
00151
00152 IMPORT_C gint g_unichar_xdigit_value (gunichar c) G_GNUC_CONST;
00153
00154
00155 IMPORT_C GUnicodeType g_unichar_type (gunichar c) G_GNUC_CONST;
00156
00157
00158 IMPORT_C GUnicodeBreakType g_unichar_break_type (gunichar c) G_GNUC_CONST;
00159
00160
00161
00162
00163
00164 IMPORT_C void g_unicode_canonical_ordering (gunichar *string,
00165 gsize len);
00166
00167
00168
00169
00170 IMPORT_C gunichar *g_unicode_canonical_decomposition (gunichar ch,
00171 gsize *result_len) G_GNUC_MALLOC;
00172
00173
00174
00175 #ifdef SYMBIAN
00176 IMPORT_C const gchar * const * _g_utf8_skip();
00177 #endif
00178 GLIB_VAR const gchar * const g_utf8_skip;
00179
00180 #define g_utf8_next_char(p) (char *)((p) + g_utf8_skip[*(guchar *)(p)])
00181
00182 IMPORT_C gunichar g_utf8_get_char (const gchar *p);
00183 IMPORT_C gunichar g_utf8_get_char_validated (const gchar *p,
00184 gssize max_len);
00185
00186 IMPORT_C gchar* g_utf8_offset_to_pointer (const gchar *str,
00187 glong offset);
00188 IMPORT_C glong g_utf8_pointer_to_offset (const gchar *str,
00189 const gchar *pos);
00190 IMPORT_C gchar* g_utf8_prev_char (const gchar *p);
00191 IMPORT_C gchar* g_utf8_find_next_char (const gchar *p,
00192 const gchar *end);
00193 IMPORT_C gchar* g_utf8_find_prev_char (const gchar *str,
00194 const gchar *p);
00195
00196 IMPORT_C glong g_utf8_strlen (const gchar *p,
00197 gssize max);
00198
00199
00200 IMPORT_C gchar* g_utf8_strncpy (gchar *dest,
00201 const gchar *src,
00202 gsize n);
00203
00204
00205
00206 IMPORT_C gchar* g_utf8_strchr (const gchar *p,
00207 gssize len,
00208 gunichar c);
00209 IMPORT_C gchar* g_utf8_strrchr (const gchar *p,
00210 gssize len,
00211 gunichar c);
00212 IMPORT_C gchar* g_utf8_strreverse (const gchar *str,
00213 gssize len);
00214
00215 IMPORT_C gunichar2 *g_utf8_to_utf16 (const gchar *str,
00216 glong len,
00217 glong *items_read,
00218 glong *items_written,
00219 GError **error) G_GNUC_MALLOC;
00220 IMPORT_C gunichar * g_utf8_to_ucs4 (const gchar *str,
00221 glong len,
00222 glong *items_read,
00223 glong *items_written,
00224 GError **error) G_GNUC_MALLOC;
00225 IMPORT_C gunichar * g_utf8_to_ucs4_fast (const gchar *str,
00226 glong len,
00227 glong *items_written) G_GNUC_MALLOC;
00228 IMPORT_C gunichar * g_utf16_to_ucs4 (const gunichar2 *str,
00229 glong len,
00230 glong *items_read,
00231 glong *items_written,
00232 GError **error) G_GNUC_MALLOC;
00233 IMPORT_C gchar* g_utf16_to_utf8 (const gunichar2 *str,
00234 glong len,
00235 glong *items_read,
00236 glong *items_written,
00237 GError **error) G_GNUC_MALLOC;
00238 IMPORT_C gunichar2 *g_ucs4_to_utf16 (const gunichar *str,
00239 glong len,
00240 glong *items_read,
00241 glong *items_written,
00242 GError **error) G_GNUC_MALLOC;
00243 IMPORT_C gchar* g_ucs4_to_utf8 (const gunichar *str,
00244 glong len,
00245 glong *items_read,
00246 glong *items_written,
00247 GError **error) G_GNUC_MALLOC;
00248
00249
00250
00251
00252
00253 IMPORT_C gint g_unichar_to_utf8 (gunichar c,
00254 gchar *outbuf);
00255
00256
00257
00258
00259
00260 IMPORT_C gboolean g_utf8_validate (const gchar *str,
00261 gssize max_len,
00262 const gchar **end);
00263
00264
00265 IMPORT_C gboolean g_unichar_validate (gunichar ch);
00266
00267 IMPORT_C gchar *g_utf8_strup (const gchar *str,
00268 gssize len) G_GNUC_MALLOC;
00269 IMPORT_C gchar *g_utf8_strdown (const gchar *str,
00270 gssize len) G_GNUC_MALLOC;
00271 IMPORT_C gchar *g_utf8_casefold (const gchar *str,
00272 gssize len) G_GNUC_MALLOC;
00273
00274 typedef enum {
00275 G_NORMALIZE_DEFAULT,
00276 G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
00277 G_NORMALIZE_DEFAULT_COMPOSE,
00278 G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
00279 G_NORMALIZE_ALL,
00280 G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
00281 G_NORMALIZE_ALL_COMPOSE,
00282 G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
00283 } GNormalizeMode;
00284
00285 IMPORT_C gchar *g_utf8_normalize (const gchar *str,
00286 gssize len,
00287 GNormalizeMode mode) G_GNUC_MALLOC;
00288
00289 IMPORT_C gint g_utf8_collate (const gchar *str1,
00290 const gchar *str2);
00291 IMPORT_C gchar *g_utf8_collate_key (const gchar *str,
00292 gssize len) G_GNUC_MALLOC;
00293 IMPORT_C gchar *g_utf8_collate_key_for_filename (const gchar *str,
00294 gssize len) G_GNUC_MALLOC;
00295
00296 IMPORT_C gboolean g_unichar_get_mirror_char (gunichar ch,
00297 gunichar *mirrored_ch);
00298
00299 G_END_DECLS
00300
00301 #endif