/* * Copyright © 2009 Red Hat, Inc. * Copyright © 2011 Codethink Limited * Copyright © 2010,2011,2012 Google, Inc. * * This is part of HarfBuzz, a text shaping library. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and its documentation for any purpose, provided that the * above copyright notice and the following two paragraphs appear in * all copies of this software. * * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. * * Red Hat Author(s): Behdad Esfahbod * Codethink Author(s): Ryan Lortie * Google Author(s): Behdad Esfahbod */ #include "hb.hh" #include "hb-unicode.hh" /** * SECTION: hb-unicode * @title: hb-unicode * @short_description: Unicode character property access * @include: hb.h * * Unicode functions are used to access Unicode character properties. * With these functions, client programs can query various properties from * the Unicode Character Database for any code point, such as General * Category (gc), Script (sc), Canonical Combining Class (ccc), etc. * * Client programs can optionally pass in their own Unicode functions * that implement the same queries. The set of functions available is * defined by the virtual methods in #hb_unicode_funcs_t. * * HarfBuzz provides built-in default functions for each method in * #hb_unicode_funcs_t. **/ /* * hb_unicode_funcs_t */ static hb_unicode_combining_class_t hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t unicode HB_UNUSED, void *user_data HB_UNUSED) { return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED; } #ifndef HB_DISABLE_DEPRECATED static unsigned int hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t unicode HB_UNUSED, void *user_data HB_UNUSED) { return 1; } #endif static hb_unicode_general_category_t hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t unicode HB_UNUSED, void *user_data HB_UNUSED) { return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER; } static hb_codepoint_t hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t unicode, void *user_data HB_UNUSED) { return unicode; } static hb_script_t hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t unicode HB_UNUSED, void *user_data HB_UNUSED) { return HB_SCRIPT_UNKNOWN; } static hb_bool_t hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t a HB_UNUSED, hb_codepoint_t b HB_UNUSED, hb_codepoint_t *ab HB_UNUSED, void *user_data HB_UNUSED) { return false; } static hb_bool_t hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t ab HB_UNUSED, hb_codepoint_t *a HB_UNUSED, hb_codepoint_t *b HB_UNUSED, void *user_data HB_UNUSED) { return false; } #ifndef HB_DISABLE_DEPRECATED static unsigned int hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED, hb_codepoint_t u HB_UNUSED, hb_codepoint_t *decomposed HB_UNUSED, void *user_data HB_UNUSED) { return 0; } #endif #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB) #include "hb-glib.h" #endif #if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN) #include "hb-icu.h" #endif /** * hb_unicode_funcs_get_default: * * Fetches a pointer to the default Unicode-functions structure that is used * when no functions are explicitly set on #hb_buffer_t. * * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure * * Since: 0.9.2 **/ hb_unicode_funcs_t * hb_unicode_funcs_get_default () { #if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD) return hb_ucd_get_unicode_funcs (); #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB) return hb_glib_get_unicode_funcs (); #elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN) return hb_icu_get_unicode_funcs (); #else #define HB_UNICODE_FUNCS_NIL 1 return hb_unicode_funcs_get_empty (); #endif } #if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL) #error "Could not find any Unicode functions implementation, you have to provide your own" #error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code." #endif /** * hb_unicode_funcs_create: (Xconstructor) * @parent: (nullable): Parent Unicode-functions structure * * Creates a new #hb_unicode_funcs_t structure of Unicode functions. * * Return value: (transfer full): The Unicode-functions structure * * Since: 0.9.2 **/ hb_unicode_funcs_t * hb_unicode_funcs_create (hb_unicode_funcs_t *parent) { hb_unicode_funcs_t *ufuncs; if (!(ufuncs = hb_object_create ())) return hb_unicode_funcs_get_empty (); if (!parent) parent = hb_unicode_funcs_get_empty (); hb_unicode_funcs_make_immutable (parent); ufuncs->parent = hb_unicode_funcs_reference (parent); ufuncs->func = parent->func; /* We can safely copy user_data from parent since we hold a reference * onto it and it's immutable. We should not copy the destroy notifiers * though. */ ufuncs->user_data = parent->user_data; return ufuncs; } DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) = { HB_OBJECT_HEADER_STATIC, nullptr, /* parent */ { #define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil, HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS #undef HB_UNICODE_FUNC_IMPLEMENT } }; /** * hb_unicode_funcs_get_empty: * * Fetches the singleton empty Unicode-functions structure. * * Return value: (transfer full): The empty Unicode-functions structure * * Since: 0.9.2 **/ hb_unicode_funcs_t * hb_unicode_funcs_get_empty () { return const_cast (&Null (hb_unicode_funcs_t)); } /** * hb_unicode_funcs_reference: (skip) * @ufuncs: The Unicode-functions structure * * Increases the reference count on a Unicode-functions structure. * * Return value: (transfer full): The Unicode-functions structure * * Since: 0.9.2 **/ hb_unicode_funcs_t * hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs) { return hb_object_reference (ufuncs); } /** * hb_unicode_funcs_destroy: (skip) * @ufuncs: The Unicode-functions structure * * Decreases the reference count on a Unicode-functions structure. When * the reference count reaches zero, the Unicode-functions structure is * destroyed, freeing all memory. * * Since: 0.9.2 **/ void hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs) { if (!hb_object_destroy (ufuncs)) return; #define HB_UNICODE_FUNC_IMPLEMENT(name) \ if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name); HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS #undef HB_UNICODE_FUNC_IMPLEMENT hb_unicode_funcs_destroy (ufuncs->parent); free (ufuncs); } /** * hb_unicode_funcs_set_user_data: (skip) * @ufuncs: The Unicode-functions structure * @key: The user-data key * @data: A pointer to the user data * @destroy: (optional): A callback to call when @data is not needed anymore * @replace: Whether to replace an existing data with the same key * * Attaches a user-data key/data pair to the specified Unicode-functions structure. * * Return value: %true if success, %false otherwise * * Since: 0.9.2 **/ hb_bool_t hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, hb_user_data_key_t *key, void * data, hb_destroy_func_t destroy, hb_bool_t replace) { return hb_object_set_user_data (ufuncs, key, data, destroy, replace); } /** * hb_unicode_funcs_get_user_data: (skip) * @ufuncs: The Unicode-functions structure * @key: The user-data key to query * * Fetches the user-data associated with the specified key, * attached to the specified Unicode-functions structure. * * Return value: (transfer none): A pointer to the user data * * Since: 0.9.2 **/ void * hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, hb_user_data_key_t *key) { return hb_object_get_user_data (ufuncs, key); } /** * hb_unicode_funcs_make_immutable: * @ufuncs: The Unicode-functions structure * * Makes the specified Unicode-functions structure * immutable. * * Since: 0.9.2 **/ void hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs) { if (hb_object_is_immutable (ufuncs)) return; hb_object_make_immutable (ufuncs); } /** * hb_unicode_funcs_is_immutable: * @ufuncs: The Unicode-functions structure * * Tests whether the specified Unicode-functions structure * is immutable. * * Return value: %true if @ufuncs is immutable, false otherwise * * Since: 0.9.2 **/ hb_bool_t hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs) { return hb_object_is_immutable (ufuncs); } /** * hb_unicode_funcs_get_parent: * @ufuncs: The Unicode-functions structure * * Fetches the parent of the Unicode-functions structure * @ufuncs. * * Return value: The parent Unicode-functions structure * * Since: 0.9.2 **/ hb_unicode_funcs_t * hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs) { return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty (); } #define HB_UNICODE_FUNC_IMPLEMENT(name) \ \ void \ hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \ hb_unicode_##name##_func_t func, \ void *user_data, \ hb_destroy_func_t destroy) \ { \ if (hb_object_is_immutable (ufuncs)) \ return; \ \ if (ufuncs->destroy.name) \ ufuncs->destroy.name (ufuncs->user_data.name); \ \ if (func) { \ ufuncs->func.name = func; \ ufuncs->user_data.name = user_data; \ ufuncs->destroy.name = destroy; \ } else { \ ufuncs->func.name = ufuncs->parent->func.name; \ ufuncs->user_data.name = ufuncs->parent->user_data.name; \ ufuncs->destroy.name = nullptr; \ } \ } HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS #undef HB_UNICODE_FUNC_IMPLEMENT #define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \ \ return_type \ hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \ hb_codepoint_t unicode) \ { \ return ufuncs->name (unicode); \ } HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE #undef HB_UNICODE_FUNC_IMPLEMENT /** * hb_unicode_compose: * @ufuncs: The Unicode-functions structure * @a: The first Unicode code point to compose * @b: The second Unicode code point to compose * @ab: (out): The composition of @a, @b * * Fetches the composition of a sequence of two Unicode * code points. * * Calls the composition function of the specified * Unicode-functions structure @ufuncs. * * Return value: %true if @a and @b composed, false otherwise * * Since: 0.9.2 **/ hb_bool_t hb_unicode_compose (hb_unicode_funcs_t *ufuncs, hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) { return ufuncs->compose (a, b, ab); } /** * hb_unicode_decompose: * @ufuncs: The Unicode-functions structure * @ab: Unicode code point to decompose * @a: (out): The first code point of the decomposition of @ab * @b: (out): The second code point of the decomposition of @ab * * Fetches the decomposition of a Unicode code point. * * Calls the decomposition function of the specified * Unicode-functions structure @ufuncs. * * Return value: %true if @ab was decomposed, false otherwise * * Since: 0.9.2 **/ hb_bool_t hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) { return ufuncs->decompose (ab, a, b); } #ifndef HB_DISABLE_DEPRECATED /** * hb_unicode_decompose_compatibility: * @ufuncs: The Unicode-functions structure * @u: Code point to decompose * @decomposed: (out): Compatibility decomposition of @u * * Fetches the compatibility decomposition of a Unicode * code point. Deprecated. * * Return value: * * Since: 0.9.2 * Deprecated: 2.0.0 **/ unsigned int hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, hb_codepoint_t u, hb_codepoint_t *decomposed) { return ufuncs->decompose_compatibility (u, decomposed); } #endif #ifndef HB_NO_OT_SHAPE /* See hb-unicode.hh for details. */ const uint8_t _hb_modified_combining_class[256] = { 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */ 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */ 2, 3, 4, 5, 6, 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */ 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */ 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */ /* Hebrew */ HB_MODIFIED_COMBINING_CLASS_CCC10, HB_MODIFIED_COMBINING_CLASS_CCC11, HB_MODIFIED_COMBINING_CLASS_CCC12, HB_MODIFIED_COMBINING_CLASS_CCC13, HB_MODIFIED_COMBINING_CLASS_CCC14, HB_MODIFIED_COMBINING_CLASS_CCC15, HB_MODIFIED_COMBINING_CLASS_CCC16, HB_MODIFIED_COMBINING_CLASS_CCC17, HB_MODIFIED_COMBINING_CLASS_CCC18, HB_MODIFIED_COMBINING_CLASS_CCC19, HB_MODIFIED_COMBINING_CLASS_CCC20, HB_MODIFIED_COMBINING_CLASS_CCC21, HB_MODIFIED_COMBINING_CLASS_CCC22, HB_MODIFIED_COMBINING_CLASS_CCC23, HB_MODIFIED_COMBINING_CLASS_CCC24, HB_MODIFIED_COMBINING_CLASS_CCC25, HB_MODIFIED_COMBINING_CLASS_CCC26, /* Arabic */ HB_MODIFIED_COMBINING_CLASS_CCC27, HB_MODIFIED_COMBINING_CLASS_CCC28, HB_MODIFIED_COMBINING_CLASS_CCC29, HB_MODIFIED_COMBINING_CLASS_CCC30, HB_MODIFIED_COMBINING_CLASS_CCC31, HB_MODIFIED_COMBINING_CLASS_CCC32, HB_MODIFIED_COMBINING_CLASS_CCC33, HB_MODIFIED_COMBINING_CLASS_CCC34, HB_MODIFIED_COMBINING_CLASS_CCC35, /* Syriac */ HB_MODIFIED_COMBINING_CLASS_CCC36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, /* Telugu */ HB_MODIFIED_COMBINING_CLASS_CCC84, 85, 86, 87, 88, 89, 90, HB_MODIFIED_COMBINING_CLASS_CCC91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, /* Thai */ HB_MODIFIED_COMBINING_CLASS_CCC103, 104, 105, 106, HB_MODIFIED_COMBINING_CLASS_CCC107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, /* Lao */ HB_MODIFIED_COMBINING_CLASS_CCC118, 119, 120, 121, HB_MODIFIED_COMBINING_CLASS_CCC122, 123, 124, 125, 126, 127, 128, /* Tibetan */ HB_MODIFIED_COMBINING_CLASS_CCC129, HB_MODIFIED_COMBINING_CLASS_CCC130, 131, HB_MODIFIED_COMBINING_CLASS_CCC132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */ 201, 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */ 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */ 215, 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */ 217, 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */ 219, 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */ 221, 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */ 223, 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */ 225, 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */ 227, 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */ 229, 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */ 231, 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */ 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */ 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */ 235, 236, 237, 238, 239, 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */ 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */ }; #endif /* * Emoji */ #ifndef HB_NO_EMOJI_SEQUENCES #include "hb-unicode-emoji-table.hh" bool _hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp) { return _hb_emoji_is_Extended_Pictographic (cp); } #endif