summaryrefslogtreecommitdiff
path: root/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/harfbuzz/src/hb-ot-cmap-table.hh')
-rw-r--r--thirdparty/harfbuzz/src/hb-ot-cmap-table.hh260
1 files changed, 178 insertions, 82 deletions
diff --git a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
index 878e02ff17..c8a2af1e82 100644
--- a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
@@ -49,6 +49,12 @@ struct CmapSubtableFormat0
*glyph = gid;
return true;
}
+
+ unsigned get_language () const
+ {
+ return language;
+ }
+
void collect_unicodes (hb_set_t *out) const
{
for (unsigned int i = 0; i < 256; i++)
@@ -212,29 +218,24 @@ struct CmapSubtableFormat4
HBINT16 *idDelta,
unsigned segcount)
{
+ hb_hashmap_t<hb_codepoint_t, hb_codepoint_t> cp_to_gid;
+ + it | hb_sink (cp_to_gid);
+
HBUINT16 *idRangeOffset = c->allocate_size<HBUINT16> (HBUINT16::static_size * segcount);
if (unlikely (!c->check_success (idRangeOffset))) return nullptr;
if (unlikely ((char *)idRangeOffset - (char *)idDelta != (int) segcount * (int) HBINT16::static_size)) return nullptr;
- + hb_range (segcount)
- | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; })
- | hb_apply ([&] (const unsigned i)
- {
- idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
-
- + it
- | hb_filter ([&] (const hb_item_type<Iterator> _) { return _.first >= startCode[i] && _.first <= endCode[i]; })
- | hb_apply ([&] (const hb_item_type<Iterator> _)
- {
- HBUINT16 glyID;
- glyID = _.second;
- c->copy<HBUINT16> (glyID);
- })
- ;
-
-
- })
- ;
+ for (unsigned i : + hb_range (segcount)
+ | hb_filter ([&] (const unsigned _) { return idDelta[_] == 0; }))
+ {
+ idRangeOffset[i] = 2 * (c->start_embed<HBUINT16> () - idRangeOffset - i);
+ for (hb_codepoint_t cp = startCode[i]; cp <= endCode[i]; cp++)
+ {
+ HBUINT16 gid;
+ gid = cp_to_gid[cp];
+ c->copy<HBUINT16> (gid);
+ }
+ }
return idRangeOffset;
}
@@ -253,7 +254,7 @@ struct CmapSubtableFormat4
if (format4_iter.len () == 0) return;
unsigned table_initpos = c->length ();
- if (unlikely (!c->extend_min (*this))) return;
+ if (unlikely (!c->extend_min (this))) return;
this->format = 4;
//serialize endCode[]
@@ -276,7 +277,17 @@ struct CmapSubtableFormat4
HBUINT16 *idRangeOffset = serialize_rangeoffset_glyid (c, format4_iter, endCode, startCode, idDelta, segcount);
if (unlikely (!c->check_success (idRangeOffset))) return;
- if (unlikely (!c->check_assign(this->length, c->length () - table_initpos))) return;
+ this->length = c->length () - table_initpos;
+ if ((long long) this->length != (long long) c->length () - table_initpos)
+ {
+ // Length overflowed. Discard the current object before setting the error condition, otherwise
+ // discard is a noop which prevents the higher level code from reverting the serializer to the
+ // pre-error state in cmap4 overflow handling code.
+ c->pop_discard ();
+ c->err (HB_SERIALIZE_ERROR_INT_OVERFLOW);
+ return;
+ }
+
this->segCountX2 = segcount * 2;
this->entrySelector = hb_max (1u, hb_bit_storage (segcount)) - 1;
this->searchRange = 2 * (1u << this->entrySelector);
@@ -285,6 +296,11 @@ struct CmapSubtableFormat4
: 0;
}
+ unsigned get_language () const
+ {
+ return language;
+ }
+
struct accelerator_t
{
accelerator_t () {}
@@ -547,6 +563,12 @@ struct CmapSubtableTrimmed
*glyph = gid;
return true;
}
+
+ unsigned get_language () const
+ {
+ return language;
+ }
+
void collect_unicodes (hb_set_t *out) const
{
hb_codepoint_t start = startCharCode;
@@ -582,7 +604,7 @@ struct CmapSubtableTrimmed
UINT length; /* Byte length of this subtable. */
UINT language; /* Ignore. */
UINT startCharCode; /* First character code covered. */
- ArrayOf<HBGlyphID, UINT>
+ ArrayOf<HBGlyphID16, UINT>
glyphIdArray; /* Array of glyph index values for character
* codes in the range. */
public:
@@ -606,6 +628,11 @@ struct CmapSubtableLongSegmented
return true;
}
+ unsigned get_language () const
+ {
+ return language;
+ }
+
void collect_unicodes (hb_set_t *out, unsigned int num_glyphs) const
{
for (unsigned int i = 0; i < this->groups.len; i++)
@@ -670,7 +697,7 @@ struct CmapSubtableLongSegmented
HBUINT16 reserved; /* Reserved; set to 0. */
HBUINT32 length; /* Byte length of this subtable. */
HBUINT32 language; /* Ignore. */
- SortedArrayOf<CmapSubtableLongGroup, HBUINT32>
+ SortedArray32Of<CmapSubtableLongGroup>
groups; /* Groupings. */
public:
DEFINE_SIZE_ARRAY (16, groups);
@@ -691,7 +718,7 @@ struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
{
if (it.len () == 0) return;
unsigned table_initpos = c->length ();
- if (unlikely (!c->extend_min (*this))) return;
+ if (unlikely (!c->extend_min (this))) return;
hb_codepoint_t startCharCode = 0xFFFF, endCharCode = 0xFFFF;
hb_codepoint_t glyphID = 0;
@@ -784,7 +811,7 @@ struct UnicodeValueRange
DEFINE_SIZE_STATIC (4);
};
-struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
+struct DefaultUVS : SortedArray32Of<UnicodeValueRange>
{
void collect_unicodes (hb_set_t *out) const
{
@@ -850,7 +877,9 @@ struct DefaultUVS : SortedArrayOf<UnicodeValueRange, HBUINT32>
}
else
{
- if (unlikely (!c->check_assign (out->len, (c->length () - init_len) / UnicodeValueRange::static_size))) return nullptr;
+ if (unlikely (!c->check_assign (out->len,
+ (c->length () - init_len) / UnicodeValueRange::static_size,
+ HB_SERIALIZE_ERROR_INT_OVERFLOW))) return nullptr;
return out;
}
}
@@ -871,28 +900,26 @@ struct UVSMapping
}
HBUINT24 unicodeValue; /* Base Unicode value of the UVS */
- HBGlyphID glyphID; /* Glyph ID of the UVS */
+ HBGlyphID16 glyphID; /* Glyph ID of the UVS */
public:
DEFINE_SIZE_STATIC (5);
};
-struct NonDefaultUVS : SortedArrayOf<UVSMapping, HBUINT32>
+struct NonDefaultUVS : SortedArray32Of<UVSMapping>
{
void collect_unicodes (hb_set_t *out) const
{
- unsigned int count = len;
- for (unsigned int i = 0; i < count; i++)
- out->add (arrayZ[i].unicodeValue);
+ for (const auto& a : as_array ())
+ out->add (a.unicodeValue);
}
void collect_mapping (hb_set_t *unicodes, /* OUT */
hb_map_t *mapping /* OUT */) const
{
- unsigned count = len;
- for (unsigned i = 0; i < count; i++)
+ for (const auto& a : as_array ())
{
- hb_codepoint_t unicode = arrayZ[i].unicodeValue;
- hb_codepoint_t glyphid = arrayZ[i].glyphID;
+ hb_codepoint_t unicode = a.unicodeValue;
+ hb_codepoint_t glyphid = a.glyphID;
unicodes->add (unicode);
mapping->set (unicode, glyphid);
}
@@ -1041,9 +1068,9 @@ struct VariationSelectorRecord
}
HBUINT24 varSelector; /* Variation selector. */
- LOffsetTo<DefaultUVS>
+ Offset32To<DefaultUVS>
defaultUVS; /* Offset to Default UVS Table. May be 0. */
- LOffsetTo<NonDefaultUVS>
+ Offset32To<NonDefaultUVS>
nonDefaultUVS; /* Offset to Non-Default UVS Table. May be 0. */
public:
DEFINE_SIZE_STATIC (11);
@@ -1058,9 +1085,8 @@ struct CmapSubtableFormat14
void collect_variation_selectors (hb_set_t *out) const
{
- unsigned int count = record.len;
- for (unsigned int i = 0; i < count; i++)
- out->add (record.arrayZ[i].varSelector);
+ for (const auto& a : record.as_array ())
+ out->add (a.varSelector);
}
void collect_variation_unicodes (hb_codepoint_t variation_selector,
hb_set_t *out) const
@@ -1076,7 +1102,7 @@ struct CmapSubtableFormat14
unsigned table_initpos = c->length ();
const char* init_tail = c->tail;
- if (unlikely (!c->extend_min (*this))) return;
+ if (unlikely (!c->extend_min (this))) return;
this->format = 14;
auto src_tbl = reinterpret_cast<const CmapSubtableFormat14*> (base);
@@ -1112,10 +1138,12 @@ struct CmapSubtableFormat14
return;
int tail_len = init_tail - c->tail;
- c->check_assign (this->length, c->length () - table_initpos + tail_len);
+ c->check_assign (this->length, c->length () - table_initpos + tail_len,
+ HB_SERIALIZE_ERROR_INT_OVERFLOW);
c->check_assign (this->record.len,
(c->length () - table_initpos - CmapSubtableFormat14::min_size) /
- VariationSelectorRecord::static_size);
+ VariationSelectorRecord::static_size,
+ HB_SERIALIZE_ERROR_INT_OVERFLOW);
/* Correct the incorrect write order by reversing the order of the variation
records array. */
@@ -1180,7 +1208,7 @@ struct CmapSubtableFormat14
protected:
HBUINT16 format; /* Format number is set to 14. */
HBUINT32 length; /* Byte length of this subtable. */
- SortedArrayOf<VariationSelectorRecord, HBUINT32>
+ SortedArray32Of<VariationSelectorRecord>
record; /* Variation selector records; sorted
* in increasing order of `varSelector'. */
public:
@@ -1235,6 +1263,20 @@ struct CmapSubtable
}
}
+ unsigned get_language () const
+ {
+ switch (u.format) {
+ case 0: return u.format0 .get_language ();
+ case 4: return u.format4 .get_language ();
+ case 6: return u.format6 .get_language ();
+ case 10: return u.format10.get_language ();
+ case 12: return u.format12.get_language ();
+ case 13: return u.format13.get_language ();
+ case 14:
+ default: return 0;
+ }
+ }
+
template<typename Iterator,
hb_requires (hb_is_iterator (Iterator))>
void serialize (hb_serialize_context_t *c,
@@ -1338,7 +1380,7 @@ struct EncodingRecord
HBUINT16 platformID; /* Platform ID. */
HBUINT16 encodingID; /* Platform-specific encoding ID. */
- LOffsetTo<CmapSubtable>
+ Offset32To<CmapSubtable>
subtable; /* Byte offset from beginning of table to the subtable for this encoding. */
public:
DEFINE_SIZE_STATIC (8);
@@ -1350,58 +1392,112 @@ struct cmap
template<typename Iterator, typename EncodingRecIter,
hb_requires (hb_is_iterator (EncodingRecIter))>
- void serialize (hb_serialize_context_t *c,
+ bool serialize (hb_serialize_context_t *c,
Iterator it,
EncodingRecIter encodingrec_iter,
const void *base,
- const hb_subset_plan_t *plan)
+ const hb_subset_plan_t *plan,
+ bool drop_format_4 = false)
{
- if (unlikely (!c->extend_min ((*this)))) return;
+ if (unlikely (!c->extend_min ((*this)))) return false;
this->version = 0;
unsigned format4objidx = 0, format12objidx = 0, format14objidx = 0;
+ auto snap = c->snapshot ();
for (const EncodingRecord& _ : encodingrec_iter)
{
+ if (c->in_error ())
+ return false;
+
unsigned format = (base+_.subtable).u.format;
- if (!plan->glyphs_requested->is_empty ())
+ if (format != 4 && format != 12 && format != 14) continue;
+
+ hb_set_t unicodes_set;
+ (base+_.subtable).collect_unicodes (&unicodes_set);
+
+ if (!drop_format_4 && format == 4)
{
- hb_set_t unicodes_set;
- hb_map_t cp_glyphid_map;
- (base+_.subtable).collect_mapping (&unicodes_set, &cp_glyphid_map);
-
- auto table_iter =
- + hb_zip (unicodes_set.iter(), unicodes_set.iter() | hb_map(cp_glyphid_map))
- | hb_filter (plan->_glyphset, hb_second)
- | hb_filter ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p)
- {
- return plan->unicodes->has (p.first) ||
- plan->glyphs_requested->has (p.second);
- })
- | hb_map ([plan] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t>& p_org)
- {
- return hb_pair_t<hb_codepoint_t, hb_codepoint_t> (p_org.first, plan->glyph_map->get(p_org.second));
- })
- ;
-
- if (format == 4) c->copy (_, table_iter, 4u, base, plan, &format4objidx);
- else if (format == 12) c->copy (_, table_iter, 12u, base, plan, &format12objidx);
- else if (format == 14) c->copy (_, table_iter, 14u, base, plan, &format14objidx);
+ c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
+ if (c->in_error () && c->only_overflow ())
+ {
+ // cmap4 overflowed, reset and retry serialization without format 4 subtables.
+ c->revert (snap);
+ return serialize (c, it,
+ encodingrec_iter,
+ base,
+ plan,
+ true);
+ }
}
- /* when --gids option is not used, we iterate input unicodes instead of
- * all codepoints in each subtable, which is more efficient */
- else
+
+ else if (format == 12)
{
- hb_set_t unicodes_set;
- (base+_.subtable).collect_unicodes (&unicodes_set);
+ if (_can_drop (_, unicodes_set, base, + it | hb_map (hb_first), encodingrec_iter)) continue;
+ c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
+ }
+ else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
+ }
+ c->check_assign(this->encodingRecord.len,
+ (c->length () - cmap::min_size)/EncodingRecord::static_size,
+ HB_SERIALIZE_ERROR_INT_OVERFLOW);
- if (format == 4) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 4u, base, plan, &format4objidx);
- else if (format == 12) c->copy (_, + it | hb_filter (unicodes_set, hb_first), 12u, base, plan, &format12objidx);
- else if (format == 14) c->copy (_, it, 14u, base, plan, &format14objidx);
+ // Fail if format 4 was dropped and there is no cmap12.
+ return !drop_format_4 || format12objidx;
+ }
+
+ template<typename Iterator, typename EncodingRecordIterator,
+ hb_requires (hb_is_iterator (Iterator)),
+ hb_requires (hb_is_iterator (EncodingRecordIterator))>
+ bool _can_drop (const EncodingRecord& cmap12,
+ const hb_set_t& cmap12_unicodes,
+ const void* base,
+ Iterator subset_unicodes,
+ EncodingRecordIterator encoding_records)
+ {
+ for (auto cp : + subset_unicodes | hb_filter (cmap12_unicodes))
+ {
+ if (cp >= 0x10000) return false;
+ }
+
+ unsigned target_platform;
+ unsigned target_encoding;
+ unsigned target_language = (base+cmap12.subtable).get_language ();
+
+ if (cmap12.platformID == 0 && cmap12.encodingID == 4)
+ {
+ target_platform = 0;
+ target_encoding = 3;
+ } else if (cmap12.platformID == 3 && cmap12.encodingID == 10) {
+ target_platform = 3;
+ target_encoding = 1;
+ } else {
+ return false;
+ }
+
+ for (const auto& _ : encoding_records)
+ {
+ if (_.platformID != target_platform
+ || _.encodingID != target_encoding
+ || (base+_.subtable).get_language() != target_language)
+ continue;
+
+ hb_set_t sibling_unicodes;
+ (base+_.subtable).collect_unicodes (&sibling_unicodes);
+
+ auto cmap12 = + subset_unicodes | hb_filter (cmap12_unicodes);
+ auto sibling = + subset_unicodes | hb_filter (sibling_unicodes);
+ for (; cmap12 && sibling; cmap12++, sibling++)
+ {
+ unsigned a = *cmap12;
+ unsigned b = *sibling;
+ if (a != b) return false;
}
+
+ return !cmap12 && !sibling;
}
- c->check_assign(this->encodingRecord.len, (c->length () - cmap::min_size)/EncodingRecord::static_size);
+ return false;
}
void closure_glyphs (const hb_set_t *unicodes,
@@ -1468,8 +1564,8 @@ struct cmap
| hb_filter ([&] (const hb_pair_t<hb_codepoint_t, hb_codepoint_t> _)
{ return (_.second != HB_MAP_VALUE_INVALID); })
;
- cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan);
- return_trace (true);
+
+ return_trace (cmap_prime->serialize (c->serializer, it, encodingrec_iter, this, c->plan));
}
const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
@@ -1697,7 +1793,7 @@ struct cmap
protected:
HBUINT16 version; /* Table version number (0). */
- SortedArrayOf<EncodingRecord>
+ SortedArray16Of<EncodingRecord>
encodingRecord; /* Encoding tables. */
public:
DEFINE_SIZE_ARRAY (4, encodingRecord);