diff options
Diffstat (limited to 'thirdparty/graphite/src/Code.cpp')
-rw-r--r-- | thirdparty/graphite/src/Code.cpp | 782 |
1 files changed, 782 insertions, 0 deletions
diff --git a/thirdparty/graphite/src/Code.cpp b/thirdparty/graphite/src/Code.cpp new file mode 100644 index 0000000000..ec5ab298ca --- /dev/null +++ b/thirdparty/graphite/src/Code.cpp @@ -0,0 +1,782 @@ +/* GRAPHITE2 LICENSING + + Copyright 2010, SIL International + All rights reserved. + + This library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should also have received a copy of the GNU Lesser General Public + License along with this library in the file named "LICENSE". + If not, write to the Free Software Foundation, 51 Franklin Street, + Suite 500, Boston, MA 02110-1335, USA or visit their web page on the + internet at http://www.fsf.org/licenses/lgpl.html. + +Alternatively, the contents of this file may be used under the terms of the +Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public +License, as published by the Free Software Foundation, either version 2 +of the License or (at your option) any later version. +*/ +// This class represents loaded graphite stack machine code. It performs +// basic sanity checks, on the incoming code to prevent more obvious problems +// from crashing graphite. +// Author: Tim Eves + +#include <cassert> +#include <cstddef> +#include <cstdlib> +#include <cstring> +#include "graphite2/Segment.h" +#include "inc/Code.h" +#include "inc/Face.h" +#include "inc/GlyphFace.h" +#include "inc/GlyphCache.h" +#include "inc/Machine.h" +#include "inc/Rule.h" +#include "inc/Silf.h" + +#include <cstdio> + +#ifdef NDEBUG +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wunused-parameter" +#endif +#endif + + +using namespace graphite2; +using namespace vm; + +namespace { + +inline bool is_return(const instr i) { + const opcode_t * opmap = Machine::getOpcodeTable(); + const instr pop_ret = *opmap[POP_RET].impl, + ret_zero = *opmap[RET_ZERO].impl, + ret_true = *opmap[RET_TRUE].impl; + return i == pop_ret || i == ret_zero || i == ret_true; +} + +struct context +{ + context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false;} + struct { + uint8 changed:1, + referenced:1; + } flags; + uint8 codeRef; +}; + +} // end namespace + + +class Machine::Code::decoder +{ +public: + struct limits; + static const int NUMCONTEXTS = 256; + + decoder(limits & lims, Code &code, enum passtype pt) throw(); + + bool load(const byte * bc_begin, const byte * bc_end); + void apply_analysis(instr * const code, instr * code_end); + byte max_ref() { return _max_ref; } + int out_index() const { return _out_index; } + +private: + void set_ref(int index) throw(); + void set_noref(int index) throw(); + void set_changed(int index) throw(); + opcode fetch_opcode(const byte * bc); + void analyse_opcode(const opcode, const int8 * const dp) throw(); + bool emit_opcode(opcode opc, const byte * & bc); + bool validate_opcode(const byte opc, const byte * const bc); + bool valid_upto(const uint16 limit, const uint16 x) const throw(); + bool test_context() const throw(); + bool test_ref(int8 index) const throw(); + bool test_attr(attrCode attr) const throw(); + void failure(const status_t s) const throw() { _code.failure(s); } + + Code & _code; + int _out_index; + uint16 _out_length; + instr * _instr; + byte * _data; + limits & _max; + enum passtype _passtype; + int _stack_depth; + bool _in_ctxt_item; + int16 _slotref; + context _contexts[NUMCONTEXTS]; + byte _max_ref; +}; + + +struct Machine::Code::decoder::limits +{ + const byte * bytecode; + const uint8 pre_context; + const uint16 rule_length, + classes, + glyf_attrs, + features; + const byte attrid[gr_slatMax]; +}; + +inline Machine::Code::decoder::decoder(limits & lims, Code &code, enum passtype pt) throw() +: _code(code), + _out_index(code._constraint ? 0 : lims.pre_context), + _out_length(code._constraint ? 1 : lims.rule_length), + _instr(code._code), _data(code._data), _max(lims), _passtype(pt), + _stack_depth(0), + _in_ctxt_item(false), + _slotref(0), + _max_ref(0) +{ } + + + +Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end, + uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face, + enum passtype pt, byte * * const _out) + : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded), + _constraint(is_constraint), _modify(false), _delete(false), _own(_out==0) +{ +#ifdef GRAPHITE2_TELEMETRY + telemetry::category _code_cat(face.tele.code); +#endif + assert(bytecode_begin != 0); + if (bytecode_begin == bytecode_end) + { + // ::new (this) Code(); + return; + } + assert(bytecode_end > bytecode_begin); + const opcode_t * op_to_fn = Machine::getOpcodeTable(); + + // Allocate code and data target buffers, these sizes are a worst case + // estimate. Once we know their real sizes the we'll shrink them. + if (_out) _code = reinterpret_cast<instr *>(*_out); + else _code = static_cast<instr *>(malloc(estimateCodeDataOut(bytecode_end-bytecode_begin, 1, is_constraint ? 0 : rule_length))); + _data = reinterpret_cast<byte *>(_code + (bytecode_end - bytecode_begin)); + + if (!_code || !_data) { + failure(alloc_failed); + return; + } + + decoder::limits lims = { + bytecode_end, + pre_context, + rule_length, + silf.numClasses(), + face.glyphs().numAttrs(), + face.numFeatures(), + {1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,255, + 1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,0,0, + 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0, silf.numUser()} + }; + + decoder dec(lims, *this, pt); + if(!dec.load(bytecode_begin, bytecode_end)) + return; + + // Is this an empty program? + if (_instr_count == 0) + { + release_buffers(); + ::new (this) Code(); + return; + } + + // When we reach the end check we've terminated it correctly + if (!is_return(_code[_instr_count-1])) { + failure(missing_return); + return; + } + + assert((_constraint && immutable()) || !_constraint); + dec.apply_analysis(_code, _code + _instr_count); + _max_ref = dec.max_ref(); + + // Now we know exactly how much code and data the program really needs + // realloc the buffers to exactly the right size so we don't waste any + // memory. + assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_instr_count)); + assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_data_size)); + memmove(_code + (_instr_count+1), _data, _data_size*sizeof(byte)); + size_t const total_sz = ((_instr_count+1) + (_data_size + sizeof(instr)-1)/sizeof(instr))*sizeof(instr); + if (_out) + *_out += total_sz; + else + { + instr * const old_code = _code; + _code = static_cast<instr *>(realloc(_code, total_sz)); + if (!_code) free(old_code); + } + _data = reinterpret_cast<byte *>(_code + (_instr_count+1)); + + if (!_code) + { + failure(alloc_failed); + return; + } + + // Make this RET_ZERO, we should never reach this but just in case ... + _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint]; + +#ifdef GRAPHITE2_TELEMETRY + telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr)); +#endif +} + +Machine::Code::~Code() throw () +{ + if (_own) + release_buffers(); +} + + +bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end) +{ + _max.bytecode = bc_end; + while (bc < bc_end) + { + const opcode opc = fetch_opcode(bc++); + if (opc == vm::MAX_OPCODE) + return false; + + analyse_opcode(opc, reinterpret_cast<const int8 *>(bc)); + + if (!emit_opcode(opc, bc)) + return false; + } + + return bool(_code); +} + +// Validation check and fixups. +// + +opcode Machine::Code::decoder::fetch_opcode(const byte * bc) +{ + const byte opc = *bc++; + + // Do some basic sanity checks based on what we know about the opcode + if (!validate_opcode(opc, bc)) return MAX_OPCODE; + + // And check its arguments as far as possible + switch (opcode(opc)) + { + case NOP : + break; + case PUSH_BYTE : + case PUSH_BYTEU : + case PUSH_SHORT : + case PUSH_SHORTU : + case PUSH_LONG : + ++_stack_depth; + break; + case ADD : + case SUB : + case MUL : + case DIV : + case MIN_ : + case MAX_ : + case AND : + case OR : + case EQUAL : + case NOT_EQ : + case LESS : + case GTR : + case LESS_EQ : + case GTR_EQ : + case BITOR : + case BITAND : + if (--_stack_depth <= 0) + failure(underfull_stack); + break; + case NEG : + case TRUNC8 : + case TRUNC16 : + case NOT : + case BITNOT : + case BITSET : + if (_stack_depth <= 0) + failure(underfull_stack); + break; + case COND : + _stack_depth -= 2; + if (_stack_depth <= 0) + failure(underfull_stack); + break; + case NEXT_N : // runtime checked + break; + case NEXT : + case COPY_NEXT : + ++_out_index; + if (_out_index < -1 || _out_index > _out_length || _slotref > _max.rule_length) + failure(out_of_range_data); + break; + case PUT_GLYPH_8BIT_OBS : + valid_upto(_max.classes, bc[0]); + test_context(); + break; + case PUT_SUBS_8BIT_OBS : + test_ref(int8(bc[0])); + valid_upto(_max.classes, bc[1]); + valid_upto(_max.classes, bc[2]); + test_context(); + break; + case PUT_COPY : + test_ref(int8(bc[0])); + test_context(); + break; + case INSERT : + if (_passtype >= PASS_TYPE_POSITIONING) + failure(invalid_opcode); + ++_out_length; + if (_out_index < 0) ++_out_index; + if (_out_index < -1 || _out_index >= _out_length) + failure(out_of_range_data); + break; + case DELETE : + if (_passtype >= PASS_TYPE_POSITIONING) + failure(invalid_opcode); + if (_out_index < _max.pre_context) + failure(out_of_range_data); + --_out_index; + --_out_length; + if (_out_index < -1 || _out_index > _out_length) + failure(out_of_range_data); + break; + case ASSOC : + if (bc[0] == 0) + failure(out_of_range_data); + for (uint8 num = bc[0]; num; --num) + test_ref(int8(bc[num])); + test_context(); + break; + case CNTXT_ITEM : + valid_upto(_max.rule_length, _max.pre_context + int8(bc[0])); + if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end); + if (_in_ctxt_item) failure(nested_context_item); + break; + case ATTR_SET : + case ATTR_ADD : + case ATTR_SUB : + case ATTR_SET_SLOT : + if (--_stack_depth < 0) + failure(underfull_stack); + valid_upto(gr_slatMax, bc[0]); + if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes + failure(out_of_range_data); + test_attr(attrCode(bc[0])); + test_context(); + break; + case IATTR_SET_SLOT : + if (--_stack_depth < 0) + failure(underfull_stack); + if (valid_upto(gr_slatMax, bc[0])) + valid_upto(_max.attrid[bc[0]], bc[1]); + test_attr(attrCode(bc[0])); + test_context(); + break; + case PUSH_SLOT_ATTR : + ++_stack_depth; + valid_upto(gr_slatMax, bc[0]); + test_ref(int8(bc[1])); + if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes + failure(out_of_range_data); + test_attr(attrCode(bc[0])); + break; + case PUSH_GLYPH_ATTR_OBS : + case PUSH_ATT_TO_GATTR_OBS : + ++_stack_depth; + valid_upto(_max.glyf_attrs, bc[0]); + test_ref(int8(bc[1])); + break; + case PUSH_ATT_TO_GLYPH_METRIC : + case PUSH_GLYPH_METRIC : + ++_stack_depth; + valid_upto(kgmetDescent, bc[0]); + test_ref(int8(bc[1])); + // level: dp[2] no check necessary + break; + case PUSH_FEAT : + ++_stack_depth; + valid_upto(_max.features, bc[0]); + test_ref(int8(bc[1])); + break; + case PUSH_ISLOT_ATTR : + ++_stack_depth; + if (valid_upto(gr_slatMax, bc[0])) + { + test_ref(int8(bc[1])); + valid_upto(_max.attrid[bc[0]], bc[2]); + } + test_attr(attrCode(bc[0])); + break; + case PUSH_IGLYPH_ATTR :// not implemented + ++_stack_depth; + break; + case POP_RET : + if (--_stack_depth < 0) + failure(underfull_stack); + GR_FALLTHROUGH; + // no break + case RET_ZERO : + case RET_TRUE : + break; + case IATTR_SET : + case IATTR_ADD : + case IATTR_SUB : + if (--_stack_depth < 0) + failure(underfull_stack); + if (valid_upto(gr_slatMax, bc[0])) + valid_upto(_max.attrid[bc[0]], bc[1]); + test_attr(attrCode(bc[0])); + test_context(); + break; + case PUSH_PROC_STATE : // dummy: dp[0] no check necessary + case PUSH_VERSION : + ++_stack_depth; + break; + case PUT_SUBS : + test_ref(int8(bc[0])); + valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]); + valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]); + test_context(); + break; + case PUT_SUBS2 : // not implemented + case PUT_SUBS3 : // not implemented + break; + case PUT_GLYPH : + valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]); + test_context(); + break; + case PUSH_GLYPH_ATTR : + case PUSH_ATT_TO_GLYPH_ATTR : + ++_stack_depth; + valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]); + test_ref(int8(bc[2])); + break; + case SET_FEAT : + valid_upto(_max.features, bc[0]); + test_ref(int8(bc[1])); + break; + default: + failure(invalid_opcode); + break; + } + + return bool(_code) ? opcode(opc) : MAX_OPCODE; +} + + +void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw() +{ + switch (opc) + { + case DELETE : + _code._delete = true; + break; + case ASSOC : + set_changed(0); +// for (uint8 num = arg[0]; num; --num) +// _analysis.set_noref(num); + break; + case PUT_GLYPH_8BIT_OBS : + case PUT_GLYPH : + _code._modify = true; + set_changed(0); + break; + case ATTR_SET : + case ATTR_ADD : + case ATTR_SUB : + case ATTR_SET_SLOT : + case IATTR_SET_SLOT : + case IATTR_SET : + case IATTR_ADD : + case IATTR_SUB : + set_noref(0); + break; + case NEXT : + case COPY_NEXT : + ++_slotref; + _contexts[_slotref] = context(uint8(_code._instr_count+1)); + // if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref; + break; + case INSERT : + if (_slotref >= 0) --_slotref; + _code._modify = true; + break; + case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter + case PUT_SUBS : + _code._modify = true; + set_changed(0); + GR_FALLTHROUGH; + // no break + case PUT_COPY : + if (arg[0] != 0) { set_changed(0); _code._modify = true; } + set_ref(arg[0]); + break; + case PUSH_GLYPH_ATTR_OBS : + case PUSH_SLOT_ATTR : + case PUSH_GLYPH_METRIC : + case PUSH_ATT_TO_GATTR_OBS : + case PUSH_ATT_TO_GLYPH_METRIC : + case PUSH_ISLOT_ATTR : + case PUSH_FEAT : + case SET_FEAT : + set_ref(arg[1]); + break; + case PUSH_ATT_TO_GLYPH_ATTR : + case PUSH_GLYPH_ATTR : + set_ref(arg[2]); + break; + default: + break; + } +} + + +bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc) +{ + const opcode_t * op_to_fn = Machine::getOpcodeTable(); + const opcode_t & op = op_to_fn[opc]; + if (op.impl[_code._constraint] == 0) + { + failure(unimplemented_opcode_used); + return false; + } + + const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; + + // Add this instruction + *_instr++ = op.impl[_code._constraint]; + ++_code._instr_count; + + // Grab the parameters + if (param_sz) { + memcpy(_data, bc, param_sz * sizeof(byte)); + bc += param_sz; + _data += param_sz; + _code._data_size += param_sz; + } + + // recursively decode a context item so we can split the skip into + // instruction and data portions. + if (opc == CNTXT_ITEM) + { + assert(_out_index == 0); + _in_ctxt_item = true; + _out_index = _max.pre_context + int8(_data[-2]); + _slotref = int8(_data[-2]); + _out_length = _max.rule_length; + + const size_t ctxt_start = _code._instr_count; + byte & instr_skip = _data[-1]; + byte & data_skip = *_data++; + ++_code._data_size; + const byte *curr_end = _max.bytecode; + + if (load(bc, bc + instr_skip)) + { + bc += instr_skip; + data_skip = instr_skip - byte(_code._instr_count - ctxt_start); + instr_skip = byte(_code._instr_count - ctxt_start); + _max.bytecode = curr_end; + + _out_length = 1; + _out_index = 0; + _slotref = 0; + _in_ctxt_item = false; + } + else + { + _out_index = 0; + _slotref = 0; + return false; + } + } + + return bool(_code); +} + + +void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end) +{ + // insert TEMP_COPY commands for slots that need them (that change and are referenced later) + int tempcount = 0; + if (_code._constraint) return; + + const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0]; + for (const context * c = _contexts, * const ce = c + _slotref; c < ce; ++c) + { + if (!c->flags.referenced || !c->flags.changed) continue; + + instr * const tip = code + c->codeRef + tempcount; + memmove(tip+1, tip, (code_end - tip) * sizeof(instr)); + *tip = temp_copy; + ++code_end; + ++tempcount; + _code._delete = true; + } + + _code._instr_count = code_end - code; +} + + +inline +bool Machine::Code::decoder::validate_opcode(const byte opc, const byte * const bc) +{ + if (opc >= MAX_OPCODE) + { + failure(invalid_opcode); + return false; + } + const opcode_t & op = Machine::getOpcodeTable()[opc]; + if (op.impl[_code._constraint] == 0) + { + failure(unimplemented_opcode_used); + return false; + } + if (op.param_sz == VARARGS && bc >= _max.bytecode) + { + failure(arguments_exhausted); + return false; + } + const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; + if (bc - 1 + param_sz >= _max.bytecode) + { + failure(arguments_exhausted); + return false; + } + return true; +} + + +bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw() +{ + const bool t = (limit != 0) && (x < limit); + if (!t) failure(out_of_range_data); + return t; +} + +inline +bool Machine::Code::decoder::test_ref(int8 index) const throw() +{ + if (_code._constraint && !_in_ctxt_item) + { + if (index > 0 || -index > _max.pre_context) + { + failure(out_of_range_data); + return false; + } + } + else + { + if (_max.rule_length == 0 + || (_slotref + _max.pre_context + index >= _max.rule_length) + || (_slotref + _max.pre_context + index < 0)) + { + failure(out_of_range_data); + return false; + } + } + return true; +} + +bool Machine::Code::decoder::test_context() const throw() +{ + if (_out_index >= _out_length || _out_index < 0 || _slotref >= NUMCONTEXTS - 1) + { + failure(out_of_range_data); + return false; + } + return true; +} + +bool Machine::Code::decoder::test_attr(attrCode) const throw() +{ +#if 0 // This code is coming but causes backward compatibility problems. + if (_passtype < PASS_TYPE_POSITIONING) + { + if (attr != gr_slatBreak && attr != gr_slatDir && attr != gr_slatUserDefn + && attr != gr_slatCompRef) + { + failure(out_of_range_data); + return false; + } + } +#endif + return true; +} + +inline +void Machine::Code::failure(const status_t s) throw() { + release_buffers(); + _status = s; +} + + +inline +void Machine::Code::decoder::set_ref(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + _contexts[index + _slotref].flags.referenced = true; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +inline +void Machine::Code::decoder::set_noref(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +inline +void Machine::Code::decoder::set_changed(int index) throw() { + if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; + _contexts[index + _slotref].flags.changed= true; + if (index + _slotref > _max_ref) _max_ref = index + _slotref; +} + + +void Machine::Code::release_buffers() throw() +{ + if (_own) + free(_code); + _code = 0; + _data = 0; + _own = false; +} + + +int32 Machine::Code::run(Machine & m, slotref * & map) const +{ +// assert(_own); + assert(*this); // Check we are actually runnable + + if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context()) + || m.slotMap()[_max_ref + m.slotMap().context()] == 0) + { + m._status = Machine::slot_offset_out_bounds; + return 1; +// return m.run(_code, _data, map); + } + + return m.run(_code, _data, map); +} |