// SPDX-License-Identifier: MIT OR MPL-2.0 OR LGPL-2.1-or-later OR GPL-2.0-or-later // Copyright 2010, SIL International, All rights reserved. // This class represents loaded graphite stack machine code. It performs // basic sanity checks, on the incoming code to prevent more obvious problems // from crashing graphite. // Author: Tim Eves #include #include #include #include #include "graphite2/Segment.h" #include "inc/Code.h" #include "inc/Face.h" #include "inc/GlyphFace.h" #include "inc/GlyphCache.h" #include "inc/Machine.h" #include "inc/Rule.h" #include "inc/Silf.h" #include #ifdef NDEBUG #ifdef __GNUC__ #pragma GCC diagnostic ignored "-Wunused-parameter" #endif #endif using namespace graphite2; using namespace vm; namespace { inline bool is_return(const instr i) { const opcode_t * opmap = Machine::getOpcodeTable(); const instr pop_ret = *opmap[POP_RET].impl, ret_zero = *opmap[RET_ZERO].impl, ret_true = *opmap[RET_TRUE].impl; return i == pop_ret || i == ret_zero || i == ret_true; } struct context { context(uint8 ref=0) : codeRef(ref) {flags.changed=false; flags.referenced=false;} struct { uint8 changed:1, referenced:1; } flags; uint8 codeRef; }; } // end namespace class Machine::Code::decoder { public: struct limits; static const int NUMCONTEXTS = 256; decoder(limits & lims, Code &code, enum passtype pt) throw(); bool load(const byte * bc_begin, const byte * bc_end); void apply_analysis(instr * const code, instr * code_end); byte max_ref() { return _max_ref; } int out_index() const { return _out_index; } private: void set_ref(int index) throw(); void set_noref(int index) throw(); void set_changed(int index) throw(); opcode fetch_opcode(const byte * bc); void analyse_opcode(const opcode, const int8 * const dp) throw(); bool emit_opcode(opcode opc, const byte * & bc); bool validate_opcode(const byte opc, const byte * const bc); bool valid_upto(const uint16 limit, const uint16 x) const throw(); bool test_context() const throw(); bool test_ref(int8 index) const throw(); bool test_attr(attrCode attr) const throw(); void failure(const status_t s) const throw() { _code.failure(s); } Code & _code; int _out_index; uint16 _out_length; instr * _instr; byte * _data; limits & _max; enum passtype _passtype; int _stack_depth; bool _in_ctxt_item; int16 _slotref; context _contexts[NUMCONTEXTS]; byte _max_ref; }; struct Machine::Code::decoder::limits { const byte * bytecode; const uint8 pre_context; const uint16 rule_length, classes, glyf_attrs, features; const byte attrid[gr_slatMax]; }; inline Machine::Code::decoder::decoder(limits & lims, Code &code, enum passtype pt) throw() : _code(code), _out_index(code._constraint ? 0 : lims.pre_context), _out_length(code._constraint ? 1 : lims.rule_length), _instr(code._code), _data(code._data), _max(lims), _passtype(pt), _stack_depth(0), _in_ctxt_item(false), _slotref(0), _max_ref(0) { } Machine::Code::Code(bool is_constraint, const byte * bytecode_begin, const byte * const bytecode_end, uint8 pre_context, uint16 rule_length, const Silf & silf, const Face & face, enum passtype pt, byte * * const _out) : _code(0), _data(0), _data_size(0), _instr_count(0), _max_ref(0), _status(loaded), _constraint(is_constraint), _modify(false), _delete(false), _own(_out==0) { #ifdef GRAPHITE2_TELEMETRY telemetry::category _code_cat(face.tele.code); #endif assert(bytecode_begin != 0); if (bytecode_begin == bytecode_end) { // ::new (this) Code(); return; } assert(bytecode_end > bytecode_begin); const opcode_t * op_to_fn = Machine::getOpcodeTable(); // Allocate code and data target buffers, these sizes are a worst case // estimate. Once we know their real sizes the we'll shrink them. if (_out) _code = reinterpret_cast(*_out); else _code = static_cast(malloc(estimateCodeDataOut(bytecode_end-bytecode_begin, 1, is_constraint ? 0 : rule_length))); _data = reinterpret_cast(_code + (bytecode_end - bytecode_begin)); if (!_code || !_data) { failure(alloc_failed); return; } decoder::limits lims = { bytecode_end, pre_context, rule_length, silf.numClasses(), face.glyphs().numAttrs(), face.numFeatures(), {1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,255, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0, silf.numUser()} }; decoder dec(lims, *this, pt); if(!dec.load(bytecode_begin, bytecode_end)) return; // Is this an empty program? if (_instr_count == 0) { release_buffers(); ::new (this) Code(); return; } // When we reach the end check we've terminated it correctly if (!is_return(_code[_instr_count-1])) { failure(missing_return); return; } assert((_constraint && immutable()) || !_constraint); dec.apply_analysis(_code, _code + _instr_count); _max_ref = dec.max_ref(); // Now we know exactly how much code and data the program really needs // realloc the buffers to exactly the right size so we don't waste any // memory. assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_instr_count)); assert((bytecode_end - bytecode_begin) >= ptrdiff_t(_data_size)); memmove(_code + (_instr_count+1), _data, _data_size*sizeof(byte)); size_t const total_sz = ((_instr_count+1) + (_data_size + sizeof(instr)-1)/sizeof(instr))*sizeof(instr); if (_out) *_out += total_sz; else { instr * const old_code = _code; _code = static_cast(realloc(_code, total_sz)); if (!_code) free(old_code); } _data = reinterpret_cast(_code + (_instr_count+1)); if (!_code) { failure(alloc_failed); return; } // Make this RET_ZERO, we should never reach this but just in case ... _code[_instr_count] = op_to_fn[RET_ZERO].impl[_constraint]; #ifdef GRAPHITE2_TELEMETRY telemetry::count_bytes(_data_size + (_instr_count+1)*sizeof(instr)); #endif } Machine::Code::~Code() throw () { if (_own) release_buffers(); } bool Machine::Code::decoder::load(const byte * bc, const byte * bc_end) { _max.bytecode = bc_end; while (bc < bc_end) { const opcode opc = fetch_opcode(bc++); if (opc == vm::MAX_OPCODE) return false; analyse_opcode(opc, reinterpret_cast(bc)); if (!emit_opcode(opc, bc)) return false; } return bool(_code); } // Validation check and fixups. // opcode Machine::Code::decoder::fetch_opcode(const byte * bc) { const byte opc = *bc++; // Do some basic sanity checks based on what we know about the opcode if (!validate_opcode(opc, bc)) return MAX_OPCODE; // And check its arguments as far as possible switch (opcode(opc)) { case NOP : break; case PUSH_BYTE : case PUSH_BYTEU : case PUSH_SHORT : case PUSH_SHORTU : case PUSH_LONG : ++_stack_depth; break; case ADD : case SUB : case MUL : case DIV : case MIN_ : case MAX_ : case AND : case OR : case EQUAL : case NOT_EQ : case LESS : case GTR : case LESS_EQ : case GTR_EQ : case BITOR : case BITAND : if (--_stack_depth <= 0) failure(underfull_stack); break; case NEG : case TRUNC8 : case TRUNC16 : case NOT : case BITNOT : case BITSET : if (_stack_depth <= 0) failure(underfull_stack); break; case COND : _stack_depth -= 2; if (_stack_depth <= 0) failure(underfull_stack); break; case NEXT_N : // runtime checked break; case NEXT : case COPY_NEXT : ++_out_index; if (_out_index < -1 || _out_index > _out_length || _slotref > _max.rule_length) failure(out_of_range_data); break; case PUT_GLYPH_8BIT_OBS : valid_upto(_max.classes, bc[0]); test_context(); break; case PUT_SUBS_8BIT_OBS : test_ref(int8(bc[0])); valid_upto(_max.classes, bc[1]); valid_upto(_max.classes, bc[2]); test_context(); break; case PUT_COPY : test_ref(int8(bc[0])); test_context(); break; case INSERT : if (_passtype >= PASS_TYPE_POSITIONING) failure(invalid_opcode); ++_out_length; if (_out_index < 0) ++_out_index; if (_out_index < -1 || _out_index >= _out_length) failure(out_of_range_data); break; case DELETE : if (_passtype >= PASS_TYPE_POSITIONING) failure(invalid_opcode); if (_out_index < _max.pre_context) failure(out_of_range_data); --_out_index; --_out_length; if (_out_index < -1 || _out_index > _out_length) failure(out_of_range_data); break; case ASSOC : if (bc[0] == 0) failure(out_of_range_data); for (uint8 num = bc[0]; num; --num) test_ref(int8(bc[num])); test_context(); break; case CNTXT_ITEM : valid_upto(_max.rule_length, _max.pre_context + int8(bc[0])); if (bc + 2 + bc[1] >= _max.bytecode) failure(jump_past_end); if (_in_ctxt_item) failure(nested_context_item); break; case ATTR_SET : case ATTR_ADD : case ATTR_SUB : case ATTR_SET_SLOT : if (--_stack_depth < 0) failure(underfull_stack); valid_upto(gr_slatMax, bc[0]); if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes failure(out_of_range_data); test_attr(attrCode(bc[0])); test_context(); break; case IATTR_SET_SLOT : if (--_stack_depth < 0) failure(underfull_stack); if (valid_upto(gr_slatMax, bc[0])) valid_upto(_max.attrid[bc[0]], bc[1]); test_attr(attrCode(bc[0])); test_context(); break; case PUSH_SLOT_ATTR : ++_stack_depth; valid_upto(gr_slatMax, bc[0]); test_ref(int8(bc[1])); if (attrCode(bc[0]) == gr_slatUserDefn) // use IATTR for user attributes failure(out_of_range_data); test_attr(attrCode(bc[0])); break; case PUSH_GLYPH_ATTR_OBS : case PUSH_ATT_TO_GATTR_OBS : ++_stack_depth; valid_upto(_max.glyf_attrs, bc[0]); test_ref(int8(bc[1])); break; case PUSH_ATT_TO_GLYPH_METRIC : case PUSH_GLYPH_METRIC : ++_stack_depth; valid_upto(kgmetDescent, bc[0]); test_ref(int8(bc[1])); // level: dp[2] no check necessary break; case PUSH_FEAT : ++_stack_depth; valid_upto(_max.features, bc[0]); test_ref(int8(bc[1])); break; case PUSH_ISLOT_ATTR : ++_stack_depth; if (valid_upto(gr_slatMax, bc[0])) { test_ref(int8(bc[1])); valid_upto(_max.attrid[bc[0]], bc[2]); } test_attr(attrCode(bc[0])); break; case PUSH_IGLYPH_ATTR :// not implemented ++_stack_depth; break; case POP_RET : if (--_stack_depth < 0) failure(underfull_stack); GR_FALLTHROUGH; // no break case RET_ZERO : case RET_TRUE : break; case IATTR_SET : case IATTR_ADD : case IATTR_SUB : if (--_stack_depth < 0) failure(underfull_stack); if (valid_upto(gr_slatMax, bc[0])) valid_upto(_max.attrid[bc[0]], bc[1]); test_attr(attrCode(bc[0])); test_context(); break; case PUSH_PROC_STATE : // dummy: dp[0] no check necessary case PUSH_VERSION : ++_stack_depth; break; case PUT_SUBS : test_ref(int8(bc[0])); valid_upto(_max.classes, uint16(bc[1]<< 8) | bc[2]); valid_upto(_max.classes, uint16(bc[3]<< 8) | bc[4]); test_context(); break; case PUT_SUBS2 : // not implemented case PUT_SUBS3 : // not implemented break; case PUT_GLYPH : valid_upto(_max.classes, uint16(bc[0]<< 8) | bc[1]); test_context(); break; case PUSH_GLYPH_ATTR : case PUSH_ATT_TO_GLYPH_ATTR : ++_stack_depth; valid_upto(_max.glyf_attrs, uint16(bc[0]<< 8) | bc[1]); test_ref(int8(bc[2])); break; case SET_FEAT : valid_upto(_max.features, bc[0]); test_ref(int8(bc[1])); break; default: failure(invalid_opcode); break; } return bool(_code) ? opcode(opc) : MAX_OPCODE; } void Machine::Code::decoder::analyse_opcode(const opcode opc, const int8 * arg) throw() { switch (opc) { case DELETE : _code._delete = true; break; case ASSOC : set_changed(0); // for (uint8 num = arg[0]; num; --num) // _analysis.set_noref(num); break; case PUT_GLYPH_8BIT_OBS : case PUT_GLYPH : _code._modify = true; set_changed(0); break; case ATTR_SET : case ATTR_ADD : case ATTR_SUB : case ATTR_SET_SLOT : case IATTR_SET_SLOT : case IATTR_SET : case IATTR_ADD : case IATTR_SUB : set_noref(0); break; case NEXT : case COPY_NEXT : ++_slotref; _contexts[_slotref] = context(uint8(_code._instr_count+1)); // if (_analysis.slotref > _analysis.max_ref) _analysis.max_ref = _analysis.slotref; break; case INSERT : if (_slotref >= 0) --_slotref; _code._modify = true; break; case PUT_SUBS_8BIT_OBS : // slotref on 1st parameter case PUT_SUBS : _code._modify = true; set_changed(0); GR_FALLTHROUGH; // no break case PUT_COPY : if (arg[0] != 0) { set_changed(0); _code._modify = true; } set_ref(arg[0]); break; case PUSH_GLYPH_ATTR_OBS : case PUSH_SLOT_ATTR : case PUSH_GLYPH_METRIC : case PUSH_ATT_TO_GATTR_OBS : case PUSH_ATT_TO_GLYPH_METRIC : case PUSH_ISLOT_ATTR : case PUSH_FEAT : case SET_FEAT : set_ref(arg[1]); break; case PUSH_ATT_TO_GLYPH_ATTR : case PUSH_GLYPH_ATTR : set_ref(arg[2]); break; default: break; } } bool Machine::Code::decoder::emit_opcode(opcode opc, const byte * & bc) { const opcode_t * op_to_fn = Machine::getOpcodeTable(); const opcode_t & op = op_to_fn[opc]; if (op.impl[_code._constraint] == 0) { failure(unimplemented_opcode_used); return false; } const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; // Add this instruction *_instr++ = op.impl[_code._constraint]; ++_code._instr_count; // Grab the parameters if (param_sz) { memcpy(_data, bc, param_sz * sizeof(byte)); bc += param_sz; _data += param_sz; _code._data_size += param_sz; } // recursively decode a context item so we can split the skip into // instruction and data portions. if (opc == CNTXT_ITEM) { assert(_out_index == 0); _in_ctxt_item = true; _out_index = _max.pre_context + int8(_data[-2]); _slotref = int8(_data[-2]); _out_length = _max.rule_length; const size_t ctxt_start = _code._instr_count; byte & instr_skip = _data[-1]; byte & data_skip = *_data++; ++_code._data_size; const byte *curr_end = _max.bytecode; if (load(bc, bc + instr_skip)) { bc += instr_skip; data_skip = instr_skip - byte(_code._instr_count - ctxt_start); instr_skip = byte(_code._instr_count - ctxt_start); _max.bytecode = curr_end; _out_length = 1; _out_index = 0; _slotref = 0; _in_ctxt_item = false; } else { _out_index = 0; _slotref = 0; return false; } } return bool(_code); } void Machine::Code::decoder::apply_analysis(instr * const code, instr * code_end) { // insert TEMP_COPY commands for slots that need them (that change and are referenced later) int tempcount = 0; if (_code._constraint) return; const instr temp_copy = Machine::getOpcodeTable()[TEMP_COPY].impl[0]; for (const context * c = _contexts, * const ce = c + _slotref; c < ce; ++c) { if (!c->flags.referenced || !c->flags.changed) continue; instr * const tip = code + c->codeRef + tempcount; memmove(tip+1, tip, (code_end - tip) * sizeof(instr)); *tip = temp_copy; ++code_end; ++tempcount; _code._delete = true; } _code._instr_count = code_end - code; } inline bool Machine::Code::decoder::validate_opcode(const byte opc, const byte * const bc) { if (opc >= MAX_OPCODE) { failure(invalid_opcode); return false; } const opcode_t & op = Machine::getOpcodeTable()[opc]; if (op.impl[_code._constraint] == 0) { failure(unimplemented_opcode_used); return false; } if (op.param_sz == VARARGS && bc >= _max.bytecode) { failure(arguments_exhausted); return false; } const size_t param_sz = op.param_sz == VARARGS ? bc[0] + 1 : op.param_sz; if (bc - 1 + param_sz >= _max.bytecode) { failure(arguments_exhausted); return false; } return true; } bool Machine::Code::decoder::valid_upto(const uint16 limit, const uint16 x) const throw() { const bool t = (limit != 0) && (x < limit); if (!t) failure(out_of_range_data); return t; } inline bool Machine::Code::decoder::test_ref(int8 index) const throw() { if (_code._constraint && !_in_ctxt_item) { if (index > 0 || -index > _max.pre_context) { failure(out_of_range_data); return false; } } else { if (_max.rule_length == 0 || (_slotref + _max.pre_context + index >= _max.rule_length) || (_slotref + _max.pre_context + index < 0)) { failure(out_of_range_data); return false; } } return true; } bool Machine::Code::decoder::test_context() const throw() { if (_out_index >= _out_length || _out_index < 0 || _slotref >= NUMCONTEXTS - 1) { failure(out_of_range_data); return false; } return true; } bool Machine::Code::decoder::test_attr(attrCode) const throw() { #if 0 // This code is coming but causes backward compatibility problems. if (_passtype < PASS_TYPE_POSITIONING) { if (attr != gr_slatBreak && attr != gr_slatDir && attr != gr_slatUserDefn && attr != gr_slatCompRef) { failure(out_of_range_data); return false; } } #endif return true; } inline void Machine::Code::failure(const status_t s) throw() { release_buffers(); _status = s; } inline void Machine::Code::decoder::set_ref(int index) throw() { if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; _contexts[index + _slotref].flags.referenced = true; if (index + _slotref > _max_ref) _max_ref = index + _slotref; } inline void Machine::Code::decoder::set_noref(int index) throw() { if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; if (index + _slotref > _max_ref) _max_ref = index + _slotref; } inline void Machine::Code::decoder::set_changed(int index) throw() { if (index + _slotref < 0 || index + _slotref >= NUMCONTEXTS) return; _contexts[index + _slotref].flags.changed= true; if (index + _slotref > _max_ref) _max_ref = index + _slotref; } void Machine::Code::release_buffers() throw() { if (_own) free(_code); _code = 0; _data = 0; _own = false; } int32 Machine::Code::run(Machine & m, slotref * & map) const { // assert(_own); assert(*this); // Check we are actually runnable if (m.slotMap().size() <= size_t(_max_ref + m.slotMap().context()) || m.slotMap()[_max_ref + m.slotMap().context()] == 0) { m._status = Machine::slot_offset_out_bounds; return 1; // return m.run(_code, _data, map); } return m.run(_code, _data, map); }