diff options
Diffstat (limited to 'modules/gdscript/gd_tokenizer.cpp')
-rw-r--r-- | modules/gdscript/gd_tokenizer.cpp | 1344 |
1 files changed, 1344 insertions, 0 deletions
diff --git a/modules/gdscript/gd_tokenizer.cpp b/modules/gdscript/gd_tokenizer.cpp new file mode 100644 index 0000000000..ff9be7926b --- /dev/null +++ b/modules/gdscript/gd_tokenizer.cpp @@ -0,0 +1,1344 @@ +/*************************************************************************/ +/* gd_tokenizer.cpp */ +/*************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* http://www.godotengine.org */ +/*************************************************************************/ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/*************************************************************************/ +#include "gd_tokenizer.h" +#include "print_string.h" +#include "gd_functions.h" +#include "io/marshalls.h" +#include "map.h" + +const char* GDTokenizer::token_names[TK_MAX]={ +"Empty", +"Identifier", +"Constant", +"Self", +"Built-In Type", +"Built-In Func", +"In", +"'=='", +"'!='", +"'<'", +"'<='", +"'>'", +"'>='", +"'and'", +"'or'", +"'not'", +"'+'", +"'-'", +"'*'", +"'/'", +"'%'", +"'<<'", +"'>>'", +"'='", +"'+='", +"'-='", +"'*='", +"'/='", +"'%='", +"'<<='", +"'>>='", +"'&='", +"'|='", +"'^='", +"'&'", +"'|'", +"'^'", +"'~'", +//"Plus Plus", +//"Minus Minus", +"if", +"elif", +"else", +"for", +"do", +"while", +"switch", +"case", +"break", +"continue", +"pass", +"return", +"func", +"class", +"extends", +"tool", +"static", +"export", +"const", +"var", +"preload", +"assert", +"'['", +"']'", +"'{'", +"'}'", +"'('", +"')'", +"','", +"';'", +"'.'", +"'?'", +"':'", +"'\\n'", +"Error", +"EOF"}; + +const char *GDTokenizer::get_token_name(Token p_token) { + + ERR_FAIL_INDEX_V(p_token,TK_MAX,"<error>"); + return token_names[p_token]; +} + +static bool _is_text_char(CharType c) { + + return (c>='a' && c<='z') || (c>='A' && c<='Z') || (c>='0' && c<='9') || c=='_'; +} + +static bool _is_number(CharType c) { + + return (c>='0' && c<='9'); +} + +static bool _is_hex(CharType c) { + + return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F'); +} + +void GDTokenizerText::_make_token(Token p_type) { + + TokenData &tk=tk_rb[tk_rb_pos]; + + tk.type=p_type; + tk.line=line; + tk.col=column; + + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; +} +void GDTokenizerText::_make_identifier(const StringName& p_identifier) { + + TokenData &tk=tk_rb[tk_rb_pos]; + + tk.type=TK_IDENTIFIER; + tk.identifier=p_identifier; + tk.line=line; + tk.col=column; + + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; + +} + +void GDTokenizerText::_make_built_in_func(GDFunctions::Function p_func) { + + TokenData &tk=tk_rb[tk_rb_pos]; + + tk.type=TK_BUILT_IN_FUNC; + tk.func=p_func; + tk.line=line; + tk.col=column; + + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; + +} +void GDTokenizerText::_make_constant(const Variant& p_constant) { + + TokenData &tk=tk_rb[tk_rb_pos]; + + tk.type=TK_CONSTANT; + tk.constant=p_constant; + tk.line=line; + tk.col=column; + + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; + +} + +void GDTokenizerText::_make_type(const Variant::Type& p_type) { + + + TokenData &tk=tk_rb[tk_rb_pos]; + + tk.type=TK_BUILT_IN_TYPE; + tk.vtype=p_type; + tk.line=line; + tk.col=column; + + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; + +} + + +void GDTokenizerText::_make_error(const String& p_error) { + + error_flag=true; + last_error=p_error; + + TokenData &tk=tk_rb[tk_rb_pos]; + tk.type=TK_ERROR; + tk.constant=p_error; + tk.line=line; + tk.col=column; + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; + +} + + +void GDTokenizerText::_make_newline(int p_spaces) { + + TokenData &tk=tk_rb[tk_rb_pos]; + tk.type=TK_NEWLINE; + tk.constant=p_spaces; + tk.line=line; + tk.col=column; + tk_rb_pos=(tk_rb_pos+1)%TK_RB_SIZE; +} + +void GDTokenizerText::_advance() { + + if (error_flag) { + //parser broke + _make_error(last_error); + return; + } + + if (code_pos>=len) { + _make_token(TK_EOF); + return; + } +#define GETCHAR(m_ofs) ((m_ofs+code_pos)>=len?0:_code[m_ofs+code_pos]) +#define INCPOS(m_amount) { code_pos+=m_amount; column+=m_amount; } + while (true) { + + + bool is_node_path=false; + + switch(GETCHAR(0)) { + case 0: + _make_token(TK_EOF); + break; + case '\t': + case '\r': + case ' ': + INCPOS(1); + continue; + case '\n': { + line++; + INCPOS(1); + column=0; + int i=0; + while(GETCHAR(i)==' ' || GETCHAR(i)=='\t') { + i++; + } + + _make_newline(i); + return; + } +#if 1 //py style tokenizer + case '#': { // line comment skip + + while(GETCHAR(0)!='\n') { + code_pos++; + if (GETCHAR(0)==0) { //end of file + _make_error("Unterminated Comment"); + return; + } + } + INCPOS(1); + column=0; + line++; + int i=0; + while(GETCHAR(i)==' ' || GETCHAR(i)=='\t') { + i++; + } + _make_newline(i); + return; + + } break; +#endif + case '/': { + + switch(GETCHAR(1)) { +#if 0 // c style tokenizer + case '*': { // block comment + int pos = code_pos+2; + int new_line=line; + int new_col=column+2; + + while(true) { + if (_code[pos]=='0') { + _make_error("Unterminated Comment"); + code_pos=pos; + return; + } + if (_code[pos]=='*' && _code[pos+1]=='/') { + new_col+=2; + pos+=2; //compensate + break; + } else if (_code[pos]=='\n') { + new_line++; + new_col=0; + } else { + new_col++; + } + pos++; + } + + column=new_col; + line=new_line; + code_pos=pos; + continue; + + } break; + case '/': { // line comment skip + + while(GETCHAR(0)!='\n') { + code_pos++; + if (GETCHAR(0)==0) { //end of file + _make_error("Unterminated Comment"); + return; + } + } + INCPOS(1); + column=0; + line++; + continue; + + } break; +#endif + case '=': { // diveq + + _make_token(TK_OP_ASSIGN_DIV); + INCPOS(1); + + } break; + default: + _make_token(TK_OP_DIV); + + } + } break; + case '=': { + if (GETCHAR(1)=='=') { + _make_token(TK_OP_EQUAL); + INCPOS(1); + + } else + _make_token(TK_OP_ASSIGN); + + } break; + case '<': { + if (GETCHAR(1)=='=') { + + _make_token(TK_OP_LESS_EQUAL); + INCPOS(1); + } else if (GETCHAR(1)=='<') { + if (GETCHAR(2)=='=') { + _make_token(TK_OP_ASSIGN_SHIFT_LEFT); + INCPOS(1); + } else { + _make_token(TK_OP_SHIFT_LEFT); + } + INCPOS(1); + } else + _make_token(TK_OP_LESS); + + } break; + case '>': { + if (GETCHAR(1)=='=') { + _make_token(TK_OP_GREATER_EQUAL); + INCPOS(1); + } else if (GETCHAR(1)=='>') { + if (GETCHAR(2)=='=') { + _make_token(TK_OP_ASSIGN_SHIFT_RIGHT); + INCPOS(1); + + } else { + _make_token(TK_OP_SHIFT_RIGHT); + } + INCPOS(1); + } else { + _make_token(TK_OP_GREATER); + } + + } break; + case '!': { + if (GETCHAR(1)=='=') { + _make_token(TK_OP_NOT_EQUAL); + INCPOS(1); + } else { + _make_token(TK_OP_NOT); + } + + } break; + //case '"' //string - no strings in shader + //case '\'' //string - no strings in shader + case '{': + _make_token(TK_CURLY_BRACKET_OPEN); + break; + case '}': + _make_token(TK_CURLY_BRACKET_CLOSE); + break; + case '[': + _make_token(TK_BRACKET_OPEN); + break; + case ']': + _make_token(TK_BRACKET_CLOSE); + break; + case '(': + _make_token(TK_PARENTHESIS_OPEN); + break; + case ')': + _make_token(TK_PARENTHESIS_CLOSE); + break; + case ',': + _make_token(TK_COMMA); + break; + case ';': + _make_token(TK_SEMICOLON); + break; + case '?': + _make_token(TK_QUESTION_MARK); + break; + case ':': + _make_token(TK_COLON); //for methods maybe but now useless. + break; + case '^': { + if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_BIT_XOR); + INCPOS(1); + } else { + _make_token(TK_OP_BIT_XOR); + } + + } break; + case '~': + _make_token(TK_OP_BIT_INVERT); + break; + case '&': { + if (GETCHAR(1)=='&') { + + _make_token(TK_OP_AND); + INCPOS(1); + } else if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_BIT_AND); + INCPOS(1); + } else { + _make_token(TK_OP_BIT_AND); + } + } break; + case '|': { + if (GETCHAR(1)=='|') { + + _make_token(TK_OP_OR); + INCPOS(1); + } else if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_BIT_OR); + INCPOS(1); + } else { + _make_token(TK_OP_BIT_OR); + } + } break; + case '*': { + + if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_MUL); + INCPOS(1); + } else { + _make_token(TK_OP_MUL); + } + } break; + case '+': { + + if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_ADD); + INCPOS(1); + //} else if (GETCHAR(1)=='+') { + // _make_token(TK_OP_PLUS_PLUS); + // INCPOS(1); + } else { + _make_token(TK_OP_ADD); + } + + } break; + case '-': { + + if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_SUB); + INCPOS(1); + //} else if (GETCHAR(1)=='-') { + // _make_token(TK_OP_MINUS_MINUS); + // INCPOS(1); + } else { + _make_token(TK_OP_SUB); + } + } break; + case '%': { + + if (GETCHAR(1)=='=') { + _make_token(TK_OP_ASSIGN_MOD); + INCPOS(1); + } else { + _make_token(TK_OP_MOD); + } + } break; + case '@': + if (CharType(GETCHAR(1))!='"') { + _make_error("Unexpected '@'"); + return; + } + INCPOS(1); + is_node_path=true; + case '"': { + + int i=1; + String str; + while(true) { + if (CharType(GETCHAR(i)==0)) { + + _make_error("Unterminated String"); + return; + } else if (CharType(GETCHAR(i)=='"')) { + break; + } else if (CharType(GETCHAR(i)=='\\')) { + //escaped characters... + i++; + CharType next = GETCHAR(i); + if (next==0) { + _make_error("Unterminated String"); + return; + } + CharType res=0; + + switch(next) { + + case 'a': res=7; break; + case 'b': res=8; break; + case 't': res=9; break; + case 'n': res=10; break; + case 'v': res=11; break; + case 'f': res=12; break; + case 'r': res=13; break; + case '\'': res='\''; break; + case '\"': res='\"'; break; + case '\\': res='\\'; break; + case 'x': { + //hexnumbarh - oct is deprecated + + int read=0; + for(int j=0;j<4;j++) { + CharType c = GETCHAR(i+j); + if (c==0) { + _make_error("Unterminated String"); + return; + } + if (!_is_hex(c)) { + if (j==0 || !(j&1)) { + _make_error("Malformed hex constant in string"); + return; + } else + break; + } + CharType v; + if (c>='0' && c<='9') { + v=c-'0'; + } else if (c>='a' && c<='f') { + v=c-'a'; + v+=10; + } else if (c>='A' && c<='F') { + v=c-'A'; + v+=10; + } else { + ERR_PRINT("BUG"); + v=0; + } + + res<<=4; + res|=v; + + read++; + } + i+=read-1; + + + } break; + default: { + + _make_error("Invalid escape sequence"); + return; + } break; + } + + str+=res; + + } else { + str+=CharType(GETCHAR(i)); + } + i++; + } + INCPOS(i); + + if (is_node_path) { + _make_constant(NodePath(str)); + } else { + _make_constant(str); + } + + } break; + default: { + + if (_is_number(GETCHAR(0)) || (GETCHAR(0)=='.' && _is_number(GETCHAR(1)))) { + // parse number + bool period_found=false; + bool exponent_found=false; + bool hexa_found=false; + bool sign_found=false; + + String str; + int i=0; + + while(true) { + if (GETCHAR(i)=='.') { + if (period_found || exponent_found) { + _make_error("Invalid numeric constant at '.'"); + return; + } + period_found=true; + } else if (GETCHAR(i)=='x') { + if (hexa_found || str.length()!=1 || !( (i==1 && str[0]=='0') || (i==2 && str[1]=='0' && str[0]=='-') ) ) { + _make_error("Invalid numeric constant at 'x'"); + return; + } + hexa_found=true; + } else if (!hexa_found && GETCHAR(i)=='e') { + if (hexa_found || exponent_found) { + _make_error("Invalid numeric constant at 'e'"); + return; + } + exponent_found=true; + } else if (_is_number(GETCHAR(i))) { + //all ok + } else if (hexa_found && _is_hex(GETCHAR(i))) { + + } else if ((GETCHAR(i)=='-' || GETCHAR(i)=='+') && exponent_found) { + if (sign_found) { + _make_error("Invalid numeric constant at '-'"); + return; + } + sign_found=true; + } else + break; + + str+=CharType(GETCHAR(i)); + i++; + } + + if (!( _is_number(str[str.length()-1]) || (hexa_found && _is_hex(str[str.length()-1])))) { + _make_error("Invalid numeric constant: "+str); + return; + } + + INCPOS(str.length()); + if (hexa_found) { + int val = str.hex_to_int(); + _make_constant(val); + } else if (period_found) { + real_t val = str.to_double(); + //print_line("*%*%*%*% to convert: "+str+" result: "+rtos(val)); + _make_constant(val); + } else { + int val = str.to_int(); + _make_constant(val); + + } + + return; + } + + if (GETCHAR(0)=='.') { + //parse period + _make_token(TK_PERIOD); + break; + } + + if (_is_text_char(GETCHAR(0))) { + // parse identifier + String str; + str+=CharType(GETCHAR(0)); + + int i=1; + while(_is_text_char(GETCHAR(i))) { + str+=CharType(GETCHAR(i)); + i++; + } + + bool identifier=false; + + if (str=="null") { + _make_constant(Variant()); + + } else if (str=="true") { + _make_constant(true); + + } else if (str=="false") { + _make_constant(false); + } else { + + bool found=false; + + struct _bit { Variant::Type type; const char *text;}; + //built in types + + static const _bit type_list[]={ + //types + {Variant::BOOL,"bool"}, + {Variant::INT,"int"}, + {Variant::REAL,"float"}, + {Variant::STRING,"String"}, + {Variant::VECTOR2,"vec2"}, + {Variant::VECTOR2,"Vector2"}, + {Variant::RECT2,"Rect2"}, + {Variant::MATRIX32,"Matrix32"}, + {Variant::MATRIX32,"mat32"}, + {Variant::VECTOR3,"vec3"}, + {Variant::VECTOR3,"Vector3"}, + {Variant::_AABB,"AABB"}, + {Variant::_AABB,"Rect3"}, + {Variant::PLANE,"Plane"}, + {Variant::QUAT,"Quat"}, + {Variant::MATRIX3,"mat3"}, + {Variant::MATRIX3,"Matrix3"}, + {Variant::TRANSFORM,"trn"}, + {Variant::TRANSFORM,"Transform"}, + {Variant::COLOR,"Color"}, + {Variant::IMAGE,"Image"}, + {Variant::_RID,"RID"}, + {Variant::OBJECT,"Object"}, + {Variant::INPUT_EVENT,"InputEvent"}, + {Variant::DICTIONARY,"dict"}, + {Variant::DICTIONARY,"Dictionary"}, + {Variant::ARRAY,"Array"}, + {Variant::RAW_ARRAY,"RawArray"}, + {Variant::INT_ARRAY,"IntArray"}, + {Variant::REAL_ARRAY,"FloatArray"}, + {Variant::STRING_ARRAY,"StringArray"}, + {Variant::VECTOR2_ARRAY,"Vector2Array"}, + {Variant::VECTOR3_ARRAY,"Vector3Array"}, + {Variant::COLOR_ARRAY,"ColorArray"}, + {Variant::VARIANT_MAX,NULL}, + }; + + { + + + int idx=0; + + while(type_list[idx].text) { + + if (str==type_list[idx].text) { + _make_type(type_list[idx].type); + found=true; + break; + } + idx++; + } + } + + if (!found) { + + //built in func? + + for(int i=0;i<GDFunctions::FUNC_MAX;i++) { + + if (str==GDFunctions::get_func_name(GDFunctions::Function(i))) { + + _make_built_in_func(GDFunctions::Function(i)); + found=true; + break; + } + } + + //keywor + } + + if (!found) { + + + struct _kws { Token token; const char *text;}; + + static const _kws keyword_list[]={ + //ops + {TK_OP_IN,"in"}, + {TK_OP_NOT,"not"}, + {TK_OP_OR,"or"}, + {TK_OP_AND,"and"}, + //func + {TK_PR_FUNCTION,"func"}, + {TK_PR_FUNCTION,"function"}, + {TK_PR_CLASS,"class"}, + {TK_PR_EXTENDS,"extends"}, + {TK_PR_TOOL,"tool"}, + {TK_PR_STATIC,"static"}, + {TK_PR_EXPORT,"export"}, + {TK_PR_VAR,"var"}, + {TK_PR_PRELOAD,"preload"}, + {TK_PR_ASSERT,"assert"}, + {TK_PR_CONST,"const"}, + //controlflow + {TK_CF_IF,"if"}, + {TK_CF_ELIF,"elif"}, + {TK_CF_ELSE,"else"}, + {TK_CF_FOR,"for"}, + {TK_CF_WHILE,"while"}, + {TK_CF_DO,"do"}, + {TK_CF_SWITCH,"switch"}, + {TK_CF_BREAK,"break"}, + {TK_CF_CONTINUE,"continue"}, + {TK_CF_RETURN,"return"}, + {TK_CF_PASS,"pass"}, + {TK_SELF,"self"}, + {TK_ERROR,NULL} + }; + + int idx=0; + found=false; + + while(keyword_list[idx].text) { + + if (str==keyword_list[idx].text) { + _make_token(keyword_list[idx].token); + found=true; + break; + } + idx++; + } + } + + if (!found) + identifier=true; + } + + + if (identifier) { + _make_identifier(str); + } + INCPOS(str.length()); + return; + } + + _make_error("Unknown character"); + return; + + } break; + } + + INCPOS(1); + break; + } + +} + +void GDTokenizerText::set_code(const String& p_code) { + + code=p_code; + len = p_code.length(); + if (len) { + _code=&code[0]; + } else { + _code=NULL; + } + code_pos=0; + line=1; //it is stand-ar-ized that lines begin in 1 in code.. + column=0; + tk_rb_pos=0; + error_flag=false; + last_error=""; + for(int i=0;i<MAX_LOOKAHEAD+1;i++) + _advance(); +} + +GDTokenizerText::Token GDTokenizerText::get_token(int p_offset) const { + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, TK_ERROR); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, TK_ERROR); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + return tk_rb[ofs].type; +} + +int GDTokenizerText::get_token_line(int p_offset) const { + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, -1); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, -1); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + return tk_rb[ofs].line; +} + +int GDTokenizerText::get_token_column(int p_offset) const { + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, -1); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, -1); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + return tk_rb[ofs].col; +} + +const Variant& GDTokenizerText::get_token_constant(int p_offset) const { + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, tk_rb[0].constant); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, tk_rb[0].constant); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_CONSTANT,tk_rb[0].constant); + return tk_rb[ofs].constant; +} +StringName GDTokenizerText::get_token_identifier(int p_offset) const { + + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, StringName()); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, StringName()); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_IDENTIFIER,StringName()); + return tk_rb[ofs].identifier; + +} + +GDFunctions::Function GDTokenizerText::get_token_built_in_func(int p_offset) const { + + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, GDFunctions::FUNC_MAX); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, GDFunctions::FUNC_MAX); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_BUILT_IN_FUNC,GDFunctions::FUNC_MAX); + return tk_rb[ofs].func; + +} + +Variant::Type GDTokenizerText::get_token_type(int p_offset) const { + + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, Variant::NIL); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, Variant::NIL); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_BUILT_IN_TYPE,Variant::NIL); + return tk_rb[ofs].vtype; + +} + + +int GDTokenizerText::get_token_line_indent(int p_offset) const { + + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, 0); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, 0); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_NEWLINE,0); + return tk_rb[ofs].constant; + +} + +String GDTokenizerText::get_token_error(int p_offset) const { + + ERR_FAIL_COND_V( p_offset <= -MAX_LOOKAHEAD, String()); + ERR_FAIL_COND_V( p_offset >= MAX_LOOKAHEAD, String()); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD -1)%TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type!=TK_ERROR,String()); + return tk_rb[ofs].constant; +} + +void GDTokenizerText::advance(int p_amount) { + + ERR_FAIL_COND( p_amount <=0 ); + for(int i=0;i<p_amount;i++) + _advance(); +} + + + + + + + + + + + +////////////////////////////////////////////////////////////////////////////////////////////////////// + +#define BYTECODE_VERSION 1 + +Error GDTokenizerBuffer::set_code_buffer(const Vector<uint8_t> & p_buffer) { + + + const uint8_t *buf=p_buffer.ptr(); + int total_len=p_buffer.size(); + ERR_FAIL_COND_V( p_buffer.size()<24 || p_buffer[0]!='G' || p_buffer[1]!='D' || p_buffer[2]!='S' || p_buffer[3]!='C',ERR_INVALID_DATA); + + int version = decode_uint32(&buf[4]); + if (version>1) { + ERR_EXPLAIN("Bytecode is too New!"); + ERR_FAIL_COND_V(version>BYTECODE_VERSION,ERR_INVALID_DATA); + } + int identifier_count = decode_uint32(&buf[8]); + int constant_count = decode_uint32(&buf[12]); + int line_count = decode_uint32(&buf[16]); + int token_count = decode_uint32(&buf[20]); + + const uint8_t *b=buf; + + b=&buf[24]; + total_len-=24; + + identifiers.resize(identifier_count); + for(int i=0;i<identifier_count;i++) { + + int len = decode_uint32(b); + ERR_FAIL_COND_V(len>total_len,ERR_INVALID_DATA); + b+=4; + Vector<uint8_t> cs; + cs.resize(len); + for(int j=0;j<len;j++) { + cs[j]=b[j]^0xb6; + } + + cs[cs.size()-1]=0; + String s; + s.parse_utf8((const char*)cs.ptr()); + b+=len; + total_len-=len+4; + identifiers[i]=s; + } + + constants.resize(constant_count); + for(int i=0;i<constant_count;i++) { + + Variant v; + int len; + Error err = decode_variant(v,b,total_len,&len); + if (err) + return err; + b+=len; + total_len-=len; + constants[i]=v; + + } + + ERR_FAIL_COND_V(line_count*8>total_len,ERR_INVALID_DATA); + + for(int i=0;i<line_count;i++) { + + uint32_t token=decode_uint32(b); + b+=4; + uint32_t linecol=decode_uint32(b); + b+=4; + + lines.insert(token,linecol); + total_len-=8; + } + + tokens.resize(token_count); + + for(int i=0;i<token_count;i++) { + + ERR_FAIL_COND_V( total_len < 1, ERR_INVALID_DATA); + + if ((*b)&TOKEN_BYTE_MASK) { //little endian always + ERR_FAIL_COND_V( total_len < 4, ERR_INVALID_DATA); + + tokens[i]=decode_uint32(b)&~TOKEN_BYTE_MASK; + b+=4; + } else { + tokens[i]=*b; + b+=1; + total_len--; + } + } + + token=0; + + return OK; + +} + + +Vector<uint8_t> GDTokenizerBuffer::parse_code_string(const String& p_code) { + + Vector<uint8_t> buf; + + + Map<StringName,int> identifier_map; + HashMap<Variant,int,VariantHasher> constant_map; + Map<uint32_t,int> line_map; + Vector<uint32_t> token_array; + + GDTokenizerText tt; + tt.set_code(p_code); + int line=-1; + int col=0; + + while(true) { + + if (tt.get_token_line()!=line) { + + line=tt.get_token_line(); + line_map[line]=token_array.size(); + } + + uint32_t token=tt.get_token(); + switch(tt.get_token()) { + + case TK_IDENTIFIER: { + StringName id = tt.get_token_identifier(); + if (!identifier_map.has(id)) { + int idx = identifier_map.size(); + identifier_map[id]=idx; + } + token|=identifier_map[id]<<TOKEN_BITS; + } break; + case TK_CONSTANT: { + + Variant c = tt.get_token_constant(); + if (!constant_map.has(c)) { + int idx = constant_map.size(); + constant_map[c]=idx; + } + token|=constant_map[c]<<TOKEN_BITS; + } break; + case TK_BUILT_IN_TYPE: { + + token|=tt.get_token_type()<<TOKEN_BITS; + } break; + case TK_BUILT_IN_FUNC: { + + token|=tt.get_token_built_in_func()<<TOKEN_BITS; + + } break; + case TK_NEWLINE: { + + token|=tt.get_token_line_indent()<<TOKEN_BITS; + } break; + case TK_ERROR: { + + ERR_FAIL_V(Vector<uint8_t>()); + } break; + default: {} + + }; + + token_array.push_back(token); + + if (tt.get_token()==TK_EOF) + break; + tt.advance(); + + } + + //reverse maps + + Map<int,StringName> rev_identifier_map; + for(Map<StringName,int>::Element *E=identifier_map.front();E;E=E->next()) { + rev_identifier_map[E->get()]=E->key(); + } + + Map<int,Variant> rev_constant_map; + const Variant *K =NULL; + while((K=constant_map.next(K))) { + rev_constant_map[constant_map[*K]]=*K; + } + + Map<int,uint32_t> rev_line_map; + for(Map<uint32_t,int>::Element *E=line_map.front();E;E=E->next()) { + rev_line_map[E->get()]=E->key(); + } + + //save header + buf.resize(24); + buf[0]='G'; + buf[1]='D'; + buf[2]='S'; + buf[3]='C'; + encode_uint32(BYTECODE_VERSION,&buf[4]); + encode_uint32(identifier_map.size(),&buf[8]); + encode_uint32(constant_map.size(),&buf[12]); + encode_uint32(line_map.size(),&buf[16]); + encode_uint32(token_array.size(),&buf[20]); + + //save identifiers + + for(Map<int,StringName>::Element *E=rev_identifier_map.front();E;E=E->next()) { + + CharString cs = String(E->get()).utf8(); + int len = cs.length()+1; + int extra = 4-(len%4); + if (extra==4) + extra=0; + + uint8_t ibuf[4]; + encode_uint32(len+extra,ibuf); + for(int i=0;i<4;i++) { + buf.push_back(ibuf[i]); + } + for(int i=0;i<len;i++) { + buf.push_back(cs[i]^0xb6); + } + for(int i=0;i<extra;i++) { + buf.push_back(0^0xb6); + } + } + + for(Map<int,Variant>::Element *E=rev_constant_map.front();E;E=E->next()) { + + int len; + Error err = encode_variant(E->get(),NULL,len); + ERR_FAIL_COND_V(err!=OK,Vector<uint8_t>()); + int pos=buf.size(); + buf.resize(pos+len); + encode_variant(E->get(),&buf[pos],len); + } + + for(Map<int,uint32_t>::Element *E=rev_line_map.front();E;E=E->next()) { + + uint8_t ibuf[8]; + encode_uint32(E->key(),&ibuf[0]); + encode_uint32(E->get(),&ibuf[4]); + for(int i=0;i<8;i++) + buf.push_back(ibuf[i]); + } + + for(int i=0;i<token_array.size();i++) { + + uint32_t token = token_array[i]; + + if (token&~TOKEN_MASK) { + uint8_t buf4[4]; + encode_uint32(token_array[i]|TOKEN_BYTE_MASK,&buf4[0]); + for(int j=0;j<4;j++) { + buf.push_back(buf4[j]); + } + } else { + buf.push_back(token); + } + } + + return buf; + +} + +GDTokenizerBuffer::Token GDTokenizerBuffer::get_token(int p_offset) const { + + int offset = token+p_offset; + + if (offset<0 || offset>=tokens.size()) + return TK_EOF; + + return GDTokenizerBuffer::Token(tokens[offset]&TOKEN_MASK); +} + + +StringName GDTokenizerBuffer::get_token_identifier(int p_offset) const{ + + int offset = token+p_offset; + + ERR_FAIL_INDEX_V(offset,tokens.size(),StringName()); + uint32_t identifier = tokens[offset]>>TOKEN_BITS; + ERR_FAIL_INDEX_V(identifier,identifiers.size(),StringName()); + + return identifiers[identifier]; +} + +GDFunctions::Function GDTokenizerBuffer::get_token_built_in_func(int p_offset) const{ + + int offset = token+p_offset; + ERR_FAIL_INDEX_V(offset,tokens.size(),GDFunctions::FUNC_MAX); + return GDFunctions::Function(tokens[offset]>>TOKEN_BITS); +} + +Variant::Type GDTokenizerBuffer::get_token_type(int p_offset) const{ + + int offset = token+p_offset; + ERR_FAIL_INDEX_V(offset,tokens.size(),Variant::NIL); + + return Variant::Type(tokens[offset]>>TOKEN_BITS); +} + +int GDTokenizerBuffer::get_token_line(int p_offset) const{ + + int offset = token+p_offset; + int pos = lines.find_nearest(offset); + + if (pos<0) + return -1; + if (pos>=lines.size()) + pos=lines.size()-1; + + uint32_t l = lines.getv(pos); + return l&TOKEN_LINE_MASK; + +} +int GDTokenizerBuffer::get_token_column(int p_offset) const{ + + int offset = token+p_offset; + int pos = lines.find_nearest(offset); + if (pos<0) + return -1; + if (pos>=lines.size()) + pos=lines.size()-1; + + uint32_t l = lines.getv(pos); + return l>>TOKEN_LINE_BITS; + +} +int GDTokenizerBuffer::get_token_line_indent(int p_offset) const{ + + int offset = token+p_offset; + ERR_FAIL_INDEX_V(offset,tokens.size(),0); + return tokens[offset]>>TOKEN_BITS; +} +const Variant& GDTokenizerBuffer::get_token_constant(int p_offset) const{ + + + int offset = token+p_offset; + ERR_FAIL_INDEX_V(offset,tokens.size(),nil); + uint32_t constant = tokens[offset]>>TOKEN_BITS; + ERR_FAIL_INDEX_V(constant,constants.size(),nil); + return constants[constant]; + +} +String GDTokenizerBuffer::get_token_error(int p_offset) const{ + + ERR_FAIL_V(String()); +} + +void GDTokenizerBuffer::advance(int p_amount){ + + ERR_FAIL_INDEX(p_amount+token,tokens.size()); + token+=p_amount; +} +GDTokenizerBuffer::GDTokenizerBuffer(){ + + token=0; + +} + |