From afbde3314aee106c835249b2f56c14d68f782899 Mon Sep 17 00:00:00 2001 From: Bojidar Marinov Date: Sun, 13 Oct 2019 22:48:18 +0300 Subject: Allow mixed tabs and spaces when indentation does not depend on tab size (hopefully) Closes #30937, fixes #32612 --- modules/gdscript/gdscript_parser.cpp | 76 ++++++++++++++++++++++----------- modules/gdscript/gdscript_parser.h | 22 +++++++++- modules/gdscript/gdscript_tokenizer.cpp | 71 ++++++++++++------------------ modules/gdscript/gdscript_tokenizer.h | 10 ++--- 4 files changed, 102 insertions(+), 77 deletions(-) diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp index 21434cd150..857bcbb706 100644 --- a/modules/gdscript/gdscript_parser.cpp +++ b/modules/gdscript/gdscript_parser.cpp @@ -89,8 +89,8 @@ bool GDScriptParser::_enter_indent_block(BlockNode *p_block) { if (tokenizer->get_token() != GDScriptTokenizer::TK_NEWLINE) { // be more python-like - int current = tab_level.back()->get(); - tab_level.push_back(current); + IndentLevel current_level = indent_level.back()->get(); + indent_level.push_back(current_level); return true; //_set_error("newline expected after ':'."); //return false; @@ -105,12 +105,19 @@ bool GDScriptParser::_enter_indent_block(BlockNode *p_block) { } else if (tokenizer->get_token(1) != GDScriptTokenizer::TK_NEWLINE) { int indent = tokenizer->get_token_line_indent(); - int current = tab_level.back()->get(); - if (indent <= current) { + int tabs = tokenizer->get_token_line_tab_indent(); + IndentLevel current_level = indent_level.back()->get(); + IndentLevel new_indent(indent, tabs); + if (new_indent.is_mixed(current_level)) { + _set_error("Mixed tabs and spaces in indentation."); return false; } - tab_level.push_back(indent); + if (indent <= current_level.indent) { + return false; + } + + indent_level.push_back(new_indent); tokenizer->advance(); return true; @@ -2213,7 +2220,7 @@ GDScriptParser::PatternNode *GDScriptParser::_parse_pattern(bool p_static) { } void GDScriptParser::_parse_pattern_block(BlockNode *p_block, Vector &p_branches, bool p_static) { - int indent_level = tab_level.back()->get(); + IndentLevel current_level = indent_level.back()->get(); p_block->has_return = true; @@ -2228,7 +2235,7 @@ void GDScriptParser::_parse_pattern_block(BlockNode *p_block, Vector tab_level.back()->get()) { + if (current_level.indent > indent_level.back()->get().indent) { break; // go back a level } @@ -2685,7 +2692,7 @@ void GDScriptParser::_transform_match_statment(MatchNode *p_match_statement) { void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) { - int indent_level = tab_level.back()->get(); + IndentLevel current_level = indent_level.back()->get(); #ifdef DEBUG_ENABLED @@ -2698,9 +2705,13 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) { bool is_first_line = true; while (true) { - if (!is_first_line && tab_level.back()->prev() && tab_level.back()->prev()->get() == indent_level) { + if (!is_first_line && indent_level.back()->prev() && indent_level.back()->prev()->get().indent == current_level.indent) { + if (indent_level.back()->prev()->get().is_mixed(current_level)) { + _set_error("Mixed tabs and spaces in indentation."); + return; + } // pythonic single-line expression, don't parse future lines - tab_level.pop_back(); + indent_level.pop_back(); p_block->end_line = tokenizer->get_token_line(); return; } @@ -2710,7 +2721,7 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) { if (error_set) return; - if (indent_level > tab_level.back()->get()) { + if (current_level.indent > indent_level.back()->get().indent) { p_block->end_line = tokenizer->get_token_line(); return; //go back a level } @@ -2914,14 +2925,14 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) { while (tokenizer->get_token() == GDScriptTokenizer::TK_NEWLINE && _parse_newline()) ; - if (tab_level.back()->get() < indent_level) { //not at current indent level + if (indent_level.back()->get().indent < current_level.indent) { //not at current indent level p_block->end_line = tokenizer->get_token_line(); return; } if (tokenizer->get_token() == GDScriptTokenizer::TK_CF_ELIF) { - if (tab_level.back()->get() > indent_level) { + if (indent_level.back()->get().indent > current_level.indent) { _set_error("Invalid indentation."); return; @@ -2969,7 +2980,7 @@ void GDScriptParser::_parse_block(BlockNode *p_block, bool p_static) { } else if (tokenizer->get_token() == GDScriptTokenizer::TK_CF_ELSE) { - if (tab_level.back()->get() > indent_level) { + if (indent_level.back()->get().indent > current_level.indent) { _set_error("Invalid indentation."); return; } @@ -3341,32 +3352,45 @@ bool GDScriptParser::_parse_newline() { if (tokenizer->get_token(1) != GDScriptTokenizer::TK_EOF && tokenizer->get_token(1) != GDScriptTokenizer::TK_NEWLINE) { + IndentLevel current_level = indent_level.back()->get(); int indent = tokenizer->get_token_line_indent(); - int current_indent = tab_level.back()->get(); + int tabs = tokenizer->get_token_line_tab_indent(); + IndentLevel new_level(indent, tabs); + + if (new_level.is_mixed(current_level)) { + _set_error("Mixed tabs and spaces in indentation."); + return false; + } - if (indent > current_indent) { + if (indent > current_level.indent) { _set_error("Unexpected indentation."); return false; } - if (indent < current_indent) { + if (indent < current_level.indent) { - while (indent < current_indent) { + while (indent < current_level.indent) { //exit block - if (tab_level.size() == 1) { + if (indent_level.size() == 1) { _set_error("Invalid indentation. Bug?"); return false; } - tab_level.pop_back(); + indent_level.pop_back(); - if (tab_level.back()->get() < indent) { + if (indent_level.back()->get().indent < indent) { _set_error("Unindent does not match any outer indentation level."); return false; } - current_indent = tab_level.back()->get(); + + if (indent_level.back()->get().is_mixed(current_level)) { + _set_error("Mixed tabs and spaces in indentation."); + return false; + } + + current_level = indent_level.back()->get(); } tokenizer->advance(); @@ -3464,7 +3488,7 @@ void GDScriptParser::_parse_extends(ClassNode *p_class) { void GDScriptParser::_parse_class(ClassNode *p_class) { - int indent_level = tab_level.back()->get(); + IndentLevel current_level = indent_level.back()->get(); while (true) { @@ -3472,7 +3496,7 @@ void GDScriptParser::_parse_class(ClassNode *p_class) { if (error_set) return; - if (indent_level > tab_level.back()->get()) { + if (current_level.indent > indent_level.back()->get().indent) { p_class->end_line = tokenizer->get_token_line(); return; //go back a level } @@ -8546,8 +8570,8 @@ void GDScriptParser::clear() { validating = false; for_completion = false; error_set = false; - tab_level.clear(); - tab_level.push_back(0); + indent_level.clear(); + indent_level.push_back(IndentLevel(0, 0)); error_line = 0; error_column = 0; pending_newline = -1; diff --git a/modules/gdscript/gdscript_parser.h b/modules/gdscript/gdscript_parser.h index 04ce9cf4c6..93557d745d 100644 --- a/modules/gdscript/gdscript_parser.h +++ b/modules/gdscript/gdscript_parser.h @@ -552,7 +552,27 @@ private: int pending_newline; - List tab_level; + struct IndentLevel { + int indent; + int tabs; + + bool is_mixed(IndentLevel other) { + return ( + (indent == other.indent && tabs != other.tabs) || + (indent > other.indent && tabs < other.tabs) || + (indent < other.indent && tabs > other.tabs)); + } + + IndentLevel() : + indent(0), + tabs(0) {} + + IndentLevel(int p_indent, int p_tabs) : + indent(p_indent), + tabs(p_tabs) {} + }; + + List indent_level; String base_path; String self_path; diff --git a/modules/gdscript/gdscript_tokenizer.cpp b/modules/gdscript/gdscript_tokenizer.cpp index 8b20b0ff48..c2e2c4c3c9 100644 --- a/modules/gdscript/gdscript_tokenizer.cpp +++ b/modules/gdscript/gdscript_tokenizer.cpp @@ -450,11 +450,11 @@ void GDScriptTokenizerText::_make_error(const String &p_error) { tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE; } -void GDScriptTokenizerText::_make_newline(int p_spaces) { +void GDScriptTokenizerText::_make_newline(int p_indentation, int p_tabs) { TokenData &tk = tk_rb[tk_rb_pos]; tk.type = TK_NEWLINE; - tk.constant = p_spaces; + tk.constant = Vector2(p_indentation, p_tabs); tk.line = line; tk.col = column; tk_rb_pos = (tk_rb_pos + 1) % TK_RB_SIZE; @@ -511,33 +511,6 @@ void GDScriptTokenizerText::_advance() { case ' ': INCPOS(1); continue; - case '\n': { - line++; - INCPOS(1); - column = 1; - int i = 0; - while (true) { - if (GETCHAR(i) == ' ') { - if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_SPACES; - if (file_indent_type != INDENT_SPACES) { - _make_error("Spaces used for indentation in tab-indented file!"); - return; - } - } else if (GETCHAR(i) == '\t') { - if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_TABS; - if (file_indent_type != INDENT_TABS) { - _make_error("Tabs used for indentation in space-indented file!"); - return; - } - } else { - break; // not indentation anymore - } - i++; - } - - _make_newline(i); - return; - } case '#': { // line comment skip #ifdef DEBUG_ENABLED String comment; @@ -565,33 +538,34 @@ void GDScriptTokenizerText::_advance() { ignore_warnings = true; } #endif // DEBUG_ENABLED + FALLTHROUGH; + } + case '\n': { + line++; INCPOS(1); + bool used_spaces = false; + int tabs = 0; column = 1; - line++; int i = 0; while (true) { if (GETCHAR(i) == ' ') { - if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_SPACES; - if (file_indent_type != INDENT_SPACES) { - _make_error("Spaces used for indentation in tab-indented file!"); - return; - } + i++; + used_spaces = true; } else if (GETCHAR(i) == '\t') { - if (file_indent_type == INDENT_NONE) file_indent_type = INDENT_TABS; - if (file_indent_type != INDENT_TABS) { - _make_error("Tabs used for indentation in space-indented file!"); + if (used_spaces) { + _make_error("Spaces used before tabs on a line"); return; } + i++; + tabs++; } else { break; // not indentation anymore } - i++; } - _make_newline(i); + _make_newline(i, tabs); return; - - } break; + } case '/': { switch (GETCHAR(1)) { @@ -1112,7 +1086,6 @@ void GDScriptTokenizerText::set_code(const String &p_code) { ignore_warnings = false; #endif // DEBUG_ENABLED last_error = ""; - file_indent_type = INDENT_NONE; for (int i = 0; i < MAX_LOOKAHEAD + 1; i++) _advance(); } @@ -1187,7 +1160,17 @@ int GDScriptTokenizerText::get_token_line_indent(int p_offset) const { int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE; ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0); - return tk_rb[ofs].constant; + return tk_rb[ofs].constant.operator Vector2().x; +} + +int GDScriptTokenizerText::get_token_line_tab_indent(int p_offset) const { + + ERR_FAIL_COND_V(p_offset <= -MAX_LOOKAHEAD, 0); + ERR_FAIL_COND_V(p_offset >= MAX_LOOKAHEAD, 0); + + int ofs = (TK_RB_SIZE + tk_rb_pos + p_offset - MAX_LOOKAHEAD - 1) % TK_RB_SIZE; + ERR_FAIL_COND_V(tk_rb[ofs].type != TK_NEWLINE, 0); + return tk_rb[ofs].constant.operator Vector2().y; } String GDScriptTokenizerText::get_token_error(int p_offset) const { diff --git a/modules/gdscript/gdscript_tokenizer.h b/modules/gdscript/gdscript_tokenizer.h index 89d586b912..58749012b7 100644 --- a/modules/gdscript/gdscript_tokenizer.h +++ b/modules/gdscript/gdscript_tokenizer.h @@ -168,6 +168,7 @@ public: virtual int get_token_line(int p_offset = 0) const = 0; virtual int get_token_column(int p_offset = 0) const = 0; virtual int get_token_line_indent(int p_offset = 0) const = 0; + virtual int get_token_line_tab_indent(int p_offset = 0) const = 0; virtual String get_token_error(int p_offset = 0) const = 0; virtual void advance(int p_amount = 1) = 0; #ifdef DEBUG_ENABLED @@ -205,7 +206,7 @@ class GDScriptTokenizerText : public GDScriptTokenizer { }; void _make_token(Token p_type); - void _make_newline(int p_spaces = 0); + void _make_newline(int p_indentation = 0, int p_tabs = 0); void _make_identifier(const StringName &p_identifier); void _make_built_in_func(GDScriptFunctions::Function p_func); void _make_constant(const Variant &p_constant); @@ -222,11 +223,6 @@ class GDScriptTokenizerText : public GDScriptTokenizer { int tk_rb_pos; String last_error; bool error_flag; - enum { - INDENT_NONE, - INDENT_SPACES, - INDENT_TABS, - } file_indent_type; #ifdef DEBUG_ENABLED Vector > warning_skips; @@ -245,6 +241,7 @@ public: virtual int get_token_line(int p_offset = 0) const; virtual int get_token_column(int p_offset = 0) const; virtual int get_token_line_indent(int p_offset = 0) const; + virtual int get_token_line_tab_indent(int p_offset = 0) const; virtual const Variant &get_token_constant(int p_offset = 0) const; virtual String get_token_error(int p_offset = 0) const; virtual void advance(int p_amount = 1); @@ -283,6 +280,7 @@ public: virtual int get_token_line(int p_offset = 0) const; virtual int get_token_column(int p_offset = 0) const; virtual int get_token_line_indent(int p_offset = 0) const; + virtual int get_token_line_tab_indent(int p_offset = 0) const { return 0; } virtual const Variant &get_token_constant(int p_offset = 0) const; virtual String get_token_error(int p_offset = 0) const; virtual void advance(int p_amount = 1); -- cgit v1.2.3