From f8a7c462736bf886adc8bc3bbf424d534391d3dc Mon Sep 17 00:00:00 2001 From: Karroffel Date: Fri, 30 Sep 2016 21:40:31 +0200 Subject: pattern matching: implemented parser --- modules/gdscript/gd_parser.cpp | 214 ++++++++++++++++++++++++++++++++++++++ modules/gdscript/gd_parser.h | 36 ++++++- modules/gdscript/gd_tokenizer.cpp | 2 + modules/gdscript/gd_tokenizer.h | 1 + 4 files changed, 252 insertions(+), 1 deletion(-) (limited to 'modules') diff --git a/modules/gdscript/gd_parser.cpp b/modules/gdscript/gd_parser.cpp index 8f4f5ef4ca..99e5944f80 100644 --- a/modules/gdscript/gd_parser.cpp +++ b/modules/gdscript/gd_parser.cpp @@ -1567,6 +1567,193 @@ bool GDParser::_recover_from_completion() { return true; } +GDParser::PatternNode *GDParser::_parse_pattern(bool p_static) +{ + + PatternNode *pattern = memnew(PatternNode); + + GDTokenizer::Token token = tokenizer->get_token(); + if (error_set) + return NULL; + + switch (token) { + // all the constants like strings and numbers + case GDTokenizer::TK_CONSTANT: { + Node *value = _parse_and_reduce_expression(pattern, p_static); + if (value->type != GDParser::Node::TYPE_CONSTANT) { + _set_error("Not a constant expression"); + return NULL; + } + pattern->pt_type = GDParser::PatternNode::PT_CONSTANT; + pattern->constant = static_cast(value); + } break; + + case GDTokenizer::TK_BRACKET_OPEN: { + tokenizer->advance(); + pattern->pt_type = GDParser::PatternNode::PT_ARRAY; + while (true) { + + if (tokenizer->get_token() == GDTokenizer::TK_BRACKET_CLOSE) { + tokenizer->advance(); + break; + } + + if (tokenizer->get_token() == GDTokenizer::TK_PERIOD && tokenizer->get_token(1) == GDTokenizer::TK_PERIOD) { + // match everything + tokenizer->advance(2); + pattern->pt_type = GDParser::PatternNode::PT_IGNORE_REST; + if (tokenizer->get_token() == GDTokenizer::TK_COMMA && tokenizer->get_token(1) == GDTokenizer::TK_BRACKET_CLOSE) { + tokenizer->advance(2); + break; + } else if (tokenizer->get_token() == GDTokenizer::TK_BRACKET_CLOSE) { + tokenizer->advance(1); + break; + } else { + _set_error("'..' pattern only allowed at the end of an array pattern"); + return NULL; + } + } + + PatternNode *sub_pattern = _parse_pattern(p_static); + if (!sub_pattern) { + return NULL; + } + + pattern->array.push_back(sub_pattern); + + if (tokenizer->get_token() == GDTokenizer::TK_COMMA) { + tokenizer->advance(); + continue; + } else if (tokenizer->get_token() == GDTokenizer::TK_BRACKET_CLOSE) { + tokenizer->advance(); + break; + } else { + _set_error("Not a valid pattern"); + return NULL; + } + } + } break; + + case GDTokenizer::TK_IDENTIFIER: { + pattern->pt_type = GDParser::PatternNode::PT_BIND; + pattern->bind = tokenizer->get_token_identifier(); + tokenizer->advance(); + } break; + + case GDTokenizer::TK_CURLY_BRACKET_OPEN: { + tokenizer->advance(); + pattern->pt_type = GDParser::PatternNode::PT_DICITIONARY; + while (true) { + + if (tokenizer->get_token() == GDTokenizer::TK_CURLY_BRACKET_CLOSE) { + tokenizer->advance(); + break; + } + + if (tokenizer->get_token() == GDTokenizer::TK_PERIOD && tokenizer->get_token(1) == GDTokenizer::TK_PERIOD) { + // match everything + tokenizer->advance(2); + pattern->pt_type = GDParser::PatternNode::PT_IGNORE_REST; + if (tokenizer->get_token() == GDTokenizer::TK_COMMA && tokenizer->get_token(1) == GDTokenizer::TK_CURLY_BRACKET_CLOSE) { + tokenizer->advance(2); + break; + } else if (tokenizer->get_token() == GDTokenizer::TK_CURLY_BRACKET_CLOSE) { + tokenizer->advance(1); + break; + } else { + _set_error("'..' pattern only allowed at the end of an dictionary pattern"); + return NULL; + } + } + + Node *key = _parse_and_reduce_expression(pattern, p_static); + if (!key) { + _set_error("Not a valid key in pattern"); + return NULL; + } + + if (key->type != GDParser::Node::TYPE_CONSTANT) { + _set_error("Not a constant expression as key"); + return NULL; + } + + if (tokenizer->get_token() == GDTokenizer::TK_COLON) { + tokenizer->advance(); + + PatternNode *value = _parse_pattern(p_static); + if (!value) { + _set_error("Expected pattern in dictionary value"); + return NULL; + } + + pattern->dictionary.insert(static_cast(key), value); + } else { + pattern->dictionary.insert(static_cast(key), NULL); + } + + + if (tokenizer->get_token() == GDTokenizer::TK_COMMA) { + tokenizer->advance(); + continue; + } else if (tokenizer->get_token() == GDTokenizer::TK_CURLY_BRACKET_CLOSE) { + tokenizer->advance(); + break; + } else { + _set_error("Not a valid pattern"); + return NULL; + } + } + } break; + + default: { + _set_error("Not a valid pattern"); + return NULL; + } + } + + return pattern; +} + +void GDParser::_parse_pattern_block(Vector &p_block, bool p_static) +{ + int indent_level = tab_level.back()->get(); + + while (true) { + + while (tokenizer->get_token() == GDTokenizer::TK_NEWLINE && _parse_newline()); + + // GDTokenizer::Token token = tokenizer->get_token(); + if (error_set) + return; + + if (indent_level > tab_level.back()->get()) { + return; // go back a level + } + + if (pending_newline!=-1) { + pending_newline=-1; + } + + PatternBranchNode *branch = memnew(PatternBranchNode); + + branch->pattern = _parse_pattern(p_static); + if (!branch->pattern) { + return; + } + + if(!_enter_indent_block()) { + _set_error("Expected block in pattern branch"); + return; + } + + branch->body = memnew(BlockNode); + + _parse_block(branch->body, p_static); + + p_block.push_back(branch); + } +} + void GDParser::_parse_block(BlockNode *p_block,bool p_static) { int indent_level = tab_level.back()->get(); @@ -1969,6 +2156,33 @@ void GDParser::_parse_block(BlockNode *p_block,bool p_static) { } + } break; + case GDTokenizer::TK_CF_MATCH: { + + tokenizer->advance(); + + ControlFlowNode *match_node = memnew(ControlFlowNode); + match_node->cf_type = ControlFlowNode::CF_MATCH; + + Node *val_to_match = _parse_and_reduce_expression(p_block, p_static); + + if (!val_to_match) { + if (_recover_from_completion()) { + break; + } + return; + } + + match_node->arguments.push_back(val_to_match); + + if (!_enter_indent_block()) { + _set_error("Expected indented pattern matching block after 'match'"); + return; + } + + _parse_pattern_block(match_node->branches, p_static); + + p_block->statements.push_back(match_node); } break; case GDTokenizer::TK_PR_ASSERT: { diff --git a/modules/gdscript/gd_parser.h b/modules/gdscript/gd_parser.h index 75653e0916..f36cb8a732 100644 --- a/modules/gdscript/gd_parser.h +++ b/modules/gdscript/gd_parser.h @@ -257,6 +257,31 @@ public: Vector arguments; OperatorNode() { type=TYPE_OPERATOR; } }; + + + struct PatternNode : public Node { + + enum PatternType { + PT_CONSTANT, + PT_BIND, + PT_DICITIONARY, + PT_ARRAY, + PT_IGNORE_REST + }; + + PatternType pt_type; + + ConstantNode *constant; + StringName bind; + Map dictionary; + Vector array; + + }; + + struct PatternBranchNode : public Node { + PatternNode *pattern; + BlockNode *body; + }; struct ControlFlowNode : public Node { enum CFType { @@ -266,13 +291,15 @@ public: CF_SWITCH, CF_BREAK, CF_CONTINUE, - CF_RETURN + CF_RETURN, + CF_MATCH }; CFType cf_type; Vector arguments; BlockNode *body; BlockNode *body_else; + Vector branches; ControlFlowNode *_else; //used for if ControlFlowNode() { type=TYPE_CONTROL_FLOW; cf_type=CF_IF; body=NULL; body_else=NULL;} @@ -450,6 +477,13 @@ private: Node* _reduce_expression(Node *p_node,bool p_to_const=false); Node* _parse_and_reduce_expression(Node *p_parent,bool p_static,bool p_reduce_const=false,bool p_allow_assign=false); + + // TODO + void _parse_pattern_block(Vector &p_block, bool p_static); + + PatternNode *_parse_pattern(bool p_static); + + void _parse_block(BlockNode *p_block,bool p_static); void _parse_extends(ClassNode *p_class); void _parse_class(ClassNode *p_class); diff --git a/modules/gdscript/gd_tokenizer.cpp b/modules/gdscript/gd_tokenizer.cpp index 2041ec12ad..13ec059f84 100644 --- a/modules/gdscript/gd_tokenizer.cpp +++ b/modules/gdscript/gd_tokenizer.cpp @@ -85,6 +85,7 @@ const char* GDTokenizer::token_names[TK_MAX]={ "continue", "pass", "return", +"match", "func", "class", "extends", @@ -888,6 +889,7 @@ void GDTokenizerText::_advance() { {TK_CF_BREAK,"break"}, {TK_CF_CONTINUE,"continue"}, {TK_CF_RETURN,"return"}, + {TK_CF_MATCH, "match"}, {TK_CF_PASS,"pass"}, {TK_SELF,"self"}, {TK_CONST_PI,"PI"}, diff --git a/modules/gdscript/gd_tokenizer.h b/modules/gdscript/gd_tokenizer.h index b91229ab1e..f67e962ca3 100644 --- a/modules/gdscript/gd_tokenizer.h +++ b/modules/gdscript/gd_tokenizer.h @@ -92,6 +92,7 @@ public: TK_CF_CONTINUE, TK_CF_PASS, TK_CF_RETURN, + TK_CF_MATCH, TK_PR_FUNCTION, TK_PR_CLASS, TK_PR_EXTENDS, -- cgit v1.2.3