summaryrefslogtreecommitdiff
path: root/thirdparty/embree/common/lexers
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/embree/common/lexers')
-rw-r--r--thirdparty/embree/common/lexers/parsestream.h101
-rw-r--r--thirdparty/embree/common/lexers/stream.h215
-rw-r--r--thirdparty/embree/common/lexers/streamfilters.h39
-rw-r--r--thirdparty/embree/common/lexers/stringstream.cpp51
-rw-r--r--thirdparty/embree/common/lexers/stringstream.h29
-rw-r--r--thirdparty/embree/common/lexers/tokenstream.cpp181
-rw-r--r--thirdparty/embree/common/lexers/tokenstream.h164
7 files changed, 780 insertions, 0 deletions
diff --git a/thirdparty/embree/common/lexers/parsestream.h b/thirdparty/embree/common/lexers/parsestream.h
new file mode 100644
index 0000000000..f65a52cb47
--- /dev/null
+++ b/thirdparty/embree/common/lexers/parsestream.h
@@ -0,0 +1,101 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "stringstream.h"
+#include "../sys/filename.h"
+#include "../math/vec2.h"
+#include "../math/vec3.h"
+#include "../math/col3.h"
+#include "../math/color.h"
+
+namespace embree
+{
+ /*! helper class for simple command line parsing */
+ class ParseStream : public Stream<std::string>
+ {
+ public:
+ ParseStream (const Ref<Stream<std::string> >& cin) : cin(cin) {}
+
+ ParseStream (const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
+ const std::string& endl = "", bool multiLine = false)
+ : cin(new StringStream(cin,seps,endl,multiLine)) {}
+
+ public:
+ ParseLocation location() { return cin->loc(); }
+ std::string next() { return cin->get(); }
+
+ void force(const std::string& next) {
+ std::string token = getString();
+ if (token != next)
+ THROW_RUNTIME_ERROR("token \""+next+"\" expected but token \""+token+"\" found");
+ }
+
+ std::string getString() {
+ return get();
+ }
+
+ FileName getFileName() {
+ return FileName(get());
+ }
+
+ int getInt () {
+ return atoi(get().c_str());
+ }
+
+ Vec2i getVec2i() {
+ int x = atoi(get().c_str());
+ int y = atoi(get().c_str());
+ return Vec2i(x,y);
+ }
+
+ Vec3ia getVec3ia() {
+ int x = atoi(get().c_str());
+ int y = atoi(get().c_str());
+ int z = atoi(get().c_str());
+ return Vec3ia(x,y,z);
+ }
+
+ float getFloat() {
+ return (float)atof(get().c_str());
+ }
+
+ Vec2f getVec2f() {
+ float x = (float)atof(get().c_str());
+ float y = (float)atof(get().c_str());
+ return Vec2f(x,y);
+ }
+
+ Vec3f getVec3f() {
+ float x = (float)atof(get().c_str());
+ float y = (float)atof(get().c_str());
+ float z = (float)atof(get().c_str());
+ return Vec3f(x,y,z);
+ }
+
+ Vec3fa getVec3fa() {
+ float x = (float)atof(get().c_str());
+ float y = (float)atof(get().c_str());
+ float z = (float)atof(get().c_str());
+ return Vec3fa(x,y,z);
+ }
+
+ Col3f getCol3f() {
+ float x = (float)atof(get().c_str());
+ float y = (float)atof(get().c_str());
+ float z = (float)atof(get().c_str());
+ return Col3f(x,y,z);
+ }
+
+ Color getColor() {
+ float r = (float)atof(get().c_str());
+ float g = (float)atof(get().c_str());
+ float b = (float)atof(get().c_str());
+ return Color(r,g,b);
+ }
+
+ private:
+ Ref<Stream<std::string> > cin;
+ };
+}
diff --git a/thirdparty/embree/common/lexers/stream.h b/thirdparty/embree/common/lexers/stream.h
new file mode 100644
index 0000000000..a40c15f8eb
--- /dev/null
+++ b/thirdparty/embree/common/lexers/stream.h
@@ -0,0 +1,215 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "../sys/platform.h"
+#include "../sys/ref.h"
+#include "../sys/filename.h"
+#include "../sys/string.h"
+
+#include <vector>
+#include <iostream>
+#include <cstdio>
+#include <string.h>
+
+namespace embree
+{
+ /*! stores the location of a stream element in the source */
+ class ParseLocation
+ {
+ public:
+ ParseLocation () : lineNumber(-1), colNumber(-1) {}
+ ParseLocation (std::shared_ptr<std::string> fileName, ssize_t lineNumber, ssize_t colNumber, ssize_t /*charNumber*/)
+ : fileName(fileName), lineNumber(lineNumber), colNumber(colNumber) {}
+
+ std::string str() const
+ {
+ std::string str = "unknown";
+ if (fileName) str = *fileName;
+ if (lineNumber >= 0) str += " line " + toString(lineNumber);
+ if (lineNumber >= 0 && colNumber >= 0) str += " character " + toString(colNumber);
+ return str;
+ }
+
+ private:
+ std::shared_ptr<std::string> fileName; /// name of the file (or stream) the token is from
+ ssize_t lineNumber; /// the line number the token is from
+ ssize_t colNumber; /// the character number in the current line
+ };
+
+ /*! a stream class templated over the stream elements */
+ template<typename T> class Stream : public RefCount
+ {
+ enum { BUF_SIZE = 1024 };
+
+ private:
+ virtual T next() = 0;
+ virtual ParseLocation location() = 0;
+ __forceinline std::pair<T,ParseLocation> nextHelper() {
+ ParseLocation l = location();
+ T v = next();
+ return std::pair<T,ParseLocation>(v,l);
+ }
+ __forceinline void push_back(const std::pair<T,ParseLocation>& v) {
+ if (past+future == BUF_SIZE) pop_front();
+ size_t end = (start+past+future++)%BUF_SIZE;
+ buffer[end] = v;
+ }
+ __forceinline void pop_front() {
+ if (past == 0) THROW_RUNTIME_ERROR("stream buffer empty");
+ start = (start+1)%BUF_SIZE; past--;
+ }
+ public:
+ Stream () : start(0), past(0), future(0), buffer(BUF_SIZE) {}
+ virtual ~Stream() {}
+
+ public:
+
+ const ParseLocation& loc() {
+ if (future == 0) push_back(nextHelper());
+ return buffer[(start+past)%BUF_SIZE].second;
+ }
+ T get() {
+ if (future == 0) push_back(nextHelper());
+ T t = buffer[(start+past)%BUF_SIZE].first;
+ past++; future--;
+ return t;
+ }
+ const T& peek() {
+ if (future == 0) push_back(nextHelper());
+ return buffer[(start+past)%BUF_SIZE].first;
+ }
+ const T& unget(size_t n = 1) {
+ if (past < n) THROW_RUNTIME_ERROR ("cannot unget that many items");
+ past -= n; future += n;
+ return peek();
+ }
+ void drop() {
+ if (future == 0) push_back(nextHelper());
+ past++; future--;
+ }
+ private:
+ size_t start,past,future;
+ std::vector<std::pair<T,ParseLocation> > buffer;
+ };
+
+ /*! warps an iostream stream */
+ class StdStream : public Stream<int>
+ {
+ public:
+ StdStream (std::istream& cin, const std::string& name = "std::stream")
+ : cin(cin), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
+ ~StdStream() {}
+ ParseLocation location() {
+ return ParseLocation(name,lineNumber,colNumber,charNumber);
+ }
+ int next() {
+ int c = cin.get();
+ if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
+ charNumber++;
+ return c;
+ }
+ private:
+ std::istream& cin;
+ ssize_t lineNumber; /// the line number the token is from
+ ssize_t colNumber; /// the character number in the current line
+ ssize_t charNumber; /// the character in the file
+ std::shared_ptr<std::string> name; /// name of buffer
+ };
+
+ /*! creates a stream from a file */
+ class FileStream : public Stream<int>
+ {
+ public:
+
+ FileStream (FILE* file, const std::string& name = "file")
+ : file(file), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
+
+ FileStream (const FileName& fileName)
+ : lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
+ {
+ file = fopen(fileName.c_str(),"r");
+ if (file == nullptr) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
+ }
+ ~FileStream() { if (file) fclose(file); }
+
+ public:
+ ParseLocation location() {
+ return ParseLocation(name,lineNumber,colNumber,charNumber);
+ }
+
+ int next() {
+ int c = fgetc(file);
+ if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
+ charNumber++;
+ return c;
+ }
+
+ private:
+ FILE* file;
+ ssize_t lineNumber; /// the line number the token is from
+ ssize_t colNumber; /// the character number in the current line
+ ssize_t charNumber; /// the character in the file
+ std::shared_ptr<std::string> name; /// name of buffer
+ };
+
+ /*! creates a stream from a string */
+ class StrStream : public Stream<int>
+ {
+ public:
+
+ StrStream (const char* str)
+ : str(str), lineNumber(1), colNumber(0), charNumber(0) {}
+
+ public:
+ ParseLocation location() {
+ return ParseLocation(std::shared_ptr<std::string>(),lineNumber,colNumber,charNumber);
+ }
+
+ int next() {
+ int c = str[charNumber];
+ if (c == 0) return EOF;
+ if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
+ charNumber++;
+ return c;
+ }
+
+ private:
+ const char* str;
+ ssize_t lineNumber; /// the line number the token is from
+ ssize_t colNumber; /// the character number in the current line
+ ssize_t charNumber; /// the character in the file
+ };
+
+ /*! creates a character stream from a command line */
+ class CommandLineStream : public Stream<int>
+ {
+ public:
+ CommandLineStream (int argc, char** argv, const std::string& name = "command line")
+ : i(0), j(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name)))
+ {
+ if (argc > 0) {
+ for (size_t i=0; argv[0][i] && i<1024; i++) charNumber++;
+ charNumber++;
+ }
+ for (ssize_t k=1; k<argc; k++) args.push_back(argv[k]);
+ }
+ ~CommandLineStream() {}
+ public:
+ ParseLocation location() {
+ return ParseLocation(name,0,charNumber,charNumber);
+ }
+ int next() {
+ if (i == args.size()) return EOF;
+ if (j == args[i].size()) { i++; j=0; charNumber++; return ' '; }
+ charNumber++;
+ return args[i][j++];
+ }
+ private:
+ size_t i,j;
+ std::vector<std::string> args;
+ ssize_t charNumber; /// the character in the file
+ std::shared_ptr<std::string> name; /// name of buffer
+ };
+}
diff --git a/thirdparty/embree/common/lexers/streamfilters.h b/thirdparty/embree/common/lexers/streamfilters.h
new file mode 100644
index 0000000000..3592b77b03
--- /dev/null
+++ b/thirdparty/embree/common/lexers/streamfilters.h
@@ -0,0 +1,39 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "stream.h"
+
+namespace embree
+{
+ /* removes all line comments from a stream */
+ class LineCommentFilter : public Stream<int>
+ {
+ public:
+ LineCommentFilter (const FileName& fileName, const std::string& lineComment)
+ : cin(new FileStream(fileName)), lineComment(lineComment) {}
+ LineCommentFilter (Ref<Stream<int> > cin, const std::string& lineComment)
+ : cin(cin), lineComment(lineComment) {}
+
+ ParseLocation location() { return cin->loc(); }
+
+ int next()
+ {
+ /* look if the line comment starts here */
+ for (size_t j=0; j<lineComment.size(); j++) {
+ if (cin->peek() != lineComment[j]) { cin->unget(j); goto not_found; }
+ cin->get();
+ }
+ /* eat all characters until the end of the line (or file) */
+ while (cin->peek() != '\n' && cin->peek() != EOF) cin->get();
+
+ not_found:
+ return cin->get();
+ }
+
+ private:
+ Ref<Stream<int> > cin;
+ std::string lineComment;
+ };
+}
diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp
new file mode 100644
index 0000000000..a037869506
--- /dev/null
+++ b/thirdparty/embree/common/lexers/stringstream.cpp
@@ -0,0 +1,51 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "stringstream.h"
+
+namespace embree
+{
+ static const std::string stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
+
+ /* creates map for fast categorization of characters */
+ static void createCharMap(bool map[256], const std::string& chrs) {
+ for (size_t i=0; i<256; i++) map[i] = false;
+ for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
+ }
+
+ /* simple tokenizer */
+ StringStream::StringStream(const Ref<Stream<int> >& cin, const std::string& seps, const std::string& endl, bool multiLine)
+ : cin(cin), endl(endl), multiLine(multiLine)
+ {
+ createCharMap(isSepMap,seps);
+ createCharMap(isValidCharMap,stringChars);
+ }
+
+ std::string StringStream::next()
+ {
+ /* skip separators */
+ while (cin->peek() != EOF) {
+ if (endl != "" && cin->peek() == '\n') { cin->drop(); return endl; }
+ if (multiLine && cin->peek() == '\\') {
+ cin->drop();
+ if (cin->peek() == '\n') { cin->drop(); continue; }
+ cin->unget();
+ }
+ if (!isSeparator(cin->peek())) break;
+ cin->drop();
+ }
+
+ /* parse everything until the next separator */
+ std::vector<char> str; str.reserve(64);
+ while (cin->peek() != EOF && !isSeparator(cin->peek())) {
+ int c = cin->get();
+ // -- GODOT start --
+ // if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
+ if (!isValidChar(c)) abort();
+ // -- GODOT end --
+ str.push_back((char)c);
+ }
+ str.push_back(0);
+ return std::string(str.data());
+ }
+}
diff --git a/thirdparty/embree/common/lexers/stringstream.h b/thirdparty/embree/common/lexers/stringstream.h
new file mode 100644
index 0000000000..6d9c27e3cd
--- /dev/null
+++ b/thirdparty/embree/common/lexers/stringstream.h
@@ -0,0 +1,29 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "stream.h"
+
+namespace embree
+{
+ /*! simple tokenizer that produces a string stream */
+ class StringStream : public Stream<std::string>
+ {
+ public:
+ StringStream(const Ref<Stream<int> >& cin, const std::string& seps = "\n\t\r ",
+ const std::string& endl = "", bool multiLine = false);
+ public:
+ ParseLocation location() { return cin->loc(); }
+ std::string next();
+ private:
+ __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
+ __forceinline bool isValidChar(unsigned int c) const { return c<256 && isValidCharMap[c]; }
+ private:
+ Ref<Stream<int> > cin; /*! source character stream */
+ bool isSepMap[256]; /*! map for fast classification of separators */
+ bool isValidCharMap[256]; /*! map for valid characters */
+ std::string endl; /*! the token of the end of line */
+ bool multiLine; /*! whether to parse lines wrapped with \ */
+ };
+}
diff --git a/thirdparty/embree/common/lexers/tokenstream.cpp b/thirdparty/embree/common/lexers/tokenstream.cpp
new file mode 100644
index 0000000000..6ed6f2045a
--- /dev/null
+++ b/thirdparty/embree/common/lexers/tokenstream.cpp
@@ -0,0 +1,181 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "tokenstream.h"
+#include "../math/math.h"
+
+namespace embree
+{
+ /* shorthands for common sets of characters */
+ const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
+ const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ const std::string TokenStream::numbers = "0123456789";
+ const std::string TokenStream::separators = "\n\t\r ";
+ const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
+
+ /* creates map for fast categorization of characters */
+ static void createCharMap(bool map[256], const std::string& chrs) {
+ for (size_t i=0; i<256; i++) map[i] = false;
+ for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
+ }
+
+ /* build full tokenizer that takes list of valid characters and keywords */
+ TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
+ const std::string& alpha, //< valid characters for identifiers
+ const std::string& seps, //< characters that act as separators
+ const std::vector<std::string>& symbols) //< symbols
+ : cin(cin), symbols(symbols)
+ {
+ createCharMap(isAlphaMap,alpha);
+ createCharMap(isSepMap,seps);
+ createCharMap(isStringCharMap,stringChars);
+ }
+
+ bool TokenStream::decDigits(std::string& str_o)
+ {
+ bool ok = false;
+ std::string str;
+ if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
+ while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
+ if (ok) str_o += str;
+ else cin->unget(str.size());
+ return ok;
+ }
+
+ bool TokenStream::decDigits1(std::string& str_o)
+ {
+ bool ok = false;
+ std::string str;
+ while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
+ if (ok) str_o += str; else cin->unget(str.size());
+ return ok;
+ }
+
+ bool TokenStream::trySymbol(const std::string& symbol)
+ {
+ size_t pos = 0;
+ while (pos < symbol.size()) {
+ if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
+ cin->drop(); pos++;
+ }
+ return true;
+ }
+
+ bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
+ {
+ for (size_t i=0; i<symbols.size(); i++) {
+ if (!trySymbol(symbols[i])) continue;
+ token = Token(symbols[i],Token::TY_SYMBOL,loc);
+ return true;
+ }
+ return false;
+ }
+
+ bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
+ {
+ bool ok = false;
+ std::string str;
+ if (trySymbol("nan")) {
+ token = Token(float(nan));
+ return true;
+ }
+ if (trySymbol("+inf")) {
+ token = Token(float(pos_inf));
+ return true;
+ }
+ if (trySymbol("-inf")) {
+ token = Token(float(neg_inf));
+ return true;
+ }
+
+ if (decDigits(str))
+ {
+ if (cin->peek() == '.') {
+ str += (char)cin->get();
+ decDigits(str);
+ if (cin->peek() == 'e' || cin->peek() == 'E') {
+ str += (char)cin->get();
+ if (decDigits(str)) ok = true; // 1.[2]E2
+ }
+ else ok = true; // 1.[2]
+ }
+ else if (cin->peek() == 'e' || cin->peek() == 'E') {
+ str += (char)cin->get();
+ if (decDigits(str)) ok = true; // 1E2
+ }
+ }
+ else
+ {
+ if (cin->peek() == '.') {
+ str += (char)cin->get();
+ if (decDigits(str)) {
+ if (cin->peek() == 'e' || cin->peek() == 'E') {
+ str += (char)cin->get();
+ if (decDigits(str)) ok = true; // .3E2
+ }
+ else ok = true; // .3
+ }
+ }
+ }
+ if (ok) {
+ token = Token((float)atof(str.c_str()),loc);
+ }
+ else cin->unget(str.size());
+ return ok;
+ }
+
+ bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
+ std::string str;
+ if (decDigits(str)) {
+ token = Token(atoi(str.c_str()),loc);
+ return true;
+ }
+ return false;
+ }
+
+ bool TokenStream::tryString(Token& token, const ParseLocation& loc)
+ {
+ std::string str;
+ if (cin->peek() != '\"') return false;
+ cin->drop();
+ while (cin->peek() != '\"') {
+ const int c = cin->get();
+ if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
+ str += (char)c;
+ }
+ cin->drop();
+ token = Token(str,Token::TY_STRING,loc);
+ return true;
+ }
+
+ bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
+ {
+ std::string str;
+ if (!isAlpha(cin->peek())) return false;
+ str += (char)cin->get();
+ while (isAlphaNum(cin->peek())) str += (char)cin->get();
+ token = Token(str,Token::TY_IDENTIFIER,loc);
+ return true;
+ }
+
+ void TokenStream::skipSeparators()
+ {
+ /* skip separators */
+ while (cin->peek() != EOF && isSeparator(cin->peek()))
+ cin->drop();
+ }
+
+ Token TokenStream::next()
+ {
+ Token token;
+ skipSeparators();
+ ParseLocation loc = cin->loc();
+ if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
+ if (tryFloat (token,loc)) return token; /**< try to parse float */
+ if (tryInt (token,loc)) return token; /**< try to parse integer */
+ if (tryString (token,loc)) return token; /**< try to parse string */
+ if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
+ if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
+ return Token((char)cin->get(),loc); /**< return invalid character token */
+ }
+}
diff --git a/thirdparty/embree/common/lexers/tokenstream.h b/thirdparty/embree/common/lexers/tokenstream.h
new file mode 100644
index 0000000000..6e49dd0b39
--- /dev/null
+++ b/thirdparty/embree/common/lexers/tokenstream.h
@@ -0,0 +1,164 @@
+// Copyright 2009-2021 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include "stream.h"
+#include <string>
+#include <vector>
+
+namespace embree
+{
+ /*! token class */
+ class Token
+ {
+ public:
+
+ enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
+
+ Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
+ Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
+ Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
+ Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
+ Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
+
+ static Token Eof() { return Token(); }
+ static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
+ static Token Str(std::string str) { return Token(str,TY_STRING); }
+ static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
+
+ char Char() const {
+ if (ty == TY_CHAR) return c;
+ THROW_RUNTIME_ERROR(loc.str()+": character expected");
+ }
+
+ int Int() const {
+ if (ty == TY_INT) return i;
+ THROW_RUNTIME_ERROR(loc.str()+": integer expected");
+ }
+
+ float Float(bool cast = true) const {
+ if (ty == TY_FLOAT) return f;
+ if (ty == TY_INT && cast) return (float)i;
+ THROW_RUNTIME_ERROR(loc.str()+": float expected");
+ }
+
+ std::string Identifier() const {
+ if (ty == TY_IDENTIFIER) return str;
+ THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
+ }
+
+ std::string String() const {
+ if (ty == TY_STRING) return str;
+ THROW_RUNTIME_ERROR(loc.str()+": string expected");
+ }
+
+ std::string Symbol() const {
+ if (ty == TY_SYMBOL) return str;
+ THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
+ }
+
+ const ParseLocation& Location() const { return loc; }
+
+ friend bool operator==(const Token& a, const Token& b)
+ {
+ if (a.ty != b.ty) return false;
+ if (a.ty == TY_CHAR) return a.c == b.c;
+ if (a.ty == TY_INT) return a.i == b.i;
+ if (a.ty == TY_FLOAT) return a.f == b.f;
+ if (a.ty == TY_IDENTIFIER) return a.str == b.str;
+ if (a.ty == TY_STRING) return a.str == b.str;
+ if (a.ty == TY_SYMBOL) return a.str == b.str;
+ return true;
+ }
+
+ friend bool operator!=(const Token& a, const Token& b) {
+ return !(a == b);
+ }
+
+ friend bool operator <( const Token& a, const Token& b ) {
+ if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
+ if (a.ty == TY_CHAR) return a.c < b.c;
+ if (a.ty == TY_INT) return a.i < b.i;
+ if (a.ty == TY_FLOAT) return a.f < b.f;
+ if (a.ty == TY_IDENTIFIER) return a.str < b.str;
+ if (a.ty == TY_STRING) return a.str < b.str;
+ if (a.ty == TY_SYMBOL) return a.str < b.str;
+ return false;
+ }
+
+ friend std::ostream& operator<<(std::ostream& cout, const Token& t)
+ {
+ if (t.ty == TY_EOF) return cout << "eof";
+ if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
+ if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
+ if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
+ if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
+ if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
+ if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
+ return cout << "unknown";
+ }
+
+ private:
+ Type ty; //< the type of the token
+ union {
+ char c; //< data for char tokens
+ int i; //< data for int tokens
+ float f; //< data for float tokens
+ };
+ std::string str; //< data for string and identifier tokens
+ ParseLocation loc; //< the location the token is from
+ };
+
+ /*! build full tokenizer that takes list of valid characters and keywords */
+ class TokenStream : public Stream<Token>
+ {
+ public:
+
+ /*! shorthands for common sets of characters */
+ static const std::string alpha;
+ static const std::string ALPHA;
+ static const std::string numbers;
+ static const std::string separators;
+ static const std::string stringChars;
+
+ public:
+ TokenStream(const Ref<Stream<int> >& cin,
+ const std::string& alpha, //< valid characters for identifiers
+ const std::string& seps, //< characters that act as separators
+ const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
+ public:
+ ParseLocation location() { return cin->loc(); }
+ Token next();
+ bool trySymbol(const std::string& symbol);
+
+ private:
+ void skipSeparators();
+ bool decDigits(std::string& str);
+ bool decDigits1(std::string& str);
+ bool trySymbols(Token& token, const ParseLocation& loc);
+ bool tryFloat(Token& token, const ParseLocation& loc);
+ bool tryInt(Token& token, const ParseLocation& loc);
+ bool tryString(Token& token, const ParseLocation& loc);
+ bool tryIdentifier(Token& token, const ParseLocation& loc);
+
+ Ref<Stream<int> > cin;
+ bool isSepMap[256];
+ bool isAlphaMap[256];
+ bool isStringCharMap[256];
+ std::vector<std::string> symbols;
+
+ /*! checks if a character is a separator */
+ __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
+
+ /*! checks if a character is a number */
+ __forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
+
+ /*! checks if a character is valid inside a string */
+ __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
+
+ /*! checks if a character is legal for an identifier */
+ __forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
+ __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
+ };
+}