1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "tokenstream.h"
#include "../math/math.h"
namespace embree
{
/* shorthands for common sets of characters */
const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
const std::string TokenStream::numbers = "0123456789";
const std::string TokenStream::separators = "\n\t\r ";
const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
/* creates map for fast categorization of characters */
static void createCharMap(bool map[256], const std::string& chrs) {
for (size_t i=0; i<256; i++) map[i] = false;
for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
}
/* build full tokenizer that takes list of valid characters and keywords */
TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
const std::string& alpha, //< valid characters for identifiers
const std::string& seps, //< characters that act as separators
const std::vector<std::string>& symbols) //< symbols
: cin(cin), symbols(symbols)
{
createCharMap(isAlphaMap,alpha);
createCharMap(isSepMap,seps);
createCharMap(isStringCharMap,stringChars);
}
bool TokenStream::decDigits(std::string& str_o)
{
bool ok = false;
std::string str;
if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str;
else cin->unget(str.size());
return ok;
}
bool TokenStream::decDigits1(std::string& str_o)
{
bool ok = false;
std::string str;
while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
if (ok) str_o += str; else cin->unget(str.size());
return ok;
}
bool TokenStream::trySymbol(const std::string& symbol)
{
size_t pos = 0;
while (pos < symbol.size()) {
if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
cin->drop(); pos++;
}
return true;
}
bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
{
for (size_t i=0; i<symbols.size(); i++) {
if (!trySymbol(symbols[i])) continue;
token = Token(symbols[i],Token::TY_SYMBOL,loc);
return true;
}
return false;
}
bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
{
bool ok = false;
std::string str;
if (trySymbol("nan")) {
token = Token(float(nan));
return true;
}
if (trySymbol("+inf")) {
token = Token(float(pos_inf));
return true;
}
if (trySymbol("-inf")) {
token = Token(float(neg_inf));
return true;
}
if (decDigits(str))
{
if (cin->peek() == '.') {
str += (char)cin->get();
decDigits(str);
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1.[2]E2
}
else ok = true; // 1.[2]
}
else if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // 1E2
}
}
else
{
if (cin->peek() == '.') {
str += (char)cin->get();
if (decDigits(str)) {
if (cin->peek() == 'e' || cin->peek() == 'E') {
str += (char)cin->get();
if (decDigits(str)) ok = true; // .3E2
}
else ok = true; // .3
}
}
}
if (ok) {
token = Token((float)atof(str.c_str()),loc);
}
else cin->unget(str.size());
return ok;
}
bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
std::string str;
if (decDigits(str)) {
token = Token(atoi(str.c_str()),loc);
return true;
}
return false;
}
bool TokenStream::tryString(Token& token, const ParseLocation& loc)
{
std::string str;
if (cin->peek() != '\"') return false;
cin->drop();
while (cin->peek() != '\"') {
const int c = cin->get();
if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
str += (char)c;
}
cin->drop();
token = Token(str,Token::TY_STRING,loc);
return true;
}
bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
{
std::string str;
if (!isAlpha(cin->peek())) return false;
str += (char)cin->get();
while (isAlphaNum(cin->peek())) str += (char)cin->get();
token = Token(str,Token::TY_IDENTIFIER,loc);
return true;
}
void TokenStream::skipSeparators()
{
/* skip separators */
while (cin->peek() != EOF && isSeparator(cin->peek()))
cin->drop();
}
Token TokenStream::next()
{
Token token;
skipSeparators();
ParseLocation loc = cin->loc();
if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
if (tryFloat (token,loc)) return token; /**< try to parse float */
if (tryInt (token,loc)) return token; /**< try to parse integer */
if (tryString (token,loc)) return token; /**< try to parse string */
if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
return Token((char)cin->get(),loc); /**< return invalid character token */
}
}
|