this repo has no description
at fixPythonPipStalling 295 lines 9.7 kB view raw
1#include "RezLexer.h" 2#include "RezLexerWaveToken.h" 3#include "RezParser.generated.hh" 4#include <unordered_map> 5 6#include <boost/regex.hpp> 7 8using namespace boost::wave; 9 10static int readInt(const char *str, const char *end = NULL, int baseOverride = 0) 11{ 12 int x = 0; 13 14 int base = 10; 15 16 if(baseOverride) 17 base = baseOverride; 18 else if(*str == '0') 19 { 20 base = 8; 21 ++str; 22 if(*str == 'x' || *str == 'X') 23 { 24 base = 16; 25 ++str; 26 } 27 if(*str == 'b' || *str == 'B') 28 { 29 base = 2; 30 ++str; 31 } 32 } 33 else if(*str == 'b' || *str == 'B') 34 { 35 base = 2; 36 ++str; 37 } 38 39 while(str != end && *str) 40 { 41 x *= base; 42 if(*str >= 'a' && *str <= 'z') 43 x += *str - 'a' + 10; 44 else if(*str >= 'A' && *str <= 'Z') 45 x += *str - 'A' + 10; 46 else if(*str >= '0' && *str <= '9') 47 x += *str - '0'; 48 str++; 49 } 50 51 return x; 52} 53 54static int readCharLit(const char *str) 55{ 56 const char *p = str + 1; 57 const char *e = str + strlen(str) - 1; 58 59 if(e - p != 4) 60 std::cout << "warning: CHAR LITERAL " << str << "\n"; 61 62 int x = 0; 63 while(p != e) 64 { 65 x <<= 8; 66 x |= (*p) & 0xFF; 67 ++p; 68 } 69 return x; 70} 71 72static std::string readStringLit(const char *str) 73{ 74 const char *p = str + 1; 75 const char *e = str + strlen(str) - 1; 76 77 std::ostringstream out; 78 79 while(p != e) 80 { 81 if(*p == '\\') 82 { 83 ++p; 84 if(p != e) 85 { 86 switch(*p) 87 { 88 case 'n': 89 out << '\n'; ++p; 90 break; 91 case 'r': 92 out << '\r'; ++p; 93 break; 94 case 't': 95 out << '\t'; ++p; 96 break; 97 case '0': 98 case '1': 99 case '2': 100 case '3': 101 if(p + 3 > e) 102 continue; 103 if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) 104 { 105 if(p + 4 > e) 106 continue; 107 out << (char)readInt(p+2, p+4, 16); 108 p += 4; 109 } 110 else 111 { 112 out << (char)readInt(p, p+3, 8); 113 p += 3; 114 } 115 break; 116 case '$': 117 { 118 if(p + 3 > e) 119 continue; 120 out << (char)readInt(p+1, p+3, 16); 121 p += 3; 122 } 123 break; 124 } 125 } 126 } 127 else 128 { 129 out << *p++; 130 } 131 } 132 133 return out.str(); 134} 135 136RezSymbol RezLexer::nextToken() 137{ 138 for(auto tok = nextWave(); tok != T_EOI && tok != T_EOF; tok = nextWave()) 139 { 140 if(IS_CATEGORY(tok, WhiteSpaceTokenType)) 141 continue; 142 else if(IS_CATEGORY(tok, EOLTokenType)) 143 continue; 144 else if(tok == T_PP_LINE) 145 { 146 while(tok != T_EOI && tok != T_EOF && !IS_CATEGORY(tok, EOLTokenType)) 147 tok = nextWave(); 148 continue; 149 } 150 else 151 { 152 //std::cout << "{" << std::hex << (token_id)tok << std::dec << "|" << tok.get_value() << "}\n"; 153 154 auto pos = tok.get_position(); 155 curFile = pos.get_file().c_str(); 156 auto yypos = yy::position(&curFile, pos.get_line(), pos.get_column()); 157 yy::location loc(yypos); 158 lastLocation = loc; 159 160 if(tok == (UnknownTokenType | '"')) 161 { 162 return RezParser::make_STRINGLIT("Hello, world.", loc); 163 } 164 else if(IS_CATEGORY(tok, IdentifierTokenType) || IS_CATEGORY(tok, KeywordTokenType) || IS_CATEGORY(tok, BoolLiteralTokenType)) 165 { 166 typedef decltype(&RezParser::make_TYPE) memfun; 167#define KEYWORD(upper, lower) \ 168{ lower, &RezParser::make_ ## upper } 169 170 static std::unordered_map<std::string, memfun> keywords = { 171 KEYWORD(TYPE, "type"), 172 KEYWORD(RESOURCE, "resource"), 173 KEYWORD(DATA, "data"), 174 KEYWORD(READ, "read"), 175 KEYWORD(INCLUDE, "include"), 176 KEYWORD(CHANGE, "change"), 177 KEYWORD(DELETE, "delete"), 178 179 KEYWORD(ARRAY,"array"), 180 KEYWORD(SWITCH, "switch"), 181 KEYWORD(CASE, "case"), 182 KEYWORD(AS, "as"), 183 KEYWORD(FILL,"fill"), 184 KEYWORD(ALIGN, "align"), 185 KEYWORD(HEX,"hex"), 186 KEYWORD(KEY, "key"), 187 KEYWORD(WIDE,"wide"), 188 KEYWORD(UNSIGNED, "unsigned"), 189 KEYWORD(BINARY, "binary"), 190 KEYWORD(LITERAL, "literal"), 191 KEYWORD(BOOLEAN, "boolean"), 192 KEYWORD(BIT, "bit"), 193 KEYWORD(NIBBLE, "nibble"), 194 KEYWORD(BYTE, "byte"), 195 KEYWORD(CHAR, "char"), 196 KEYWORD(WORD, "word"), 197 KEYWORD(INTEGER, "integer"), 198 KEYWORD(LONG, "long"), 199 KEYWORD(LONGINT, "longint"), 200 KEYWORD(PSTRING, "pstring"), 201 KEYWORD(PSTRING, "wstring"), 202 KEYWORD(STRING, "string"), 203 KEYWORD(POINT, "point"), 204 KEYWORD(RECT, "rect"), 205 KEYWORD(BITSTRING, "bitstring"), 206 207 KEYWORD(INTEGER, "int"), 208 KEYWORD(DOLLAR, "$"), 209 210 KEYWORD(FUN_COUNTOF, "$$countof"), 211 KEYWORD(FUN_ARRAYINDEX, "$$arrayindex"), 212 KEYWORD(FUN_READ, "$$read"), 213 KEYWORD(FUN_BITFIELD, "$$bitfield"), 214 KEYWORD(FUN_WORD, "$$word"), 215 KEYWORD(FUN_BYTE, "$$byte"), 216 KEYWORD(FUN_LONG, "$$long"), 217 }; 218 219 std::string s = tok.get_value().c_str(); 220 if(s.size() >= 2 && s[0] == '$' && std::all_of(s.begin()+1, s.end(), [](char c) { return isxdigit(c); })) 221 return RezParser::make_INTLIT(readInt(s.c_str()+1, nullptr, 16), loc); 222 223 std::string lower = s; 224 std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); 225 auto p = keywords.find(lower); 226 if(p == keywords.end()) 227 { 228 //std::cout << "id: " << s << std::endl; 229 return RezParser::make_IDENTIFIER(lower, loc); 230 } 231 else 232 { 233 //std::cout << "key: " << s << std::endl; 234 return (*p->second)(loc); 235 } 236 } 237 else if(tok == T_INTLIT) 238 { 239 if(tok.get_value() == "0") 240 { 241 auto tok2 = peekWave(); 242 while(tok2 != T_EOI && tok2 != T_EOF && IS_CATEGORY(tok2, WhiteSpaceTokenType)) 243 nextWave(), tok2 = peekWave(); 244 245 //std::cout << "!" << std::hex << (token_id)tok2 << std::dec << "|" << tok2.get_value() << "!\n"; 246 static boost::regex binlit("[bB][01]+"); 247 if(tok2 == T_IDENTIFIER && boost::regex_match(tok2.get_value().c_str(), binlit)) 248 tok = nextWave(); 249 } 250 return RezParser::make_INTLIT(readInt(tok.get_value().c_str()), loc); 251 } 252 else 253 { 254#define NOVAL_TOK(name) \ 255case T_ ## name: /*std::cout << #name << std::endl;*/ return RezParser::make_ ## name(loc) 256 switch(token_id(tok)) 257 { 258 case T_INTLIT: return RezParser::make_INTLIT(readInt(tok.get_value().c_str()), loc); 259 260 case T_CHARLIT: return RezParser::make_CHARLIT(readCharLit(tok.get_value().c_str()), loc); 261 case T_STRINGLIT: return RezParser::make_STRINGLIT(readStringLit(tok.get_value().c_str()), loc); 262 263 NOVAL_TOK(LEFTBRACE); 264 NOVAL_TOK(RIGHTBRACE); 265 NOVAL_TOK(LEFTBRACKET); 266 NOVAL_TOK(RIGHTBRACKET); 267 NOVAL_TOK(LEFTPAREN); 268 NOVAL_TOK(RIGHTPAREN); 269 NOVAL_TOK(SEMICOLON); 270 NOVAL_TOK(COMMA); 271 NOVAL_TOK(PLUS); 272 NOVAL_TOK(MINUS); 273 NOVAL_TOK(DIVIDE); 274 NOVAL_TOK(STAR); 275 NOVAL_TOK(ASSIGN); 276 NOVAL_TOK(COLON); 277 NOVAL_TOK(SHIFTLEFT); 278 NOVAL_TOK(SHIFTRIGHT); 279 NOVAL_TOK(EQUAL); 280 NOVAL_TOK(NOTEQUAL); 281 NOVAL_TOK(AND); 282 NOVAL_TOK(OR); 283 NOVAL_TOK(XOR); 284 NOVAL_TOK(COMPL); 285 286 default: 287 288 return RezParser::make_BADTOKEN(tok.get_value().c_str(), loc); 289 } 290 291 } 292 } 293 } 294 return RezParser::symbol_type(RezParser::token_type(0), yy::location()); 295}