189 lines
4.0 KiB
C++
189 lines
4.0 KiB
C++
#include "Lexer.h"
|
|
|
|
Lexer::Lexer()
|
|
{
|
|
string keys[] = {"function", "class", "for", "while", "if", "else"};
|
|
keywords.assign(keys, keys+6);
|
|
|
|
char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'};
|
|
punctuation.assign(punct, punct+9);
|
|
|
|
string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "|", "=", ":", "==", "+=", "-=", "<=", ">=", "!=", "&&", "||"};
|
|
operators.assign(oper, oper +19);
|
|
}
|
|
|
|
Lexer::~Lexer()
|
|
{
|
|
//dtor
|
|
}
|
|
|
|
void Lexer::add_keyword(string word)
|
|
{
|
|
if(!is_keyword(word))
|
|
{
|
|
keywords.push_back(word);
|
|
}
|
|
}
|
|
void Lexer::add_punctuation_char(char c)
|
|
{
|
|
if(!is_punctuation(c))
|
|
{
|
|
punctuation.push_back(c);
|
|
}
|
|
}
|
|
|
|
void Lexer::add_operator(string oper)
|
|
{
|
|
if(!is_operator(oper))
|
|
{
|
|
operators.push_back(oper);
|
|
}
|
|
}
|
|
|
|
|
|
vector<Token> Lexer::parse_line(string line)
|
|
{
|
|
vector<Token> tokens;
|
|
while(line != "")
|
|
{
|
|
pair<string, Token> result_of_parsing = parse_token(line);
|
|
line = result_of_parsing.first;
|
|
Token token = result_of_parsing.second;
|
|
if(token.get_value() != "")
|
|
{
|
|
tokens.push_back(token);
|
|
}
|
|
}
|
|
return tokens;
|
|
|
|
}
|
|
|
|
pair<string, Token> Lexer::parse_token(string line)
|
|
{
|
|
string token_value = "";
|
|
unsigned int i;
|
|
bool in_char_literal = false;
|
|
for(i=0; i< line.size(); i++)
|
|
{
|
|
if(token_value == "" && isspace(line[i])) continue;
|
|
|
|
if(isalnum(line[i]) || line[i] == '\"' || line[i]== '_')
|
|
{
|
|
token_value += line[i];
|
|
if(line[i] == '\"')
|
|
{
|
|
if(in_char_literal)
|
|
{
|
|
in_char_literal = false;
|
|
}
|
|
else in_char_literal = true;
|
|
}
|
|
}
|
|
else if(ispunct(line[i]))
|
|
{
|
|
if(token_value=="")
|
|
{
|
|
token_value=line[i];
|
|
i++;
|
|
if(i<line.size())
|
|
{
|
|
if(line[i] == '=')
|
|
{
|
|
token_value+=line[i];
|
|
i++;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
else if (in_char_literal && isspace(line[i]))
|
|
{
|
|
token_value += line[i];
|
|
}
|
|
else break;
|
|
}
|
|
|
|
Token token = Token(guess_type(token_value), token_value);
|
|
string truncated_line = line.substr(i);
|
|
|
|
return pair<string, Token>(truncated_line, token);
|
|
}
|
|
|
|
bool Lexer::is_keyword(string value)
|
|
{
|
|
for(int i=0; i< keywords.size(); i++)
|
|
{
|
|
if(value == keywords[i]) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Lexer::is_punctuation(char c)
|
|
{
|
|
|
|
for(int i=0; i< punctuation.size(); i++)
|
|
{
|
|
if(c == punctuation[i]) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool Lexer::is_operator(string value)
|
|
{
|
|
for(int i=0; i< operators.size(); i++)
|
|
{
|
|
if(value == operators[i]) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
type_of_token Lexer::guess_type(string value)
|
|
{
|
|
/* I can have one of these types:
|
|
typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal,
|
|
t_punctuation, t_keyword } type_of_token;
|
|
|
|
*/
|
|
|
|
if(value == "") return t_invalid_token;
|
|
if(isdigit(value[0]))
|
|
{
|
|
bool is_integer = true;
|
|
for(int i=1; i<value.size(); i++)
|
|
{
|
|
if(!isdigit(value[i])) is_integer = false;
|
|
}
|
|
|
|
if(is_integer) return t_integer;
|
|
else return t_invalid_token;
|
|
}
|
|
if(isalpha(value[0]))
|
|
{
|
|
if(is_keyword(value)) return t_keyword;
|
|
else return t_symbol;
|
|
|
|
}
|
|
|
|
if(value[0]=='\"')
|
|
{
|
|
if(value[value.size()-1] == '\"') return t_literal;
|
|
else return t_invalid_token;
|
|
}
|
|
|
|
if(value.size() == 1 )
|
|
{
|
|
if(is_punctuation(value[0])) return t_punctuation;
|
|
else
|
|
{
|
|
if(is_operator(value)) return t_operator;
|
|
}
|
|
}
|
|
if(value.size() == 2 && is_operator(value)) return t_operator;
|
|
|
|
//If any...
|
|
return t_invalid_token;
|
|
}
|
|
|
|
|
|
|