sencha-lang/Sencha-lang/Lexer.cpp

195 lines
3.9 KiB
C++

#include "Lexer.h"
Lexer::Lexer()
{
string keys[] = {"function", "class", "for", "while", "if", "else"};
keywords.assign(keys, keys+6);
char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'};
punctuation.assign(punct, punct+9);
string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "|", "=", ":", "==", "+=", "-=", "<=", ">=", "!=", "&&", "||"};
operators.assign(oper, oper +19);
}
Lexer::~Lexer()
{
//dtor
}
void Lexer::add_keyword(string word)
{
if(!is_keyword(word))
{
keywords.push_back(word);
}
}
void Lexer::add_punctuation_char(char c)
{
if(!is_punctuation(c))
{
punctuation.push_back(c);
}
}
void Lexer::add_operator(string oper)
{
if(!is_operator(oper))
{
operators.push_back(oper);
}
}
vector<Token> Lexer::parse_line(string line)
{
vector<Token> tokens;
while(line != "")
{
pair<string, Token> result_of_parsing = parse_token(line);
line = result_of_parsing.first;
Token token = result_of_parsing.second;
if(token.get_value() != "")
{
tokens.push_back(token);
}
}
return tokens;
}
pair<string, Token> Lexer::parse_token(string line)
{
string token_value = "";
unsigned int i;
for(i=0; i< line.size(); i++)
{
if(token_value == "" && isspace(line[i])) continue;
if(line[i] == '\"')
{
token_value += line[i++];
for(; i < line.size() ; i++)
{
token_value += line[i];
if (line[i] == '\"')
{
i++;
break;
}
}
}
if(isalnum(line[i]) || line[i]== '_')
{
token_value += line[i];
}
else if(ispunct(line[i]))
{
if(token_value=="")
{
token_value=line[i];
i++;
if(i<line.size())
{
if(line[i] == '=')
{
token_value+=line[i];
i++;
}
}
}
break;
}
else break;
}
Token token = Token(guess_type(token_value), token_value);
string truncated_line = line.substr(i);
return pair<string, Token>(truncated_line, token);
}
bool Lexer::is_keyword(string value)
{
for(int i=0; i< keywords.size(); i++)
{
if(value == keywords[i]) return true;
}
return false;
}
bool Lexer::is_punctuation(char c)
{
for(int i=0; i< punctuation.size(); i++)
{
if(c == punctuation[i]) return true;
}
return false;
}
bool Lexer::is_operator(string value)
{
for(int i=0; i< operators.size(); i++)
{
if(value == operators[i]) return true;
}
return false;
}
type_of_token Lexer::guess_type(string value)
{
/* I can have one of these types:
typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal,
t_punctuation, t_keyword } type_of_token;
*/
if(value == "") return t_invalid_token;
if(isdigit(value[0]))
{
bool is_integer = true;
for(int i=1; i<value.size(); i++)
{
if(!isdigit(value[i])) is_integer = false;
}
if(is_integer) return t_integer;
else return t_invalid_token;
}
if(isalpha(value[0]))
{
if(is_keyword(value)) return t_keyword;
else return t_symbol;
}
if(value[0]=='\"')
{
if(value[value.size()-1] == '\"') return t_literal;
else return t_invalid_token;
}
if(value.size() == 1 )
{
if(is_punctuation(value[0])) return t_punctuation;
else
{
if(is_operator(value)) return t_operator;
}
}
if(value.size() == 2 && is_operator(value)) return t_operator;
//If any...
return t_invalid_token;
}