sencha-lang/Sencha-lang/Lexer.cpp

#include "Lexer.h"

Lexer::Lexer()
{
    string keys[] = {"function", "class", "for", "while", "if", "else", "true", "false"};
    keywords.assign(keys, keys+8);

    char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'};
    punctuation.assign(punct, punct+9);

    string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "|", "=", ":", "==", "+=", "-=", "<=", ">=", "!=", "&&", "||"};
    operators.assign(oper, oper +19);
}

Lexer::~Lexer()
{
    //dtor
}

std::string Lexer::unescape_string(string text)
{
	std::string result = text;
	unsigned int offset = 0;
	for(unsigned int i = 0; i < text.size(); i++)
	{
		char replacement = 0;
		if(text[i] == '\\')
		{
			switch (text[i+1])
			{
			case 'n':
				replacement = '\n';
				break;
			case 'r':
				replacement = '\r';
				break;
			case 't':
				replacement = '\t';
				break;
			case '[':
				replacement = 27;
				break;
			}

		}
		if(replacement != 0)
		{
			char replacement_chars[2];
			replacement_chars[0] = replacement;
			replacement_chars[1] = 0;

			result.replace(i + offset, 2, replacement_chars);
			offset--;
		}


	}
	return result;
}

void Lexer::add_keyword(string word)
{
    if(!is_keyword(word))
    {
        keywords.push_back(word);
    }
}
void Lexer::add_punctuation_char(char c)
{
    if(!is_punctuation(c))
    {
        punctuation.push_back(c);
    }
}

void Lexer::add_operator(string oper)
{
    if(!is_operator(oper))
    {
        operators.push_back(oper);
    }
}


vector<Token> Lexer::parse_line(string line)
{
    vector<Token> tokens;
    while(line != "")
    {
        pair<string, Token> result_of_parsing = parse_token(line);
        line = result_of_parsing.first;
        Token token = result_of_parsing.second;
        if(token.get_value() != "")
        {
            tokens.push_back(token);
        }
    }
    return tokens;

}

pair<string, Token> Lexer::parse_token(string line)
{
    string token_value = "";
    unsigned int i;


    for(i=0; i< line.size(); i++)
    {
        if(token_value == "" && isspace(line[i])) continue;

        if(isdigit(line[i]))
        {
        	token_value += line[i++];
        	for(; i < line.size(); i++)
        	{
        		if(isdigit(line[i]) || line[i] == '.')
        		{
        			token_value += line[i];
        		}
        		else
        		{
        			break;
        		}
        	}
        }

        if(line[i] == '\"')
        {
        	token_value += line[i++];
        	for(; i < line.size() ; i++)
        	{
        		token_value += line[i];
        		if (line[i] == '\"')
        		{
        			i++;
        			break;
        		}
        	}
        }

        if(isalnum(line[i])  || line[i]== '_')
        {
            token_value += line[i];

        }

        else if(ispunct(line[i]))
        {
            if(token_value=="")
            {
                token_value=line[i];
                i++;
                if(i<line.size())
                {
                    if(line[i] == '=')
                    {
                        token_value+=line[i];
                        i++;
                    }
                }
            }
            break;
        }


        else break;
    }
    auto type = guess_type(token_value);
    if(type == t_literal) token_value = unescape_string(token_value);

    Token token = Token(type, token_value);
    string truncated_line = line.substr(i);

    return pair<string, Token>(truncated_line, token);
}

bool Lexer::is_keyword(string value)
{
    for(unsigned int i=0; i< keywords.size(); i++)
    {
        if(value == keywords[i]) return true;
    }
    return false;
}

bool Lexer::is_punctuation(char c)
{

    for(unsigned int i=0; i< punctuation.size(); i++)
    {
        if(c == punctuation[i]) return true;
    }
    return false;
}

bool Lexer::is_operator(string value)
{
    for(unsigned int i=0; i< operators.size(); i++)
    {
        if(value == operators[i]) return true;
    }
    return false;
}

type_of_token Lexer::guess_type(string value)
{
    /* I can have one of these types:
    typedef enum {  t_invalid_token=0, t_symbol, t_integer, t_literal,
                t_punctuation, t_keyword } type_of_token;

    */

    if(value == "") return t_invalid_token;
    if(isdigit(value[0]))
       {
           bool is_number = true;
           bool dot_used = false;
           for(unsigned int i=0; i<value.size(); i++)
           {
        	 if(value[i] == '.')
        	 {
        		 if(dot_used) return t_invalid_token;
        		 dot_used = true;
        		 continue;
        	 }
             if(!isdigit(value[i]))  is_number = false;
           }

           if(is_number)
           {
        	   if(dot_used)
        	   {
        		   return t_float;
        	   }
        	   else return t_integer;
           }

           else return t_invalid_token;
       }
    if(isalpha(value[0]))
    {
        if(is_keyword(value)) return t_keyword;
        else return t_symbol;

    }

    if(value[0]=='\"')
    {
        if(value[value.size()-1] == '\"') return t_literal;
        else return t_invalid_token;
    }

    if(value.size() == 1 )
    {
        if(is_punctuation(value[0])) return t_punctuation;
        else
        {
            if(is_operator(value)) return t_operator;
        }
    }
    if(value.size() == 2 && is_operator(value)) return t_operator;

    //If any...
    return t_invalid_token;
}
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`#include "Lexer.h"`

			`Lexer::Lexer()`
			`{`
If statements and postfix expressions like function calls, yeah! 2012-12-09 16:42:13 +00:00			`string keys[] = {"function", "class", "for", "while", "if", "else", "true", "false"};`
			`keywords.assign(keys, keys+8);`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00
			`char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'};`
			`punctuation.assign(punct, punct+9);`

			`string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "\|", "=", ":", "==", "+=", "-=", "<=", ">=", "!=", "&&", "\|\|"};`
			`operators.assign(oper, oper +19);`
			`}`

			`Lexer::~Lexer()`
			`{`
			`//dtor`
			`}`
Simple unescaping strins. 2012-12-16 09:36:19 +00:00
			`std::string Lexer::unescape_string(string text)`
			`{`
			`std::string result = text;`
			`unsigned int offset = 0;`
			`for(unsigned int i = 0; i < text.size(); i++)`
			`{`
			`char replacement = 0;`
			`if(text[i] == '\\')`
			`{`
			`switch (text[i+1])`
			`{`
			`case 'n':`
			`replacement = '\n';`
			`break;`
			`case 'r':`
			`replacement = '\r';`
			`break;`
			`case 't':`
			`replacement = '\t';`
			`break;`
			`case '[':`
			`replacement = 27;`
			`break;`
			`}`

			`}`
			`if(replacement != 0)`
			`{`
			`char replacement_chars[2];`
			`replacement_chars[0] = replacement;`
			`replacement_chars[1] = 0;`

			`result.replace(i + offset, 2, replacement_chars);`
			`offset--;`
			`}`


			`}`
			`return result;`
			`}`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00
			`void Lexer::add_keyword(string word)`
			`{`
			`if(!is_keyword(word))`
			`{`
			`keywords.push_back(word);`
			`}`
			`}`
			`void Lexer::add_punctuation_char(char c)`
			`{`
			`if(!is_punctuation(c))`
			`{`
			`punctuation.push_back(c);`
			`}`
			`}`

			`void Lexer::add_operator(string oper)`
			`{`
			`if(!is_operator(oper))`
			`{`
			`operators.push_back(oper);`
			`}`
			`}`


			`vector<Token> Lexer::parse_line(string line)`
			`{`
			`vector<Token> tokens;`
			`while(line != "")`
			`{`
			`pair<string, Token> result_of_parsing = parse_token(line);`
			`line = result_of_parsing.first;`
			`Token token = result_of_parsing.second;`
			`if(token.get_value() != "")`
			`{`
			`tokens.push_back(token);`
			`}`
			`}`
			`return tokens;`

			`}`

			`pair<string, Token> Lexer::parse_token(string line)`
			`{`
			`string token_value = "";`
			`unsigned int i;`
I added simple float usage. 2012-12-16 08:24:16 +00:00
Some stuff with multiline interactive parsing and with character literal lexing analisys. Actually - bugs. 2012-11-17 15:54:16 +00:00
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`for(i=0; i< line.size(); i++)`
			`{`
			`if(token_value == "" && isspace(line[i])) continue;`
Some stuff with multiline interactive parsing and with character literal lexing analisys. Actually - bugs. 2012-11-17 15:54:16 +00:00
I added simple float usage. 2012-12-16 08:24:16 +00:00			`if(isdigit(line[i]))`
			`{`
			`token_value += line[i++];`
			`for(; i < line.size(); i++)`
			`{`
			`if(isdigit(line[i]) \|\| line[i] == '.')`
			`{`
			`token_value += line[i];`
			`}`
			`else`
			`{`
			`break;`
			`}`
			`}`
			`}`

Some stuff with multiline interactive parsing and with character literal lexing analisys. Actually - bugs. 2012-11-17 15:54:16 +00:00			`if(line[i] == '\"')`
			`{`
			`token_value += line[i++];`
			`for(; i < line.size() ; i++)`
			`{`
			`token_value += line[i];`
			`if (line[i] == '\"')`
			`{`
			`i++;`
			`break;`
			`}`
			`}`
			`}`

			`if(isalnum(line[i]) \|\| line[i]== '_')`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`{`
			`token_value += line[i];`
Some stuff with multiline interactive parsing and with character literal lexing analisys. Actually - bugs. 2012-11-17 15:54:16 +00:00
			`}`

I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`else if(ispunct(line[i]))`
			`{`
			`if(token_value=="")`
			`{`
			`token_value=line[i];`
			`i++;`
			`if(i<line.size())`
			`{`
			`if(line[i] == '=')`
			`{`
			`token_value+=line[i];`
			`i++;`
			`}`
			`}`
			`}`
			`break;`
Some stuff with multiline interactive parsing and with character literal lexing analisys. Actually - bugs. 2012-11-17 15:54:16 +00:00			`}`


I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`else break;`
			`}`
Simple unescaping strins. 2012-12-16 09:36:19 +00:00			`auto type = guess_type(token_value);`
			`if(type == t_literal) token_value = unescape_string(token_value);`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00
Simple unescaping strins. 2012-12-16 09:36:19 +00:00			`Token token = Token(type, token_value);`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`string truncated_line = line.substr(i);`

			`return pair<string, Token>(truncated_line, token);`
			`}`

			`bool Lexer::is_keyword(string value)`
			`{`
variabuls und stuff 2012-12-08 19:59:05 +00:00			`for(unsigned int i=0; i< keywords.size(); i++)`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`{`
			`if(value == keywords[i]) return true;`
			`}`
			`return false;`
			`}`

			`bool Lexer::is_punctuation(char c)`
			`{`

variabuls und stuff 2012-12-08 19:59:05 +00:00			`for(unsigned int i=0; i< punctuation.size(); i++)`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`{`
			`if(c == punctuation[i]) return true;`
			`}`
			`return false;`
			`}`

			`bool Lexer::is_operator(string value)`
			`{`
variabuls und stuff 2012-12-08 19:59:05 +00:00			`for(unsigned int i=0; i< operators.size(); i++)`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`{`
			`if(value == operators[i]) return true;`
			`}`
			`return false;`
			`}`

Removed some unnecessary files. 2012-11-04 13:26:36 +00:00			`type_of_token Lexer::guess_type(string value)`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`{`
			`/* I can have one of these types:`
			`typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal,`
			`t_punctuation, t_keyword } type_of_token;`

			`*/`

			`if(value == "") return t_invalid_token;`
			`if(isdigit(value[0]))`
			`{`
I added simple float usage. 2012-12-16 08:24:16 +00:00			`bool is_number = true;`
			`bool dot_used = false;`
			`for(unsigned int i=0; i<value.size(); i++)`
			`{`
			`if(value[i] == '.')`
			`{`
			`if(dot_used) return t_invalid_token;`
			`dot_used = true;`
			`continue;`
			`}`
			`if(!isdigit(value[i])) is_number = false;`
I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`}`

I added simple float usage. 2012-12-16 08:24:16 +00:00			`if(is_number)`
			`{`
			`if(dot_used)`
			`{`
			`return t_float;`
			`}`
			`else return t_integer;`
			`}`

I did some migration to eclipse. Old code-blocks version still exists. I installed new very simple test framework called minunit. If eclipse doesn't explode, I will use it as main IDE. 2012-11-02 23:07:00 +00:00			`else return t_invalid_token;`
			`}`
			`if(isalpha(value[0]))`
			`{`
			`if(is_keyword(value)) return t_keyword;`
			`else return t_symbol;`

			`}`

			`if(value[0]=='\"')`
			`{`
			`if(value[value.size()-1] == '\"') return t_literal;`
			`else return t_invalid_token;`
			`}`

			`if(value.size() == 1 )`
			`{`
			`if(is_punctuation(value[0])) return t_punctuation;`
			`else`
			`{`
			`if(is_operator(value)) return t_operator;`
			`}`
			`}`
			`if(value.size() == 2 && is_operator(value)) return t_operator;`

			`//If any...`
			`return t_invalid_token;`
			`}`