summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustyna Att Ilczuk <justyna.ilczuk@gmail.com>2012-10-28 13:11:20 +0100
committerJustyna Att Ilczuk <justyna.ilczuk@gmail.com>2012-10-28 13:11:20 +0100
commite15c8be93ac3a5913122d0d045dd0bdc6120032e (patch)
treea40566d2a3cddd97cc1c0ed0ba2c454ac8773318
parenta38163f4080d0d5d20404df8e0434bbfa2707872 (diff)
downloadsencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.gz
sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.bz2
sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.xz
sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.zip
i've got kind of lexer and parserf
# Please enter the commit message for your changes. Lines starting
-rw-r--r--sencha/Lexer.cpp172
-rw-r--r--sencha/Lexer.h37
-rw-r--r--sencha/Parser.cpp295
-rw-r--r--sencha/Parser.h44
-rw-r--r--sencha/Token.cpp2
-rw-r--r--sencha/Token.h25
-rwxr-xr-xsencha/bin/Debug/senchabin0 -> 301486 bytes
-rw-r--r--sencha/include/token.h14
-rw-r--r--sencha/main.cpp48
-rw-r--r--sencha/obj/Debug/Lexer.obin0 -> 367920 bytes
-rw-r--r--sencha/obj/Debug/Parser.obin0 -> 221664 bytes
-rw-r--r--sencha/obj/Debug/Token.obin0 -> 11520 bytes
-rw-r--r--sencha/obj/Debug/main.obin0 -> 172656 bytes
-rw-r--r--sencha/sencha.cbp46
-rw-r--r--sencha/sencha.depend34
-rw-r--r--sencha/sencha.layout16
-rw-r--r--sencha/src/token.cpp11
17 files changed, 744 insertions, 0 deletions
diff --git a/sencha/Lexer.cpp b/sencha/Lexer.cpp
new file mode 100644
index 0000000..820c556
--- /dev/null
+++ b/sencha/Lexer.cpp
@@ -0,0 +1,172 @@
+#include "Lexer.h"
+
+Lexer::Lexer()
+{
+ string keys[] = {"function", "class", "penis"};
+ keywords.assign(keys, keys+3);
+
+ char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'};
+ punctuation.assign(punct, punct+9);
+
+ string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "|", "=", ":", "==", "+=", "-=", "<=", ">=", "!", "&&", "||"};
+ operators.assign(oper, oper +19);
+}
+
+Lexer::~Lexer()
+{
+ //dtor
+}
+
+void Lexer::add_keyword(string word)
+{
+ if(!is_keyword(word))
+ {
+ keywords.push_back(word);
+ }
+}
+void Lexer::add_punctuation_char(char c)
+{
+ if(!is_punctuation(c))
+ {
+ punctuation.push_back(c);
+ }
+}
+
+void Lexer::add_operator(string oper)
+{
+ if(!is_operator(oper))
+ {
+ operators.push_back(oper);
+ }
+}
+
+
+vector<Token> Lexer::parse_line(string line)
+{
+ vector<Token> tokens;
+ while(line != "")
+ {
+ pair<string, Token> result_of_parsing = parse_token(line);
+ line = result_of_parsing.first;
+ Token token = result_of_parsing.second;
+ if(token.get_value() != "")
+ {
+ tokens.push_back(token);
+ }
+ }
+ return tokens;
+
+}
+
+pair<string, Token> Lexer::parse_token(string line)
+{
+ string token_value = "";
+ unsigned int i;
+ for(i=0; i< line.size(); i++)
+ {
+ if(token_value == "" && isspace(line[i])) continue;
+
+ if(isalnum(line[i]) || line[i] == '\"' || line[i]== '_')
+ {
+ token_value += line[i];
+ }
+ else if(ispunct(line[i]))
+ {
+ if(token_value=="")
+ {
+ token_value=line[i];
+ i++;
+ if(i<line.size())
+ {
+ if(line[i] == '=')
+ token_value+=line[i];
+ }
+ }
+ break;
+ }
+ else break;
+ }
+
+ Token token = Token(guess_type(token_value), token_value);
+ string truncated_line = line.substr(i);
+
+ return pair<string, Token>(truncated_line, token);
+}
+
+bool Lexer::is_keyword(string value)
+{
+ for(int i=0; i< keywords.size(); i++)
+ {
+ if(value == keywords[i]) return true;
+ }
+ return false;
+}
+
+bool Lexer::is_punctuation(char c)
+{
+
+ for(int i=0; i< punctuation.size(); i++)
+ {
+ if(c == punctuation[i]) return true;
+ }
+ return false;
+}
+
+bool Lexer::is_operator(string value)
+{
+ for(int i=0; i< operators.size(); i++)
+ {
+ if(value == operators[i]) return true;
+ }
+ return false;
+}
+
+type_of_token Lexer::guess_type(string value) //TODO, hey wouldn't it be nice, to implement it?
+{
+ /* I can have one of these types:
+ typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal,
+ t_punctuation, t_keyword } type_of_token;
+
+ */
+
+ if(value == "") return t_invalid_token;
+ if(isdigit(value[0]))
+ {
+ bool is_integer = true;
+ for(int i=1; i<value.size(); i++)
+ {
+ if(!isdigit(value[i])) is_integer = false;
+ }
+
+ if(is_integer) return t_integer;
+ else return t_invalid_token;
+ }
+ if(isalpha(value[0]))
+ {
+ if(is_keyword(value)) return t_keyword;
+ else return t_symbol;
+
+ }
+
+ if(value[0]=='\"')
+ {
+ if(value[value.size()-1] == '\"') return t_literal;
+ else return t_invalid_token;
+ }
+
+ if(value.size() == 1 )
+ {
+ if(is_punctuation(value[0])) return t_punctuation;
+ else
+ {
+ if(is_operator(value)) return t_operator;
+ }
+ }
+ if(value.size() == 2 && is_operator(value)) return t_operator;
+
+ //If any...
+ return t_invalid_token;
+}
+
+
+
diff --git a/sencha/Lexer.h b/sencha/Lexer.h
new file mode 100644
index 0000000..92b6c09
--- /dev/null
+++ b/sencha/Lexer.h
@@ -0,0 +1,37 @@
+#ifndef LEXER_H
+#define LEXER_H
+#include <vector>
+#include <string>
+#include <utility>
+#include <cctype>
+#include <iostream>
+#include "Token.h"
+
+using namespace std;
+class Lexer
+{
+ public:
+ Lexer();
+ virtual ~Lexer();
+
+ bool is_keyword(string value);
+ bool is_punctuation(char c);
+ bool is_operator(string value );
+
+ vector<string> keywords;
+ vector<char> punctuation;
+ vector<string> operators;
+
+ void add_keyword(string word);
+ void add_punctuation_char(char c);
+ void add_operator(string oper);
+
+ vector<Token> parse_line(string line);
+ pair<string, Token> parse_token(string line);
+ type_of_token guess_type(string value);
+
+ protected:
+ private:
+};
+
+#endif // LEXER_H
diff --git a/sencha/Parser.cpp b/sencha/Parser.cpp
new file mode 100644
index 0000000..9b816c1
--- /dev/null
+++ b/sencha/Parser.cpp
@@ -0,0 +1,295 @@
+#include "Parser.h"
+#include "iostream"
+
+Parser::Parser(vector<Token> tokens)
+{
+ //token_stream.push_back(Token(t_symbol, "dupa"));
+ error_message = "***ERRORS DURING PARSING***\n";
+ report_message = "***PARSER REPORT***\n";
+ token_stream = tokens;
+ position_in_stream = 0;
+ read_next();
+}
+
+Parser::~Parser()
+{
+ //dtor
+}
+
+void Parser::report(string s)
+{
+ report_message += s + "\n";
+}
+
+void Parser::error(string s)
+{
+ error_message += s + "\n";
+}
+
+bool Parser::read_next()
+{
+ if(position_in_stream < token_stream.size())
+ {
+ current_token = token_stream[position_in_stream];
+ tok_value = current_token.get_value();
+ position_in_stream++;
+ cout << "***Reading token: ..." << endl;
+ cout << "Token value: " << tok_value << "***" << endl << endl;
+ return true;
+ }
+ else
+ {
+ current_token = Token(t_invalid_token, "");
+ tok_value = current_token.get_value();
+ return false;
+ }
+}
+
+void Parser::interpret()
+{
+ while(tok_value!= "")
+ {
+ if(is_type())
+ {
+ report("Identifier: " + tok_value + "\n");
+ read_next();
+
+ if(accept("="))
+ {
+ expr();
+ report(" := ");
+ }
+ if(accept(";"))
+ {
+ report("Variable definition\n");
+ continue;
+ }
+
+ expect("(");
+ int argc = 0;
+ while(true)
+ {
+ argc++;
+ is_type();
+ report("function argument: " + tok_value + "\n");
+ read_next();
+ if(peek(")"))
+ {
+ break;
+ }
+ expect(",");
+ }
+ expect(")");
+
+ if(!accept(";"))
+ {
+ report("function body:\n");
+ statement();
+ }
+ }
+
+ else {
+ report("Regular statement:\n");
+ statement();
+ }
+ }
+}
+bool Parser::peek(string s)
+{
+ return tok_value == s;
+}
+
+bool Parser::accept(string s)
+{
+ if(peek(s))
+ {
+ read_next();
+ return true;
+ }
+ else return false;
+}
+
+bool Parser::expect(string s)
+{
+ if(!accept(s))
+ {
+ string error_message = "Error: expected ";
+ error_message += s;
+ error_message += " but received: " + tok_value + "\n";
+
+ error(error_message);
+ return false;
+ }
+ else return true;
+}
+
+bool Parser::is_type()
+{
+ if(current_token.get_type() == t_symbol || current_token.get_type() == t_keyword)
+ {
+ if(tok_value == "def" || tok_value == "string" || tok_value == "num")
+ {
+ read_next();
+ return true;
+ }
+ else return false;
+
+ }
+ else return false;
+}
+
+void Parser::statement()
+{
+ if(accept("{"))
+ {
+ while(!accept("}"))
+ {
+ statement();
+ }
+ }
+ else if(is_type())
+ {
+ report("Local variable: " + tok_value + "\n");
+ read_next();
+ if(accept("="))
+ {
+ expr();
+ report(" := ");
+ }
+ expect(";");
+ }
+ else if(accept("if"))
+ {
+ //stuff
+ //TODO implement that
+ }
+ else if(accept("while"))
+ {
+ //similar stuff
+ }
+ else if(tok_value == "return")
+ {
+ if(!peek(";"))
+ {
+ expr();
+ }
+ expect(";");
+ report("RETURN\n");
+
+ }
+ else
+ {
+ expr();
+ expect(";");
+ }
+}
+
+void Parser::prim_expr()
+{
+ if(current_token.get_type() == t_integer)
+ {
+ report("Number: " + tok_value + "\n");
+ }
+ else if(current_token.get_type() == t_symbol)
+ {
+ report("Variable: " + tok_value + "\n");
+ }
+ else if(current_token.get_type() == t_literal)
+ {
+ report("Character literal: " + tok_value + "\n");
+ }
+ else if(accept("("))
+ {
+ expr();
+ expect(")");
+ }
+ else
+ {
+ error("ERROR: unexpected primary expression" + tok_value + "\n");
+
+ }
+ read_next();
+}
+
+void Parser::postfix_expr()
+{
+ prim_expr();
+ if(accept("["))
+ {
+ expr();
+ expect("]");
+ report(" [] ");
+
+ }
+ else if(accept("("))
+ {
+ if(!accept(")"))
+ {
+ expr();
+ report("function argument\n");
+ while(accept(","))
+ {
+ expr();
+ report("function argument\n");
+ }
+ expect(")");
+ }
+ report("FUNC_CALL\n");
+ }
+}
+
+void Parser::add_expr()
+{
+ postfix_expr();
+ while(peek("+") || peek("-"))
+ {
+ if(accept("+"))
+ {
+ postfix_expr();
+ report(" + ");
+ } else if(accept("-"))
+ {
+ postfix_expr();
+ report(" - ");
+ }
+ }
+}
+
+void Parser::rel_expr()
+{
+ add_expr();
+ while(peek("<"))
+ {
+ accept("<");
+ add_expr();
+ report(" < ");
+ }
+}
+
+void Parser::eq_expr()
+{
+ rel_expr();
+ while(peek("==") || peek("!="))
+ {
+ if(accept("=="))
+ {
+ rel_expr();
+ report("==");
+ }
+ else if(accept("!="))
+ {
+ rel_expr();
+ report("!=");
+ }
+ }
+}
+
+void Parser::expr()
+{
+ eq_expr();
+ if(accept("="))
+ {
+ expr();
+ report(" := ");
+ }
+}
diff --git a/sencha/Parser.h b/sencha/Parser.h
new file mode 100644
index 0000000..7a35955
--- /dev/null
+++ b/sencha/Parser.h
@@ -0,0 +1,44 @@
+#ifndef PARSER_H
+#define PARSER_H
+#include <string>
+#include <vector>
+#include "Token.h"
+
+using namespace std;
+class Parser
+{
+ public:
+ Parser(vector<Token> tokens);
+ virtual ~Parser();
+
+ Token current_token;
+ string tok_value;
+ vector<Token> token_stream;
+ int position_in_stream;
+
+ string report_message;
+ string error_message;
+
+ bool read_next();
+ bool peek(string s);
+ bool accept(string s);
+ bool expect(string s);
+
+ void error(string s);
+ void report(string s);
+ void interpret();
+ void statement();
+
+ void add_expr();
+ void prim_expr();
+ void postfix_expr();
+ void rel_expr();
+ void eq_expr();
+ void expr();
+
+ bool is_type();
+ protected:
+ private:
+};
+
+#endif // PARSER_H
diff --git a/sencha/Token.cpp b/sencha/Token.cpp
new file mode 100644
index 0000000..873f4e7
--- /dev/null
+++ b/sencha/Token.cpp
@@ -0,0 +1,2 @@
+#include "Token.h"
+
diff --git a/sencha/Token.h b/sencha/Token.h
new file mode 100644
index 0000000..7c3552f
--- /dev/null
+++ b/sencha/Token.h
@@ -0,0 +1,25 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+#include <string>
+
+
+using namespace std;
+
+typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal,
+ t_punctuation, t_keyword, t_operator, t_float } type_of_token; //t_float not implemented
+ //TODO implement t_float in lexer
+
+
+class Token
+{
+ public:
+ type_of_token type;
+ string value;
+ Token() : type(t_invalid_token), value("") {};
+ Token(type_of_token type, string value) : type(type), value(value) { };
+ type_of_token get_type() { return type; };
+ string get_value() { return value; };
+
+};
+
+#endif // TOKEN_H
diff --git a/sencha/bin/Debug/sencha b/sencha/bin/Debug/sencha
new file mode 100755
index 0000000..80e6bde
--- /dev/null
+++ b/sencha/bin/Debug/sencha
Binary files differ
diff --git a/sencha/include/token.h b/sencha/include/token.h
new file mode 100644
index 0000000..a3bf6ed
--- /dev/null
+++ b/sencha/include/token.h
@@ -0,0 +1,14 @@
+#ifndef TOKEN_H
+#define TOKEN_H
+
+
+class Token
+{
+ public:
+ Token();
+ virtual ~Token();
+ protected:
+ private:
+};
+
+#endif // TOKEN_H
diff --git a/sencha/main.cpp b/sencha/main.cpp
new file mode 100644
index 0000000..a003b9d
--- /dev/null
+++ b/sencha/main.cpp
@@ -0,0 +1,48 @@
+#include <iostream>
+#include <string>
+#include "Token.h"
+#include "Lexer.h"
+#include "Parser.h"
+
+using namespace std;
+
+void test_lexer()
+{
+ string test_line = "def i; bulb; i + 3; string banan; banan = \"banan\"; string kaboom(num how_many_times) { def z; }";
+ string test_line2 = "i like \"lol\" ,function: bananananananas, lol, ==555% % % += 1sas /> \n";
+ Lexer lexer;
+ vector<Token> tokens = lexer.parse_line(test_line);
+
+
+ for(int i=0; i< tokens.size(); i++)
+ {
+ cout << tokens[i].get_value() << " type: " << tokens[i].get_type() << endl;
+ }
+
+
+ tokens = lexer.parse_line(test_line2);
+
+ for(int i=0; i< tokens.size(); i++)
+ {
+ cout << tokens[i].get_value() << " type: " << tokens[i].get_type() << endl;
+ }
+}
+
+void test_parser()
+{
+ string text_line = "def i; bulb; i + 3; string banan = \"kartofel\"; banan = \"banan\"; string kaboom(num how_many_times) { def z; }";
+ Lexer lexer;
+ vector<Token> tokens = lexer.parse_line(text_line);
+ Parser parser = Parser(tokens);
+ parser.interpret();
+ cout << parser.report_message;
+ cout << parser.error_message;
+
+}
+int main()
+{
+ cout << "Hello world!" << endl;
+ test_parser();
+ //test_lexer();
+ return 0;
+}
diff --git a/sencha/obj/Debug/Lexer.o b/sencha/obj/Debug/Lexer.o
new file mode 100644
index 0000000..3b22649
--- /dev/null
+++ b/sencha/obj/Debug/Lexer.o
Binary files differ
diff --git a/sencha/obj/Debug/Parser.o b/sencha/obj/Debug/Parser.o
new file mode 100644
index 0000000..e90665e
--- /dev/null
+++ b/sencha/obj/Debug/Parser.o
Binary files differ
diff --git a/sencha/obj/Debug/Token.o b/sencha/obj/Debug/Token.o
new file mode 100644
index 0000000..34612c7
--- /dev/null
+++ b/sencha/obj/Debug/Token.o
Binary files differ
diff --git a/sencha/obj/Debug/main.o b/sencha/obj/Debug/main.o
new file mode 100644
index 0000000..1d3aa1e
--- /dev/null
+++ b/sencha/obj/Debug/main.o
Binary files differ
diff --git a/sencha/sencha.cbp b/sencha/sencha.cbp
new file mode 100644
index 0000000..fc2c00e
--- /dev/null
+++ b/sencha/sencha.cbp
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_project_file>
+ <FileVersion major="1" minor="6" />
+ <Project>
+ <Option title="sencha" />
+ <Option pch_mode="2" />
+ <Option compiler="gcc" />
+ <Build>
+ <Target title="Debug">
+ <Option output="bin/Debug/sencha" prefix_auto="1" extension_auto="1" />
+ <Option object_output="obj/Debug/" />
+ <Option type="1" />
+ <Option compiler="gcc" />
+ <Compiler>
+ <Add option="-g" />
+ </Compiler>
+ </Target>
+ <Target title="Release">
+ <Option output="bin/Release/sencha" prefix_auto="1" extension_auto="1" />
+ <Option object_output="obj/Release/" />
+ <Option type="1" />
+ <Option compiler="gcc" />
+ <Compiler>
+ <Add option="-O2" />
+ </Compiler>
+ <Linker>
+ <Add option="-s" />
+ </Linker>
+ </Target>
+ </Build>
+ <Compiler>
+ <Add option="-Wall" />
+ <Add option="-fexceptions" />
+ </Compiler>
+ <Unit filename="Lexer.cpp" />
+ <Unit filename="Lexer.h" />
+ <Unit filename="Token.cpp" />
+ <Unit filename="Token.h" />
+ <Unit filename="main.cpp" />
+ <Extensions>
+ <envvars />
+ <code_completion />
+ <debugger />
+ </Extensions>
+ </Project>
+</CodeBlocks_project_file>
diff --git a/sencha/sencha.depend b/sencha/sencha.depend
new file mode 100644
index 0000000..1a22907
--- /dev/null
+++ b/sencha/sencha.depend
@@ -0,0 +1,34 @@
+# depslib dependency file v1.0
+1350761891 source:/home/attero/development/sencha-lang/sencha/Token.cpp
+ "Token.h"
+
+1351424620 /home/attero/development/sencha-lang/sencha/Token.h
+ <string>
+
+1351425805 source:/home/attero/development/sencha-lang/sencha/Lexer.cpp
+ "Lexer.h"
+
+1351413156 /home/attero/development/sencha-lang/sencha/Lexer.h
+ <vector>
+ <string>
+ <utility>
+ <cctype>
+ <iostream>
+ "Token.h"
+
+1351425833 source:/home/attero/development/sencha-lang/sencha/main.cpp
+ <iostream>
+ <string>
+ "Token.h"
+ "Lexer.h"
+ "Parser.h"
+
+1351426069 source:/home/attero/development/sencha-lang/sencha/Parser.cpp
+ "Parser.h"
+ "iostream"
+
+1351424740 /home/attero/development/sencha-lang/sencha/Parser.h
+ <string>
+ <vector>
+ "Token.h"
+
diff --git a/sencha/sencha.layout b/sencha/sencha.layout
new file mode 100644
index 0000000..9cb5f08
--- /dev/null
+++ b/sencha/sencha.layout
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<CodeBlocks_layout_file>
+ <ActiveTarget name="Debug" />
+ <File name="Lexer.cpp" open="1" top="0" tabpos="5">
+ <Cursor position="1105" topLine="0" />
+ </File>
+ <File name="Token.cpp" open="1" top="0" tabpos="3">
+ <Cursor position="22" topLine="0" />
+ </File>
+ <File name="Token.h" open="1" top="0" tabpos="2">
+ <Cursor position="327" topLine="0" />
+ </File>
+ <File name="main.cpp" open="1" top="1" tabpos="1">
+ <Cursor position="269" topLine="1" />
+ </File>
+</CodeBlocks_layout_file>
diff --git a/sencha/src/token.cpp b/sencha/src/token.cpp
new file mode 100644
index 0000000..84c5d4b
--- /dev/null
+++ b/sencha/src/token.cpp
@@ -0,0 +1,11 @@
+#include "../../Headers/include/token.h"
+
+token::Token()
+{
+ //ctor
+}
+
+token::~Token()
+{
+ //dtor
+}