diff options
author | Justyna Att Ilczuk <justyna.ilczuk@gmail.com> | 2012-10-28 13:11:20 +0100 |
---|---|---|
committer | Justyna Att Ilczuk <justyna.ilczuk@gmail.com> | 2012-10-28 13:11:20 +0100 |
commit | e15c8be93ac3a5913122d0d045dd0bdc6120032e (patch) | |
tree | a40566d2a3cddd97cc1c0ed0ba2c454ac8773318 | |
parent | a38163f4080d0d5d20404df8e0434bbfa2707872 (diff) | |
download | sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.gz sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.bz2 sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.tar.xz sencha-lang-e15c8be93ac3a5913122d0d045dd0bdc6120032e.zip |
i've got kind of lexer and parserf
# Please enter the commit message for your changes. Lines starting
-rw-r--r-- | sencha/Lexer.cpp | 172 | ||||
-rw-r--r-- | sencha/Lexer.h | 37 | ||||
-rw-r--r-- | sencha/Parser.cpp | 295 | ||||
-rw-r--r-- | sencha/Parser.h | 44 | ||||
-rw-r--r-- | sencha/Token.cpp | 2 | ||||
-rw-r--r-- | sencha/Token.h | 25 | ||||
-rwxr-xr-x | sencha/bin/Debug/sencha | bin | 0 -> 301486 bytes | |||
-rw-r--r-- | sencha/include/token.h | 14 | ||||
-rw-r--r-- | sencha/main.cpp | 48 | ||||
-rw-r--r-- | sencha/obj/Debug/Lexer.o | bin | 0 -> 367920 bytes | |||
-rw-r--r-- | sencha/obj/Debug/Parser.o | bin | 0 -> 221664 bytes | |||
-rw-r--r-- | sencha/obj/Debug/Token.o | bin | 0 -> 11520 bytes | |||
-rw-r--r-- | sencha/obj/Debug/main.o | bin | 0 -> 172656 bytes | |||
-rw-r--r-- | sencha/sencha.cbp | 46 | ||||
-rw-r--r-- | sencha/sencha.depend | 34 | ||||
-rw-r--r-- | sencha/sencha.layout | 16 | ||||
-rw-r--r-- | sencha/src/token.cpp | 11 |
17 files changed, 744 insertions, 0 deletions
diff --git a/sencha/Lexer.cpp b/sencha/Lexer.cpp new file mode 100644 index 0000000..820c556 --- /dev/null +++ b/sencha/Lexer.cpp @@ -0,0 +1,172 @@ +#include "Lexer.h"
+
+Lexer::Lexer()
+{
+ string keys[] = {"function", "class", "penis"}; + keywords.assign(keys, keys+3); + + char punct[] = {'.', ',', ';', '{', '}', '[', ']', '(', ')'}; + punctuation.assign(punct, punct+9); + + string oper[] = {"<", ">", "+", "-", "/", "*", "%", "&", "|", "=", ":", "==", "+=", "-=", "<=", ">=", "!", "&&", "||"}; + operators.assign(oper, oper +19);
+}
+
+Lexer::~Lexer()
+{
+ //dtor
+}
+ +void Lexer::add_keyword(string word) +{ + if(!is_keyword(word)) + { + keywords.push_back(word); + } +} +void Lexer::add_punctuation_char(char c) +{ + if(!is_punctuation(c)) + { + punctuation.push_back(c); + } +} + +void Lexer::add_operator(string oper) +{ + if(!is_operator(oper)) + { + operators.push_back(oper); + } +} + + +vector<Token> Lexer::parse_line(string line) +{ + vector<Token> tokens; + while(line != "") + { + pair<string, Token> result_of_parsing = parse_token(line); + line = result_of_parsing.first; + Token token = result_of_parsing.second; + if(token.get_value() != "") + { + tokens.push_back(token); + } + } + return tokens; + +} + +pair<string, Token> Lexer::parse_token(string line) +{ + string token_value = ""; + unsigned int i; + for(i=0; i< line.size(); i++) + { + if(token_value == "" && isspace(line[i])) continue; + + if(isalnum(line[i]) || line[i] == '\"' || line[i]== '_') + { + token_value += line[i]; + } + else if(ispunct(line[i])) + { + if(token_value=="") + { + token_value=line[i]; + i++; + if(i<line.size()) + { + if(line[i] == '=') + token_value+=line[i]; + } + } + break; + } + else break; + } + + Token token = Token(guess_type(token_value), token_value); + string truncated_line = line.substr(i); + + return pair<string, Token>(truncated_line, token); +} + +bool Lexer::is_keyword(string value) +{ + for(int i=0; i< keywords.size(); i++) + { + if(value == keywords[i]) return true; + } + return false; +} + +bool Lexer::is_punctuation(char c) +{ + + for(int i=0; i< punctuation.size(); i++) + { + if(c == punctuation[i]) return true; + } + return false; +} + +bool Lexer::is_operator(string value) +{ + for(int i=0; i< operators.size(); i++) + { + if(value == operators[i]) return true; + } + return false; +} + +type_of_token Lexer::guess_type(string value) //TODO, hey wouldn't it be nice, to implement it? +{ + /* I can have one of these types: + typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal, + t_punctuation, t_keyword } type_of_token; + + */ + + if(value == "") return t_invalid_token; + if(isdigit(value[0])) + { + bool is_integer = true; + for(int i=1; i<value.size(); i++) + { + if(!isdigit(value[i])) is_integer = false; + } + + if(is_integer) return t_integer; + else return t_invalid_token; + } + if(isalpha(value[0])) + { + if(is_keyword(value)) return t_keyword; + else return t_symbol; + + } + + if(value[0]=='\"') + { + if(value[value.size()-1] == '\"') return t_literal; + else return t_invalid_token; + } + + if(value.size() == 1 ) + { + if(is_punctuation(value[0])) return t_punctuation; + else + { + if(is_operator(value)) return t_operator; + } + } + if(value.size() == 2 && is_operator(value)) return t_operator; + + //If any... + return t_invalid_token; +} + + + diff --git a/sencha/Lexer.h b/sencha/Lexer.h new file mode 100644 index 0000000..92b6c09 --- /dev/null +++ b/sencha/Lexer.h @@ -0,0 +1,37 @@ +#ifndef LEXER_H
+#define LEXER_H
+#include <vector>
+#include <string> +#include <utility> +#include <cctype> +#include <iostream> +#include "Token.h" + +using namespace std;
+class Lexer
+{
+ public:
+ Lexer();
+ virtual ~Lexer(); + + bool is_keyword(string value); + bool is_punctuation(char c); + bool is_operator(string value ); + + vector<string> keywords; + vector<char> punctuation; + vector<string> operators; + + void add_keyword(string word); + void add_punctuation_char(char c); + void add_operator(string oper); + + vector<Token> parse_line(string line); + pair<string, Token> parse_token(string line); + type_of_token guess_type(string value); +
+ protected:
+ private:
+};
+
+#endif // LEXER_H
diff --git a/sencha/Parser.cpp b/sencha/Parser.cpp new file mode 100644 index 0000000..9b816c1 --- /dev/null +++ b/sencha/Parser.cpp @@ -0,0 +1,295 @@ +#include "Parser.h"
+#include "iostream" +
+Parser::Parser(vector<Token> tokens)
+{ + //token_stream.push_back(Token(t_symbol, "dupa")); + error_message = "***ERRORS DURING PARSING***\n"; + report_message = "***PARSER REPORT***\n"; + token_stream = tokens; + position_in_stream = 0; + read_next();
+}
+
+Parser::~Parser()
+{
+ //dtor
+}
+ +void Parser::report(string s) +{ + report_message += s + "\n"; +} + +void Parser::error(string s) +{ + error_message += s + "\n"; +} + +bool Parser::read_next() +{ + if(position_in_stream < token_stream.size()) + { + current_token = token_stream[position_in_stream]; + tok_value = current_token.get_value(); + position_in_stream++; + cout << "***Reading token: ..." << endl; + cout << "Token value: " << tok_value << "***" << endl << endl; + return true; + } + else + { + current_token = Token(t_invalid_token, ""); + tok_value = current_token.get_value(); + return false; + } +} + +void Parser::interpret() +{ + while(tok_value!= "") + { + if(is_type()) + { + report("Identifier: " + tok_value + "\n"); + read_next(); + + if(accept("=")) + { + expr(); + report(" := "); + } + if(accept(";")) + { + report("Variable definition\n"); + continue; + } + + expect("("); + int argc = 0; + while(true) + { + argc++; + is_type(); + report("function argument: " + tok_value + "\n"); + read_next(); + if(peek(")")) + { + break; + } + expect(","); + } + expect(")"); + + if(!accept(";")) + { + report("function body:\n"); + statement(); + } + } + + else { + report("Regular statement:\n"); + statement(); + } + } +} +bool Parser::peek(string s) +{ + return tok_value == s; +} + +bool Parser::accept(string s) +{ + if(peek(s)) + { + read_next(); + return true; + } + else return false; +} + +bool Parser::expect(string s) +{ + if(!accept(s)) + { + string error_message = "Error: expected "; + error_message += s; + error_message += " but received: " + tok_value + "\n"; + + error(error_message); + return false; + } + else return true; +} + +bool Parser::is_type() +{ + if(current_token.get_type() == t_symbol || current_token.get_type() == t_keyword) + { + if(tok_value == "def" || tok_value == "string" || tok_value == "num") + { + read_next(); + return true; + } + else return false; + + } + else return false; +} + +void Parser::statement() +{ + if(accept("{")) + { + while(!accept("}")) + { + statement(); + } + } + else if(is_type()) + { + report("Local variable: " + tok_value + "\n"); + read_next(); + if(accept("=")) + { + expr(); + report(" := "); + } + expect(";"); + } + else if(accept("if")) + { + //stuff + //TODO implement that + } + else if(accept("while")) + { + //similar stuff + } + else if(tok_value == "return") + { + if(!peek(";")) + { + expr(); + } + expect(";"); + report("RETURN\n"); + + } + else + { + expr(); + expect(";"); + } +} + +void Parser::prim_expr() +{ + if(current_token.get_type() == t_integer) + { + report("Number: " + tok_value + "\n"); + } + else if(current_token.get_type() == t_symbol) + { + report("Variable: " + tok_value + "\n"); + } + else if(current_token.get_type() == t_literal) + { + report("Character literal: " + tok_value + "\n"); + } + else if(accept("(")) + { + expr(); + expect(")"); + } + else + { + error("ERROR: unexpected primary expression" + tok_value + "\n"); + + } + read_next(); +} + +void Parser::postfix_expr() +{ + prim_expr(); + if(accept("[")) + { + expr(); + expect("]"); + report(" [] "); + + } + else if(accept("(")) + { + if(!accept(")")) + { + expr(); + report("function argument\n"); + while(accept(",")) + { + expr(); + report("function argument\n"); + } + expect(")"); + } + report("FUNC_CALL\n"); + } +} + +void Parser::add_expr() +{ + postfix_expr(); + while(peek("+") || peek("-")) + { + if(accept("+")) + { + postfix_expr(); + report(" + "); + } else if(accept("-")) + { + postfix_expr(); + report(" - "); + } + } +} + +void Parser::rel_expr() +{ + add_expr(); + while(peek("<")) + { + accept("<"); + add_expr(); + report(" < "); + } +} + +void Parser::eq_expr() +{ + rel_expr(); + while(peek("==") || peek("!=")) + { + if(accept("==")) + { + rel_expr(); + report("=="); + } + else if(accept("!=")) + { + rel_expr(); + report("!="); + } + } +} + +void Parser::expr() +{ + eq_expr(); + if(accept("=")) + { + expr(); + report(" := "); + } +} diff --git a/sencha/Parser.h b/sencha/Parser.h new file mode 100644 index 0000000..7a35955 --- /dev/null +++ b/sencha/Parser.h @@ -0,0 +1,44 @@ +#ifndef PARSER_H
+#define PARSER_H
+#include <string> +#include <vector> +#include "Token.h"
+ +using namespace std;
+class Parser
+{
+ public:
+ Parser(vector<Token> tokens);
+ virtual ~Parser(); + + Token current_token; + string tok_value; + vector<Token> token_stream; + int position_in_stream; + + string report_message; + string error_message; + + bool read_next(); + bool peek(string s); + bool accept(string s); + bool expect(string s); + + void error(string s); + void report(string s); + void interpret(); + void statement(); + + void add_expr(); + void prim_expr(); + void postfix_expr(); + void rel_expr(); + void eq_expr(); + void expr(); + + bool is_type();
+ protected:
+ private:
+};
+
+#endif // PARSER_H
diff --git a/sencha/Token.cpp b/sencha/Token.cpp new file mode 100644 index 0000000..873f4e7 --- /dev/null +++ b/sencha/Token.cpp @@ -0,0 +1,2 @@ +#include "Token.h"
+
diff --git a/sencha/Token.h b/sencha/Token.h new file mode 100644 index 0000000..7c3552f --- /dev/null +++ b/sencha/Token.h @@ -0,0 +1,25 @@ +#ifndef TOKEN_H
+#define TOKEN_H
+#include <string>
+ + +using namespace std; + +typedef enum { t_invalid_token=0, t_symbol, t_integer, t_literal, + t_punctuation, t_keyword, t_operator, t_float } type_of_token; //t_float not implemented + //TODO implement t_float in lexer +
+ +class Token +{ + public: + type_of_token type; + string value; + Token() : type(t_invalid_token), value("") {}; + Token(type_of_token type, string value) : type(type), value(value) { }; + type_of_token get_type() { return type; }; + string get_value() { return value; }; + +}; +
+#endif // TOKEN_H
diff --git a/sencha/bin/Debug/sencha b/sencha/bin/Debug/sencha Binary files differnew file mode 100755 index 0000000..80e6bde --- /dev/null +++ b/sencha/bin/Debug/sencha diff --git a/sencha/include/token.h b/sencha/include/token.h new file mode 100644 index 0000000..a3bf6ed --- /dev/null +++ b/sencha/include/token.h @@ -0,0 +1,14 @@ +#ifndef TOKEN_H
+#define TOKEN_H
+
+
+class Token
+{
+ public:
+ Token();
+ virtual ~Token();
+ protected:
+ private:
+};
+
+#endif // TOKEN_H
diff --git a/sencha/main.cpp b/sencha/main.cpp new file mode 100644 index 0000000..a003b9d --- /dev/null +++ b/sencha/main.cpp @@ -0,0 +1,48 @@ +#include <iostream> +#include <string>
+#include "Token.h" +#include "Lexer.h" +#include "Parser.h" +
+using namespace std;
+ +void test_lexer() +{ + string test_line = "def i; bulb; i + 3; string banan; banan = \"banan\"; string kaboom(num how_many_times) { def z; }"; + string test_line2 = "i like \"lol\" ,function: bananananananas, lol, ==555% % % += 1sas /> \n"; + Lexer lexer; + vector<Token> tokens = lexer.parse_line(test_line); + + + for(int i=0; i< tokens.size(); i++) + { + cout << tokens[i].get_value() << " type: " << tokens[i].get_type() << endl; + } + + + tokens = lexer.parse_line(test_line2); + + for(int i=0; i< tokens.size(); i++) + { + cout << tokens[i].get_value() << " type: " << tokens[i].get_type() << endl; + } +} + +void test_parser() +{ + string text_line = "def i; bulb; i + 3; string banan = \"kartofel\"; banan = \"banan\"; string kaboom(num how_many_times) { def z; }"; + Lexer lexer; + vector<Token> tokens = lexer.parse_line(text_line); + Parser parser = Parser(tokens); + parser.interpret(); + cout << parser.report_message; + cout << parser.error_message; + +}
+int main()
+{
+ cout << "Hello world!" << endl; + test_parser(); + //test_lexer();
+ return 0;
+}
diff --git a/sencha/obj/Debug/Lexer.o b/sencha/obj/Debug/Lexer.o Binary files differnew file mode 100644 index 0000000..3b22649 --- /dev/null +++ b/sencha/obj/Debug/Lexer.o diff --git a/sencha/obj/Debug/Parser.o b/sencha/obj/Debug/Parser.o Binary files differnew file mode 100644 index 0000000..e90665e --- /dev/null +++ b/sencha/obj/Debug/Parser.o diff --git a/sencha/obj/Debug/Token.o b/sencha/obj/Debug/Token.o Binary files differnew file mode 100644 index 0000000..34612c7 --- /dev/null +++ b/sencha/obj/Debug/Token.o diff --git a/sencha/obj/Debug/main.o b/sencha/obj/Debug/main.o Binary files differnew file mode 100644 index 0000000..1d3aa1e --- /dev/null +++ b/sencha/obj/Debug/main.o diff --git a/sencha/sencha.cbp b/sencha/sencha.cbp new file mode 100644 index 0000000..fc2c00e --- /dev/null +++ b/sencha/sencha.cbp @@ -0,0 +1,46 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> +<CodeBlocks_project_file> + <FileVersion major="1" minor="6" /> + <Project> + <Option title="sencha" /> + <Option pch_mode="2" /> + <Option compiler="gcc" /> + <Build> + <Target title="Debug"> + <Option output="bin/Debug/sencha" prefix_auto="1" extension_auto="1" /> + <Option object_output="obj/Debug/" /> + <Option type="1" /> + <Option compiler="gcc" /> + <Compiler> + <Add option="-g" /> + </Compiler> + </Target> + <Target title="Release"> + <Option output="bin/Release/sencha" prefix_auto="1" extension_auto="1" /> + <Option object_output="obj/Release/" /> + <Option type="1" /> + <Option compiler="gcc" /> + <Compiler> + <Add option="-O2" /> + </Compiler> + <Linker> + <Add option="-s" /> + </Linker> + </Target> + </Build> + <Compiler> + <Add option="-Wall" /> + <Add option="-fexceptions" /> + </Compiler> + <Unit filename="Lexer.cpp" /> + <Unit filename="Lexer.h" /> + <Unit filename="Token.cpp" /> + <Unit filename="Token.h" /> + <Unit filename="main.cpp" /> + <Extensions> + <envvars /> + <code_completion /> + <debugger /> + </Extensions> + </Project> +</CodeBlocks_project_file> diff --git a/sencha/sencha.depend b/sencha/sencha.depend new file mode 100644 index 0000000..1a22907 --- /dev/null +++ b/sencha/sencha.depend @@ -0,0 +1,34 @@ +# depslib dependency file v1.0 +1350761891 source:/home/attero/development/sencha-lang/sencha/Token.cpp + "Token.h" + +1351424620 /home/attero/development/sencha-lang/sencha/Token.h + <string> + +1351425805 source:/home/attero/development/sencha-lang/sencha/Lexer.cpp + "Lexer.h" + +1351413156 /home/attero/development/sencha-lang/sencha/Lexer.h + <vector> + <string> + <utility> + <cctype> + <iostream> + "Token.h" + +1351425833 source:/home/attero/development/sencha-lang/sencha/main.cpp + <iostream> + <string> + "Token.h" + "Lexer.h" + "Parser.h" + +1351426069 source:/home/attero/development/sencha-lang/sencha/Parser.cpp + "Parser.h" + "iostream" + +1351424740 /home/attero/development/sencha-lang/sencha/Parser.h + <string> + <vector> + "Token.h" + diff --git a/sencha/sencha.layout b/sencha/sencha.layout new file mode 100644 index 0000000..9cb5f08 --- /dev/null +++ b/sencha/sencha.layout @@ -0,0 +1,16 @@ +<?xml version="1.0" encoding="UTF-8" standalone="yes" ?> +<CodeBlocks_layout_file> + <ActiveTarget name="Debug" /> + <File name="Lexer.cpp" open="1" top="0" tabpos="5"> + <Cursor position="1105" topLine="0" /> + </File> + <File name="Token.cpp" open="1" top="0" tabpos="3"> + <Cursor position="22" topLine="0" /> + </File> + <File name="Token.h" open="1" top="0" tabpos="2"> + <Cursor position="327" topLine="0" /> + </File> + <File name="main.cpp" open="1" top="1" tabpos="1"> + <Cursor position="269" topLine="1" /> + </File> +</CodeBlocks_layout_file> diff --git a/sencha/src/token.cpp b/sencha/src/token.cpp new file mode 100644 index 0000000..84c5d4b --- /dev/null +++ b/sencha/src/token.cpp @@ -0,0 +1,11 @@ +#include "../../Headers/include/token.h"
+
+token::Token()
+{
+ //ctor
+}
+
+token::~Token()
+{
+ //dtor
+}
|