Basicprogramming(.org) > About dialect development
BASIC Dialect from scratch
n00b:
I finally got the second video up. Here is the link: https://youtu.be/hSLDiCWPvh0
And here is the final code from this video:
--- Code: ---#include <iostream>
#include <string>
#include <sstream>
#include <string.h>
using namespace std;
string keywords[]=
{
"IF",
"THEN",
"ELSE",
"FOR",
"NEXT",
"STEP",
"TO",
"DO",
"LOOP",
"WHILE",
"UNTIL",
"WEND",
"GOTO",
"DEF",
"PRINT",
"INPUT",
"REM",
"END"
};
int keyword_count = sizeof(keywords)/sizeof(keywords[0]);
string spChars[]=
{
"^",
"&",
"*",
"(",
")",
"-",
"+",
"=",
"[",
"]",
"\"",
"<",
">",
",",
"/",
" "
};
int spChar_count = sizeof(spChars)/sizeof(spChars[0]);
string tokens[100];
int token_count = 0;
string vm_asm[2000];
int vm_asm_count = 0;
unsigned char data_segment[5000];
unsigned char code_segment[5000];
int data_offset = 0;
int code_offset = 0;
int m_count = 0;
int s_count = 0;
int m_max = 0;
int s_max = 0;
bool isSpecialCharacter(string c)
{
for(int i = 0; i < spChar_count; i++)
{
if(c.compare(spChars[i])==0)
return true;
}
return false;
}
bool isNumber(string c)
{
if(c.compare("0")==0 || c.compare("1")==0 || c.compare("2")==0 || c.compare("3")==0 || c.compare("4")==0 ||
c.compare("5")==0 || c.compare("6")==0 || c.compare("7")==0 || c.compare("8")==0 || c.compare("9")==0)
return true;
return false;
}
bool tokenizer(string src_line)
{
token_count = 0;
src_line += " ";
string src_token = "";
for(int i = 0; i < src_line.length(); i++)
{
if(!isSpecialCharacter(src_line.substr(i,1)))
{
if(isNumber(src_line.substr(i,1)))
{
src_token = "<num>";
int d = 0;
for(; i < src_line.length(); i++)
{
if(isNumber(src_line.substr(i,1)))
{
src_token += src_line.substr(i,1);
}
else if(src_line.substr(i,1).compare(".")==0)
{
if(d==0)
{
src_token += ".";
d++;
}
else
{
cout << "Can only have one decimal in a number" << endl;
return false;
}
}
else if(isSpecialCharacter(src_line.substr(i,1)))
{
break;
}
else
{
return false;
}
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
i--;
}
else
{
src_token = "<id>";
for(; i < src_line.length(); i++)
{
if(isSpecialCharacter(src_line.substr(i,1)))
break;
else
src_token += src_line.substr(i,1);
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
i--;
}
}
else if(src_line.substr(i,1).compare("\"")==0)
{
//cout << "DEBUG: " << i << endl;
bool str_close = false;
src_token = "<string>";
for(i=i+1; i < src_line.length(); i++)
{
if(src_line.substr(i,1).compare("\"")==0)
{
//cout << "someting" << endl;
str_close = true;
break;
}
else
src_token += src_line.substr(i,1);
//cout << "str: " << src_line.substr(i,1) << endl;
}
if(str_close == false)
{
cout << "Did not close string" << endl;
return false;
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
}
else if(src_line.substr(i,1).compare("+")==0)
{
tokens[token_count] = "<add>";
token_count++;
}
else if(src_line.substr(i,1).compare("-")==0)
{
tokens[token_count] = "<sub>";
token_count++;
}
else if(src_line.substr(i,1).compare("*")==0)
{
tokens[token_count] = "<mul>";
token_count++;
}
else if(src_line.substr(i,1).compare("/")==0)
{
tokens[token_count] = "<div>";
token_count++;
}
else if(src_line.substr(i,1).compare("^")==0)
{
tokens[token_count] = "<pow>";
token_count++;
}
else if(src_line.substr(i,1).compare("(")==0)
{
tokens[token_count] = "<par_open>";
token_count++;
}
else if(src_line.substr(i,1).compare(")")==0)
{
tokens[token_count] = "<par_close>";
token_count++;
}
else if(src_line.substr(i,1).compare("[")==0)
{
tokens[token_count] = "<square_open>";
token_count++;
}
else if(src_line.substr(i,1).compare("]")==0)
{
tokens[token_count] = "<square_close>";
token_count++;
}
else if(src_line.substr(i,1).compare("=")==0)
{
tokens[token_count] = "<equal>";
token_count++;
}
else if(src_line.substr(i,1).compare("&")==0)
{
tokens[token_count] = "<amp>";
token_count++;
}
else if(src_line.substr(i,1).compare(",")==0)
{
tokens[token_count] = "<sep>";
token_count++;
}
else if(src_line.substr(i,1).compare("<")==0)
{
tokens[token_count] = "<less>";
if(src_line.substr(i).length() > 1)
{
if(src_line.substr(i,2).compare("<=")==0)
{
tokens[token_count] = "<less_equal>";
i++;
}
else if(src_line.substr(i,2).compare("<>")==0)
{
tokens[token_count] = "<not_equal>";
i++;
}
}
token_count++;
}
else if(src_line.substr(i,1).compare(">")==0)
{
tokens[token_count] = "<greater>";
if(src_line.substr(i).length() > 1)
{
if(src_line.substr(i,2).compare("<=")==0)
{
tokens[token_count] = "<greater_equal>";
i++;
}
}
token_count++;
}
}
return true;
}
string NumToString(double a)
{
stringstream ss;
ss << a;
string str = ss.str();
return str;
}
bool parse_expression()
{
for(int i = 0; i < token_count; i++)
{
if(tokens[i].substr(0,5).compare("<num>")==0)
{
vm_asm[vm_asm_count] = "mov m" + NumToString(m_count) + " " + tokens[i].substr(5);
vm_asm_count++;
tokens[i] = "<vm_var>m" + NumToString(m_count);
m_count++;
}
else if(tokens[i].substr(0,8).compare("<string>")==0)
{
memcpy(&data_segment[data_offset], tokens[i].substr(8).c_str()+'\0', tokens[i].substr(8).length()+1);
vm_asm[vm_asm_count] = "mov$ s" + NumToString(s_count) + " @" + NumToString(data_offset);
vm_asm_count++;
data_offset += tokens[i].substr(8).length() + 1;
tokens[i] = "<vm_var>s" + NumToString(s_count);
s_count++;
}
}
return true;
}
int main()
{
string src_line = "";
string test_option = "";
while(src_line.compare("exit")!=0)
{
cout << "->";
getline(cin, src_line);
tokenizer(src_line);
parse_expression();
if(src_line.compare("vm_asm")==0)
test_option = src_line;
if(test_option.compare("vm_asm")==0)
{
cout << "[vm_asm_start]" << endl;
for(int i = 0; i < vm_asm_count; i++)
cout << vm_asm[i] << endl;
cout << "[vm_asm_end]" << endl << endl;
cout << "DATA:";
for (int i = 0; i < data_offset; i ++)
cout << data_segment[i];
cout << endl;
}
else
{
for(int i = 0; i < token_count; i++)
cout << tokens[i] << endl;
cout << endl;
}
}
return 0;
}
--- End code ---
Aurel:
Ok i ty to compile your C++ code with newbieIDE which include C++ compiler mingw
and i get error ..
so I must use CodeBlocks ?
n00b:
I can't tell what the error is from that screenshot. That looks more like a run time error from the image but I need to see the entire error message to be sure.
n00b:
I just posted video #3. In this video I finally start going over getting the parser to parse math expressions as well as strings. Here is the link: https://youtu.be/NtpFWnmq3Do
And here is the final code from this video:
--- Code: ---#include <iostream>
#include <string>
#include <sstream>
#include <string.h>
using namespace std;
string keywords[]=
{
"IF",
"THEN",
"ELSE",
"FOR",
"NEXT",
"STEP",
"TO",
"DO",
"LOOP",
"WHILE",
"UNTIL",
"WEND",
"GOTO",
"DEF",
"PRINT",
"INPUT",
"REM",
"END"
};
int keyword_count = sizeof(keywords)/sizeof(keywords[0]);
string spChars[]=
{
"^",
"&",
"*",
"(",
")",
"-",
"+",
"=",
"[",
"]",
"\"",
"<",
">",
",",
"/",
" "
};
int spChar_count = sizeof(spChars)/sizeof(spChars[0]);
string tokens[100];
int token_count = 0;
string vm_asm[2000];
int vm_asm_count = 0;
unsigned char data_segment[5000];
unsigned char code_segment[5000];
int data_offset = 0;
int code_offset = 0;
int m_count = 0;
int s_count = 0;
int m_max = 0;
int s_max = 0;
bool isSpecialCharacter(string c)
{
for(int i = 0; i < spChar_count; i++)
{
if(c.compare(spChars[i])==0)
return true;
}
return false;
}
bool isNumber(string c)
{
if(c.compare("0")==0 || c.compare("1")==0 || c.compare("2")==0 || c.compare("3")==0 || c.compare("4")==0 ||
c.compare("5")==0 || c.compare("6")==0 || c.compare("7")==0 || c.compare("8")==0 || c.compare("9")==0)
return true;
return false;
}
bool tokenizer(string src_line)
{
token_count = 0;
src_line += " ";
string src_token = "";
for(int i = 0; i < src_line.length(); i++)
{
if(!isSpecialCharacter(src_line.substr(i,1)))
{
if(isNumber(src_line.substr(i,1)))
{
src_token = "<num>";
int d = 0;
for(; i < src_line.length(); i++)
{
if(isNumber(src_line.substr(i,1)))
{
src_token += src_line.substr(i,1);
}
else if(src_line.substr(i,1).compare(".")==0)
{
if(d==0)
{
src_token += ".";
d++;
}
else
{
cout << "Can only have one decimal in a number" << endl;
return false;
}
}
else if(isSpecialCharacter(src_line.substr(i,1)))
{
break;
}
else
{
return false;
}
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
i--;
}
else
{
src_token = "<id>";
for(; i < src_line.length(); i++)
{
if(isSpecialCharacter(src_line.substr(i,1)))
break;
else
src_token += src_line.substr(i,1);
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
i--;
}
}
else if(src_line.substr(i,1).compare("\"")==0)
{
//cout << "DEBUG: " << i << endl;
bool str_close = false;
src_token = "<string>";
for(i=i+1; i < src_line.length(); i++)
{
if(src_line.substr(i,1).compare("\"")==0)
{
//cout << "someting" << endl;
str_close = true;
break;
}
else
src_token += src_line.substr(i,1);
//cout << "str: " << src_line.substr(i,1) << endl;
}
if(str_close == false)
{
cout << "Did not close string" << endl;
return false;
}
tokens[token_count] = src_token;
token_count++;
src_token = "";
}
else if(src_line.substr(i,1).compare("+")==0)
{
tokens[token_count] = "<add>";
token_count++;
}
else if(src_line.substr(i,1).compare("-")==0)
{
tokens[token_count] = "<sub>";
token_count++;
}
else if(src_line.substr(i,1).compare("*")==0)
{
tokens[token_count] = "<mul>";
token_count++;
}
else if(src_line.substr(i,1).compare("/")==0)
{
tokens[token_count] = "<div>";
token_count++;
}
else if(src_line.substr(i,1).compare("^")==0)
{
tokens[token_count] = "<pow>";
token_count++;
}
else if(src_line.substr(i,1).compare("(")==0)
{
tokens[token_count] = "<par_open>";
token_count++;
}
else if(src_line.substr(i,1).compare(")")==0)
{
tokens[token_count] = "<par_close>";
token_count++;
}
else if(src_line.substr(i,1).compare("[")==0)
{
tokens[token_count] = "<square_open>";
token_count++;
}
else if(src_line.substr(i,1).compare("]")==0)
{
tokens[token_count] = "<square_close>";
token_count++;
}
else if(src_line.substr(i,1).compare("=")==0)
{
tokens[token_count] = "<equal>";
token_count++;
}
else if(src_line.substr(i,1).compare("&")==0)
{
tokens[token_count] = "<amp>";
token_count++;
}
else if(src_line.substr(i,1).compare(",")==0)
{
tokens[token_count] = "<sep>";
token_count++;
}
else if(src_line.substr(i,1).compare("<")==0)
{
tokens[token_count] = "<less>";
if(src_line.substr(i).length() > 1)
{
if(src_line.substr(i,2).compare("<=")==0)
{
tokens[token_count] = "<less_equal>";
i++;
}
else if(src_line.substr(i,2).compare("<>")==0)
{
tokens[token_count] = "<not_equal>";
i++;
}
}
token_count++;
}
else if(src_line.substr(i,1).compare(">")==0)
{
tokens[token_count] = "<greater>";
if(src_line.substr(i).length() > 1)
{
if(src_line.substr(i,2).compare("<=")==0)
{
tokens[token_count] = "<greater_equal>";
i++;
}
}
token_count++;
}
}
return true;
}
string NumToString(double a)
{
stringstream ss;
ss << a;
string str = ss.str();
return str;
}
bool parse_expression()
{
for(int i = 0; i < token_count; i++)
{
if(tokens[i].substr(0,5).compare("<num>")==0)
{
vm_asm[vm_asm_count] = "mov m" + NumToString(m_count) + " " + tokens[i].substr(5);
vm_asm_count++;
tokens[i] = "<vm_var>m" + NumToString(m_count);
m_count++;
}
else if(tokens[i].substr(0,8).compare("<string>")==0)
{
memcpy(&data_segment[data_offset], tokens[i].substr(8).c_str()+'\0', tokens[i].substr(8).length()+1);
vm_asm[vm_asm_count] = "mov$ s" + NumToString(s_count) + " @" + NumToString(data_offset);
vm_asm_count++;
data_offset += tokens[i].substr(8).length() + 1;
tokens[i] = "<vm_var>s" + NumToString(s_count);
s_count++;
}
}
bool parse_loop = true;
int block_start = 0;
int block_end = 0;
int par = 0;
string arg1 = "";
string arg2 = "";
int op_step = 0;
int op_start = 0;
int op_end = 0;
while(parse_loop)
{
block_start = 0;
block_end = 0;
par = 0;
for(int i = 0; i < token_count; i++)
{
if(tokens[i].compare("<par_open>")==0)
{
block_start = i;
par = 1;
tokens[i] = "";
}
else if(tokens[i].compare("<par_close>")==0)
{
if(par == 0)
{
cout << "Cannot close a parenthesis without opening it" << endl;
return false;
}
par = 2;
block_end = i;
tokens[i] = "";
}
}
cout << "ldfjl" << endl;
if(par == 0)
{
block_start = 0;
block_end = token_count;
parse_loop = false;
}
else if(par == 1)
{
cout << "Must close parenthesis" << endl;
return false;
}
//Exponents
op_step = 0;
arg1 = "";
arg2 = "";
op_start = 0;
op_end = 0;
for(int i = block_start; i < block_end; i++)
{
if(tokens[i].substr(0,8).compare("<vm_var>")==0)
{
if(op_step == 2 && tokens[i].substr(8,1).compare("m")!=0)
{
cout << "Expected number in number expression" << endl;
return false;
}
if(tokens[i].substr(8,1).compare("m")==0)
{
switch(op_step)
{
case 0:
op_start = i;
op_step = 1;
arg1 = tokens[i].substr(8);
break;
case 2:
op_end = i;
op_step = 3;
arg2 = tokens[i].substr(8);
break;
}
}
}
else if(tokens[i].compare("<pow>")==0)
{
if(op_step != 1)
{
cout << "Missing first number in number expression" << endl;
return false;
}
op_step = 2;
}
else if(tokens[i].compare("")!=0)
{
op_step = 0;
}
if(op_step == 3)
{
vm_asm[vm_asm_count] = "pow " + arg1 + " " + arg2;
vm_asm_count++;
for(int i = op_start; i <= op_end; i++)
{
tokens[i] = "";
}
tokens[op_start] = "<vm_var>" + arg1;
op_step = 0;
i = block_start-1;
}
}
//Multiplication and Division
op_step = 0;
arg1 = "";
arg2 = "";
op_start = 0;
op_end = 0;
for(int i = block_start; i < block_end; i++)
{
if(tokens[i].substr(0,8).compare("<vm_var>")==0)
{
if(op_step == 2 && tokens[i].substr(8,1).compare("m")!=0)
{
cout << "Expected number in number expression" << endl;
return false;
}
if(tokens[i].substr(8,1).compare("m")==0)
{
switch(op_step)
{
case 0:
op_start = i;
op_step = 1;
arg1 = tokens[i].substr(8);
break;
case 2:
op_end = i;
op_step = 3;
arg2 = tokens[i].substr(8);
break;
}
}
}
else if(tokens[i].compare("<mul>")==0)
{
if(op_step != 1)
{
cout << "Missing first number in number expression" << endl;
return false;
}
op_step = 2;
vm_asm[vm_asm_count] = "mul ";
}
else if(tokens[i].compare("<div>")==0)
{
if(op_step != 1)
{
cout << "Missing first number in number expression" << endl;
return false;
}
op_step = 2;
vm_asm[vm_asm_count] = "div ";
}
else if(tokens[i].compare("")!=0)
{
op_step = 0;
}
if(op_step == 3)
{
vm_asm[vm_asm_count] += arg1 + " " + arg2;
vm_asm_count++;
for(int i = op_start; i <= op_end; i++)
{
tokens[i] = "";
}
tokens[op_start] = "<vm_var>" + arg1;
op_step = 0;
i = block_start-1;
}
}
//Addition and Subtraction
op_step = 0;
arg1 = "";
arg2 = "";
op_start = 0;
op_end = 0;
for(int i = block_start; i < block_end; i++)
{
if(tokens[i].substr(0,8).compare("<vm_var>")==0)
{
if(op_step == 2 && tokens[i].substr(8,1).compare("m")!=0)
{
cout << "Expected number in number expression" << endl;
return false;
}
if(tokens[i].substr(8,1).compare("m")==0)
{
switch(op_step)
{
case 0:
op_start = i;
op_step = 1;
arg1 = tokens[i].substr(8);
break;
case 2:
op_end = i;
op_step = 3;
arg2 = tokens[i].substr(8);
break;
}
}
}
else if(tokens[i].compare("<add>")==0)
{
if(op_step != 1)
{
cout << "Missing first number in number expression" << endl;
return false;
}
op_step = 2;
vm_asm[vm_asm_count] = "add ";
}
else if(tokens[i].compare("<sub>")==0)
{
if(op_step != 1)
{
cout << "Missing first number in number expression" << endl;
return false;
}
op_step = 2;
vm_asm[vm_asm_count] = "sub ";
}
else if(tokens[i].compare("")!=0)
{
op_step = 0;
}
if(op_step == 3)
{
vm_asm[vm_asm_count] += arg1 + " " + arg2;
vm_asm_count++;
for(int i = op_start; i <= op_end; i++)
{
tokens[i] = "";
}
tokens[op_start] = "<vm_var>" + arg1;
op_step = 0;
i = block_start-1;
}
}
//Adding Strings
op_step = 0;
arg1 = "";
arg2 = "";
op_start = 0;
op_end = 0;
for(int i = block_start; i < block_end; i++)
{
if(tokens[i].substr(0,8).compare("<vm_var>")==0)
{
if(op_step == 2 && tokens[i].substr(8,1).compare("s")!=0)
{
cout << "Expected string in string expression" << endl;
return false;
}
if(tokens[i].substr(8,1).compare("s")==0)
{
switch(op_step)
{
case 0:
op_start = i;
op_step = 1;
arg1 = tokens[i].substr(8);
break;
case 2:
op_end = i;
op_step = 3;
arg2 = tokens[i].substr(8);
break;
}
}
}
else if(tokens[i].compare("<amp>")==0)
{
if(op_step != 1)
{
cout << "Missing first string in string expression" << endl;
return false;
}
op_step = 2;
}
else if(tokens[i].compare("")!=0)
{
op_step = 0;
}
if(op_step == 3)
{
vm_asm[vm_asm_count] = "add$ " + arg1 + " " + arg2;
vm_asm_count++;
for(int i = op_start; i <= op_end; i++)
{
tokens[i] = "";
}
tokens[op_start] = "<vm_var>" + arg1;
op_step = 0;
i = block_start-1;
}
}
}
return true;
}
int main()
{
string src_line = "";
string test_option = "";
while(src_line.compare("exit")!=0)
{
cout << "->";
getline(cin, src_line);
tokenizer(src_line);
parse_expression();
if(src_line.compare("vm_asm")==0)
test_option = src_line;
if(test_option.compare("vm_asm")==0)
{
cout << "[vm_asm_start]" << endl;
for(int i = 0; i < vm_asm_count; i++)
cout << vm_asm[i] << endl;
cout << "[vm_asm_end]" << endl << endl;
cout << "DATA:";
for (int i = 0; i < data_offset; i ++)
cout << data_segment[i];
cout << endl;
}
else
{
for(int i = 0; i < token_count; i++)
cout << tokens[i] << endl;
cout << endl;
}
}
return 0;
}
--- End code ---
n00b:
I just uploaded video #4. In this video I just add more stuff to the parser like AND, OR, XOR, NOT, SHL, SHR, and all the comparison operators. Here is the link: https://youtu.be/pQs6t9RcI-Q
I could not post the code directly here because of the 20,000 character limit so for all future videos I will attach the source file.
Navigation
[0] Message Index
[*] Previous page
Go to full version