-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlexer.cpp
executable file
·200 lines (174 loc) · 4.98 KB
/
lexer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
* lexer.cpp
* Benjamin Ferid Issa
* February 8th 2017
*
* Based on files provided by Dr. Frank Jones, Computer Science Department, Brigham Young University
*/
#include <iostream>
#include "lexer.h"
#include "ctype.h"
#include "singles.h"
#include "facts.h"
#include "strings.h"
#include "rules.h"
#include "schemes.h"
#include "queries.h"
#include "id.h"
#include "comment.h"
lexer::lexer()
{
//empty default constructor...(for now)
m_currentState = nullptr;
}
//returns the results of performing lexical annalysis on a vector of characters
std::vector<token*>* lexer::analyze(std::vector<char>* Input)
{
int MaxCharacters = Input->size();
int CurrentIndex = 0;//where are we in the input string
this->LineCount = 1;
std::vector<token*>* tokens = new std::vector<token*>();
//read through the input string, invoking each state machine
//at the same point in the input string (via its start start)
//until an accepting machine is found...
//if nothing accepts then we return an undefined token and advance
//a single character...
char InputChar;
token* newToken = nullptr;
while(CurrentIndex < MaxCharacters)
{
InputChar = (*Input)[CurrentIndex];
//std::cout << "\"" << InputChar << "\"\n";
//first check for and consume whitespace...
while(isspace(InputChar))
{
if(InputChar == '\n')
{
this->LineCount++;
}
CurrentIndex++;//for now...
if (CurrentIndex < MaxCharacters){
InputChar = (*Input)[CurrentIndex];
}
else{
newToken = new token(MY_EOF, LineCount);
tokens->push_back(newToken);
return tokens;
}
}
//run the token recognizers in precedence enforcing order
newToken = new token(UNDEFINED, LineCount);
//check for single character tokens...
m_currentState = new singles(this, newToken);
runMachine(Input, CurrentIndex);
//Check for keywords ("Facts," "Rules," "Queries," and "Schemes")
findkeywords(Input, CurrentIndex, newToken);
//Check for the open-ended possible token types (IDs, Strings, and Comments)
findopenended(Input, CurrentIndex, newToken);
if(newToken->Type() == UNDEFINED && !this->at_eof)
{
//OOPS! NOTHING MATCHES!!!
newToken->addCharacter(InputChar);
}
tokens->push_back(newToken);
CurrentIndex+=newToken->stringSize();
}
newToken = new token(MY_EOF, LineCount);
tokens->push_back(newToken);
return tokens;
}
void lexer::findkeywords(std::vector<char>* Input, int Index, token*& newToken)
{
if(newToken->Type() == UNDEFINED)
{
//do the next thing...
m_currentState = new facts(this, newToken);
runMachine(Input, Index);
}
if(newToken->Type() == UNDEFINED)
{
//do the next thing...
m_currentState = new rules(this, newToken);
runMachine(Input, Index);
}
if(newToken->Type() == UNDEFINED)
{
//do the next thing...
m_currentState = new schemes(this, newToken);
runMachine(Input, Index);
}
if(newToken->Type() == UNDEFINED)
{
//do the next thing...
m_currentState = new queries(this, newToken);
runMachine(Input, Index);
}
}
void lexer::findopenended(std::vector<char>* Input, int Index, token*& newToken)
{
if(newToken->Type() == UNDEFINED && !this->at_eof)
{
//do the next thing...
m_currentState = new strings(this, newToken);
runMachine(Input, Index);
}
if(newToken->Type() == UNDEFINED && !this->at_eof){
m_currentState = new id(this, newToken);
runMachine(Input, Index);
}
if(newToken->Type() == UNDEFINED && !this->at_eof){
m_currentState = new comment(this, newToken);
runMachine(Input, Index);
}
}
void lexer::runMachine(std::vector<char>* Input, int Index)
{
char InputChar = (*Input)[Index];
//std::cout << InputChar << " " << (int)InputChar << std::endl;
while(m_currentState->input(InputChar))//let the state machine do the work...
{
Index++;
if (Index > Input->size())
{
//std::cout << "Oops";
this->at_eof = true;
break;
}
InputChar = (*Input)[Index];
//std::cout << InputChar << " " << (int)InputChar << std::endl;
}
}
//prints the resulst of lexical analysis to stdout
void lexer::printAnalysis(std::vector<token*>* Tokens)
{
for(unsigned int i = 0; i < Tokens->size(); i++)
{
(*Tokens)[i]->printDescription();
}
std::cout << "Total Tokens = " << Tokens->size() << std::endl;
}
//a utility method to get a vector of type char from a text file
std::vector<char>* lexer::fileToVectorOfChars(std::string FileName)
{
std::vector<char>* Characters = new std::vector<char>();
std::ifstream InputFile(FileName.c_str());
if(InputFile.fail())
{
std::cout << "Error: Unable to Open File.\n";
return Characters;
}
char Character = InputFile.get();
while(InputFile.good())
{
Characters->push_back(Character);
Character = InputFile.get();
}
return Characters;
}
void lexer::setState(state* State)
{
m_currentState = State;
}
void lexer::increaseLineCount(){
LineCount++;
}