-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtokenizer.py
More file actions
148 lines (124 loc) · 4.47 KB
/
tokenizer.py
File metadata and controls
148 lines (124 loc) · 4.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import errors
from stdfunc import *
def create_tokens(code):
# SETTINGS
dec_identifier = "."
# SETTINGS
code_lines = code.split("\n")
tokens = []
code_length = len(code)
i = 0
line = 0
while i < code_length:
if code[i] == "#":
start_i = i
while i < code_length and code[i] != "\n":
i += 1
# tokens.append(("COMMENT", code[start_i:i]))
continue
if code[i] == " ":
i += 1
continue
if std_isnumeric(code[i]) or code[i] == dec_identifier:
start_i = i
num_type = "INT"
while i < code_length and std_isnumeric(code[i]):
i += 1
if code[i] == dec_identifier:
i += 1
num_type = "FLOAT"
while i < code_length and std_isnumeric(code[i]):
i += 1
if code[i] == dec_identifier or std_isalpha(code[i]):
print(errors.invalid_number(line + 1, code_lines, code[start_i:i]))
exit(1)
if num_type == "INT":
number = str(int(code[start_i:i]))
else:
number = str(float(code[start_i:i].replace(",", "."))).replace(".", dec_identifier)
tokens.append((num_type, number))
continue
if code[i] == '"':
start_i = i + 1
i += 1
while i < code_length and code[i] != '"':
i += 1
if i == code_length:
print(errors.invalid_string(line + 1, code_lines, code[start_i:i]))
exit(1)
if i < code_length:
i += 1
tokens.append(("STRING", code[start_i:i-1]))
continue
if code[i] == "'":
start_i = i + 1
i += 1
while i < code_length and code[i] != "'":
i += 1
if i == code_length:
print(errors.invalid_string(line + 1, code_lines, code[start_i:i]))
exit(1)
if i < code_length:
i += 1
tokens.append(("STRING", code[start_i:i-1]))
continue
if code[i:i+5] == "print" and (i+5 == code_length or not std_isalphanumeric(code[i+5])):
tokens.append(("KEYWORD", "print"))
i += 5
continue
if code[i:i+6] == "return" and (i+6 == code_length or not std_isalphanumeric(code[i+6])):
tokens.append(("KEYWORD", "return"))
i += 6
continue
if code[i:i+8] == "function" and (i+8 == code_length or not std_isalphanumeric(code[i+8])):
tokens.append(("FUNCTION", "function"))
i += 8
continue
if code[i:i+4] == "read" and (i+4 == code_length or not std_isalphanumeric(code[i+4])):
tokens.append(("KEYWORD", "read"))
i += 4
continue
if code[i:i+2] == "if" and (i+2 == code_length or not std_isalphanumeric(code[i+2])):
tokens.append(("IFSTATE", "if"))
i += 2
continue
if code[i:i + 4] == "True" and (i + 4 == code_length or not std_isalphanumeric(code[i + 4])):
tokens.append(("BOOL", "True"))
i += 4
continue
if code[i:i + 5] == "False" and (i + 5 == code_length or not std_isalphanumeric(code[i + 5])):
tokens.append(("BOOL", "False"))
i += 5
continue
if std_isalpha(code[i]):
start_i = i
while i < code_length and (std_isalphanumeric(code[i]) or code[i] == "_"):
i += 1
if any(char.isupper() for char in code[start_i:i]):
print(errors.invalid_variable_name(line + 1, code_lines, code[start_i:i]))
exit(1)
tokens.append(("IDENTIFIER", code[start_i:i]))
continue
if code[i] == "=":
tokens.append(("ASSIGN", code[i]))
i += 1
continue
if code[i] in "+-*/":
tokens.append(("OPERATOR", code[i]))
i += 1
continue
if std_issymb(code[i]):
tokens.append(("SYMBOL", code[i]))
i += 1
continue
if code[i] == "\n":
i += 1
line += 1
continue
i += 1
# for tokenline in tokens:
# print(tokenline)
# continue
return tokens
# for token in tokenizer():
# print(token)