parser - tokeniser_test.go
1 package parser
2
3 import (
4 "iter"
5 "strings"
6 "testing"
7 )
8
9 func tokenisers(str string) iter.Seq2[string, Tokeniser] {
10 return func(yield func(string, Tokeniser) bool) {
11 _ = yield("string", NewStringTokeniser(str)) &&
12 yield("bytes", NewByteTokeniser([]byte(str))) &&
13 yield("reader", NewReaderTokeniser(strings.NewReader(str))) &&
14 yield("sub (string)", Tokeniser{tokeniser: NewStringTokeniser(str).sub()}) &&
15 yield("sub (bytes)", Tokeniser{tokeniser: NewByteTokeniser([]byte(str)).sub()}) &&
16 yield("sub (reader)", Tokeniser{tokeniser: NewReaderTokeniser(strings.NewReader(str)).sub()})
17 }
18 }
19
20 func TestTokeniserNext(t *testing.T) {
21 for n, p := range tokenisers("ABCDEFGH") {
22 if c := p.Peek(); c != 'A' {
23 t.Errorf("test 1 (%s): expecting %q, got %q", n, 'A', c)
24 } else if c = p.Peek(); c != 'A' {
25 t.Errorf("test 2 (%s): expecting %q, got %q", n, 'A', c)
26 } else if c = p.Next(); c != 'A' {
27 t.Errorf("test 3 (%s): expecting %q, got %q", n, 'A', c)
28 } else if c = p.Next(); c != 'B' {
29 t.Errorf("test 4 (%s): expecting %q, got %q", n, 'B', c)
30 } else if c = p.Peek(); c != 'C' {
31 t.Errorf("test 5 (%s): expecting %q, got %q", n, 'C', c)
32 }
33 }
34 }
35
36 func TestTokeniserAccept(t *testing.T) {
37 for n, p := range tokenisers("ABC£") {
38 if _, s := p.Accept("ABCD"), p.Get(); s != "A" {
39 t.Errorf("test 1 (%s): expecting \"A\", got %q", n, s)
40 } else if _, s = p.Accept("ABCD"), p.Get(); s != "B" {
41 t.Errorf("test 2 (%s): expecting \"B\", got %q", n, s)
42 } else if _, s = p.Accept("ABCD"), p.Get(); s != "C" {
43 t.Errorf("test 3 (%s): expecting \"C\", got %q", n, s)
44 } else if _, s = p.Accept("ABCD"), p.Get(); s != "" {
45 t.Errorf("test 4 (%s): expecting \"\", got %q", n, s)
46 } else if _, s = p.Accept("£"), p.Get(); s != "£" {
47 t.Errorf("test 5 (%s): expecting \"£\", got %q", n, s)
48 }
49 }
50 }
51
52 func TestTokeniserAcceptRun(t *testing.T) {
53 for n, p := range tokenisers("123ABC££$$%%^^\n") {
54 if _, s := p.AcceptRun("0123456789"), p.Get(); s != "123" {
55 t.Errorf("test 1 (%s): expecting \"123\", got %q", n, s)
56 } else if _, s = p.AcceptRun("ABC"), p.Get(); s != "ABC" {
57 t.Errorf("test 2 (%s): expecting \"ABC\", got %q", n, s)
58 } else if _, s = p.AcceptRun("£$%^"), p.Get(); s != "££$$%%^^" {
59 t.Errorf("test 3 (%s): expecting \"££$$%%^^\", got %q", n, s)
60 } else if _, s = p.AcceptRun("\n"), p.Get(); s != "\n" {
61 t.Errorf("test 4 (%s): expecting \"\\n\", got %q", n, s)
62 }
63 }
64 }
65
66 func TestTokeniserExcept(t *testing.T) {
67 for n, p := range tokenisers("123") {
68 if _, s := p.Except("1"), p.Get(); s != "" {
69 t.Errorf("test 1 (%s): expecting \"\", got %q", n, s)
70 } else if _, s = p.Except("2"), p.Get(); s != "1" {
71 t.Errorf("test 2 (%s): expecting \"1\", got %q", n, s)
72 } else if _, s = p.Except("2"), p.Get(); s != "" {
73 t.Errorf("test 3 (%s): expecting \"\", got %q", n, s)
74 } else if _, s = p.Except("!"), p.Get(); s != "2" {
75 t.Errorf("test 4 (%s): expecting \"2\", got %q", n, s)
76 } else if _, s = p.Except("!"), p.Get(); s != "3" {
77 t.Errorf("test 5 (%s): expecting \"3\", got %q", n, s)
78 } else if _, s = p.Except("!"), p.Get(); s != "" {
79 t.Errorf("test 6 (%s): expecting \"\", got %q", n, s)
80 }
81 }
82 }
83
84 func TestTokeniserExceptRun(t *testing.T) {
85 for n, p := range tokenisers("12345ABC\n67890DEF\nOH MY!") {
86 p.ExceptRun("\n")
87 if s := p.Get(); s != "12345ABC" {
88 t.Errorf("test 1 (%s): expecting \"12345ABC\", got %q", n, s)
89
90 continue
91 }
92
93 p.Except("")
94 p.Get()
95 p.ExceptRun("\n")
96
97 if s := p.Get(); s != "67890DEF" {
98 t.Errorf("test 2 (%s): expecting \"67890DEF\", got %q", n, s)
99
100 continue
101 }
102
103 p.Except("")
104 p.Get()
105 p.ExceptRun("")
106
107 if s := p.Get(); s != "OH MY!" {
108 t.Errorf("test 3 (%s): expecting \"OH MY!\", got %q", n, s)
109
110 continue
111 }
112 }
113 }
114
115 func TestTokeniserReset(t *testing.T) {
116 for n, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
117 p.ExceptRun("E")
118 p.Reset()
119
120 if got := p.Get(); got != "" {
121 t.Errorf("test 1 (%s): expecting to get %q, got %q", n, "", got)
122 } else if _, got = p.ExceptRun("E"), p.Get(); got != "ABCD" {
123 t.Errorf("test 2 (%s): expecting to get %q, got %q", n, "ABCD", got)
124 }
125 }
126 }
127
128 func TestTokeniserState(t *testing.T) {
129 for n, p := range tokenisers("12345ABC\n67890DEF\nOH MY!") {
130 state := p.State()
131
132 a := p.Next()
133 b := p.Next()
134 c := p.Next()
135 d := p.Next()
136 l := p.Len()
137
138 state.Reset()
139
140 if p.Next() != a || p.Next() != b || p.Next() != c || p.Next() != d || p.Len() != l {
141 t.Errorf("test 1 (%s): failed to reset state correctly", n)
142
143 continue
144 }
145
146 state = p.State()
147
148 a = p.Next()
149 b = p.Next()
150 c = p.Next()
151 d = p.Next()
152 l = p.Len()
153
154 state.Reset()
155
156 if p.Next() != a || p.Next() != b || p.Next() != c || p.Next() != d || p.Len() != l {
157 t.Errorf("test 2 (%s): failed to reset state correctly", n)
158
159 continue
160 }
161 }
162 }
163
164 func TestTokeniserAcceptString(t *testing.T) {
165 for m, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
166 for n, test := range [...]struct {
167 Str string
168 Read int
169 CaseInsensitive bool
170 }{
171 {
172 Str: "Z",
173 },
174 {
175 Str: "A",
176 Read: 1,
177 },
178 {
179 Str: "BCD",
180 Read: 3,
181 },
182 {
183 Str: "EFGZ",
184 Read: 3,
185 },
186 {
187 Str: "hij",
188 Read: 0,
189 },
190 {
191 Str: "hij",
192 Read: 3,
193 CaseInsensitive: true,
194 },
195 } {
196 if read := p.AcceptString(test.Str, test.CaseInsensitive); read != test.Read {
197 t.Errorf("test %d (%s): expecting to parse %d chars, parsed %d", n+1, m, test.Read, read)
198 }
199 }
200 }
201 }
202
203 func TestTokeniserAcceptWord(t *testing.T) {
204 for m, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
205 for n, test := range [...]struct {
206 Words []string
207 Read string
208 CaseInsensitive bool
209 }{
210 {},
211 {
212 Words: []string{"Z"},
213 },
214 {
215 Words: []string{"Z", "Y"},
216 },
217 {
218 Words: []string{"A"},
219 Read: "A",
220 },
221 {
222 Words: []string{"BD"},
223 },
224 {
225 Words: []string{"BD", "BE"},
226 },
227 {
228 Words: []string{"BCD", "BCE"},
229 Read: "BCD",
230 },
231 {
232 Words: []string{"EFH", "EFG"},
233 Read: "EFG",
234 },
235 {
236 Words: []string{"HIJ", "HIJK"},
237 Read: "HIJK",
238 },
239 {
240 Words: []string{"LMNOP", "LMOPQ", "LmNoPqR"},
241 Read: "LMNOPQR",
242 CaseInsensitive: true,
243 },
244 {
245 Words: []string{"ZYX", "ST", "STZ"},
246 Read: "ST",
247 },
248 } {
249 if read := p.AcceptWord(test.Words, test.CaseInsensitive); read != test.Read {
250 t.Errorf("test %d (%s): expecting to parse %q, parsed %q", n+1, m, test.Read, read)
251 }
252 }
253 }
254 }
255
256 func TestTokeniserSub(t *testing.T) {
257 for n, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
258 if _, ok := p.tokeniser.(*sub); ok {
259 break
260 } else if c := p.Next(); c != 'A' {
261 t.Errorf("test 1 (%s): expecting to read %q, got %q", n, 'A', c)
262
263 continue
264 }
265
266 q := p.SubTokeniser()
267
268 if c := q.Next(); c != 'B' {
269 t.Errorf("test 2 (%s): expecting to read %q, got %q", n, 'B', c)
270
271 continue
272 }
273
274 if c := q.ExceptRun("H"); c != 'H' {
275 t.Errorf("test 3 (%s): expecting to read %q, got %q", n, 'H', c)
276
277 continue
278 } else if got := q.Get(); got != "BCDEFG" {
279 t.Errorf("test 4 (%s): expecting to read %q, got %q", n, "BCDEFG", got)
280
281 continue
282 }
283
284 q.Next()
285
286 if got := q.Get(); got != "H" {
287 t.Errorf("test 5 (%s): expecting to read %q, got %q", n, "H", got)
288
289 continue
290 } else if got := p.Get(); got != "ABCDEFGH" {
291 t.Errorf("test 6 (%s): expecting to read %q, got %q", n, "ABCDEFGH", got)
292
293 continue
294 }
295
296 q.Next()
297
298 if got := q.Get(); got != "" {
299 t.Errorf("test 7 (%s): expecting to read %q, got %q", n, "", got)
300
301 continue
302 }
303
304 p.Next()
305
306 q = p.SubTokeniser()
307
308 q.Next()
309
310 r := q.SubTokeniser()
311
312 r.Next()
313
314 if got := r.Get(); got != "L" {
315 t.Errorf("test 8 (%s): expecting to read %q, got %q", n, "L", got)
316 } else if got := q.Get(); got != "KL" {
317 t.Errorf("test 9 (%s): expecting to read %q, got %q", n, "KL", got)
318 } else if got := p.Get(); got != "IJKL" {
319 t.Errorf("test 10 (%s): expecting to read %q, got %q", n, "HIJKL", got)
320 }
321 }
322 }
323