parser - tokeniser_test.go
1 package parser
2
3 import (
4 "iter"
5 "strings"
6 "testing"
7 )
8
9 func tokenisers(str string) iter.Seq2[string, Tokeniser] {
10 return func(yield func(string, Tokeniser) bool) {
11 _ = yield("string", NewStringTokeniser(str)) &&
12 yield("bytes", NewByteTokeniser([]byte(str))) &&
13 yield("reader", NewReaderTokeniser(strings.NewReader(str))) &&
14 yield("rune reader", NewRuneReaderTokeniser(strings.NewReader(str))) &&
15 yield("sub (string)", Tokeniser{tokeniser: NewStringTokeniser(str).sub()}) &&
16 yield("sub (bytes)", Tokeniser{tokeniser: NewByteTokeniser([]byte(str)).sub()}) &&
17 yield("sub (reader)", Tokeniser{tokeniser: NewReaderTokeniser(strings.NewReader(str)).sub()}) &&
18 yield("sub (rune reader)", Tokeniser{tokeniser: NewRuneReaderTokeniser(strings.NewReader(str)).sub()})
19 }
20 }
21
22 func TestTokeniserNext(t *testing.T) {
23 for n, p := range tokenisers("ABCDEFGH") {
24 if c := p.Peek(); c != 'A' {
25 t.Errorf("test 1 (%s): expecting %q, got %q", n, 'A', c)
26 } else if c = p.Peek(); c != 'A' {
27 t.Errorf("test 2 (%s): expecting %q, got %q", n, 'A', c)
28 } else if c = p.Next(); c != 'A' {
29 t.Errorf("test 3 (%s): expecting %q, got %q", n, 'A', c)
30 } else if c = p.Next(); c != 'B' {
31 t.Errorf("test 4 (%s): expecting %q, got %q", n, 'B', c)
32 } else if c = p.Peek(); c != 'C' {
33 t.Errorf("test 5 (%s): expecting %q, got %q", n, 'C', c)
34 }
35 }
36 }
37
38 func TestTokeniserLen(t *testing.T) {
39 for n, p := range tokenisers("A…") {
40 p.Peek()
41
42 if l := p.Len(); l != 0 {
43 t.Errorf("test 1 (%s): expecting to have read 0 bytes, read %d", n, l)
44 }
45
46 p.Next()
47
48 if l := p.Len(); l != 1 {
49 t.Errorf("test 2 (%s): expecting to have read 1 byte, read %d", n, l)
50 }
51
52 p.Next()
53
54 if l := p.Len(); l != 4 {
55 t.Errorf("test 3 (%s): expecting to have read 4 bytes, read %d", n, l)
56 }
57
58 p.Next()
59
60 if l := p.Len(); l != 4 {
61 t.Errorf("test 4 (%s): expecting to have read 4 bytes, read %d", n, l)
62 }
63
64 p.Next()
65
66 if l := p.Len(); l != 4 {
67 t.Errorf("test 5 (%s): expecting to have read 4 bytes, read %d", n, l)
68 }
69 }
70 }
71
72 func TestTokeniserAccept(t *testing.T) {
73 for n, p := range tokenisers("ABC£") {
74 if _, s := p.Accept("ABCD"), p.Get(); s != "A" {
75 t.Errorf("test 1 (%s): expecting \"A\", got %q", n, s)
76 } else if _, s = p.Accept("ABCD"), p.Get(); s != "B" {
77 t.Errorf("test 2 (%s): expecting \"B\", got %q", n, s)
78 } else if _, s = p.Accept("ABCD"), p.Get(); s != "C" {
79 t.Errorf("test 3 (%s): expecting \"C\", got %q", n, s)
80 } else if _, s = p.Accept("ABCD"), p.Get(); s != "" {
81 t.Errorf("test 4 (%s): expecting \"\", got %q", n, s)
82 } else if _, s = p.Accept("£"), p.Get(); s != "£" {
83 t.Errorf("test 5 (%s): expecting \"£\", got %q", n, s)
84 }
85 }
86 }
87
88 func TestTokeniserAcceptRune(t *testing.T) {
89 for n, p := range tokenisers("ABC£") {
90 if _, s := p.AcceptRune('A'), p.Get(); s != "A" {
91 t.Errorf("test 1 (%s): expecting \"A\", got %q", n, s)
92 } else if _, s = p.AcceptRune('B'), p.Get(); s != "B" {
93 t.Errorf("test 2 (%s): expecting \"B\", got %q", n, s)
94 } else if _, s = p.AcceptRune('C'), p.Get(); s != "C" {
95 t.Errorf("test 3 (%s): expecting \"C\", got %q", n, s)
96 } else if _, s = p.AcceptRune('D'), p.Get(); s != "" {
97 t.Errorf("test 4 (%s): expecting \"\", got %q", n, s)
98 } else if _, s = p.AcceptRune('£'), p.Get(); s != "£" {
99 t.Errorf("test 5 (%s): expecting \"£\", got %q", n, s)
100 }
101 }
102 }
103
104 func TestTokeniserAcceptRun(t *testing.T) {
105 for n, p := range tokenisers("123ABC££$$%%^^\n") {
106 if _, s := p.AcceptRun("0123456789"), p.Get(); s != "123" {
107 t.Errorf("test 1 (%s): expecting \"123\", got %q", n, s)
108 } else if _, s = p.AcceptRun("ABC"), p.Get(); s != "ABC" {
109 t.Errorf("test 2 (%s): expecting \"ABC\", got %q", n, s)
110 } else if _, s = p.AcceptRun("£$%^"), p.Get(); s != "££$$%%^^" {
111 t.Errorf("test 3 (%s): expecting \"££$$%%^^\", got %q", n, s)
112 } else if _, s = p.AcceptRun("\n"), p.Get(); s != "\n" {
113 t.Errorf("test 4 (%s): expecting \"\\n\", got %q", n, s)
114 }
115 }
116 }
117
118 func TestTokeniserExcept(t *testing.T) {
119 for n, p := range tokenisers("123") {
120 if _, s := p.Except("1"), p.Get(); s != "" {
121 t.Errorf("test 1 (%s): expecting \"\", got %q", n, s)
122 } else if _, s = p.Except("2"), p.Get(); s != "1" {
123 t.Errorf("test 2 (%s): expecting \"1\", got %q", n, s)
124 } else if _, s = p.Except("2"), p.Get(); s != "" {
125 t.Errorf("test 3 (%s): expecting \"\", got %q", n, s)
126 } else if _, s = p.Except("!"), p.Get(); s != "2" {
127 t.Errorf("test 4 (%s): expecting \"2\", got %q", n, s)
128 } else if _, s = p.Except("!"), p.Get(); s != "3" {
129 t.Errorf("test 5 (%s): expecting \"3\", got %q", n, s)
130 } else if _, s = p.Except("!"), p.Get(); s != "" {
131 t.Errorf("test 6 (%s): expecting \"\", got %q", n, s)
132 }
133 }
134 }
135
136 func TestTokeniserExceptRun(t *testing.T) {
137 for n, p := range tokenisers("12345ABC\n67890DEF\nOH MY!") {
138 p.ExceptRun("\n")
139 if s := p.Get(); s != "12345ABC" {
140 t.Errorf("test 1 (%s): expecting \"12345ABC\", got %q", n, s)
141
142 continue
143 }
144
145 p.Except("")
146 p.Get()
147 p.ExceptRun("\n")
148
149 if s := p.Get(); s != "67890DEF" {
150 t.Errorf("test 2 (%s): expecting \"67890DEF\", got %q", n, s)
151
152 continue
153 }
154
155 p.Except("")
156 p.Get()
157 p.ExceptRun("")
158
159 if s := p.Get(); s != "OH MY!" {
160 t.Errorf("test 3 (%s): expecting \"OH MY!\", got %q", n, s)
161
162 continue
163 }
164 }
165 }
166
167 func TestTokeniserReset(t *testing.T) {
168 for n, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
169 p.ExceptRun("E")
170 p.Reset()
171
172 if got := p.Get(); got != "" {
173 t.Errorf("test 1 (%s): expecting to get %q, got %q", n, "", got)
174 } else if _, got = p.ExceptRun("E"), p.Get(); got != "ABCD" {
175 t.Errorf("test 2 (%s): expecting to get %q, got %q", n, "ABCD", got)
176 }
177 }
178 }
179
180 func TestTokeniserState(t *testing.T) {
181 for n, p := range tokenisers("12345ABC\n67890DEF\nOH MY!") {
182 state := p.State()
183
184 a := p.Next()
185 b := p.Next()
186 c := p.Next()
187 d := p.Next()
188 l := p.Len()
189
190 state.Reset()
191
192 if p.Next() != a || p.Next() != b || p.Next() != c || p.Next() != d || p.Len() != l {
193 t.Errorf("test 1 (%s): failed to reset state correctly", n)
194
195 continue
196 }
197
198 state = p.State()
199
200 a = p.Next()
201 b = p.Next()
202 c = p.Next()
203 d = p.Next()
204 l = p.Len()
205
206 state.Reset()
207
208 if p.Next() != a || p.Next() != b || p.Next() != c || p.Next() != d || p.Len() != l {
209 t.Errorf("test 2 (%s): failed to reset state correctly", n)
210
211 continue
212 }
213 }
214 }
215
216 func TestTokeniserAcceptString(t *testing.T) {
217 for m, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
218 for n, test := range [...]struct {
219 Str string
220 Read int
221 CaseInsensitive bool
222 }{
223 {
224 Str: "Z",
225 },
226 {
227 Str: "A",
228 Read: 1,
229 },
230 {
231 Str: "BCD",
232 Read: 3,
233 },
234 {
235 Str: "EFGZ",
236 Read: 3,
237 },
238 {
239 Str: "hij",
240 Read: 0,
241 },
242 {
243 Str: "hij",
244 Read: 3,
245 CaseInsensitive: true,
246 },
247 } {
248 if read := p.AcceptString(test.Str, test.CaseInsensitive); read != test.Read {
249 t.Errorf("test %d (%s): expecting to parse %d chars, parsed %d", n+1, m, test.Read, read)
250 }
251 }
252 }
253 }
254
255 func TestTokeniserAcceptWord(t *testing.T) {
256 for m, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
257 for n, test := range [...]struct {
258 Words []string
259 Read string
260 CaseInsensitive bool
261 }{
262 {},
263 {
264 Words: []string{"Z"},
265 },
266 {
267 Words: []string{"Z", "Y"},
268 },
269 {
270 Words: []string{"A"},
271 Read: "A",
272 },
273 {
274 Words: []string{"BD"},
275 },
276 {
277 Words: []string{"BD", "BE"},
278 },
279 {
280 Words: []string{"BCD", "BCE"},
281 Read: "BCD",
282 },
283 {
284 Words: []string{"EFH", "EFG"},
285 Read: "EFG",
286 },
287 {
288 Words: []string{"HIJ", "HIJK"},
289 Read: "HIJK",
290 },
291 {
292 Words: []string{"LMNOP", "LMOPQ", "LmNoPqR"},
293 Read: "LMNOPQR",
294 CaseInsensitive: true,
295 },
296 {
297 Words: []string{"ZYX", "ST", "STZ"},
298 Read: "ST",
299 },
300 } {
301 if read := p.AcceptWord(test.Words, test.CaseInsensitive); read != test.Read {
302 t.Errorf("test %d (%s): expecting to parse %q, parsed %q", n+1, m, test.Read, read)
303 }
304 }
305 }
306 }
307
308 func TestTokeniserSub(t *testing.T) {
309 for n, p := range tokenisers("ABCDEFGHIJKLMNOPQRSTUVWXYZ") {
310 if _, ok := p.tokeniser.(*sub); ok {
311 break
312 } else if c := p.Next(); c != 'A' {
313 t.Errorf("test 1 (%s): expecting to read %q, got %q", n, 'A', c)
314
315 continue
316 }
317
318 q := p.SubTokeniser()
319
320 if c := q.Next(); c != 'B' {
321 t.Errorf("test 2 (%s): expecting to read %q, got %q", n, 'B', c)
322
323 continue
324 }
325
326 if c := q.ExceptRun("H"); c != 'H' {
327 t.Errorf("test 3 (%s): expecting to read %q, got %q", n, 'H', c)
328
329 continue
330 } else if got := q.Get(); got != "BCDEFG" {
331 t.Errorf("test 4 (%s): expecting to read %q, got %q", n, "BCDEFG", got)
332
333 continue
334 }
335
336 q.Next()
337
338 if got := q.Get(); got != "H" {
339 t.Errorf("test 5 (%s): expecting to read %q, got %q", n, "H", got)
340
341 continue
342 } else if got := p.Get(); got != "ABCDEFGH" {
343 t.Errorf("test 6 (%s): expecting to read %q, got %q", n, "ABCDEFGH", got)
344
345 continue
346 }
347
348 q.Next()
349
350 if got := q.Get(); got != "" {
351 t.Errorf("test 7 (%s): expecting to read %q, got %q", n, "", got)
352
353 continue
354 }
355
356 p.Next()
357
358 q = p.SubTokeniser()
359
360 q.Next()
361
362 r := q.SubTokeniser()
363
364 r.Next()
365
366 if got := r.Get(); got != "L" {
367 t.Errorf("test 8 (%s): expecting to read %q, got %q", n, "L", got)
368 } else if got := q.Get(); got != "KL" {
369 t.Errorf("test 9 (%s): expecting to read %q, got %q", n, "KL", got)
370 } else if got := p.Get(); got != "IJKL" {
371 t.Errorf("test 10 (%s): expecting to read %q, got %q", n, "HIJKL", got)
372 }
373 }
374 }
375