r - tokeniser_test.go
1 package r
2
3 import (
4 "testing"
5
6 "vimagination.zapto.org/parser"
7 )
8
9 func TestTokeniser(t *testing.T) {
10 for n, test := range [...]struct {
11 Input string
12 Output []parser.Token
13 }{
14 { // 1
15 " \t\n\v\f\u00a0\ufeff",
16 []parser.Token{
17 {Type: TokenWhitespace, Data: " \t"},
18 {Type: TokenLineTerminator, Data: "\n"},
19 {Type: TokenWhitespace, Data: "\v\f\u00a0\ufeff"},
20 {Type: parser.TokenDone, Data: ""},
21 },
22 },
23 { // 2
24 "\n\r \u2028\u2029",
25 []parser.Token{
26 {Type: TokenLineTerminator, Data: "\n"},
27 {Type: TokenLineTerminator, Data: "\r"},
28 {Type: TokenWhitespace, Data: " "},
29 {Type: TokenLineTerminator, Data: "\u2028"},
30 {Type: TokenLineTerminator, Data: "\u2029"},
31 {Type: parser.TokenDone, Data: ""},
32 },
33 },
34 { // 3
35 "(\n\r \u2028\u2029)",
36 []parser.Token{
37 {Type: TokenGrouping, Data: "("},
38 {Type: TokenWhitespaceLineTerminator, Data: "\n"},
39 {Type: TokenWhitespaceLineTerminator, Data: "\r"},
40 {Type: TokenWhitespace, Data: " "},
41 {Type: TokenWhitespaceLineTerminator, Data: "\u2028"},
42 {Type: TokenWhitespaceLineTerminator, Data: "\u2029"},
43 {Type: TokenGrouping, Data: ")"},
44 {Type: parser.TokenDone, Data: ""},
45 },
46 },
47 { // 4
48 "[\n\r \u2028\u2029]",
49 []parser.Token{
50 {Type: TokenGrouping, Data: "["},
51 {Type: TokenWhitespaceLineTerminator, Data: "\n"},
52 {Type: TokenWhitespaceLineTerminator, Data: "\r"},
53 {Type: TokenWhitespace, Data: " "},
54 {Type: TokenWhitespaceLineTerminator, Data: "\u2028"},
55 {Type: TokenWhitespaceLineTerminator, Data: "\u2029"},
56 {Type: TokenGrouping, Data: "]"},
57 {Type: parser.TokenDone, Data: ""},
58 },
59 },
60 { // 5
61 "[[\n\r \u2028\u2029]]",
62 []parser.Token{
63 {Type: TokenGrouping, Data: "[["},
64 {Type: TokenWhitespaceLineTerminator, Data: "\n"},
65 {Type: TokenWhitespaceLineTerminator, Data: "\r"},
66 {Type: TokenWhitespace, Data: " "},
67 {Type: TokenWhitespaceLineTerminator, Data: "\u2028"},
68 {Type: TokenWhitespaceLineTerminator, Data: "\u2029"},
69 {Type: TokenGrouping, Data: "]]"},
70 {Type: parser.TokenDone, Data: ""},
71 },
72 },
73 { // 6
74 "{\n\r \u2028\u2029}",
75 []parser.Token{
76 {Type: TokenGrouping, Data: "{"},
77 {Type: TokenLineTerminator, Data: "\n"},
78 {Type: TokenLineTerminator, Data: "\r"},
79 {Type: TokenWhitespace, Data: " "},
80 {Type: TokenLineTerminator, Data: "\u2028"},
81 {Type: TokenLineTerminator, Data: "\u2029"},
82 {Type: TokenGrouping, Data: "}"},
83 {Type: parser.TokenDone, Data: ""},
84 },
85 },
86 { // 7
87 ";,",
88 []parser.Token{
89 {Type: TokenExpressionTerminator, Data: ";"},
90 {Type: TokenExpressionTerminator, Data: ","},
91 {Type: parser.TokenDone, Data: ""},
92 },
93 },
94 { // 8
95 "#A comment\n # Another comment",
96 []parser.Token{
97 {Type: TokenComment, Data: "#A comment"},
98 {Type: TokenLineTerminator, Data: "\n"},
99 {Type: TokenWhitespace, Data: " "},
100 {Type: TokenComment, Data: "# Another comment"},
101 {Type: parser.TokenDone, Data: ""},
102 },
103 },
104 { // 9
105 "\"abc\"'def'\"a\\n\\t\\\"\"",
106 []parser.Token{
107 {Type: TokenStringLiteral, Data: "\"abc\""},
108 {Type: TokenStringLiteral, Data: "'def'"},
109 {Type: TokenStringLiteral, Data: "\"a\\n\\t\\\"\""},
110 {Type: parser.TokenDone, Data: ""},
111 },
112 },
113 { // 10
114 "\"'\\\"\\n\\r\\t\\b\\a\\f\\v\\\"\"",
115 []parser.Token{
116 {Type: TokenStringLiteral, Data: "\"'\\\"\\n\\r\\t\\b\\a\\f\\v\\\"\""},
117 {Type: parser.TokenDone, Data: ""},
118 },
119 },
120 { // 11
121 "\"\\132\\142=\\064\\062\"",
122 []parser.Token{
123 {Type: TokenStringLiteral, Data: "\"\\132\\142=\\064\\062\""},
124 {Type: parser.TokenDone, Data: ""},
125 },
126 },
127 { // 12
128 "\"\\0a\"",
129 []parser.Token{
130 {Type: parser.TokenError, Data: "invalid string"},
131 },
132 },
133 { // 13
134 "\"\\x7A\\x42=\\x34\\x32\"",
135 []parser.Token{
136 {Type: TokenStringLiteral, Data: "\"\\x7A\\x42=\\x34\\x32\""},
137 {Type: parser.TokenDone, Data: ""},
138 },
139 },
140 { // 14
141 "\"\\xz\"",
142 []parser.Token{
143 {Type: parser.TokenError, Data: "invalid string"},
144 },
145 },
146 { // 15
147 "\"\\m\"",
148 []parser.Token{
149 {Type: parser.TokenError, Data: "invalid string"},
150 },
151 },
152 { // 16
153 "\"abc",
154 []parser.Token{
155 {Type: parser.TokenError, Data: "invalid string"},
156 },
157 },
158 { // 17
159 "\"\\u2190 \\u{800}\"",
160 []parser.Token{
161 {Type: TokenStringLiteral, Data: "\"\\u2190 \\u{800}\""},
162 {Type: parser.TokenDone, Data: ""},
163 },
164 },
165 { // 18
166 "\"\\u{x}\"",
167 []parser.Token{
168 {Type: parser.TokenError, Data: "invalid string"},
169 },
170 },
171 { // 19
172 "\"\\u{f\"",
173 []parser.Token{
174 {Type: parser.TokenError, Data: "invalid string"},
175 },
176 },
177 { // 20
178 "\"\\u{fffff}\"",
179 []parser.Token{
180 {Type: parser.TokenError, Data: "invalid string"},
181 },
182 },
183 { // 21
184 "\"\\ufff\"",
185 []parser.Token{
186 {Type: parser.TokenError, Data: "invalid string"},
187 },
188 },
189 { // 22
190 "\"\\U01000000 \\U{2190}\"",
191 []parser.Token{
192 {Type: TokenStringLiteral, Data: "\"\\U01000000 \\U{2190}\""},
193 {Type: parser.TokenDone, Data: ""},
194 },
195 },
196 { // 23
197 "\"\\U{x}\"",
198 []parser.Token{
199 {Type: parser.TokenError, Data: "invalid string"},
200 },
201 },
202 { // 24
203 "\"\\U{f\"",
204 []parser.Token{
205 {Type: parser.TokenError, Data: "invalid string"},
206 },
207 },
208 { // 25
209 "\"\\U{fffffffff}\"",
210 []parser.Token{
211 {Type: parser.TokenError, Data: "invalid string"},
212 },
213 },
214 { // 26
215 "\"\\Ufffffff\"",
216 []parser.Token{
217 {Type: parser.TokenError, Data: "invalid string"},
218 },
219 },
220 { // 27
221 "0 1 23 0x1a2f30 .123 0.456 123.456 9.8e+7 7E-6 0x123.FEDpFF 0xFFP+FF",
222 []parser.Token{
223 {Type: TokenNumericLiteral, Data: "0"},
224 {Type: TokenWhitespace, Data: " "},
225 {Type: TokenNumericLiteral, Data: "1"},
226 {Type: TokenWhitespace, Data: " "},
227 {Type: TokenNumericLiteral, Data: "23"},
228 {Type: TokenWhitespace, Data: " "},
229 {Type: TokenNumericLiteral, Data: "0x1a2f30"},
230 {Type: TokenWhitespace, Data: " "},
231 {Type: TokenNumericLiteral, Data: ".123"},
232 {Type: TokenWhitespace, Data: " "},
233 {Type: TokenNumericLiteral, Data: "0.456"},
234 {Type: TokenWhitespace, Data: " "},
235 {Type: TokenNumericLiteral, Data: "123.456"},
236 {Type: TokenWhitespace, Data: " "},
237 {Type: TokenNumericLiteral, Data: "9.8e+7"},
238 {Type: TokenWhitespace, Data: " "},
239 {Type: TokenNumericLiteral, Data: "7E-6"},
240 {Type: TokenWhitespace, Data: " "},
241 {Type: TokenNumericLiteral, Data: "0x123.FEDpFF"},
242 {Type: TokenWhitespace, Data: " "},
243 {Type: TokenNumericLiteral, Data: "0xFFP+FF"},
244 {Type: parser.TokenDone, Data: ""},
245 },
246 },
247 { // 28
248 "Inf NaN",
249 []parser.Token{
250 {Type: TokenNumericLiteral, Data: "Inf"},
251 {Type: TokenWhitespace, Data: " "},
252 {Type: TokenNumericLiteral, Data: "NaN"},
253 {Type: parser.TokenDone, Data: ""},
254 },
255 },
256 { // 29
257 "0xz",
258 []parser.Token{
259 {Type: parser.TokenError, Data: "invalid number"},
260 },
261 },
262 { // 30
263 "0x1.",
264 []parser.Token{
265 {Type: parser.TokenError, Data: "invalid number"},
266 },
267 },
268 { // 31
269 "0x1pz",
270 []parser.Token{
271 {Type: parser.TokenError, Data: "invalid number"},
272 },
273 },
274 { // 32
275 "1ea",
276 []parser.Token{
277 {Type: parser.TokenError, Data: "invalid number"},
278 },
279 },
280 { // 33
281 "1L 99.88L 1e1L 1.2E-23L 0x123L 0xFEEDp101L",
282 []parser.Token{
283 {Type: TokenIntegerLiteral, Data: "1L"},
284 {Type: TokenWhitespace, Data: " "},
285 {Type: TokenIntegerLiteral, Data: "99.88L"},
286 {Type: TokenWhitespace, Data: " "},
287 {Type: TokenIntegerLiteral, Data: "1e1L"},
288 {Type: TokenWhitespace, Data: " "},
289 {Type: TokenIntegerLiteral, Data: "1.2E-23L"},
290 {Type: TokenWhitespace, Data: " "},
291 {Type: TokenIntegerLiteral, Data: "0x123L"},
292 {Type: TokenWhitespace, Data: " "},
293 {Type: TokenIntegerLiteral, Data: "0xFEEDp101L"},
294 {Type: parser.TokenDone, Data: ""},
295 },
296 },
297 { // 34
298 "1i .2i 9.8e1i 0x123i 0x456.ffi 0xapbi",
299 []parser.Token{
300 {Type: TokenComplexLiteral, Data: "1i"},
301 {Type: TokenWhitespace, Data: " "},
302 {Type: TokenComplexLiteral, Data: ".2i"},
303 {Type: TokenWhitespace, Data: " "},
304 {Type: TokenComplexLiteral, Data: "9.8e1i"},
305 {Type: TokenWhitespace, Data: " "},
306 {Type: TokenComplexLiteral, Data: "0x123i"},
307 {Type: TokenWhitespace, Data: " "},
308 {Type: TokenComplexLiteral, Data: "0x456.ffi"},
309 {Type: TokenWhitespace, Data: " "},
310 {Type: TokenComplexLiteral, Data: "0xapbi"},
311 {Type: parser.TokenDone, Data: ""},
312 },
313 },
314 { // 35
315 "TRUE FALSE",
316 []parser.Token{
317 {Type: TokenBooleanLiteral, Data: "TRUE"},
318 {Type: TokenWhitespace, Data: " "},
319 {Type: TokenBooleanLiteral, Data: "FALSE"},
320 {Type: parser.TokenDone, Data: ""},
321 },
322 },
323 { // 36
324 "NULL",
325 []parser.Token{
326 {Type: TokenNull, Data: "NULL"},
327 {Type: parser.TokenDone, Data: ""},
328 },
329 },
330 { // 37
331 "NA NA_character_ NA_integer_ NA_real_ NA_complex_",
332 []parser.Token{
333 {Type: TokenNA, Data: "NA"},
334 {Type: TokenWhitespace, Data: " "},
335 {Type: TokenNA, Data: "NA_character_"},
336 {Type: TokenWhitespace, Data: " "},
337 {Type: TokenNA, Data: "NA_integer_"},
338 {Type: TokenWhitespace, Data: " "},
339 {Type: TokenNA, Data: "NA_real_"},
340 {Type: TokenWhitespace, Data: " "},
341 {Type: TokenNA, Data: "NA_complex_"},
342 {Type: parser.TokenDone, Data: ""},
343 },
344 },
345 { // 38
346 "a bc def a1 b_c abc.def",
347 []parser.Token{
348 {Type: TokenIdentifier, Data: "a"},
349 {Type: TokenWhitespace, Data: " "},
350 {Type: TokenIdentifier, Data: "bc"},
351 {Type: TokenWhitespace, Data: " "},
352 {Type: TokenIdentifier, Data: "def"},
353 {Type: TokenWhitespace, Data: " "},
354 {Type: TokenIdentifier, Data: "a1"},
355 {Type: TokenWhitespace, Data: " "},
356 {Type: TokenIdentifier, Data: "b_c"},
357 {Type: TokenWhitespace, Data: " "},
358 {Type: TokenIdentifier, Data: "abc.def"},
359 {Type: parser.TokenDone, Data: ""},
360 },
361 },
362 { // 39
363 ".a",
364 []parser.Token{
365 {Type: TokenIdentifier, Data: ".a"},
366 {Type: parser.TokenDone, Data: ""},
367 },
368 },
369 { // 40
370 "if else repeat while function for in next break",
371 []parser.Token{
372 {Type: TokenKeyword, Data: "if"},
373 {Type: TokenWhitespace, Data: " "},
374 {Type: TokenKeyword, Data: "else"},
375 {Type: TokenWhitespace, Data: " "},
376 {Type: TokenKeyword, Data: "repeat"},
377 {Type: TokenWhitespace, Data: " "},
378 {Type: TokenKeyword, Data: "while"},
379 {Type: TokenWhitespace, Data: " "},
380 {Type: TokenKeyword, Data: "function"},
381 {Type: TokenWhitespace, Data: " "},
382 {Type: TokenKeyword, Data: "for"},
383 {Type: TokenWhitespace, Data: " "},
384 {Type: TokenKeyword, Data: "in"},
385 {Type: TokenWhitespace, Data: " "},
386 {Type: TokenKeyword, Data: "next"},
387 {Type: TokenWhitespace, Data: " "},
388 {Type: TokenKeyword, Data: "break"},
389 {Type: parser.TokenDone, Data: ""},
390 },
391 },
392 { // 41
393 "... ..1 ..2 ..99",
394 []parser.Token{
395 {Type: TokenEllipsis, Data: "..."},
396 {Type: TokenWhitespace, Data: " "},
397 {Type: TokenEllipsis, Data: "..1"},
398 {Type: TokenWhitespace, Data: " "},
399 {Type: TokenEllipsis, Data: "..2"},
400 {Type: TokenWhitespace, Data: " "},
401 {Type: TokenEllipsis, Data: "..99"},
402 {Type: parser.TokenDone, Data: ""},
403 },
404 },
405 { // 42
406 "+ - * / ^ > >= < <= == != ! & && | ~ -> <- ->> <<- $ : :: || |> @ = ? ??",
407 []parser.Token{
408 {Type: TokenOperator, Data: "+"},
409 {Type: TokenWhitespace, Data: " "},
410 {Type: TokenOperator, Data: "-"},
411 {Type: TokenWhitespace, Data: " "},
412 {Type: TokenOperator, Data: "*"},
413 {Type: TokenWhitespace, Data: " "},
414 {Type: TokenOperator, Data: "/"},
415 {Type: TokenWhitespace, Data: " "},
416 {Type: TokenOperator, Data: "^"},
417 {Type: TokenWhitespace, Data: " "},
418 {Type: TokenOperator, Data: ">"},
419 {Type: TokenWhitespace, Data: " "},
420 {Type: TokenOperator, Data: ">="},
421 {Type: TokenWhitespace, Data: " "},
422 {Type: TokenOperator, Data: "<"},
423 {Type: TokenWhitespace, Data: " "},
424 {Type: TokenOperator, Data: "<="},
425 {Type: TokenWhitespace, Data: " "},
426 {Type: TokenOperator, Data: "=="},
427 {Type: TokenWhitespace, Data: " "},
428 {Type: TokenOperator, Data: "!="},
429 {Type: TokenWhitespace, Data: " "},
430 {Type: TokenOperator, Data: "!"},
431 {Type: TokenWhitespace, Data: " "},
432 {Type: TokenOperator, Data: "&"},
433 {Type: TokenWhitespace, Data: " "},
434 {Type: TokenOperator, Data: "&&"},
435 {Type: TokenWhitespace, Data: " "},
436 {Type: TokenOperator, Data: "|"},
437 {Type: TokenWhitespace, Data: " "},
438 {Type: TokenOperator, Data: "~"},
439 {Type: TokenWhitespace, Data: " "},
440 {Type: TokenOperator, Data: "->"},
441 {Type: TokenWhitespace, Data: " "},
442 {Type: TokenOperator, Data: "<-"},
443 {Type: TokenWhitespace, Data: " "},
444 {Type: TokenOperator, Data: "->>"},
445 {Type: TokenWhitespace, Data: " "},
446 {Type: TokenOperator, Data: "<<-"},
447 {Type: TokenWhitespace, Data: " "},
448 {Type: TokenOperator, Data: "$"},
449 {Type: TokenWhitespace, Data: " "},
450 {Type: TokenOperator, Data: ":"},
451 {Type: TokenWhitespace, Data: " "},
452 {Type: TokenOperator, Data: "::"},
453 {Type: TokenWhitespace, Data: " "},
454 {Type: TokenOperator, Data: "||"},
455 {Type: TokenWhitespace, Data: " "},
456 {Type: TokenOperator, Data: "|>"},
457 {Type: TokenWhitespace, Data: " "},
458 {Type: TokenOperator, Data: "@"},
459 {Type: TokenWhitespace, Data: " "},
460 {Type: TokenOperator, Data: "="},
461 {Type: TokenWhitespace, Data: " "},
462 {Type: TokenOperator, Data: "?"},
463 {Type: TokenWhitespace, Data: " "},
464 {Type: TokenOperator, Data: "?"},
465 {Type: TokenOperator, Data: "?"},
466 {Type: parser.TokenDone, Data: ""},
467 },
468 },
469 { // 43
470 "%% %/% %*% %in% %o% %x% %||%",
471 []parser.Token{
472 {Type: TokenSpecialOperator, Data: "%%"},
473 {Type: TokenWhitespace, Data: " "},
474 {Type: TokenSpecialOperator, Data: "%/%"},
475 {Type: TokenWhitespace, Data: " "},
476 {Type: TokenSpecialOperator, Data: "%*%"},
477 {Type: TokenWhitespace, Data: " "},
478 {Type: TokenSpecialOperator, Data: "%in%"},
479 {Type: TokenWhitespace, Data: " "},
480 {Type: TokenSpecialOperator, Data: "%o%"},
481 {Type: TokenWhitespace, Data: " "},
482 {Type: TokenSpecialOperator, Data: "%x%"},
483 {Type: TokenWhitespace, Data: " "},
484 {Type: TokenSpecialOperator, Data: "%||%"},
485 {Type: parser.TokenDone, Data: ""},
486 },
487 },
488 { // 44
489 "<<",
490 []parser.Token{
491 {Type: parser.TokenError, Data: "invalid operator"},
492 },
493 },
494 { // 45
495 "%\n%",
496 []parser.Token{
497 {Type: parser.TokenError, Data: "invalid operator"},
498 },
499 },
500 { // 46
501 "£",
502 []parser.Token{
503 {Type: parser.TokenError, Data: "invalid character"},
504 },
505 },
506 { // 47
507 "([{[[]]}])]",
508 []parser.Token{
509 {Type: TokenGrouping, Data: "("},
510 {Type: TokenGrouping, Data: "["},
511 {Type: TokenGrouping, Data: "{"},
512 {Type: TokenGrouping, Data: "[["},
513 {Type: TokenGrouping, Data: "]]"},
514 {Type: TokenGrouping, Data: "}"},
515 {Type: TokenGrouping, Data: "]"},
516 {Type: TokenGrouping, Data: ")"},
517 {Type: parser.TokenError, Data: "invalid character"},
518 },
519 },
520 { // 48
521 "(",
522 []parser.Token{
523 {Type: TokenGrouping, Data: "("},
524 {Type: parser.TokenError, Data: "unexpected EOF"},
525 },
526 },
527 { // 49
528 "[",
529 []parser.Token{
530 {Type: TokenGrouping, Data: "["},
531 {Type: parser.TokenError, Data: "unexpected EOF"},
532 },
533 },
534 { // 50
535 "{",
536 []parser.Token{
537 {Type: TokenGrouping, Data: "{"},
538 {Type: parser.TokenError, Data: "unexpected EOF"},
539 },
540 },
541 { // 51
542 "{]",
543 []parser.Token{
544 {Type: TokenGrouping, Data: "{"},
545 {Type: parser.TokenError, Data: "invalid character"},
546 },
547 },
548 { // 52
549 "[)",
550 []parser.Token{
551 {Type: TokenGrouping, Data: "["},
552 {Type: parser.TokenError, Data: "invalid character"},
553 },
554 },
555 { // 53
556 "[[]",
557 []parser.Token{
558 {Type: TokenGrouping, Data: "[["},
559 {Type: parser.TokenError, Data: "invalid character"},
560 },
561 },
562 { // 54
563 "[[}",
564 []parser.Token{
565 {Type: TokenGrouping, Data: "[["},
566 {Type: parser.TokenError, Data: "invalid character"},
567 },
568 },
569 } {
570 p := parser.NewStringTokeniser(test.Input)
571
572 SetTokeniser(&p)
573
574 for m, tkn := range test.Output {
575 tk, _ := p.GetToken()
576 if tk.Type != tkn.Type {
577 if tk.Type == parser.TokenError {
578 t.Errorf("test %d.%d: unexpected error: %s", n+1, m+1, tk.Data)
579 } else {
580 t.Errorf("test %d.%d: Incorrect type, expecting %d, got %d", n+1, m+1, tkn.Type, tk.Type)
581 }
582
583 break
584 } else if tk.Data != tkn.Data {
585 t.Errorf("test %d.%d: Incorrect data, expecting %q, got %q", n+1, m+1, tkn.Data, tk.Data)
586
587 break
588 }
589 }
590 }
591 }
592