css - tokeniser_test.go
1 package css
2
3 import (
4 "testing"
5
6 "vimagination.zapto.org/parser"
7 )
8
9 func TestTokeniser(t *testing.T) {
10 for n, test := range [...]struct {
11 Input string
12 Output []parser.Token
13 }{
14 { // 1
15 " \t\n\r\r\n\f",
16 []parser.Token{
17 {Type: TokenWhitespace, Data: " \t\n\n\n\n"},
18 {Type: parser.TokenDone},
19 },
20 },
21 { // 2
22 "/* A Comment *//* Another Comment */",
23 []parser.Token{
24 {Type: TokenComment, Data: "/* A Comment */"},
25 {Type: TokenComment, Data: "/* Another Comment */"},
26 {Type: parser.TokenDone},
27 },
28 },
29 { // 3
30 "/* A Comment",
31 []parser.Token{
32 {Type: parser.TokenError, Data: "unexpected EOF"},
33 },
34 },
35 { // 4
36 " /* A Comment */\n \t",
37 []parser.Token{
38 {Type: TokenWhitespace, Data: " "},
39 {Type: TokenComment, Data: "/* A Comment */"},
40 {Type: TokenWhitespace, Data: "\n \t"},
41 {Type: parser.TokenDone},
42 },
43 },
44 { // 5
45 "\"a string\"",
46 []parser.Token{
47 {Type: TokenString, Data: "\"a string\""},
48 {Type: parser.TokenDone},
49 },
50 },
51 { // 6
52 " \"a string with an escape \\20\" ",
53 []parser.Token{
54 {Type: TokenWhitespace, Data: " "},
55 {Type: TokenString, Data: "\"a string with an escape \\20\""},
56 {Type: TokenWhitespace, Data: " "},
57 {Type: parser.TokenDone},
58 },
59 },
60 { // 7
61 "'escaped newline \\\n'",
62 []parser.Token{
63 {Type: TokenString, Data: "'escaped newline \\\n'"},
64 {Type: parser.TokenDone},
65 },
66 },
67 { // 8
68 "'escape followed by newline \\A\n'",
69 []parser.Token{
70 {Type: TokenString, Data: "'escape followed by newline \\A\n'"},
71 {Type: parser.TokenDone},
72 },
73 },
74 { // 9
75 "'escape followed by newline \\AaFf01\n'",
76 []parser.Token{
77 {Type: TokenString, Data: "'escape followed by newline \\AaFf01\n'"},
78 {Type: parser.TokenDone},
79 },
80 },
81 { // 10
82 "'escaped newline \\\n'",
83 []parser.Token{
84 {Type: TokenString, Data: "'escaped newline \\\n'"},
85 {Type: parser.TokenDone},
86 },
87 },
88 { // 11
89 "'escaped EOF \\",
90 []parser.Token{
91 {Type: TokenBadString, Data: "'escaped EOF \\"},
92 {Type: parser.TokenDone},
93 },
94 },
95 { // 12
96 "'escape followed by newline \\AaFf012\n'",
97 []parser.Token{
98 {Type: TokenBadString, Data: "'escape followed by newline \\AaFf012\n"},
99 {Type: TokenBadString, Data: "'"},
100 {Type: parser.TokenDone},
101 },
102 },
103 { // 13
104 "'bad string\n ",
105 []parser.Token{
106 {Type: TokenBadString, Data: "'bad string\n"},
107 {Type: TokenWhitespace, Data: " "},
108 {Type: parser.TokenDone},
109 },
110 },
111 { // 14
112 "'\"'\"'\"",
113 []parser.Token{
114 {Type: TokenString, Data: "'\"'"},
115 {Type: TokenString, Data: "\"'\""},
116 {Type: parser.TokenDone},
117 },
118 },
119 { // 15
120 "{}[",
121 []parser.Token{
122 {Type: TokenOpenBrace, Data: "{"},
123 {Type: TokenCloseBrace, Data: "}"},
124 {Type: TokenOpenBracket, Data: "["},
125 {Type: parser.TokenError, Data: "unexpected EOF"},
126 },
127 },
128 { // 16
129 ")[(]",
130 []parser.Token{
131 {Type: TokenDelim, Data: ")"},
132 {Type: TokenOpenBracket, Data: "["},
133 {Type: TokenOpenParen, Data: "("},
134 {Type: TokenDelim, Data: "]"},
135 {Type: parser.TokenError, Data: "unexpected EOF"},
136 },
137 },
138 { // 17
139 "[abc]",
140 []parser.Token{
141 {Type: TokenOpenBracket, Data: "["},
142 {Type: TokenIdent, Data: "abc"},
143 {Type: TokenCloseBracket, Data: "]"},
144 {Type: parser.TokenDone},
145 },
146 },
147 { // 18
148 ",:;",
149 []parser.Token{
150 {Type: TokenComma, Data: ","},
151 {Type: TokenColon, Data: ":"},
152 {Type: TokenSemiColon, Data: ";"},
153 {Type: parser.TokenDone},
154 },
155 },
156 { // 19
157 "1 2 12 +3.14 -1 10e+2 1.2E-9 .5 .+- 123.a 123e-a 456E+a 789ea",
158 []parser.Token{
159 {Type: TokenNumber, Data: "1"},
160 {Type: TokenWhitespace, Data: " "},
161 {Type: TokenNumber, Data: "2"},
162 {Type: TokenWhitespace, Data: " "},
163 {Type: TokenNumber, Data: "12"},
164 {Type: TokenWhitespace, Data: " "},
165 {Type: TokenNumber, Data: "+3.14"},
166 {Type: TokenWhitespace, Data: " "},
167 {Type: TokenNumber, Data: "-1"},
168 {Type: TokenWhitespace, Data: " "},
169 {Type: TokenNumber, Data: "10e+2"},
170 {Type: TokenWhitespace, Data: " "},
171 {Type: TokenNumber, Data: "1.2E-9"},
172 {Type: TokenWhitespace, Data: " "},
173 {Type: TokenNumber, Data: ".5"},
174 {Type: TokenWhitespace, Data: " "},
175 {Type: TokenDelim, Data: "."},
176 {Type: TokenDelim, Data: "+"},
177 {Type: TokenDelim, Data: "-"},
178 {Type: TokenWhitespace, Data: " "},
179 {Type: TokenNumber, Data: "123"},
180 {Type: TokenDelim, Data: "."},
181 {Type: TokenIdent, Data: "a"},
182 {Type: TokenWhitespace, Data: " "},
183 {Type: TokenDimension, Data: "123e-a"},
184 {Type: TokenWhitespace, Data: " "},
185 {Type: TokenDimension, Data: "456E"},
186 {Type: TokenDelim, Data: "+"},
187 {Type: TokenIdent, Data: "a"},
188 {Type: TokenWhitespace, Data: " "},
189 {Type: TokenDimension, Data: "789ea"},
190 {Type: parser.TokenDone},
191 },
192 },
193 { // 20
194 "1% 2% 12% +3.14% 10e+2% .5%",
195 []parser.Token{
196 {Type: TokenPercentage, Data: "1%"},
197 {Type: TokenWhitespace, Data: " "},
198 {Type: TokenPercentage, Data: "2%"},
199 {Type: TokenWhitespace, Data: " "},
200 {Type: TokenPercentage, Data: "12%"},
201 {Type: TokenWhitespace, Data: " "},
202 {Type: TokenPercentage, Data: "+3.14%"},
203 {Type: TokenWhitespace, Data: " "},
204 {Type: TokenPercentage, Data: "10e+2%"},
205 {Type: TokenWhitespace, Data: " "},
206 {Type: TokenPercentage, Data: ".5%"},
207 {Type: parser.TokenDone},
208 },
209 },
210 { // 21
211 "1a 2abc123 12-A_b\\n +3.14--123 10e+2-\\n\\n .5\\n 10px 10 px",
212 []parser.Token{
213 {Type: TokenDimension, Data: "1a"},
214 {Type: TokenWhitespace, Data: " "},
215 {Type: TokenDimension, Data: "2abc123"},
216 {Type: TokenWhitespace, Data: " "},
217 {Type: TokenDimension, Data: "12-A_b\\n"},
218 {Type: TokenWhitespace, Data: " "},
219 {Type: TokenDimension, Data: "+3.14--123"},
220 {Type: TokenWhitespace, Data: " "},
221 {Type: TokenDimension, Data: "10e+2-\\n\\n"},
222 {Type: TokenWhitespace, Data: " "},
223 {Type: TokenDimension, Data: ".5\\n"},
224 {Type: TokenWhitespace, Data: " "},
225 {Type: TokenDimension, Data: "10px"},
226 {Type: TokenWhitespace, Data: " "},
227 {Type: TokenNumber, Data: "10"},
228 {Type: TokenWhitespace, Data: " "},
229 {Type: TokenIdent, Data: "px"},
230 {Type: parser.TokenDone},
231 },
232 },
233 { // 22
234 "a abc123 -A_b\\n --123 -\\n\\n \\n abc\\\n abc£def",
235 []parser.Token{
236 {Type: TokenIdent, Data: "a"},
237 {Type: TokenWhitespace, Data: " "},
238 {Type: TokenIdent, Data: "abc123"},
239 {Type: TokenWhitespace, Data: " "},
240 {Type: TokenIdent, Data: "-A_b\\n"},
241 {Type: TokenWhitespace, Data: " "},
242 {Type: TokenIdent, Data: "--123"},
243 {Type: TokenWhitespace, Data: " "},
244 {Type: TokenIdent, Data: "-\\n\\n"},
245 {Type: TokenWhitespace, Data: " "},
246 {Type: TokenIdent, Data: "\\n"},
247 {Type: TokenWhitespace, Data: " "},
248 {Type: TokenIdent, Data: "abc"},
249 {Type: TokenDelim, Data: "\\"},
250 {Type: TokenWhitespace, Data: "\n "},
251 {Type: TokenIdent, Data: "abc£def"},
252 {Type: parser.TokenDone},
253 },
254 },
255 { // 23
256 "@a @abc123 @-A_b\\n @--123 @-\\n\\n @\\n",
257 []parser.Token{
258 {Type: TokenAtKeyword, Data: "@a"},
259 {Type: TokenWhitespace, Data: " "},
260 {Type: TokenAtKeyword, Data: "@abc123"},
261 {Type: TokenWhitespace, Data: " "},
262 {Type: TokenAtKeyword, Data: "@-A_b\\n"},
263 {Type: TokenWhitespace, Data: " "},
264 {Type: TokenAtKeyword, Data: "@--123"},
265 {Type: TokenWhitespace, Data: " "},
266 {Type: TokenAtKeyword, Data: "@-\\n\\n"},
267 {Type: TokenWhitespace, Data: " "},
268 {Type: TokenAtKeyword, Data: "@\\n"},
269 {Type: parser.TokenDone},
270 },
271 },
272 { // 24
273 "<!-- --><!----><abc>",
274 []parser.Token{
275 {Type: TokenCDO, Data: "<!--"},
276 {Type: TokenWhitespace, Data: " "},
277 {Type: TokenCDC, Data: "-->"},
278 {Type: TokenCDO, Data: "<!--"},
279 {Type: TokenCDC, Data: "-->"},
280 {Type: TokenDelim, Data: "<"},
281 {Type: TokenIdent, Data: "abc"},
282 {Type: TokenDelim, Data: ">"},
283 {Type: parser.TokenDone},
284 },
285 },
286 { // 25
287 "#a #abc123 #-A_b\\n #--123 #-\\n\\n #\\n",
288 []parser.Token{
289 {Type: TokenHash, Data: "#a"},
290 {Type: TokenWhitespace, Data: " "},
291 {Type: TokenHash, Data: "#abc123"},
292 {Type: TokenWhitespace, Data: " "},
293 {Type: TokenHash, Data: "#-A_b\\n"},
294 {Type: TokenWhitespace, Data: " "},
295 {Type: TokenHash, Data: "#--123"},
296 {Type: TokenWhitespace, Data: " "},
297 {Type: TokenHash, Data: "#-\\n\\n"},
298 {Type: TokenWhitespace, Data: " "},
299 {Type: TokenHash, Data: "#\\n"},
300 {Type: parser.TokenDone},
301 },
302 },
303 { // 26
304 "a()abc123() -A_b\\n() @--123()",
305 []parser.Token{
306 {Type: TokenFunction, Data: "a("},
307 {Type: TokenCloseParen, Data: ")"},
308 {Type: TokenFunction, Data: "abc123("},
309 {Type: TokenCloseParen, Data: ")"},
310 {Type: TokenWhitespace, Data: " "},
311 {Type: TokenFunction, Data: "-A_b\\n("},
312 {Type: TokenCloseParen, Data: ")"},
313 {Type: TokenWhitespace, Data: " "},
314 {Type: TokenAtKeyword, Data: "@--123"},
315 {Type: TokenOpenParen, Data: "("},
316 {Type: TokenCloseParen, Data: ")"},
317 {Type: parser.TokenDone},
318 },
319 },
320 { // 27
321 "url(abc) uRl( abc ) URL() UrL(!#$%&) url(abc\") url('abc') url(\"\") url(a b) url(a\\\nb) url(abc\\)",
322 []parser.Token{
323 {Type: TokenURL, Data: "url(abc)"},
324 {Type: TokenWhitespace, Data: " "},
325 {Type: TokenURL, Data: "uRl( abc )"},
326 {Type: TokenWhitespace, Data: " "},
327 {Type: TokenURL, Data: "URL()"},
328 {Type: TokenWhitespace, Data: " "},
329 {Type: TokenURL, Data: "UrL(!#$%&)"},
330 {Type: TokenWhitespace, Data: " "},
331 {Type: TokenBadURL, Data: "url(abc\")"},
332 {Type: TokenWhitespace, Data: " "},
333 {Type: TokenFunction, Data: "url("},
334 {Type: TokenString, Data: "'abc'"},
335 {Type: TokenCloseParen, Data: ")"},
336 {Type: TokenWhitespace, Data: " "},
337 {Type: TokenFunction, Data: "url("},
338 {Type: TokenString, Data: "\"\""},
339 {Type: TokenCloseParen, Data: ")"},
340 {Type: TokenWhitespace, Data: " "},
341 {Type: TokenBadURL, Data: "url(a b)"},
342 {Type: TokenWhitespace, Data: " "},
343 {Type: TokenBadURL, Data: "url(a\\\nb)"},
344 {Type: TokenWhitespace, Data: " "},
345 {Type: TokenBadURL, Data: "url(abc\\)"},
346 {Type: parser.TokenDone},
347 },
348 },
349 { // 28
350 "@ # . @#.|!$&",
351 []parser.Token{
352 {Type: TokenDelim, Data: "@"},
353 {Type: TokenWhitespace, Data: " "},
354 {Type: TokenDelim, Data: "#"},
355 {Type: TokenWhitespace, Data: " "},
356 {Type: TokenDelim, Data: "."},
357 {Type: TokenWhitespace, Data: " "},
358 {Type: TokenDelim, Data: "@"},
359 {Type: TokenDelim, Data: "#"},
360 {Type: TokenDelim, Data: "."},
361 {Type: TokenDelim, Data: "|"},
362 {Type: TokenDelim, Data: "!"},
363 {Type: TokenDelim, Data: "$"},
364 {Type: TokenDelim, Data: "&"},
365 {Type: parser.TokenDone},
366 },
367 },
368 } {
369 p := CreateTokeniser(parser.NewStringTokeniser(test.Input), true)
370
371 for m, tkn := range test.Output {
372 if tk, _ := p.GetToken(); tk.Type != tkn.Type {
373 if tk.Type == parser.TokenError {
374 t.Errorf("test %d.%d: unexpected error: %s", n+1, m+1, tk.Data)
375 } else {
376 t.Errorf("test %d.%d: Incorrect type, expecting %d, got %d", n+1, m+1, tkn.Type, tk.Type)
377 }
378
379 break
380 } else if tk.Data != tkn.Data {
381 t.Errorf("test %d.%d: Incorrect data, expecting %q, got %q", n+1, m+1, tkn.Data, tk.Data)
382
383 break
384 }
385 }
386 }
387 }
388
389 func TestTokeniserNoPreprocess(t *testing.T) {
390 for n, test := range [...]struct {
391 Input string
392 Output []parser.Token
393 }{
394 { // 1
395 " \t\n\r\r\n\f",
396 []parser.Token{
397 {Type: TokenWhitespace, Data: " \t\n\r\r\n\f"},
398 {Type: parser.TokenDone},
399 },
400 },
401 { // 2
402 "'escaped newline \\\n' 'escaped carriage return \\\r' 'escaped form feed \\\f' 'escaped crlf \\\r\n'",
403 []parser.Token{
404 {Type: TokenString, Data: "'escaped newline \\\n'"},
405 {Type: TokenWhitespace, Data: " "},
406 {Type: TokenString, Data: "'escaped carriage return \\\r'"},
407 {Type: TokenWhitespace, Data: " "},
408 {Type: TokenString, Data: "'escaped form feed \\\f'"},
409 {Type: TokenWhitespace, Data: " "},
410 {Type: TokenString, Data: "'escaped crlf \\\r\n'"},
411 {Type: parser.TokenDone},
412 },
413 },
414 { // 3
415 "'escape followed by newline \\A\n' 'escape followed by carriage return \\A\r' 'escape followed by form feed \\A\f' 'escape followed by crlf \\A\r\n'",
416 []parser.Token{
417 {Type: TokenString, Data: "'escape followed by newline \\A\n'"},
418 {Type: TokenWhitespace, Data: " "},
419 {Type: TokenString, Data: "'escape followed by carriage return \\A\r'"},
420 {Type: TokenWhitespace, Data: " "},
421 {Type: TokenString, Data: "'escape followed by form feed \\A\f'"},
422 {Type: TokenWhitespace, Data: " "},
423 {Type: TokenString, Data: "'escape followed by crlf \\A\r\n'"},
424 {Type: parser.TokenDone},
425 },
426 },
427 { // 4
428 "'escape followed by newline \\AaFf01\n' 'escape followed by carriage return \\AaFf01\r' 'escape followed by form feed \\AaFf01\f' 'escape followed by crlf \\AaFf01\r\n'",
429 []parser.Token{
430 {Type: TokenString, Data: "'escape followed by newline \\AaFf01\n'"},
431 {Type: TokenWhitespace, Data: " "},
432 {Type: TokenString, Data: "'escape followed by carriage return \\AaFf01\r'"},
433 {Type: TokenWhitespace, Data: " "},
434 {Type: TokenString, Data: "'escape followed by form feed \\AaFf01\f'"},
435 {Type: TokenWhitespace, Data: " "},
436 {Type: TokenString, Data: "'escape followed by crlf \\AaFf01\r\n'"},
437 {Type: parser.TokenDone},
438 },
439 },
440 { // 5
441 "'escaped newline \\\n' 'escaped carriage return \\\r' 'escaped form feed \\\f' 'escaped crlf \\\r\n'",
442 []parser.Token{
443 {Type: TokenString, Data: "'escaped newline \\\n'"},
444 {Type: TokenWhitespace, Data: " "},
445 {Type: TokenString, Data: "'escaped carriage return \\\r'"},
446 {Type: TokenWhitespace, Data: " "},
447 {Type: TokenString, Data: "'escaped form feed \\\f'"},
448 {Type: TokenWhitespace, Data: " "},
449 {Type: TokenString, Data: "'escaped crlf \\\r\n'"},
450 {Type: parser.TokenDone},
451 },
452 },
453 { // 6
454 "'escape followed by newline \\AaFf012\n 'escape followed by carriage return \\AaFf012\r 'escape followed by form feed \\AaFf012\f 'escape followed by crlf \\AaFf012\r\n",
455 []parser.Token{
456 {Type: TokenBadString, Data: "'escape followed by newline \\AaFf012\n"},
457 {Type: TokenWhitespace, Data: " "},
458 {Type: TokenBadString, Data: "'escape followed by carriage return \\AaFf012\r"},
459 {Type: TokenWhitespace, Data: " "},
460 {Type: TokenBadString, Data: "'escape followed by form feed \\AaFf012\f"},
461 {Type: TokenWhitespace, Data: " "},
462 {Type: TokenBadString, Data: "'escape followed by crlf \\AaFf012\r\n"},
463 {Type: parser.TokenDone},
464 },
465 },
466 { // 7
467 "'bad string\n 'bad string\r 'bad string\f 'bad string\r\n",
468 []parser.Token{
469 {Type: TokenBadString, Data: "'bad string\n"},
470 {Type: TokenWhitespace, Data: " "},
471 {Type: TokenBadString, Data: "'bad string\r"},
472 {Type: TokenWhitespace, Data: " "},
473 {Type: TokenBadString, Data: "'bad string\f"},
474 {Type: TokenWhitespace, Data: " "},
475 {Type: TokenBadString, Data: "'bad string\r\n"},
476 {Type: parser.TokenDone},
477 },
478 },
479 { // 8
480 "abc\\\n abc\\\r abc\\\f abc\\\r\n",
481 []parser.Token{
482 {Type: TokenIdent, Data: "abc"},
483 {Type: TokenDelim, Data: "\\"},
484 {Type: TokenWhitespace, Data: "\n "},
485 {Type: TokenIdent, Data: "abc"},
486 {Type: TokenDelim, Data: "\\"},
487 {Type: TokenWhitespace, Data: "\r "},
488 {Type: TokenIdent, Data: "abc"},
489 {Type: TokenDelim, Data: "\\"},
490 {Type: TokenWhitespace, Data: "\f "},
491 {Type: TokenIdent, Data: "abc"},
492 {Type: TokenDelim, Data: "\\"},
493 {Type: TokenWhitespace, Data: "\r\n"},
494 {Type: parser.TokenDone},
495 },
496 },
497 { // 9
498 "url(a\\\nb) url(a\\\rb) url(a\\\fb) url(a\\\r\\\nb)",
499 []parser.Token{
500 {Type: TokenBadURL, Data: "url(a\\\nb)"},
501 {Type: TokenWhitespace, Data: " "},
502 {Type: TokenBadURL, Data: "url(a\\\rb)"},
503 {Type: TokenWhitespace, Data: " "},
504 {Type: TokenBadURL, Data: "url(a\\\fb)"},
505 {Type: TokenWhitespace, Data: " "},
506 {Type: TokenBadURL, Data: "url(a\\\r\\\nb)"},
507 {Type: parser.TokenDone},
508 },
509 },
510 } {
511 p := createTokeniser(parser.NewStringTokeniser(test.Input), false)
512
513 for m, tkn := range test.Output {
514 if tk, _ := p.GetToken(); tk.Type != tkn.Type {
515 if tk.Type == parser.TokenError {
516 t.Errorf("test %d.%d: unexpected error: %s", n+1, m+1, tk.Data)
517 } else {
518 t.Errorf("test %d.%d: Incorrect type, expecting %d, got %d", n+1, m+1, tkn.Type, tk.Type)
519 }
520
521 break
522 } else if tk.Data != tkn.Data {
523 t.Errorf("test %d.%d: Incorrect data, expecting %q, got %q", n+1, m+1, tkn.Data, tk.Data)
524
525 break
526 }
527 }
528 }
529 }
530