css - tokeniser.go
1 package css
2
3 import (
4 "io"
5
6 "vimagination.zapto.org/parser"
7 )
8
9 const (
10 digit = "0123456789"
11 upperLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
12 lowerLetters = "abcdefghijklmnopqrstuvwxyz"
13 letters = upperLetters + lowerLetters
14 identStart = letters + "_"
15 identCont = letters + "_" + digit + "-"
16 hexDigits = digit + "abcdefABCDEF"
17 newline = "\n\r\f"
18 whitespace = " \t" + newline
19 noURL = whitespace + "\"'()\\\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"
20 )
21
22 const (
23 TokenWhitespace parser.TokenType = iota
24 TokenComment
25 TokenIdent
26 TokenString
27 TokenHash
28 TokenNumber
29 TokenComma
30 TokenCDC
31 TokenCDO
32 TokenColon
33 TokenSemiColon
34 TokenAtKeyword
35 TokenOpenParen
36 TokenCloseParen
37 TokenOpenBracket
38 TokenCloseBracket
39 TokenOpenBrace
40 TokenCloseBrace
41 TokenFunction
42 TokenBadString
43 TokenURL
44 TokenBadURL
45 TokenPercentage
46 TokenDimension
47 TokenDelim
48 )
49
50 type preprocessor struct {
51 parser.Tokeniser
52 }
53
54 func (p *preprocessor) ReadRune() (rune, int, error) {
55 r := p.Next()
56 if r == -1 {
57 return 0, 0, io.EOF
58 }
59
60 switch r {
61 case '\r':
62 p.Accept("\n")
63
64 r = '\n'
65 case '\f':
66 r = '\n'
67 }
68
69 return r, 0, nil
70 }
71
72 func CreateTokeniser(t parser.Tokeniser, preprocess bool) *parser.Tokeniser {
73 if preprocess {
74 t = parser.NewRuneReaderTokeniser(&preprocessor{t})
75 }
76
77 t.TokeniserState(new(tokeniser).start)
78
79 return &t
80 }
81
82 type tokeniser struct {
83 depth []rune
84 }
85
86 func (t *tokeniser) isState(r rune) bool {
87 if len(t.depth) == 0 {
88 return false
89 }
90
91 return t.depth[len(t.depth)-1] == r
92 }
93
94 func (t *tokeniser) pushState(r rune) {
95 t.depth = append(t.depth, r)
96 }
97
98 func (t *tokeniser) popState() {
99 t.depth = t.depth[:len(t.depth)-1]
100 }
101
102 func (t *tokeniser) start(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
103 if tk.Peek() == -1 {
104 if len(t.depth) == 0 {
105 return tk.Done()
106 }
107
108 return tk.ReturnError(io.ErrUnexpectedEOF)
109 } else if tk.Accept("/") {
110 if tk.Accept("*") {
111 return t.parseComment(tk)
112 }
113 } else if tk.Accept(whitespace) {
114 tk.AcceptRun(whitespace)
115
116 return tk.Return(TokenWhitespace, t.start)
117 } else if tk.Accept(`"`) {
118 return t.string(tk)
119 } else if tk.Accept("'") {
120 return t.string(tk)
121 } else if tk.Accept("#") {
122 if acceptWordChar(tk) {
123 return t.hash(tk)
124 }
125 } else if tk.Accept("(") {
126 t.pushState(')')
127
128 return tk.Return(TokenOpenParen, t.start)
129 } else if tk.Accept(")") {
130 if t.isState(')') {
131 t.popState()
132
133 return tk.Return(TokenCloseParen, t.start)
134 }
135 } else if tk.Accept(",") {
136 return tk.Return(TokenComma, t.start)
137 } else if tk.Accept(".") {
138 if tk.Accept(digit) {
139 return t.number(tk)
140 }
141 } else if tk.Accept(":") {
142 return tk.Return(TokenColon, t.start)
143 } else if tk.Accept(";") {
144 return tk.Return(TokenSemiColon, t.start)
145 } else if tk.Accept("<") {
146 s := tk.State()
147
148 if tk.AcceptString("!--", false) == 3 {
149 return tk.Return(TokenCDO, t.start)
150 }
151
152 s.Reset()
153 } else if tk.Accept("@") {
154 return t.ident(tk)
155 } else if tk.Accept("[") {
156 t.pushState(']')
157
158 return tk.Return(TokenOpenBracket, t.start)
159 } else if tk.Accept("]") {
160 if t.isState(']') {
161 t.popState()
162
163 return tk.Return(TokenCloseBracket, t.start)
164 }
165 } else if tk.Accept("\\") {
166 return t.ident(tk)
167 } else if tk.Accept("{") {
168 t.pushState('}')
169
170 return tk.Return(TokenOpenBrace, t.start)
171 } else if tk.Accept("}") {
172 if t.isState('}') {
173 t.popState()
174
175 return tk.Return(TokenCloseBrace, t.start)
176 }
177 } else if tk.Accept(digit) {
178 return t.number(tk)
179 } else if tk.Accept("+") {
180 state := tk.State()
181
182 if tk.Accept(digit) || tk.Accept(".") && tk.Accept(digit) {
183 return t.number(tk)
184 }
185
186 state.Reset()
187 } else if tk.Accept("-") {
188 state := tk.State()
189
190 if tk.Accept("-") {
191 if tk.Accept(">") {
192 return tk.Return(TokenCDC, t.start)
193 } else {
194 return t.ident(tk)
195 }
196 } else if tk.Accept(identStart) || tk.Accept("\\") {
197 return t.ident(tk)
198 } else if tk.Accept(digit) || tk.Accept(".") && tk.Accept(digit) {
199 return t.number(tk)
200 }
201
202 state.Reset()
203 } else if tk.Accept(identStart) {
204 return t.ident(tk)
205 } else {
206 tk.Next()
207 }
208
209 return tk.Return(TokenDelim, t.start)
210 }
211
212 func (t *tokeniser) parseComment(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
213 for {
214 if tk.ExceptRun("*") == -1 {
215 return tk.ReturnError(io.ErrUnexpectedEOF)
216 }
217
218 tk.Accept("*")
219
220 if tk.Accept("/") {
221 return tk.Return(TokenComment, t.start)
222 }
223 }
224 }
225
226 func (t *tokeniser) string(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
227 tk.Reset()
228
229 var chars string
230
231 switch tk.Next() {
232 case '"':
233 chars = "\"\\" + newline
234 case '\'':
235 chars = "'\\" + newline
236 }
237
238 for {
239 switch tk.ExceptRun(chars) {
240 case '\n', '\r', '\f':
241 acceptNewline(tk)
242
243 fallthrough
244 case -1:
245 return tk.Return(TokenBadString, t.start)
246 case '"', '\'':
247 tk.Next()
248
249 return tk.Return(TokenString, t.start)
250 case '\\':
251 tk.Next()
252
253 if !acceptNewline(tk) && !acceptEscape(tk) {
254 return tk.Return(TokenBadString, t.start)
255 }
256 }
257 }
258 }
259
260 func acceptNewline(tk *parser.Tokeniser) bool {
261 if tk.Accept("\r") {
262 tk.Accept("\n")
263
264 return true
265 }
266
267 return tk.Accept(newline)
268 }
269
270 func acceptEscape(tk *parser.Tokeniser) bool {
271 if tk.Peek() == -1 {
272 return false
273 }
274
275 if acceptNewline(tk) {
276 return false
277 } else if tk.Except(hexDigits) {
278 return true
279 }
280
281 for range 6 {
282 tk.Accept(hexDigits)
283 }
284
285 if !acceptNewline(tk) {
286 tk.Accept(whitespace)
287 }
288
289 return true
290 }
291
292 func (t *tokeniser) number(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
293 tk.Reset()
294 tk.Accept("+-")
295 tk.AcceptRun(digit)
296
297 state := tk.State()
298
299 if tk.Accept(".") {
300 if tk.Accept(digit) {
301 tk.AcceptRun(digit)
302 } else {
303 state.Reset()
304 }
305 }
306
307 state = tk.State()
308
309 if tk.Accept("eE") {
310 tk.Accept("+-")
311
312 if tk.Accept(digit) {
313 tk.AcceptRun(digit)
314 } else {
315 state.Reset()
316 }
317 }
318
319 state = tk.State()
320
321 if tk.Accept("%") {
322 return tk.Return(TokenPercentage, t.start)
323 } else if acceptIdent(tk) {
324 return tk.Return(TokenDimension, t.start)
325 }
326
327 state.Reset()
328
329 return tk.Return(TokenNumber, t.start)
330 }
331
332 func (t *tokeniser) ident(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
333 tk.Reset()
334
335 id := TokenIdent
336 state := tk.State()
337
338 if tk.AcceptString("url(", true) == 4 {
339 tk.AcceptRun(whitespace)
340
341 if c := tk.Peek(); c != '"' && c != '\'' {
342 return t.url(tk)
343 }
344 }
345
346 state.Reset()
347
348 if tk.Accept("@") {
349 id = TokenAtKeyword
350 }
351
352 if !acceptIdent(tk) {
353 state.Reset()
354 tk.Next()
355
356 return tk.Return(TokenDelim, t.start)
357 }
358
359 if id == TokenIdent && tk.Accept("(") {
360 id = TokenFunction
361 t.pushState(')')
362 }
363
364 return tk.Return(id, t.start)
365 }
366
367 func acceptIdent(tk *parser.Tokeniser) bool {
368 if tk.AcceptString("--", false) != 2 {
369 if tk.Accept("\\") {
370 if !acceptEscape(tk) {
371 return false
372 }
373 } else if !tk.Accept(identStart) {
374 if !acceptNonAscii(tk) {
375 return false
376 }
377 }
378 }
379
380 for acceptWordChar(tk) {
381 }
382
383 return true
384 }
385
386 func acceptNonAscii(tk *parser.Tokeniser) bool {
387 if c := tk.Peek(); c < 0x80 {
388 return false
389 }
390
391 tk.Next()
392
393 return true
394 }
395
396 func acceptWordChar(tk *parser.Tokeniser) bool {
397 if tk.Accept(identCont) || acceptNonAscii(tk) {
398 return true
399 }
400
401 state := tk.State()
402
403 if tk.Accept("\\") && acceptEscape(tk) {
404 return true
405 }
406
407 state.Reset()
408
409 return false
410 }
411
412 func (t *tokeniser) hash(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
413 for acceptWordChar(tk) {
414 }
415
416 return tk.Return(TokenHash, t.start)
417 }
418
419 func (t *tokeniser) url(tk *parser.Tokeniser) (parser.Token, parser.TokenFunc) {
420 id := TokenURL
421
422 Loop:
423 for {
424 switch tk.ExceptRun(noURL) {
425 case -1:
426 return tk.Return(TokenBadURL, t.start)
427 case ' ', '\t', '\n', '\r', '\f':
428 tk.AcceptRun(whitespace)
429
430 if tk.Accept(")") {
431 break Loop
432 }
433
434 id = TokenBadURL
435 case ')':
436 tk.Next()
437
438 break Loop
439 case '\\':
440 tk.Next()
441
442 if !acceptEscape(tk) {
443 id = TokenBadURL
444 }
445 default:
446 tk.Next()
447
448 id = TokenBadURL
449 }
450 }
451
452 return tk.Return(id, t.start)
453 }
454