parser - tokeniser.go
1 package parser
2
3 import (
4 "errors"
5 "io"
6 "strings"
7 )
8
9 // TokenType represents the type of token being read.
10 //
11 // Negative values are reserved for this package.
12 type TokenType int
13
14 // Constants TokenError (-2) and TokenDone (-1)
15 const (
16 TokenDone TokenType = -1 - iota
17 TokenError
18 )
19
20 // Token represents data parsed from the stream.
21 type Token struct {
22 Type TokenType
23 Data string
24 }
25
26 // TokenFunc is the type that the worker funcs implement in order to be used by
27 // the tokeniser.
28 type TokenFunc func(*Tokeniser) (Token, TokenFunc)
29
30 type tokeniser interface {
31 backup()
32 get() string
33 length() int
34 next() rune
35 }
36
37 // Tokeniser is a state machine to generate tokens from an input
38 type Tokeniser struct {
39 tokeniser
40 Err error
41 state TokenFunc
42 }
43
44 // GetToken runs the state machine and retrieves a single token and possible an
45 // error
46 func (t *Tokeniser) GetToken() (Token, error) {
47 tk := t.get()
48 if tk.Type == TokenError {
49 return tk, t.Err
50 }
51 return tk, nil
52 }
53
54 // GetError returns any error that has been generated by the Tokeniser
55 func (t *Tokeniser) GetError() error {
56 return t.Err
57 }
58
59 // TokeniserState allows the internal state of the Tokeniser to be set
60 func (t *Tokeniser) TokeniserState(tf TokenFunc) {
61 t.state = tf
62 }
63
64 func (t *Tokeniser) get() Token {
65 if t.Err == io.EOF {
66 return Token{
67 Type: TokenDone,
68 Data: "",
69 }
70 }
71 if t.state == nil {
72 t.Err = ErrNoState
73 t.state = (*Tokeniser).Error
74 }
75 var tk Token
76 tk, t.state = t.state(t)
77 if tk.Type == TokenError && t.Err == io.EOF {
78 t.Err = io.ErrUnexpectedEOF
79 }
80 return tk
81 }
82
83 // Accept returns true if the next character to be read is contained within the
84 // given string.
85 //
86 // Upon true, it advances the read position, otherwise the position remains the
87 // same.
88 func (t *Tokeniser) Accept(chars string) bool {
89 if !strings.ContainsRune(chars, t.next()) {
90 t.backup()
91 return false
92 }
93 return true
94 }
95
96 // Peek returns the next rune without advancing the read position.
97 func (t *Tokeniser) Peek() rune {
98 r := t.next()
99 t.backup()
100 return r
101 }
102
103 // Get returns a string of everything that has been read so far and resets
104 // the string for the next round of parsing.
105 func (t *Tokeniser) Get() string {
106 return t.tokeniser.get()
107 }
108
109 // Len returns the number of bytes that has been read since the last Get.
110 func (t *Tokeniser) Len() int {
111 return t.length()
112 }
113
114 // AcceptRun reads from the string as long as the read character is in the
115 // given string.
116 //
117 // Returns the rune that stopped the run.
118 func (t *Tokeniser) AcceptRun(chars string) rune {
119 for {
120 if c := t.next(); !strings.ContainsRune(chars, c) {
121 t.backup()
122 return c
123 }
124 }
125 }
126
127 // Except returns true if the next character to be read is not contained within
128 // the given string.
129 // Upon true, it advances the read position, otherwise the position remains the
130 // same.
131 func (t *Tokeniser) Except(chars string) bool {
132 if r := t.next(); r == -1 || strings.ContainsRune(chars, r) {
133 t.backup()
134 return false
135 }
136 return true
137 }
138
139 // ExceptRun reads from the string as long as the read character is not in the
140 // given string.
141 //
142 // Returns the rune that stopped the run.
143 func (t *Tokeniser) ExceptRun(chars string) rune {
144 for {
145 if r := t.next(); r == -1 || strings.ContainsRune(chars, r) {
146 t.backup()
147 return r
148 }
149 }
150 }
151
152 // Done is a TokenFunc that is used to indicate that there are no more tokens to
153 // parse.
154 func (t *Tokeniser) Done() (Token, TokenFunc) {
155 t.Err = io.EOF
156 return Token{
157 Type: TokenDone,
158 Data: "",
159 }, (*Tokeniser).Done
160 }
161
162 // Error represents an error state for the parser.
163 //
164 // The error value should be set in Tokeniser.Err and then this func should be
165 // called.
166 func (t *Tokeniser) Error() (Token, TokenFunc) {
167 if t.Err == nil {
168 t.Err = ErrUnknownError
169 }
170 return Token{
171 Type: TokenError,
172 Data: t.Err.Error(),
173 }, (*Tokeniser).Error
174 }
175
176 // Errors
177 var (
178 ErrNoState = errors.New("no state")
179 ErrUnknownError = errors.New("unknown error")
180 )
181