bash - tokeniser_test.go

     1  package bash
     2  
     3  import (
     4  	"testing"
     5  
     6  	"vimagination.zapto.org/parser"
     7  )
     8  
     9  func TestTokeniser(t *testing.T) {
    10  	for n, test := range [...]struct {
    11  		Input  string
    12  		Output []parser.Token
    13  	}{
    14  		{ // 1
    15  			"",
    16  			[]parser.Token{
    17  				{Type: parser.TokenDone, Data: ""},
    18  			},
    19  		},
    20  		{ // 2
    21  			" ",
    22  			[]parser.Token{
    23  				{Type: TokenWhitespace, Data: " "},
    24  				{Type: parser.TokenDone, Data: ""},
    25  			},
    26  		},
    27  		{ // 3
    28  			" \t\\\n",
    29  			[]parser.Token{
    30  				{Type: TokenWhitespace, Data: " \t\\\n"},
    31  				{Type: parser.TokenDone, Data: ""},
    32  			},
    33  		},
    34  		{ // 4
    35  			"\\\n \t",
    36  			[]parser.Token{
    37  				{Type: TokenWhitespace, Data: "\\\n \t"},
    38  				{Type: parser.TokenDone, Data: ""},
    39  			},
    40  		},
    41  		{ // 5
    42  			" \n\n \n",
    43  			[]parser.Token{
    44  				{Type: TokenWhitespace, Data: " "},
    45  				{Type: TokenLineTerminator, Data: "\n\n"},
    46  				{Type: TokenWhitespace, Data: " "},
    47  				{Type: TokenLineTerminator, Data: "\n"},
    48  				{Type: parser.TokenDone, Data: ""},
    49  			},
    50  		},
    51  		{ // 6
    52  			"#A comment\n# B comment",
    53  			[]parser.Token{
    54  				{Type: TokenComment, Data: "#A comment"},
    55  				{Type: TokenLineTerminator, Data: "\n"},
    56  				{Type: TokenComment, Data: "# B comment"},
    57  				{Type: parser.TokenDone, Data: ""},
    58  			},
    59  		},
    60  		{ // 7
    61  			"$ident $name a\\nbc=a $0 $12 a$b a${b}c",
    62  			[]parser.Token{
    63  				{Type: TokenIdentifier, Data: "$ident"},
    64  				{Type: TokenWhitespace, Data: " "},
    65  				{Type: TokenIdentifier, Data: "$name"},
    66  				{Type: TokenWhitespace, Data: " "},
    67  				{Type: TokenWord, Data: "a\\nbc=a"},
    68  				{Type: TokenWhitespace, Data: " "},
    69  				{Type: TokenIdentifier, Data: "$0"},
    70  				{Type: TokenWhitespace, Data: " "},
    71  				{Type: TokenIdentifier, Data: "$1"},
    72  				{Type: TokenWord, Data: "2"},
    73  				{Type: TokenWhitespace, Data: " "},
    74  				{Type: TokenWord, Data: "a"},
    75  				{Type: TokenIdentifier, Data: "$b"},
    76  				{Type: TokenWhitespace, Data: " "},
    77  				{Type: TokenWord, Data: "a"},
    78  				{Type: TokenPunctuator, Data: "${"},
    79  				{Type: TokenWord, Data: "b"},
    80  				{Type: TokenPunctuator, Data: "}"},
    81  				{Type: TokenWord, Data: "c"},
    82  				{Type: parser.TokenDone, Data: ""},
    83  			},
    84  		},
    85  		{ // 8
    86  			"abc=a def[0]=b ghi[$i]=c jkl+=d",
    87  			[]parser.Token{
    88  				{Type: TokenIdentifierAssign, Data: "abc"},
    89  				{Type: TokenPunctuator, Data: "="},
    90  				{Type: TokenWord, Data: "a"},
    91  				{Type: TokenWhitespace, Data: " "},
    92  				{Type: TokenIdentifierAssign, Data: "def"},
    93  				{Type: TokenPunctuator, Data: "["},
    94  				{Type: TokenWord, Data: "0"},
    95  				{Type: TokenPunctuator, Data: "]"},
    96  				{Type: TokenPunctuator, Data: "="},
    97  				{Type: TokenWord, Data: "b"},
    98  				{Type: TokenWhitespace, Data: " "},
    99  				{Type: TokenIdentifierAssign, Data: "ghi"},
   100  				{Type: TokenPunctuator, Data: "["},
   101  				{Type: TokenIdentifier, Data: "$i"},
   102  				{Type: TokenPunctuator, Data: "]"},
   103  				{Type: TokenPunctuator, Data: "="},
   104  				{Type: TokenWord, Data: "c"},
   105  				{Type: TokenWhitespace, Data: " "},
   106  				{Type: TokenIdentifierAssign, Data: "jkl"},
   107  				{Type: TokenPunctuator, Data: "+="},
   108  				{Type: TokenWord, Data: "d"},
   109  				{Type: parser.TokenDone, Data: ""},
   110  			},
   111  		},
   112  		{ // 9
   113  			"if then else elif fi case esac while for in do done time until coproc select function",
   114  			[]parser.Token{
   115  				{Type: TokenKeyword, Data: "if"},
   116  				{Type: TokenWhitespace, Data: " "},
   117  				{Type: TokenKeyword, Data: "then"},
   118  				{Type: TokenWhitespace, Data: " "},
   119  				{Type: TokenKeyword, Data: "else"},
   120  				{Type: TokenWhitespace, Data: " "},
   121  				{Type: TokenKeyword, Data: "elif"},
   122  				{Type: TokenWhitespace, Data: " "},
   123  				{Type: TokenKeyword, Data: "fi"},
   124  				{Type: TokenWhitespace, Data: " "},
   125  				{Type: TokenKeyword, Data: "case"},
   126  				{Type: TokenWhitespace, Data: " "},
   127  				{Type: TokenKeyword, Data: "esac"},
   128  				{Type: TokenWhitespace, Data: " "},
   129  				{Type: TokenKeyword, Data: "while"},
   130  				{Type: TokenWhitespace, Data: " "},
   131  				{Type: TokenKeyword, Data: "for"},
   132  				{Type: TokenWhitespace, Data: " "},
   133  				{Type: TokenKeyword, Data: "in"},
   134  				{Type: TokenWhitespace, Data: " "},
   135  				{Type: TokenKeyword, Data: "do"},
   136  				{Type: TokenWhitespace, Data: " "},
   137  				{Type: TokenKeyword, Data: "done"},
   138  				{Type: TokenWhitespace, Data: " "},
   139  				{Type: TokenKeyword, Data: "time"},
   140  				{Type: TokenWhitespace, Data: " "},
   141  				{Type: TokenKeyword, Data: "until"},
   142  				{Type: TokenWhitespace, Data: " "},
   143  				{Type: TokenKeyword, Data: "coproc"},
   144  				{Type: TokenWhitespace, Data: " "},
   145  				{Type: TokenKeyword, Data: "select"},
   146  				{Type: TokenWhitespace, Data: " "},
   147  				{Type: TokenKeyword, Data: "function"},
   148  				{Type: parser.TokenDone, Data: ""},
   149  			},
   150  		},
   151  		{ // 10
   152  			"ident ${name} ab\\nc=a ${6} a$ ",
   153  			[]parser.Token{
   154  				{Type: TokenWord, Data: "ident"},
   155  				{Type: TokenWhitespace, Data: " "},
   156  				{Type: TokenPunctuator, Data: "${"},
   157  				{Type: TokenWord, Data: "name"},
   158  				{Type: TokenPunctuator, Data: "}"},
   159  				{Type: TokenWhitespace, Data: " "},
   160  				{Type: TokenWord, Data: "ab\\nc=a"},
   161  				{Type: TokenWhitespace, Data: " "},
   162  				{Type: TokenPunctuator, Data: "${"},
   163  				{Type: TokenWord, Data: "6"},
   164  				{Type: TokenPunctuator, Data: "}"},
   165  				{Type: TokenWhitespace, Data: " "},
   166  				{Type: TokenWord, Data: "a$"},
   167  				{Type: TokenWhitespace, Data: " "},
   168  				{Type: parser.TokenDone, Data: ""},
   169  			},
   170  		},
   171  		{ // 11
   172  			"$(( 0 1 29 0xff 0xDeAdBeEf 0755 2#5 ))",
   173  			[]parser.Token{
   174  				{Type: TokenPunctuator, Data: "$(("},
   175  				{Type: TokenWhitespace, Data: " "},
   176  				{Type: TokenNumberLiteral, Data: "0"},
   177  				{Type: TokenWhitespace, Data: " "},
   178  				{Type: TokenNumberLiteral, Data: "1"},
   179  				{Type: TokenWhitespace, Data: " "},
   180  				{Type: TokenNumberLiteral, Data: "29"},
   181  				{Type: TokenWhitespace, Data: " "},
   182  				{Type: TokenNumberLiteral, Data: "0xff"},
   183  				{Type: TokenWhitespace, Data: " "},
   184  				{Type: TokenNumberLiteral, Data: "0xDeAdBeEf"},
   185  				{Type: TokenWhitespace, Data: " "},
   186  				{Type: TokenNumberLiteral, Data: "0755"},
   187  				{Type: TokenWhitespace, Data: " "},
   188  				{Type: TokenNumberLiteral, Data: "2#5"},
   189  				{Type: TokenWhitespace, Data: " "},
   190  				{Type: TokenPunctuator, Data: "))"},
   191  				{Type: parser.TokenDone, Data: ""},
   192  			},
   193  		},
   194  		{ // 12
   195  			"\"abc\" \"de\\nf\" \"stuff`command`more stuff\" \"text $ident $another end\" \"text $(command) end - text ${ident} end\" \"with\nnewline\" 'with\nnewline' $\"a string\" $'a \\'string'",
   196  			[]parser.Token{
   197  				{Type: TokenString, Data: "\"abc\""},
   198  				{Type: TokenWhitespace, Data: " "},
   199  				{Type: TokenString, Data: "\"de\\nf\""},
   200  				{Type: TokenWhitespace, Data: " "},
   201  				{Type: TokenStringStart, Data: "\"stuff"},
   202  				{Type: TokenOpenBacktick, Data: "`"},
   203  				{Type: TokenWord, Data: "command"},
   204  				{Type: TokenCloseBacktick, Data: "`"},
   205  				{Type: TokenStringEnd, Data: "more stuff\""},
   206  				{Type: TokenWhitespace, Data: " "},
   207  				{Type: TokenStringStart, Data: "\"text "},
   208  				{Type: TokenIdentifier, Data: "$ident"},
   209  				{Type: TokenStringMid, Data: " "},
   210  				{Type: TokenIdentifier, Data: "$another"},
   211  				{Type: TokenStringEnd, Data: " end\""},
   212  				{Type: TokenWhitespace, Data: " "},
   213  				{Type: TokenStringStart, Data: "\"text "},
   214  				{Type: TokenPunctuator, Data: "$("},
   215  				{Type: TokenWord, Data: "command"},
   216  				{Type: TokenPunctuator, Data: ")"},
   217  				{Type: TokenStringMid, Data: " end - text "},
   218  				{Type: TokenPunctuator, Data: "${"},
   219  				{Type: TokenWord, Data: "ident"},
   220  				{Type: TokenPunctuator, Data: "}"},
   221  				{Type: TokenStringEnd, Data: " end\""},
   222  				{Type: TokenWhitespace, Data: " "},
   223  				{Type: TokenString, Data: "\"with\nnewline\""},
   224  				{Type: TokenWhitespace, Data: " "},
   225  				{Type: TokenString, Data: "'with\nnewline'"},
   226  				{Type: TokenWhitespace, Data: " "},
   227  				{Type: TokenString, Data: "$\"a string\""},
   228  				{Type: TokenWhitespace, Data: " "},
   229  				{Type: TokenString, Data: "$'a \\'string'"},
   230  				{Type: parser.TokenDone, Data: ""},
   231  			},
   232  		},
   233  		{ // 13
   234  			"< <<< <<- <& <> > >> >& &>> >| | |& || & && () {} = `` $() $(())",
   235  			[]parser.Token{
   236  				{Type: TokenPunctuator, Data: "<"},
   237  				{Type: TokenWhitespace, Data: " "},
   238  				{Type: TokenPunctuator, Data: "<<<"},
   239  				{Type: TokenWhitespace, Data: " "},
   240  				{Type: TokenPunctuator, Data: "<<-"},
   241  				{Type: TokenWhitespace, Data: " "},
   242  				{Type: TokenPunctuator, Data: "<&"},
   243  				{Type: TokenWhitespace, Data: " "},
   244  				{Type: TokenPunctuator, Data: "<>"},
   245  				{Type: TokenWhitespace, Data: " "},
   246  				{Type: TokenPunctuator, Data: ">"},
   247  				{Type: TokenWhitespace, Data: " "},
   248  				{Type: TokenPunctuator, Data: ">>"},
   249  				{Type: TokenWhitespace, Data: " "},
   250  				{Type: TokenPunctuator, Data: ">&"},
   251  				{Type: TokenWhitespace, Data: " "},
   252  				{Type: TokenPunctuator, Data: "&>>"},
   253  				{Type: TokenWhitespace, Data: " "},
   254  				{Type: TokenPunctuator, Data: ">|"},
   255  				{Type: TokenWhitespace, Data: " "},
   256  				{Type: TokenPunctuator, Data: "|"},
   257  				{Type: TokenWhitespace, Data: " "},
   258  				{Type: TokenPunctuator, Data: "|&"},
   259  				{Type: TokenWhitespace, Data: " "},
   260  				{Type: TokenPunctuator, Data: "||"},
   261  				{Type: TokenWhitespace, Data: " "},
   262  				{Type: TokenPunctuator, Data: "&"},
   263  				{Type: TokenWhitespace, Data: " "},
   264  				{Type: TokenPunctuator, Data: "&&"},
   265  				{Type: TokenWhitespace, Data: " "},
   266  				{Type: TokenPunctuator, Data: "("},
   267  				{Type: TokenPunctuator, Data: ")"},
   268  				{Type: TokenWhitespace, Data: " "},
   269  				{Type: TokenPunctuator, Data: "{"},
   270  				{Type: TokenPunctuator, Data: "}"},
   271  				{Type: TokenWhitespace, Data: " "},
   272  				{Type: TokenPunctuator, Data: "="},
   273  				{Type: TokenWhitespace, Data: " "},
   274  				{Type: TokenOpenBacktick, Data: "`"},
   275  				{Type: TokenCloseBacktick, Data: "`"},
   276  				{Type: TokenWhitespace, Data: " "},
   277  				{Type: TokenPunctuator, Data: "$("},
   278  				{Type: TokenPunctuator, Data: ")"},
   279  				{Type: TokenWhitespace, Data: " "},
   280  				{Type: TokenPunctuator, Data: "$(("},
   281  				{Type: TokenPunctuator, Data: "))"},
   282  				{Type: parser.TokenDone, Data: ""},
   283  			},
   284  		},
   285  		{ // 14
   286  			"$(( + += - -= & &= | |= < <= > >= = == ! != * *= ** / /= % %= ^ ^= ~ ? : , (1) ))",
   287  			[]parser.Token{
   288  				{Type: TokenPunctuator, Data: "$(("},
   289  				{Type: TokenWhitespace, Data: " "},
   290  				{Type: TokenPunctuator, Data: "+"},
   291  				{Type: TokenWhitespace, Data: " "},
   292  				{Type: TokenPunctuator, Data: "+="},
   293  				{Type: TokenWhitespace, Data: " "},
   294  				{Type: TokenPunctuator, Data: "-"},
   295  				{Type: TokenWhitespace, Data: " "},
   296  				{Type: TokenPunctuator, Data: "-="},
   297  				{Type: TokenWhitespace, Data: " "},
   298  				{Type: TokenPunctuator, Data: "&"},
   299  				{Type: TokenWhitespace, Data: " "},
   300  				{Type: TokenPunctuator, Data: "&="},
   301  				{Type: TokenWhitespace, Data: " "},
   302  				{Type: TokenPunctuator, Data: "|"},
   303  				{Type: TokenWhitespace, Data: " "},
   304  				{Type: TokenPunctuator, Data: "|="},
   305  				{Type: TokenWhitespace, Data: " "},
   306  				{Type: TokenPunctuator, Data: "<"},
   307  				{Type: TokenWhitespace, Data: " "},
   308  				{Type: TokenPunctuator, Data: "<="},
   309  				{Type: TokenWhitespace, Data: " "},
   310  				{Type: TokenPunctuator, Data: ">"},
   311  				{Type: TokenWhitespace, Data: " "},
   312  				{Type: TokenPunctuator, Data: ">="},
   313  				{Type: TokenWhitespace, Data: " "},
   314  				{Type: TokenPunctuator, Data: "="},
   315  				{Type: TokenWhitespace, Data: " "},
   316  				{Type: TokenPunctuator, Data: "=="},
   317  				{Type: TokenWhitespace, Data: " "},
   318  				{Type: TokenPunctuator, Data: "!"},
   319  				{Type: TokenWhitespace, Data: " "},
   320  				{Type: TokenPunctuator, Data: "!="},
   321  				{Type: TokenWhitespace, Data: " "},
   322  				{Type: TokenPunctuator, Data: "*"},
   323  				{Type: TokenWhitespace, Data: " "},
   324  				{Type: TokenPunctuator, Data: "*="},
   325  				{Type: TokenWhitespace, Data: " "},
   326  				{Type: TokenPunctuator, Data: "**"},
   327  				{Type: TokenWhitespace, Data: " "},
   328  				{Type: TokenPunctuator, Data: "/"},
   329  				{Type: TokenWhitespace, Data: " "},
   330  				{Type: TokenPunctuator, Data: "/="},
   331  				{Type: TokenWhitespace, Data: " "},
   332  				{Type: TokenPunctuator, Data: "%"},
   333  				{Type: TokenWhitespace, Data: " "},
   334  				{Type: TokenPunctuator, Data: "%="},
   335  				{Type: TokenWhitespace, Data: " "},
   336  				{Type: TokenPunctuator, Data: "^"},
   337  				{Type: TokenWhitespace, Data: " "},
   338  				{Type: TokenPunctuator, Data: "^="},
   339  				{Type: TokenWhitespace, Data: " "},
   340  				{Type: TokenPunctuator, Data: "~"},
   341  				{Type: TokenWhitespace, Data: " "},
   342  				{Type: TokenPunctuator, Data: "?"},
   343  				{Type: TokenWhitespace, Data: " "},
   344  				{Type: TokenPunctuator, Data: ":"},
   345  				{Type: TokenWhitespace, Data: " "},
   346  				{Type: TokenPunctuator, Data: ","},
   347  				{Type: TokenWhitespace, Data: " "},
   348  				{Type: TokenPunctuator, Data: "("},
   349  				{Type: TokenNumberLiteral, Data: "1"},
   350  				{Type: TokenPunctuator, Data: ")"},
   351  				{Type: TokenWhitespace, Data: " "},
   352  				{Type: TokenPunctuator, Data: "))"},
   353  				{Type: parser.TokenDone, Data: ""},
   354  			},
   355  		},
   356  		{ // 15
   357  			"$(( ( ))",
   358  			[]parser.Token{
   359  				{Type: TokenPunctuator, Data: "$(("},
   360  				{Type: TokenWhitespace, Data: " "},
   361  				{Type: TokenPunctuator, Data: "("},
   362  				{Type: TokenWhitespace, Data: " "},
   363  				{Type: TokenPunctuator, Data: ")"},
   364  				{Type: parser.TokenError, Data: "invalid character"},
   365  			},
   366  		},
   367  		{ // 16
   368  			"$(( ? ))",
   369  			[]parser.Token{
   370  				{Type: TokenPunctuator, Data: "$(("},
   371  				{Type: TokenWhitespace, Data: " "},
   372  				{Type: TokenPunctuator, Data: "?"},
   373  				{Type: TokenWhitespace, Data: " "},
   374  				{Type: parser.TokenError, Data: "invalid character"},
   375  			},
   376  		},
   377  		{ // 17
   378  			"{ ",
   379  			[]parser.Token{
   380  				{Type: TokenPunctuator, Data: "{"},
   381  				{Type: TokenWhitespace, Data: " "},
   382  				{Type: parser.TokenError, Data: "unexpected EOF"},
   383  			},
   384  		},
   385  		{ // 18
   386  			"{ )",
   387  			[]parser.Token{
   388  				{Type: TokenPunctuator, Data: "{"},
   389  				{Type: TokenWhitespace, Data: " "},
   390  				{Type: parser.TokenError, Data: "invalid character"},
   391  			},
   392  		},
   393  		{ // 19
   394  			"(",
   395  			[]parser.Token{
   396  				{Type: TokenPunctuator, Data: "("},
   397  				{Type: parser.TokenError, Data: "unexpected EOF"},
   398  			},
   399  		},
   400  		{ // 20
   401  			"$(",
   402  			[]parser.Token{
   403  				{Type: TokenPunctuator, Data: "$("},
   404  				{Type: parser.TokenError, Data: "unexpected EOF"},
   405  			},
   406  		},
   407  		{ // 21
   408  			"$(}",
   409  			[]parser.Token{
   410  				{Type: TokenPunctuator, Data: "$("},
   411  				{Type: parser.TokenError, Data: "invalid character"},
   412  			},
   413  		},
   414  		{ // 22
   415  			"<<abc\n123\n456\nabc",
   416  			[]parser.Token{
   417  				{Type: TokenPunctuator, Data: "<<"},
   418  				{Type: TokenWord, Data: "abc"},
   419  				{Type: TokenLineTerminator, Data: "\n"},
   420  				{Type: TokenHeredoc, Data: "123\n456\n"},
   421  				{Type: TokenHeredocEnd, Data: "abc"},
   422  				{Type: parser.TokenDone, Data: ""},
   423  			},
   424  		},
   425  		{ // 23
   426  			"<<a'b 'c\n123\n456\nab c\n",
   427  			[]parser.Token{
   428  				{Type: TokenPunctuator, Data: "<<"},
   429  				{Type: TokenWord, Data: "a'b 'c"},
   430  				{Type: TokenLineTerminator, Data: "\n"},
   431  				{Type: TokenHeredoc, Data: "123\n456\n"},
   432  				{Type: TokenHeredocEnd, Data: "ab c"},
   433  				{Type: TokenLineTerminator, Data: "\n"},
   434  				{Type: parser.TokenDone, Data: ""},
   435  			},
   436  		},
   437  		{ // 24
   438  			"<<def\n123\n456\ndef\nabc",
   439  			[]parser.Token{
   440  				{Type: TokenPunctuator, Data: "<<"},
   441  				{Type: TokenWord, Data: "def"},
   442  				{Type: TokenLineTerminator, Data: "\n"},
   443  				{Type: TokenHeredoc, Data: "123\n456\n"},
   444  				{Type: TokenHeredocEnd, Data: "def"},
   445  				{Type: TokenLineTerminator, Data: "\n"},
   446  				{Type: TokenWord, Data: "abc"},
   447  				{Type: parser.TokenDone, Data: ""},
   448  			},
   449  		},
   450  		{ // 25
   451  			"<<def cat\n123\n456\ndef\nabc",
   452  			[]parser.Token{
   453  				{Type: TokenPunctuator, Data: "<<"},
   454  				{Type: TokenWord, Data: "def"},
   455  				{Type: TokenWhitespace, Data: " "},
   456  				{Type: TokenWord, Data: "cat"},
   457  				{Type: TokenLineTerminator, Data: "\n"},
   458  				{Type: TokenHeredoc, Data: "123\n456\n"},
   459  				{Type: TokenHeredocEnd, Data: "def"},
   460  				{Type: TokenLineTerminator, Data: "\n"},
   461  				{Type: TokenWord, Data: "abc"},
   462  				{Type: parser.TokenDone, Data: ""},
   463  			},
   464  		},
   465  		{ // 26
   466  			"<<abc cat;<<def cat\n123\nabc\n456\ndef",
   467  			[]parser.Token{
   468  				{Type: TokenPunctuator, Data: "<<"},
   469  				{Type: TokenWord, Data: "abc"},
   470  				{Type: TokenWhitespace, Data: " "},
   471  				{Type: TokenWord, Data: "cat"},
   472  				{Type: TokenPunctuator, Data: ";"},
   473  				{Type: TokenPunctuator, Data: "<<"},
   474  				{Type: TokenWord, Data: "def"},
   475  				{Type: TokenWhitespace, Data: " "},
   476  				{Type: TokenWord, Data: "cat"},
   477  				{Type: TokenLineTerminator, Data: "\n"},
   478  				{Type: TokenHeredoc, Data: "123\n"},
   479  				{Type: TokenHeredocEnd, Data: "abc"},
   480  				{Type: TokenLineTerminator, Data: "\n"},
   481  				{Type: TokenHeredoc, Data: "456\n"},
   482  				{Type: TokenHeredocEnd, Data: "def"},
   483  				{Type: parser.TokenDone, Data: ""},
   484  			},
   485  		},
   486  		{ // 27
   487  			"<<abc cat;echo $(<<def cat\n456\ndef\n)\n123\nabc",
   488  			[]parser.Token{
   489  				{Type: TokenPunctuator, Data: "<<"},
   490  				{Type: TokenWord, Data: "abc"},
   491  				{Type: TokenWhitespace, Data: " "},
   492  				{Type: TokenWord, Data: "cat"},
   493  				{Type: TokenPunctuator, Data: ";"},
   494  				{Type: TokenWord, Data: "echo"},
   495  				{Type: TokenWhitespace, Data: " "},
   496  				{Type: TokenPunctuator, Data: "$("},
   497  				{Type: TokenPunctuator, Data: "<<"},
   498  				{Type: TokenWord, Data: "def"},
   499  				{Type: TokenWhitespace, Data: " "},
   500  				{Type: TokenWord, Data: "cat"},
   501  				{Type: TokenLineTerminator, Data: "\n"},
   502  				{Type: TokenHeredoc, Data: "456\n"},
   503  				{Type: TokenHeredocEnd, Data: "def"},
   504  				{Type: TokenLineTerminator, Data: "\n"},
   505  				{Type: TokenPunctuator, Data: ")"},
   506  				{Type: TokenLineTerminator, Data: "\n"},
   507  				{Type: TokenHeredoc, Data: "123\n"},
   508  				{Type: TokenHeredocEnd, Data: "abc"},
   509  				{Type: parser.TokenDone, Data: ""},
   510  			},
   511  		},
   512  		{ // 28
   513  			"<<abc\na$abc\nabc",
   514  			[]parser.Token{
   515  				{Type: TokenPunctuator, Data: "<<"},
   516  				{Type: TokenWord, Data: "abc"},
   517  				{Type: TokenLineTerminator, Data: "\n"},
   518  				{Type: TokenHeredoc, Data: "a"},
   519  				{Type: TokenIdentifier, Data: "$abc"},
   520  				{Type: TokenHeredoc, Data: "\n"},
   521  				{Type: TokenHeredocEnd, Data: "abc"},
   522  				{Type: parser.TokenDone, Data: ""},
   523  			},
   524  		},
   525  		{ // 29
   526  			"<<abc\na${abc} $99\nabc",
   527  			[]parser.Token{
   528  				{Type: TokenPunctuator, Data: "<<"},
   529  				{Type: TokenWord, Data: "abc"},
   530  				{Type: TokenLineTerminator, Data: "\n"},
   531  				{Type: TokenHeredoc, Data: "a"},
   532  				{Type: TokenPunctuator, Data: "${"},
   533  				{Type: TokenWord, Data: "abc"},
   534  				{Type: TokenPunctuator, Data: "}"},
   535  				{Type: TokenHeredoc, Data: " "},
   536  				{Type: TokenIdentifier, Data: "$9"},
   537  				{Type: TokenHeredoc, Data: "9\n"},
   538  				{Type: TokenHeredocEnd, Data: "abc"},
   539  				{Type: parser.TokenDone, Data: ""},
   540  			},
   541  		},
   542  		{ // 30
   543  			"<<abc\na$(\necho abc;\n) 1\nabc",
   544  			[]parser.Token{
   545  				{Type: TokenPunctuator, Data: "<<"},
   546  				{Type: TokenWord, Data: "abc"},
   547  				{Type: TokenLineTerminator, Data: "\n"},
   548  				{Type: TokenHeredoc, Data: "a"},
   549  				{Type: TokenPunctuator, Data: "$("},
   550  				{Type: TokenLineTerminator, Data: "\n"},
   551  				{Type: TokenWord, Data: "echo"},
   552  				{Type: TokenWhitespace, Data: " "},
   553  				{Type: TokenWord, Data: "abc"},
   554  				{Type: TokenPunctuator, Data: ";"},
   555  				{Type: TokenLineTerminator, Data: "\n"},
   556  				{Type: TokenPunctuator, Data: ")"},
   557  				{Type: TokenHeredoc, Data: " 1\n"},
   558  				{Type: TokenHeredocEnd, Data: "abc"},
   559  				{Type: parser.TokenDone, Data: ""},
   560  			},
   561  		},
   562  		{ // 31
   563  			"<<abc\na$(<<def) 1\nabc",
   564  			[]parser.Token{
   565  				{Type: TokenPunctuator, Data: "<<"},
   566  				{Type: TokenWord, Data: "abc"},
   567  				{Type: TokenLineTerminator, Data: "\n"},
   568  				{Type: TokenHeredoc, Data: "a"},
   569  				{Type: TokenPunctuator, Data: "$("},
   570  				{Type: TokenPunctuator, Data: "<<"},
   571  				{Type: TokenWord, Data: "def"},
   572  				{Type: parser.TokenError, Data: "invalid character"},
   573  			},
   574  		},
   575  		{ // 32
   576  			"<<abc\na$(<<def cat) 1\nabc",
   577  			[]parser.Token{
   578  				{Type: TokenPunctuator, Data: "<<"},
   579  				{Type: TokenWord, Data: "abc"},
   580  				{Type: TokenLineTerminator, Data: "\n"},
   581  				{Type: TokenHeredoc, Data: "a"},
   582  				{Type: TokenPunctuator, Data: "$("},
   583  				{Type: TokenPunctuator, Data: "<<"},
   584  				{Type: TokenWord, Data: "def"},
   585  				{Type: TokenWhitespace, Data: " "},
   586  				{Type: TokenWord, Data: "cat"},
   587  				{Type: parser.TokenError, Data: "invalid character"},
   588  			},
   589  		},
   590  		{ // 33
   591  			"<<abc;$(<<def cat)\nabc\ndef\nabc",
   592  			[]parser.Token{
   593  				{Type: TokenPunctuator, Data: "<<"},
   594  				{Type: TokenWord, Data: "abc"},
   595  				{Type: TokenPunctuator, Data: ";"},
   596  				{Type: TokenPunctuator, Data: "$("},
   597  				{Type: TokenPunctuator, Data: "<<"},
   598  				{Type: TokenWord, Data: "def"},
   599  				{Type: TokenWhitespace, Data: " "},
   600  				{Type: TokenWord, Data: "cat"},
   601  				{Type: parser.TokenError, Data: "invalid character"},
   602  			},
   603  		},
   604  		{ // 34
   605  			"<<abc;<<def;$(<<ghi;<<jkl\nghi\njkl\n)\nabc\ndef",
   606  			[]parser.Token{
   607  				{Type: TokenPunctuator, Data: "<<"},
   608  				{Type: TokenWord, Data: "abc"},
   609  				{Type: TokenPunctuator, Data: ";"},
   610  				{Type: TokenPunctuator, Data: "<<"},
   611  				{Type: TokenWord, Data: "def"},
   612  				{Type: TokenPunctuator, Data: ";"},
   613  				{Type: TokenPunctuator, Data: "$("},
   614  				{Type: TokenPunctuator, Data: "<<"},
   615  				{Type: TokenWord, Data: "ghi"},
   616  				{Type: TokenPunctuator, Data: ";"},
   617  				{Type: TokenPunctuator, Data: "<<"},
   618  				{Type: TokenWord, Data: "jkl"},
   619  				{Type: TokenLineTerminator, Data: "\n"},
   620  				{Type: TokenHeredoc, Data: ""},
   621  				{Type: TokenHeredocEnd, Data: "ghi"},
   622  				{Type: TokenLineTerminator, Data: "\n"},
   623  				{Type: TokenHeredoc, Data: ""},
   624  				{Type: TokenHeredocEnd, Data: "jkl"},
   625  				{Type: TokenLineTerminator, Data: "\n"},
   626  				{Type: TokenPunctuator, Data: ")"},
   627  				{Type: TokenLineTerminator, Data: "\n"},
   628  				{Type: TokenHeredoc, Data: ""},
   629  				{Type: TokenHeredocEnd, Data: "abc"},
   630  				{Type: TokenLineTerminator, Data: "\n"},
   631  				{Type: TokenHeredoc, Data: ""},
   632  				{Type: TokenHeredocEnd, Data: "def"},
   633  				{Type: parser.TokenDone, Data: ""},
   634  			},
   635  		},
   636  		{ // 35
   637  			"<<a\\\nbc\nabc\ndef\na\nbc",
   638  			[]parser.Token{
   639  				{Type: TokenPunctuator, Data: "<<"},
   640  				{Type: TokenWord, Data: "a\\\nbc"},
   641  				{Type: TokenLineTerminator, Data: "\n"},
   642  				{Type: TokenHeredoc, Data: "abc\ndef\n"},
   643  				{Type: TokenHeredocEnd, Data: "a\nbc"},
   644  				{Type: parser.TokenDone, Data: ""},
   645  			},
   646  		},
   647  		{ // 36
   648  			"2>1 word",
   649  			[]parser.Token{
   650  				{Type: TokenNumberLiteral, Data: "2"},
   651  				{Type: TokenPunctuator, Data: ">"},
   652  				{Type: TokenWord, Data: "1"},
   653  				{Type: TokenWhitespace, Data: " "},
   654  				{Type: TokenWord, Data: "word"},
   655  				{Type: parser.TokenDone, Data: ""},
   656  			},
   657  		},
   658  		{ // 37
   659  			"time -p cmd",
   660  			[]parser.Token{
   661  				{Type: TokenKeyword, Data: "time"},
   662  				{Type: TokenWhitespace, Data: " "},
   663  				{Type: TokenWord, Data: "-p"},
   664  				{Type: TokenWhitespace, Data: " "},
   665  				{Type: TokenWord, Data: "cmd"},
   666  				{Type: parser.TokenDone, Data: ""},
   667  			},
   668  		},
   669  		{ // 38
   670  			"{a..b..2} {a,b,d} a{b,c,d}e a{1..4} {2..10..-1} {-1..-100..5} {a..z..-1}",
   671  			[]parser.Token{
   672  				{Type: TokenBraceExpansion, Data: "{a..b..2}"},
   673  				{Type: TokenWhitespace, Data: " "},
   674  				{Type: TokenBraceExpansion, Data: "{a,b,d}"},
   675  				{Type: TokenWhitespace, Data: " "},
   676  				{Type: TokenWord, Data: "a"},
   677  				{Type: TokenBraceExpansion, Data: "{b,c,d}"},
   678  				{Type: TokenWord, Data: "e"},
   679  				{Type: TokenWhitespace, Data: " "},
   680  				{Type: TokenWord, Data: "a"},
   681  				{Type: TokenBraceExpansion, Data: "{1..4}"},
   682  				{Type: TokenWhitespace, Data: " "},
   683  				{Type: TokenBraceExpansion, Data: "{2..10..-1}"},
   684  				{Type: TokenWhitespace, Data: " "},
   685  				{Type: TokenBraceExpansion, Data: "{-1..-100..5}"},
   686  				{Type: TokenWhitespace, Data: " "},
   687  				{Type: TokenBraceExpansion, Data: "{a..z..-1}"},
   688  				{Type: parser.TokenDone, Data: ""},
   689  			},
   690  		},
   691  		{ // 39
   692  			"a={123",
   693  			[]parser.Token{
   694  				{Type: TokenIdentifierAssign, Data: "a"},
   695  				{Type: TokenPunctuator, Data: "="},
   696  				{Type: TokenWord, Data: "{123"},
   697  				{Type: parser.TokenDone, Data: ""},
   698  			},
   699  		},
   700  		{ // 40
   701  			"word{ word{a} word{\nword{",
   702  			[]parser.Token{
   703  				{Type: TokenWord, Data: "word{"},
   704  				{Type: TokenWhitespace, Data: " "},
   705  				{Type: TokenWord, Data: "word{a}"},
   706  				{Type: TokenWhitespace, Data: " "},
   707  				{Type: TokenWord, Data: "word{"},
   708  				{Type: TokenLineTerminator, Data: "\n"},
   709  				{Type: TokenWord, Data: "word{"},
   710  				{Type: parser.TokenDone, Data: ""},
   711  			},
   712  		},
   713  		{ // 41
   714  			"{ echo 123; echo 456; }",
   715  			[]parser.Token{
   716  				{Type: TokenPunctuator, Data: "{"},
   717  				{Type: TokenWhitespace, Data: " "},
   718  				{Type: TokenWord, Data: "echo"},
   719  				{Type: TokenWhitespace, Data: " "},
   720  				{Type: TokenWord, Data: "123"},
   721  				{Type: TokenPunctuator, Data: ";"},
   722  				{Type: TokenWhitespace, Data: " "},
   723  				{Type: TokenWord, Data: "echo"},
   724  				{Type: TokenWhitespace, Data: " "},
   725  				{Type: TokenWord, Data: "456"},
   726  				{Type: TokenPunctuator, Data: ";"},
   727  				{Type: TokenWhitespace, Data: " "},
   728  				{Type: TokenPunctuator, Data: "}"},
   729  				{Type: parser.TokenDone, Data: ""},
   730  			},
   731  		},
   732  		{ // 42
   733  			"(echo 123; echo 456)",
   734  			[]parser.Token{
   735  				{Type: TokenPunctuator, Data: "("},
   736  				{Type: TokenWord, Data: "echo"},
   737  				{Type: TokenWhitespace, Data: " "},
   738  				{Type: TokenWord, Data: "123"},
   739  				{Type: TokenPunctuator, Data: ";"},
   740  				{Type: TokenWhitespace, Data: " "},
   741  				{Type: TokenWord, Data: "echo"},
   742  				{Type: TokenWhitespace, Data: " "},
   743  				{Type: TokenWord, Data: "456"},
   744  				{Type: TokenPunctuator, Data: ")"},
   745  				{Type: parser.TokenDone, Data: ""},
   746  			},
   747  		},
   748  		{ // 43
   749  			"`a` `echo \\`abc\\`` echo \"a`echo \"1\\`echo u\\\\\\`echo 123\\\\\\`v\\`3\"`c\"",
   750  			[]parser.Token{
   751  				{Type: TokenOpenBacktick, Data: "`"},
   752  				{Type: TokenWord, Data: "a"},
   753  				{Type: TokenCloseBacktick, Data: "`"},
   754  				{Type: TokenWhitespace, Data: " "},
   755  				{Type: TokenOpenBacktick, Data: "`"},
   756  				{Type: TokenWord, Data: "echo"},
   757  				{Type: TokenWhitespace, Data: " "},
   758  				{Type: TokenOpenBacktick, Data: "\\`"},
   759  				{Type: TokenWord, Data: "abc"},
   760  				{Type: TokenCloseBacktick, Data: "\\`"},
   761  				{Type: TokenCloseBacktick, Data: "`"},
   762  				{Type: TokenWhitespace, Data: " "},
   763  				{Type: TokenWord, Data: "echo"},
   764  				{Type: TokenWhitespace, Data: " "},
   765  				{Type: TokenStringStart, Data: "\"a"},
   766  				{Type: TokenOpenBacktick, Data: "`"},
   767  				{Type: TokenWord, Data: "echo"},
   768  				{Type: TokenWhitespace, Data: " "},
   769  				{Type: TokenStringStart, Data: "\"1"},
   770  				{Type: TokenOpenBacktick, Data: "\\`"},
   771  				{Type: TokenWord, Data: "echo"},
   772  				{Type: TokenWhitespace, Data: " "},
   773  				{Type: TokenWord, Data: "u"},
   774  				{Type: TokenOpenBacktick, Data: "\\\\\\`"},
   775  				{Type: TokenWord, Data: "echo"},
   776  				{Type: TokenWhitespace, Data: " "},
   777  				{Type: TokenWord, Data: "123"},
   778  				{Type: TokenCloseBacktick, Data: "\\\\\\`"},
   779  				{Type: TokenWord, Data: "v"},
   780  				{Type: TokenCloseBacktick, Data: "\\`"},
   781  				{Type: TokenStringEnd, Data: "3\""},
   782  				{Type: TokenCloseBacktick, Data: "`"},
   783  				{Type: TokenStringEnd, Data: "c\""},
   784  				{Type: parser.TokenDone, Data: ""},
   785  			},
   786  		},
   787  		{ // 44
   788  			"`\\``",
   789  			[]parser.Token{
   790  				{Type: TokenOpenBacktick, Data: "`"},
   791  				{Type: TokenOpenBacktick, Data: "\\`"},
   792  				{Type: parser.TokenError, Data: "incorrect backtick depth"},
   793  			},
   794  		},
   795  		{ // 45
   796  			"`\\`\\\\\\``",
   797  			[]parser.Token{
   798  				{Type: TokenOpenBacktick, Data: "`"},
   799  				{Type: TokenOpenBacktick, Data: "\\`"},
   800  				{Type: TokenOpenBacktick, Data: "\\\\\\`"},
   801  				{Type: parser.TokenError, Data: "incorrect backtick depth"},
   802  			},
   803  		},
   804  		{ // 46
   805  			"`\\`\\\\\\`\\`",
   806  			[]parser.Token{
   807  				{Type: TokenOpenBacktick, Data: "`"},
   808  				{Type: TokenOpenBacktick, Data: "\\`"},
   809  				{Type: TokenOpenBacktick, Data: "\\\\\\`"},
   810  				{Type: parser.TokenError, Data: "incorrect backtick depth"},
   811  			},
   812  		},
   813  		{ // 47
   814  			"{abc}>2",
   815  			[]parser.Token{
   816  				{Type: TokenBraceWord, Data: "{abc}"},
   817  				{Type: TokenPunctuator, Data: ">"},
   818  				{Type: TokenWord, Data: "2"},
   819  				{Type: parser.TokenDone, Data: ""},
   820  			},
   821  		},
   822  		{ // 48
   823  			"<&1-",
   824  			[]parser.Token{
   825  				{Type: TokenPunctuator, Data: "<&"},
   826  				{Type: TokenWord, Data: "1-"},
   827  				{Type: parser.TokenDone, Data: ""},
   828  			},
   829  		},
   830  	} {
   831  		p := parser.NewStringTokeniser(test.Input)
   832  
   833  		SetTokeniser(&p)
   834  
   835  		for m, tkn := range test.Output {
   836  			if tk, _ := p.GetToken(); tk.Type != tkn.Type {
   837  				if tk.Type == parser.TokenError {
   838  					t.Errorf("test %d.%d: unexpected error: %s", n+1, m+1, tk.Data)
   839  				} else {
   840  					t.Errorf("test %d.%d: Incorrect type, expecting %d, got %d", n+1, m+1, tkn.Type, tk.Type)
   841  				}
   842  
   843  				break
   844  			} else if tk.Data != tkn.Data {
   845  				t.Errorf("test %d.%d: Incorrect data, expecting %q, got %q", n+1, m+1, tkn.Data, tk.Data)
   846  
   847  				break
   848  			}
   849  		}
   850  	}
   851  }
   852