1 | #!/usr/bin/env python
|
2 | from __future__ import print_function
|
3 | """
|
4 | lex_test.py: Tests for lex.py
|
5 | """
|
6 |
|
7 | import unittest
|
8 |
|
9 | from core.lexer import CompileAll, LineLexer
|
10 | from core import test_lib
|
11 |
|
12 | from osh import parse_lib
|
13 | from osh.meta import ast, Id, Kind, LookupKind, types
|
14 | from osh.lex import LEXER_DEF
|
15 |
|
16 | lex_mode_e = types.lex_mode_e
|
17 |
|
18 |
|
19 | def _InitLexer(s):
|
20 | arena = test_lib.MakeArena('<lex_test.py>')
|
21 | _, lexer = parse_lib.InitLexer(s, arena)
|
22 | return lexer
|
23 |
|
24 |
|
25 | class AsdlTest(unittest.TestCase):
|
26 |
|
27 | def testLexMode(self):
|
28 | print(lex_mode_e.DQ)
|
29 |
|
30 |
|
31 | CMD = """\
|
32 | ls /
|
33 | ls /home/
|
34 | """
|
35 |
|
36 | class LexerTest(unittest.TestCase):
|
37 |
|
38 | def assertTokensEqual(self, left, right):
|
39 | self.assertTrue(
|
40 | test_lib.TokensEqual(left, right),
|
41 | 'Expected %r, got %r' % (left, right))
|
42 |
|
43 | def testRead(self):
|
44 | lexer = _InitLexer(CMD)
|
45 |
|
46 | t = lexer.Read(lex_mode_e.OUTER)
|
47 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
|
48 | t = lexer.Read(lex_mode_e.OUTER)
|
49 |
|
50 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
51 |
|
52 | t = lexer.Read(lex_mode_e.OUTER)
|
53 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '/'), t)
|
54 |
|
55 | t = lexer.Read(lex_mode_e.OUTER)
|
56 | self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
|
57 |
|
58 | # Line two
|
59 | t = lexer.Read(lex_mode_e.OUTER)
|
60 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
|
61 |
|
62 | t = lexer.Read(lex_mode_e.OUTER)
|
63 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
64 |
|
65 | t = lexer.Read(lex_mode_e.OUTER)
|
66 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '/home/'), t)
|
67 |
|
68 | t = lexer.Read(lex_mode_e.OUTER)
|
69 | self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
|
70 |
|
71 | t = lexer.Read(lex_mode_e.OUTER)
|
72 | self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
73 |
|
74 | # Another EOF gives EOF
|
75 | t = lexer.Read(lex_mode_e.OUTER)
|
76 | self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
77 |
|
78 | def testRead_VS_ARG_UNQ(self):
|
79 | # Another EOF gives EOF
|
80 | lexer = _InitLexer("'hi'")
|
81 | t = lexer.Read(lex_mode_e.VS_ARG_UNQ)
|
82 | #self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
|
83 | #t = l.Read(lex_mode_e.VS_ARG_UNQ)
|
84 | print(t)
|
85 |
|
86 | def testExtGlob(self):
|
87 | lexer = _InitLexer('@(foo|bar)')
|
88 |
|
89 | t = lexer.Read(lex_mode_e.OUTER)
|
90 | self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
|
91 |
|
92 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
93 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
|
94 |
|
95 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
96 | self.assertTokensEqual(ast.token(Id.Op_Pipe, '|'), t)
|
97 |
|
98 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
99 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'), t)
|
100 |
|
101 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
102 | self.assertTokensEqual(ast.token(Id.Op_RParen, ')'), t)
|
103 |
|
104 | # Individual cases
|
105 |
|
106 | lexer = _InitLexer('@(')
|
107 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
108 | self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
|
109 |
|
110 | lexer = _InitLexer('*(')
|
111 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
112 | self.assertTokensEqual(ast.token(Id.ExtGlob_Star, '*('), t)
|
113 |
|
114 | lexer = _InitLexer('?(')
|
115 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
116 | self.assertTokensEqual(ast.token(Id.ExtGlob_QMark, '?('), t)
|
117 |
|
118 | lexer = _InitLexer('$')
|
119 | t = lexer.Read(lex_mode_e.EXTGLOB)
|
120 | self.assertTokensEqual(ast.token(Id.Lit_Other, '$'), t)
|
121 |
|
122 | def testBashRegexState(self):
|
123 | lexer = _InitLexer('(foo|bar)')
|
124 |
|
125 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
126 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '('), t)
|
127 |
|
128 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
129 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
|
130 |
|
131 | t = lexer.Read(lex_mode_e.BASH_REGEX)
|
132 | self.assertTokensEqual(ast.token(Id.Lit_Chars, '|'), t)
|
133 |
|
134 | def testDBracketState(self):
|
135 | lexer = _InitLexer('-z foo')
|
136 | t = lexer.Read(lex_mode_e.DBRACKET)
|
137 | self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
|
138 | self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
|
139 |
|
140 | def testDollarSqState(self):
|
141 | lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
|
142 |
|
143 | t = lexer.Read(lex_mode_e.DOLLAR_SQ)
|
144 | print(t)
|
145 | self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)
|
146 |
|
147 | t = lexer.Read(lex_mode_e.DOLLAR_SQ)
|
148 | print(t)
|
149 | self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
|
150 |
|
151 | def testLookAhead(self):
|
152 | # I think this is the usage pattern we care about. Peek and Next() past
|
153 | # the function; then Peek() the next token. Then Lookahead in that state.
|
154 | lexer = _InitLexer('func()')
|
155 |
|
156 | t = lexer.Read(lex_mode_e.OUTER)
|
157 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
|
158 |
|
159 | #self.assertEqual(Id.Op_LParen, lexer.LookAhead())
|
160 |
|
161 | t = lexer.Read(lex_mode_e.OUTER)
|
162 | self.assertTokensEqual(ast.token(Id.Op_LParen, '('), t)
|
163 |
|
164 | self.assertTokensEqual(
|
165 | ast.token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.OUTER))
|
166 |
|
167 | lexer = _InitLexer('func ()')
|
168 |
|
169 | t = lexer.Read(lex_mode_e.OUTER)
|
170 | self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
|
171 |
|
172 | t = lexer.Read(lex_mode_e.OUTER)
|
173 | self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
|
174 |
|
175 | self.assertTokensEqual(
|
176 | ast.token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.OUTER))
|
177 |
|
178 |
|
179 | class LineLexerTest(unittest.TestCase):
|
180 |
|
181 | def setUp(self):
|
182 | self.arena = test_lib.MakeArena('<lex_test.py>')
|
183 |
|
184 | def assertTokensEqual(self, left, right):
|
185 | self.assertTrue(test_lib.TokensEqual(left, right))
|
186 |
|
187 | def testReadOuter(self):
|
188 | l = LineLexer(parse_lib._MakeMatcher(), '\n', self.arena)
|
189 | self.assertTokensEqual(
|
190 | ast.token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.OUTER))
|
191 |
|
192 | def testRead_VS_ARG_UNQ(self):
|
193 | l = LineLexer(parse_lib._MakeMatcher(), "'hi'", self.arena)
|
194 | t = l.Read(lex_mode_e.VS_ARG_UNQ)
|
195 | self.assertEqual(Id.Left_SingleQuote, t.id)
|
196 |
|
197 | def testLookAhead(self):
|
198 | # Lines always end with '\n'
|
199 | l = LineLexer(parse_lib._MakeMatcher(), '', self.arena)
|
200 | self.assertTokensEqual(
|
201 | ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
|
202 |
|
203 | l = LineLexer(parse_lib._MakeMatcher(), 'foo', self.arena)
|
204 | self.assertTokensEqual(
|
205 | ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
|
206 | self.assertTokensEqual(
|
207 | ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
|
208 |
|
209 | l = LineLexer(parse_lib._MakeMatcher(), 'foo bar', self.arena)
|
210 | self.assertTokensEqual(
|
211 | ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
|
212 | self.assertTokensEqual(
|
213 | ast.token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.OUTER))
|
214 |
|
215 | # No lookahead; using the cursor!
|
216 | l = LineLexer(parse_lib._MakeMatcher(), 'func(', self.arena)
|
217 | self.assertTokensEqual(
|
218 | ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
|
219 | self.assertTokensEqual(
|
220 | ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
|
221 |
|
222 | l = LineLexer(parse_lib._MakeMatcher(), 'func (', self.arena)
|
223 | self.assertTokensEqual(
|
224 | ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
|
225 | self.assertTokensEqual(
|
226 | ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
|
227 |
|
228 |
|
229 | OUTER_RE = CompileAll(LEXER_DEF[lex_mode_e.OUTER])
|
230 | DOUBLE_QUOTED_RE = CompileAll(LEXER_DEF[lex_mode_e.DQ])
|
231 |
|
232 |
|
233 | class RegexTest(unittest.TestCase):
|
234 |
|
235 | def testOuter(self):
|
236 | o = OUTER_RE
|
237 | nul_pat, _ = o[3]
|
238 | print(nul_pat.match('\0'))
|
239 |
|
240 | def testDoubleQuoted(self):
|
241 | d = DOUBLE_QUOTED_RE
|
242 | nul_pat, _ = d[3]
|
243 | print(nul_pat.match('\0'))
|
244 |
|
245 |
|
246 | if __name__ == '__main__':
|
247 | unittest.main()
|