OILS / opy / _regtest / src / osh / lex_test.py View on Github | oilshell.org

247 lines, 157 significant
1#!/usr/bin/env python
2from __future__ import print_function
3"""
4lex_test.py: Tests for lex.py
5"""
6
7import unittest
8
9from core.lexer import CompileAll, LineLexer
10from core import test_lib
11
12from osh import parse_lib
13from osh.meta import ast, Id, Kind, LookupKind, types
14from osh.lex import LEXER_DEF
15
16lex_mode_e = types.lex_mode_e
17
18
19def _InitLexer(s):
20 arena = test_lib.MakeArena('<lex_test.py>')
21 _, lexer = parse_lib.InitLexer(s, arena)
22 return lexer
23
24
25class AsdlTest(unittest.TestCase):
26
27 def testLexMode(self):
28 print(lex_mode_e.DQ)
29
30
31CMD = """\
32ls /
33ls /home/
34"""
35
36class LexerTest(unittest.TestCase):
37
38 def assertTokensEqual(self, left, right):
39 self.assertTrue(
40 test_lib.TokensEqual(left, right),
41 'Expected %r, got %r' % (left, right))
42
43 def testRead(self):
44 lexer = _InitLexer(CMD)
45
46 t = lexer.Read(lex_mode_e.OUTER)
47 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
48 t = lexer.Read(lex_mode_e.OUTER)
49
50 self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
51
52 t = lexer.Read(lex_mode_e.OUTER)
53 self.assertTokensEqual(ast.token(Id.Lit_Chars, '/'), t)
54
55 t = lexer.Read(lex_mode_e.OUTER)
56 self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
57
58 # Line two
59 t = lexer.Read(lex_mode_e.OUTER)
60 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
61
62 t = lexer.Read(lex_mode_e.OUTER)
63 self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
64
65 t = lexer.Read(lex_mode_e.OUTER)
66 self.assertTokensEqual(ast.token(Id.Lit_Chars, '/home/'), t)
67
68 t = lexer.Read(lex_mode_e.OUTER)
69 self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
70
71 t = lexer.Read(lex_mode_e.OUTER)
72 self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
73
74 # Another EOF gives EOF
75 t = lexer.Read(lex_mode_e.OUTER)
76 self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
77
78 def testRead_VS_ARG_UNQ(self):
79 # Another EOF gives EOF
80 lexer = _InitLexer("'hi'")
81 t = lexer.Read(lex_mode_e.VS_ARG_UNQ)
82 #self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
83 #t = l.Read(lex_mode_e.VS_ARG_UNQ)
84 print(t)
85
86 def testExtGlob(self):
87 lexer = _InitLexer('@(foo|bar)')
88
89 t = lexer.Read(lex_mode_e.OUTER)
90 self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
91
92 t = lexer.Read(lex_mode_e.EXTGLOB)
93 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
94
95 t = lexer.Read(lex_mode_e.EXTGLOB)
96 self.assertTokensEqual(ast.token(Id.Op_Pipe, '|'), t)
97
98 t = lexer.Read(lex_mode_e.EXTGLOB)
99 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'), t)
100
101 t = lexer.Read(lex_mode_e.EXTGLOB)
102 self.assertTokensEqual(ast.token(Id.Op_RParen, ')'), t)
103
104 # Individual cases
105
106 lexer = _InitLexer('@(')
107 t = lexer.Read(lex_mode_e.EXTGLOB)
108 self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
109
110 lexer = _InitLexer('*(')
111 t = lexer.Read(lex_mode_e.EXTGLOB)
112 self.assertTokensEqual(ast.token(Id.ExtGlob_Star, '*('), t)
113
114 lexer = _InitLexer('?(')
115 t = lexer.Read(lex_mode_e.EXTGLOB)
116 self.assertTokensEqual(ast.token(Id.ExtGlob_QMark, '?('), t)
117
118 lexer = _InitLexer('$')
119 t = lexer.Read(lex_mode_e.EXTGLOB)
120 self.assertTokensEqual(ast.token(Id.Lit_Other, '$'), t)
121
122 def testBashRegexState(self):
123 lexer = _InitLexer('(foo|bar)')
124
125 t = lexer.Read(lex_mode_e.BASH_REGEX)
126 self.assertTokensEqual(ast.token(Id.Lit_Chars, '('), t)
127
128 t = lexer.Read(lex_mode_e.BASH_REGEX)
129 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
130
131 t = lexer.Read(lex_mode_e.BASH_REGEX)
132 self.assertTokensEqual(ast.token(Id.Lit_Chars, '|'), t)
133
134 def testDBracketState(self):
135 lexer = _InitLexer('-z foo')
136 t = lexer.Read(lex_mode_e.DBRACKET)
137 self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
138 self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
139
140 def testDollarSqState(self):
141 lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
142
143 t = lexer.Read(lex_mode_e.DOLLAR_SQ)
144 print(t)
145 self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)
146
147 t = lexer.Read(lex_mode_e.DOLLAR_SQ)
148 print(t)
149 self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
150
151 def testLookAhead(self):
152 # I think this is the usage pattern we care about. Peek and Next() past
153 # the function; then Peek() the next token. Then Lookahead in that state.
154 lexer = _InitLexer('func()')
155
156 t = lexer.Read(lex_mode_e.OUTER)
157 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
158
159 #self.assertEqual(Id.Op_LParen, lexer.LookAhead())
160
161 t = lexer.Read(lex_mode_e.OUTER)
162 self.assertTokensEqual(ast.token(Id.Op_LParen, '('), t)
163
164 self.assertTokensEqual(
165 ast.token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.OUTER))
166
167 lexer = _InitLexer('func ()')
168
169 t = lexer.Read(lex_mode_e.OUTER)
170 self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
171
172 t = lexer.Read(lex_mode_e.OUTER)
173 self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
174
175 self.assertTokensEqual(
176 ast.token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.OUTER))
177
178
179class LineLexerTest(unittest.TestCase):
180
181 def setUp(self):
182 self.arena = test_lib.MakeArena('<lex_test.py>')
183
184 def assertTokensEqual(self, left, right):
185 self.assertTrue(test_lib.TokensEqual(left, right))
186
187 def testReadOuter(self):
188 l = LineLexer(parse_lib._MakeMatcher(), '\n', self.arena)
189 self.assertTokensEqual(
190 ast.token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.OUTER))
191
192 def testRead_VS_ARG_UNQ(self):
193 l = LineLexer(parse_lib._MakeMatcher(), "'hi'", self.arena)
194 t = l.Read(lex_mode_e.VS_ARG_UNQ)
195 self.assertEqual(Id.Left_SingleQuote, t.id)
196
197 def testLookAhead(self):
198 # Lines always end with '\n'
199 l = LineLexer(parse_lib._MakeMatcher(), '', self.arena)
200 self.assertTokensEqual(
201 ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
202
203 l = LineLexer(parse_lib._MakeMatcher(), 'foo', self.arena)
204 self.assertTokensEqual(
205 ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
206 self.assertTokensEqual(
207 ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
208
209 l = LineLexer(parse_lib._MakeMatcher(), 'foo bar', self.arena)
210 self.assertTokensEqual(
211 ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
212 self.assertTokensEqual(
213 ast.token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.OUTER))
214
215 # No lookahead; using the cursor!
216 l = LineLexer(parse_lib._MakeMatcher(), 'func(', self.arena)
217 self.assertTokensEqual(
218 ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
219 self.assertTokensEqual(
220 ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
221
222 l = LineLexer(parse_lib._MakeMatcher(), 'func (', self.arena)
223 self.assertTokensEqual(
224 ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
225 self.assertTokensEqual(
226 ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
227
228
229OUTER_RE = CompileAll(LEXER_DEF[lex_mode_e.OUTER])
230DOUBLE_QUOTED_RE = CompileAll(LEXER_DEF[lex_mode_e.DQ])
231
232
233class RegexTest(unittest.TestCase):
234
235 def testOuter(self):
236 o = OUTER_RE
237 nul_pat, _ = o[3]
238 print(nul_pat.match('\0'))
239
240 def testDoubleQuoted(self):
241 d = DOUBLE_QUOTED_RE
242 nul_pat, _ = d[3]
243 print(nul_pat.match('\0'))
244
245
246if __name__ == '__main__':
247 unittest.main()