| 1 | #!/usr/bin/env python
 | 
| 2 | from __future__ import print_function
 | 
| 3 | """
 | 
| 4 | lex_test.py: Tests for lex.py
 | 
| 5 | """
 | 
| 6 | 
 | 
| 7 | import unittest
 | 
| 8 | 
 | 
| 9 | from core.lexer import CompileAll, LineLexer
 | 
| 10 | from core import test_lib
 | 
| 11 | 
 | 
| 12 | from osh import parse_lib
 | 
| 13 | from osh.meta import ast, Id, Kind, LookupKind, types
 | 
| 14 | from osh.lex import LEXER_DEF
 | 
| 15 | 
 | 
| 16 | lex_mode_e = types.lex_mode_e
 | 
| 17 | 
 | 
| 18 | 
 | 
| 19 | def _InitLexer(s):
 | 
| 20 |   arena = test_lib.MakeArena('<lex_test.py>')
 | 
| 21 |   _, lexer = parse_lib.InitLexer(s, arena)
 | 
| 22 |   return lexer
 | 
| 23 | 
 | 
| 24 | 
 | 
| 25 | class AsdlTest(unittest.TestCase):
 | 
| 26 | 
 | 
| 27 |   def testLexMode(self):
 | 
| 28 |     print(lex_mode_e.DQ)
 | 
| 29 | 
 | 
| 30 | 
 | 
| 31 | CMD = """\
 | 
| 32 | ls /
 | 
| 33 | ls /home/
 | 
| 34 | """
 | 
| 35 | 
 | 
| 36 | class LexerTest(unittest.TestCase):
 | 
| 37 | 
 | 
| 38 |   def assertTokensEqual(self, left, right):
 | 
| 39 |     self.assertTrue(
 | 
| 40 |         test_lib.TokensEqual(left, right),
 | 
| 41 |         'Expected %r, got %r' % (left, right))
 | 
| 42 | 
 | 
| 43 |   def testRead(self):
 | 
| 44 |     lexer = _InitLexer(CMD)
 | 
| 45 | 
 | 
| 46 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 47 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
 | 
| 48 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 49 | 
 | 
| 50 |     self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
 | 
| 51 | 
 | 
| 52 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 53 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, '/'), t)
 | 
| 54 | 
 | 
| 55 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 56 |     self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
 | 
| 57 | 
 | 
| 58 |     # Line two
 | 
| 59 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 60 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'ls'), t)
 | 
| 61 | 
 | 
| 62 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 63 |     self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
 | 
| 64 | 
 | 
| 65 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 66 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, '/home/'), t)
 | 
| 67 | 
 | 
| 68 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 69 |     self.assertTokensEqual(ast.token(Id.Op_Newline, '\n'), t)
 | 
| 70 | 
 | 
| 71 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 72 |     self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
 | 
| 73 | 
 | 
| 74 |     # Another EOF gives EOF
 | 
| 75 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 76 |     self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
 | 
| 77 | 
 | 
| 78 |   def testRead_VS_ARG_UNQ(self):
 | 
| 79 |     # Another EOF gives EOF
 | 
| 80 |     lexer = _InitLexer("'hi'")
 | 
| 81 |     t = lexer.Read(lex_mode_e.VS_ARG_UNQ)
 | 
| 82 |     #self.assertTokensEqual(ast.token(Id.Eof_Real, ''), t)
 | 
| 83 |     #t = l.Read(lex_mode_e.VS_ARG_UNQ)
 | 
| 84 |     print(t)
 | 
| 85 | 
 | 
| 86 |   def testExtGlob(self):
 | 
| 87 |     lexer = _InitLexer('@(foo|bar)')
 | 
| 88 | 
 | 
| 89 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 90 |     self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
 | 
| 91 | 
 | 
| 92 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 93 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
 | 
| 94 | 
 | 
| 95 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 96 |     self.assertTokensEqual(ast.token(Id.Op_Pipe, '|'), t)
 | 
| 97 | 
 | 
| 98 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 99 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'bar'), t)
 | 
| 100 | 
 | 
| 101 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 102 |     self.assertTokensEqual(ast.token(Id.Op_RParen, ')'), t)
 | 
| 103 | 
 | 
| 104 |     # Individual cases
 | 
| 105 | 
 | 
| 106 |     lexer = _InitLexer('@(')
 | 
| 107 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 108 |     self.assertTokensEqual(ast.token(Id.ExtGlob_At, '@('), t)
 | 
| 109 | 
 | 
| 110 |     lexer = _InitLexer('*(')
 | 
| 111 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 112 |     self.assertTokensEqual(ast.token(Id.ExtGlob_Star, '*('), t)
 | 
| 113 | 
 | 
| 114 |     lexer = _InitLexer('?(')
 | 
| 115 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 116 |     self.assertTokensEqual(ast.token(Id.ExtGlob_QMark, '?('), t)
 | 
| 117 | 
 | 
| 118 |     lexer = _InitLexer('$')
 | 
| 119 |     t = lexer.Read(lex_mode_e.EXTGLOB)
 | 
| 120 |     self.assertTokensEqual(ast.token(Id.Lit_Other, '$'), t)
 | 
| 121 | 
 | 
| 122 |   def testBashRegexState(self):
 | 
| 123 |     lexer = _InitLexer('(foo|bar)')
 | 
| 124 | 
 | 
| 125 |     t = lexer.Read(lex_mode_e.BASH_REGEX)
 | 
| 126 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, '('), t)
 | 
| 127 | 
 | 
| 128 |     t = lexer.Read(lex_mode_e.BASH_REGEX)
 | 
| 129 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'foo'), t)
 | 
| 130 | 
 | 
| 131 |     t = lexer.Read(lex_mode_e.BASH_REGEX)
 | 
| 132 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, '|'), t)
 | 
| 133 | 
 | 
| 134 |   def testDBracketState(self):
 | 
| 135 |     lexer = _InitLexer('-z foo')
 | 
| 136 |     t = lexer.Read(lex_mode_e.DBRACKET)
 | 
| 137 |     self.assertTokensEqual(ast.token(Id.BoolUnary_z, '-z'), t)
 | 
| 138 |     self.assertEqual(Kind.BoolUnary, LookupKind(t.id))
 | 
| 139 | 
 | 
| 140 |   def testDollarSqState(self):
 | 
| 141 |     lexer = _InitLexer(r'foo bar\n \x00 \000 \u0065')
 | 
| 142 | 
 | 
| 143 |     t = lexer.Read(lex_mode_e.DOLLAR_SQ)
 | 
| 144 |     print(t)
 | 
| 145 |     self.assertTokensEqual(ast.token(Id.Char_Literals, 'foo bar'), t)
 | 
| 146 | 
 | 
| 147 |     t = lexer.Read(lex_mode_e.DOLLAR_SQ)
 | 
| 148 |     print(t)
 | 
| 149 |     self.assertTokensEqual(ast.token(Id.Char_OneChar, r'\n'), t)
 | 
| 150 | 
 | 
| 151 |   def testLookAhead(self):
 | 
| 152 |     # I think this is the usage pattern we care about.  Peek and Next() past
 | 
| 153 |     # the function; then Peek() the next token.  Then Lookahead in that state.
 | 
| 154 |     lexer = _InitLexer('func()')
 | 
| 155 | 
 | 
| 156 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 157 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
 | 
| 158 | 
 | 
| 159 |     #self.assertEqual(Id.Op_LParen, lexer.LookAhead())
 | 
| 160 | 
 | 
| 161 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 162 |     self.assertTokensEqual(ast.token(Id.Op_LParen, '('), t)
 | 
| 163 | 
 | 
| 164 |     self.assertTokensEqual(
 | 
| 165 |         ast.token(Id.Op_RParen, ')'), lexer.LookAhead(lex_mode_e.OUTER))
 | 
| 166 | 
 | 
| 167 |     lexer = _InitLexer('func ()')
 | 
| 168 | 
 | 
| 169 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 170 |     self.assertTokensEqual(ast.token(Id.Lit_Chars, 'func'), t)
 | 
| 171 | 
 | 
| 172 |     t = lexer.Read(lex_mode_e.OUTER)
 | 
| 173 |     self.assertTokensEqual(ast.token(Id.WS_Space, ' '), t)
 | 
| 174 | 
 | 
| 175 |     self.assertTokensEqual(
 | 
| 176 |         ast.token(Id.Op_LParen, '('), lexer.LookAhead(lex_mode_e.OUTER))
 | 
| 177 | 
 | 
| 178 | 
 | 
| 179 | class LineLexerTest(unittest.TestCase):
 | 
| 180 | 
 | 
| 181 |   def setUp(self):
 | 
| 182 |     self.arena = test_lib.MakeArena('<lex_test.py>')
 | 
| 183 | 
 | 
| 184 |   def assertTokensEqual(self, left, right):
 | 
| 185 |     self.assertTrue(test_lib.TokensEqual(left, right))
 | 
| 186 | 
 | 
| 187 |   def testReadOuter(self):
 | 
| 188 |     l = LineLexer(parse_lib._MakeMatcher(), '\n', self.arena)
 | 
| 189 |     self.assertTokensEqual(
 | 
| 190 |         ast.token(Id.Op_Newline, '\n'), l.Read(lex_mode_e.OUTER))
 | 
| 191 | 
 | 
| 192 |   def testRead_VS_ARG_UNQ(self):
 | 
| 193 |     l = LineLexer(parse_lib._MakeMatcher(), "'hi'", self.arena)
 | 
| 194 |     t = l.Read(lex_mode_e.VS_ARG_UNQ)
 | 
| 195 |     self.assertEqual(Id.Left_SingleQuote, t.id)
 | 
| 196 | 
 | 
| 197 |   def testLookAhead(self):
 | 
| 198 |     # Lines always end with '\n'
 | 
| 199 |     l = LineLexer(parse_lib._MakeMatcher(), '', self.arena)
 | 
| 200 |     self.assertTokensEqual(
 | 
| 201 |         ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
 | 
| 202 | 
 | 
| 203 |     l = LineLexer(parse_lib._MakeMatcher(), 'foo', self.arena)
 | 
| 204 |     self.assertTokensEqual(
 | 
| 205 |         ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
 | 
| 206 |     self.assertTokensEqual(
 | 
| 207 |         ast.token(Id.Unknown_Tok, ''), l.LookAhead(lex_mode_e.OUTER))
 | 
| 208 | 
 | 
| 209 |     l = LineLexer(parse_lib._MakeMatcher(), 'foo  bar', self.arena)
 | 
| 210 |     self.assertTokensEqual(
 | 
| 211 |         ast.token(Id.Lit_Chars, 'foo'), l.Read(lex_mode_e.OUTER))
 | 
| 212 |     self.assertTokensEqual(
 | 
| 213 |         ast.token(Id.Lit_Chars, 'bar'), l.LookAhead(lex_mode_e.OUTER))
 | 
| 214 | 
 | 
| 215 |     # No lookahead; using the cursor!
 | 
| 216 |     l = LineLexer(parse_lib._MakeMatcher(), 'func(', self.arena)
 | 
| 217 |     self.assertTokensEqual(
 | 
| 218 |         ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
 | 
| 219 |     self.assertTokensEqual(
 | 
| 220 |         ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
 | 
| 221 | 
 | 
| 222 |     l = LineLexer(parse_lib._MakeMatcher(), 'func  (', self.arena)
 | 
| 223 |     self.assertTokensEqual(
 | 
| 224 |         ast.token(Id.Lit_Chars, 'func'), l.Read(lex_mode_e.OUTER))
 | 
| 225 |     self.assertTokensEqual(
 | 
| 226 |         ast.token(Id.Op_LParen, '('), l.LookAhead(lex_mode_e.OUTER))
 | 
| 227 | 
 | 
| 228 | 
 | 
| 229 | OUTER_RE = CompileAll(LEXER_DEF[lex_mode_e.OUTER])
 | 
| 230 | DOUBLE_QUOTED_RE = CompileAll(LEXER_DEF[lex_mode_e.DQ])
 | 
| 231 | 
 | 
| 232 | 
 | 
| 233 | class RegexTest(unittest.TestCase):
 | 
| 234 | 
 | 
| 235 |   def testOuter(self):
 | 
| 236 |     o = OUTER_RE
 | 
| 237 |     nul_pat, _ = o[3]
 | 
| 238 |     print(nul_pat.match('\0'))
 | 
| 239 | 
 | 
| 240 |   def testDoubleQuoted(self):
 | 
| 241 |     d = DOUBLE_QUOTED_RE
 | 
| 242 |     nul_pat, _ = d[3]
 | 
| 243 |     print(nul_pat.match('\0'))
 | 
| 244 | 
 | 
| 245 | 
 | 
| 246 | if __name__ == '__main__':
 | 
| 247 |   unittest.main()
 |