| 1 | #!/usr/bin/env python2
 | 
| 2 | """
 | 
| 3 | lexer_test.py: Tests for lexer.py
 | 
| 4 | """
 | 
| 5 | 
 | 
| 6 | import unittest
 | 
| 7 | 
 | 
| 8 | from _devbuild.gen.id_kind_asdl import Id, Id_str
 | 
| 9 | from _devbuild.gen.types_asdl import lex_mode_e
 | 
| 10 | from core import test_lib
 | 
| 11 | from mycpp.mylib import log
 | 
| 12 | from frontend.lexer import DummyToken as Tok
 | 
| 13 | from frontend.lexer_def import LEXER_DEF
 | 
| 14 | from frontend import lexer
 | 
| 15 | from frontend import reader
 | 
| 16 | 
 | 
| 17 | 
 | 
| 18 | def _PrintfOuterTokens(fmt):
 | 
| 19 |     log('PrintfOuter lexing %r', fmt)
 | 
| 20 | 
 | 
| 21 |     parse_ctx = test_lib.InitParseContext()
 | 
| 22 |     arena = test_lib.MakeArena('<lexer_test.py>')
 | 
| 23 |     line_reader = reader.StringLineReader(fmt, arena)
 | 
| 24 |     lx = parse_ctx.MakeLexer(line_reader)
 | 
| 25 | 
 | 
| 26 |     while True:
 | 
| 27 |         t = lx.Read(lex_mode_e.PrintfOuter)
 | 
| 28 |         print(t)
 | 
| 29 |         if t.id in (Id.Eof_Real, Id.Eol_Tok):
 | 
| 30 |             break
 | 
| 31 | 
 | 
| 32 |     log('')
 | 
| 33 | 
 | 
| 34 | 
 | 
| 35 | def _PrintToken(t):
 | 
| 36 |     #print(t)
 | 
| 37 |     print('%20s %r' % (Id_str(t.id), t.tval))
 | 
| 38 | 
 | 
| 39 | 
 | 
| 40 | def _PrintAllTokens(lx, lex_mode):
 | 
| 41 |     while True:
 | 
| 42 |         t = lx.Read(lex_mode)
 | 
| 43 |         _PrintToken(t)
 | 
| 44 |         if t.id in (Id.Eof_Real, Id.Eol_Tok):
 | 
| 45 |             break
 | 
| 46 | 
 | 
| 47 | 
 | 
| 48 | class TokenTest(unittest.TestCase):
 | 
| 49 | 
 | 
| 50 |     def testToken(self):
 | 
| 51 |         t = Tok(Id.Lit_Chars, 'abc')
 | 
| 52 |         print(t)
 | 
| 53 | 
 | 
| 54 |         # This redundancy is OK I guess.
 | 
| 55 |         t = Tok(Id.Lit_LBrace, '{')
 | 
| 56 |         print(t)
 | 
| 57 | 
 | 
| 58 |         t = Tok(Id.Op_Semi, ';')
 | 
| 59 |         print(t)
 | 
| 60 | 
 | 
| 61 |     def testPrintStats(self):
 | 
| 62 |         states = sorted(LEXER_DEF.items(),
 | 
| 63 |                         key=lambda pair: len(pair[1]),
 | 
| 64 |                         reverse=True)
 | 
| 65 |         total = 0
 | 
| 66 |         for state, re_list in states:
 | 
| 67 |             n = len(re_list)
 | 
| 68 |             print(n, state)
 | 
| 69 |             total += n
 | 
| 70 | 
 | 
| 71 |         print("Number of lex states: %d" % len(LEXER_DEF))
 | 
| 72 |         print("Number of token dispatches: %d" % total)
 | 
| 73 | 
 | 
| 74 |     def testMoveToNextLine(self):
 | 
| 75 |         """Test that it doesn't mess up invariants."""
 | 
| 76 |         arena = test_lib.MakeArena('<lexer_test.py>')
 | 
| 77 |         code_str = '''cd {
 | 
| 78 | }'''
 | 
| 79 | 
 | 
| 80 |         print('=== Printing all tokens')
 | 
| 81 |         if 1:
 | 
| 82 |             _, lx = test_lib.InitLexer(code_str, arena)
 | 
| 83 |             _PrintAllTokens(lx, lex_mode_e.ShCommand)
 | 
| 84 | 
 | 
| 85 |         print()
 | 
| 86 |         print('=== MoveToNextLine() and LookAheadOne()')
 | 
| 87 |         _, lx = test_lib.InitLexer(code_str, arena)
 | 
| 88 | 
 | 
| 89 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 90 |         _PrintToken(t)
 | 
| 91 |         self.assertEqual(Id.Lit_Chars, t.id)
 | 
| 92 | 
 | 
| 93 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 94 |         _PrintToken(t)
 | 
| 95 |         self.assertEqual(Id.WS_Space, t.id)
 | 
| 96 | 
 | 
| 97 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 98 |         _PrintToken(t)
 | 
| 99 |         self.assertEqual(Id.Lit_LBrace, t.id)
 | 
| 100 | 
 | 
| 101 |         try:
 | 
| 102 |             lx.MoveToNextLine()
 | 
| 103 |         except AssertionError:
 | 
| 104 |             pass
 | 
| 105 |         else:
 | 
| 106 |             self.fail('Should have asserted')
 | 
| 107 | 
 | 
| 108 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 109 |         _PrintToken(t)
 | 
| 110 |         self.assertEqual(Id.Op_Newline, t.id)
 | 
| 111 | 
 | 
| 112 |         look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
 | 
| 113 |         self.assertEqual(Id.Unknown_Tok, look_ahead_id)
 | 
| 114 | 
 | 
| 115 |         # Method being tested
 | 
| 116 |         lx.MoveToNextLine()
 | 
| 117 | 
 | 
| 118 |         # Lookahead
 | 
| 119 |         print('Lookahead')
 | 
| 120 |         look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
 | 
| 121 |         self.assertEqual(Id.Lit_RBrace, look_ahead_id)
 | 
| 122 | 
 | 
| 123 |         # Lookahead again
 | 
| 124 |         print('Lookahead 2')
 | 
| 125 |         look_ahead_id = lx.LookAheadOne(lex_mode_e.ShCommand)
 | 
| 126 |         self.assertEqual(Id.Lit_RBrace, look_ahead_id)
 | 
| 127 | 
 | 
| 128 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 129 |         _PrintToken(t)
 | 
| 130 |         self.assertEqual(Id.Lit_RBrace, t.id)
 | 
| 131 | 
 | 
| 132 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 133 |         _PrintToken(t)
 | 
| 134 |         self.assertEqual(Id.Eof_Real, t.id)
 | 
| 135 | 
 | 
| 136 |     def testMaybeUnreadOne(self):
 | 
| 137 |         arena = test_lib.MakeArena('<lexer_test.py>')
 | 
| 138 |         _, lx = test_lib.InitLexer('()', arena)
 | 
| 139 | 
 | 
| 140 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 141 |         print(t)
 | 
| 142 |         self.assertEqual(Id.Op_LParen, t.id)
 | 
| 143 | 
 | 
| 144 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 145 |         print(t)
 | 
| 146 |         self.assertEqual(Id.Op_RParen, t.id)
 | 
| 147 | 
 | 
| 148 |         # Go back
 | 
| 149 |         lx.MaybeUnreadOne()
 | 
| 150 | 
 | 
| 151 |         # Push Hint
 | 
| 152 |         lx.PushHint(Id.Op_RParen, Id.Right_CasePat)
 | 
| 153 | 
 | 
| 154 |         # Now we see it again another a Id
 | 
| 155 |         t = lx.Read(lex_mode_e.ShCommand)
 | 
| 156 |         print(t)
 | 
| 157 |         self.assertEqual(Id.Right_CasePat, t.id)
 | 
| 158 | 
 | 
| 159 |     def testPrintf(self):
 | 
| 160 |         # Demonstrate input handling quirk
 | 
| 161 | 
 | 
| 162 |         # Get Id.Eof_Real because len('') == 0
 | 
| 163 |         _PrintfOuterTokens('')
 | 
| 164 | 
 | 
| 165 |         # Get Id.Eol_Tok because len('\0') == 1
 | 
| 166 |         _PrintfOuterTokens('\0')
 | 
| 167 | 
 | 
| 168 |         # Get x, then Id.Eof_Real because there are no more lines
 | 
| 169 |         _PrintfOuterTokens('x\0')
 | 
| 170 | 
 | 
| 171 | 
 | 
| 172 | class TokenFunctionsTest(unittest.TestCase):
 | 
| 173 | 
 | 
| 174 |     def testContainsEquals(self):
 | 
| 175 |         arena = test_lib.MakeArena('<lexer_test.py>')
 | 
| 176 |         _, lx = test_lib.InitLexer('echo "hi $name"', arena)
 | 
| 177 | 
 | 
| 178 |         tok = lx.Read(lex_mode_e.ShCommand)
 | 
| 179 |         print(tok)
 | 
| 180 | 
 | 
| 181 |         self.assertEqual(True, lexer.TokenContains(tok, 'echo'))
 | 
| 182 |         self.assertEqual(True, lexer.TokenContains(tok, 'ech'))
 | 
| 183 |         self.assertEqual(True, lexer.TokenContains(tok, 'cho'))
 | 
| 184 |         self.assertEqual(True, lexer.TokenContains(tok, 'c'))
 | 
| 185 |         self.assertEqual(True, lexer.TokenContains(tok, ''))
 | 
| 186 | 
 | 
| 187 |         self.assertEqual(True, lexer.TokenEquals(tok, 'echo'))
 | 
| 188 |         self.assertEqual(False, lexer.TokenEquals(tok, 'ech'))
 | 
| 189 | 
 | 
| 190 |         self.assertEqual(True, lexer.TokenStartsWith(tok, ''))
 | 
| 191 |         self.assertEqual(True, lexer.TokenStartsWith(tok, 'e'))
 | 
| 192 |         self.assertEqual(True, lexer.TokenStartsWith(tok, 'ech'))
 | 
| 193 |         self.assertEqual(False, lexer.TokenStartsWith(tok, 'cho'))
 | 
| 194 | 
 | 
| 195 |         self.assertEqual(True, lexer.TokenEndsWith(tok, ''))
 | 
| 196 |         self.assertEqual(False, lexer.TokenEndsWith(tok, 'ech'))
 | 
| 197 |         self.assertEqual(True, lexer.TokenEndsWith(tok, 'cho'))
 | 
| 198 |         self.assertEqual(True, lexer.TokenEndsWith(tok, 'o'))
 | 
| 199 | 
 | 
| 200 |     def testIsPlusEquals(self):
 | 
| 201 |         arena = test_lib.MakeArena('<lexer_test.py>')
 | 
| 202 |         _, lx = test_lib.InitLexer('foo+=b"', arena)
 | 
| 203 | 
 | 
| 204 |         tok = lx.Read(lex_mode_e.ShCommand)
 | 
| 205 |         print(tok)
 | 
| 206 |         self.assertEqual(True, lexer.IsPlusEquals(tok))
 | 
| 207 | 
 | 
| 208 |         _, lx = test_lib.InitLexer('foo=b"', arena)
 | 
| 209 | 
 | 
| 210 |         tok = lx.Read(lex_mode_e.ShCommand)
 | 
| 211 |         print(tok)
 | 
| 212 |         self.assertEqual(False, lexer.IsPlusEquals(tok))
 | 
| 213 | 
 | 
| 214 | 
 | 
| 215 | if __name__ == '__main__':
 | 
| 216 |     unittest.main()
 |