| 1 | #!/usr/bin/env python2
 | 
| 2 | # Copyright 2019 Wilke Schwiedop. All rights reserved.
 | 
| 3 | # Copyright 2019 Andy Chu. All rights reserved.
 | 
| 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 | 
| 5 | # you may not use this file except in compliance with the License.
 | 
| 6 | # You may obtain a copy of the License at
 | 
| 7 | #
 | 
| 8 | #   http://www.apache.org/licenses/LICENSE-2.0
 | 
| 9 | """
 | 
| 10 | tokenizer.py: Tokenizer for find.
 | 
| 11 | """
 | 
| 12 | 
 | 
| 13 | _ops = [
 | 
| 14 | 	('!', 'BANG'),
 | 
| 15 | 	('(', 'LPAR'),
 | 
| 16 | 	(')', 'RPAR'),
 | 
| 17 | 	('-o', 'OR'),
 | 
| 18 | 	('-a', 'AND'),
 | 
| 19 | 	(',', 'COMMA'),
 | 
| 20 | 	(';', 'SEMI'),
 | 
| 21 | 	('+', 'PLUS'),
 | 
| 22 | 
 | 
| 23 | 	('-true', 'TRUE'),
 | 
| 24 | 	('-false', 'FALSE'),
 | 
| 25 | 
 | 
| 26 | 	('-name', 'NAME'),
 | 
| 27 | 	('-iname', 'INAME'),
 | 
| 28 | 
 | 
| 29 | 	('-lname', 'LNAME'),
 | 
| 30 | 	('-ilname', 'ILNAME'),
 | 
| 31 | 
 | 
| 32 | 	('-path', 'PATH'),
 | 
| 33 | 	('-ipath', 'IPATH'),
 | 
| 34 | 
 | 
| 35 | 	('-regex', 'REGEX'),
 | 
| 36 | 	('-iregex', 'IREGEX'),
 | 
| 37 | 
 | 
| 38 | 	('-executable', 'EXECUTABLE'),
 | 
| 39 | 	('-readable', 'READABLE'),
 | 
| 40 | 	('-writable', 'WRITABLE'),
 | 
| 41 | 
 | 
| 42 | 	('-empty', 'EMPTY'),
 | 
| 43 | 
 | 
| 44 | 	('-size', 'SIZE'),
 | 
| 45 | 	('-type', 'TYPE'),
 | 
| 46 | 	('-xtype', 'XTYPE'),
 | 
| 47 | 	('-perm', 'PERM'),
 | 
| 48 | 
 | 
| 49 | 	('-group', 'GROUP'),
 | 
| 50 | 	('-user', 'USER'),
 | 
| 51 | 	('-gid', 'GID'),
 | 
| 52 | 	('-uid', 'UID'),
 | 
| 53 | 	('-nogroup', 'NOGROUP'),
 | 
| 54 | 	('-nouser', 'NOUSER'),
 | 
| 55 | 
 | 
| 56 | 	('-amin', 'AMIN'),
 | 
| 57 | 	('-anewer', 'ANEWER'),
 | 
| 58 | 	('-atime', 'ATIME'),
 | 
| 59 | 	('-cmin', 'CMIN'),
 | 
| 60 | 	('-cnewer', 'CNEWER'),
 | 
| 61 | 	('-ctime', 'CTIME'),
 | 
| 62 | 	('-mmin', 'MMIN'),
 | 
| 63 | 	# note -newer not -mnewer
 | 
| 64 | 	('-newer', 'MNEWER'),
 | 
| 65 | 	('-mtime', 'MTIME'),
 | 
| 66 | 	('-newerXY', 'NEWERXY'),
 | 
| 67 | 
 | 
| 68 | 	('-delete', 'DELETE'),
 | 
| 69 | 	('-prune', 'PRUNE'),
 | 
| 70 | 	('-quit', 'QUIT'),
 | 
| 71 | 
 | 
| 72 | 	('-print', 'PRINT'),
 | 
| 73 | 	('-print0', 'PRINT0'),
 | 
| 74 | 	('-printf', 'PRINTF'),
 | 
| 75 | 	('-ls', 'LS'),
 | 
| 76 | 	('-fprint', 'FPRINT'),
 | 
| 77 | 	('-fprint0', 'FPRINT0'),
 | 
| 78 | 	('-fprintf', 'FPRINTF'),
 | 
| 79 | 	('-fls', 'FLS'),
 | 
| 80 | 
 | 
| 81 | 	('-exec', 'EXEC'),
 | 
| 82 | 	('-execdir', 'EXECDIR'),
 | 
| 83 | 	('-ok', 'OK'),
 | 
| 84 | 	('-okdir', 'OKDIR'),
 | 
| 85 | ]
 | 
| 86 | 
 | 
| 87 | # start=100 is pgen voodoo, don't touch
 | 
| 88 | opmap = dict((op, i) for i, (op, name) in enumerate(_ops, start=100))
 | 
| 89 | tok_name = dict((i, name) for i, (op, name) in enumerate(_ops, start=100))
 | 
| 90 | tok_name[0] = 'ENDMARKER'
 | 
| 91 | tok_name[1] = 'STRING'
 | 
| 92 | #tok_name[len(tok_name)] = 'N_TOKENS'
 | 
| 93 | tok_name[256] = 'NT_OFFSET'
 | 
| 94 | 
 | 
| 95 | import sys
 | 
| 96 | this_module = sys.modules[__name__]
 | 
| 97 | for i, name in tok_name.items():
 | 
| 98 | 	setattr(this_module, name, i)
 | 
| 99 | 
 | 
| 100 | class TokenDef(object):
 | 
| 101 | 	def GetTerminalNum(self, label):
 | 
| 102 | 		""" e.g. NAME -> 1 """
 | 
| 103 | 		itoken = getattr(this_module, label, None)
 | 
| 104 | 		assert isinstance(itoken, int), label
 | 
| 105 | 		assert itoken in tok_name, label
 | 
| 106 | 		return itoken
 | 
| 107 | 
 | 
| 108 | 	def GetOpNum(self, value):
 | 
| 109 | 		""" e.g '(' -> LPAR """
 | 
| 110 | 		return opmap[value]
 | 
| 111 | 
 | 
| 112 | 	def GetKeywordNum(self, value):
 | 
| 113 | 		return None
 | 
| 114 | 
 | 
| 115 | 
 | 
| 116 | def tokenize(argv):
 | 
| 117 | 	start = end = (1, 0) # dummy location data
 | 
| 118 | 	line_text = ''
 | 
| 119 | 	for a in argv:
 | 
| 120 | 		#log('tok = %r', a)
 | 
| 121 | 		typ = opmap.get(a, STRING)
 | 
| 122 | #		print (typ, a, start, end, line_text)
 | 
| 123 | 		yield (typ, a, start, end, line_text)
 | 
| 124 | 	yield (ENDMARKER, '', start, end, line_text)
 | 
| 125 | 
 | 
| 126 | def is_terminal(type):
 | 
| 127 | 	# type (int) -> bool
 | 
| 128 | 	return type < NT_OFFSET
 | 
| 129 | 
 | 
| 130 | def is_nonterminal(type):
 | 
| 131 | 	# type (int) -> bool
 | 
| 132 | 	return type >= NT_OFFSET
 | 
| 133 | 
 | 
| 134 | def is_eof(type):
 | 
| 135 | 	# type (int) -> bool
 | 
| 136 | 	return type == ENDMARKER
 |