| 1 | #!/usr/bin/env python2
 | 
| 2 | """
 | 
| 3 | func_eggex.py
 | 
| 4 | """
 | 
| 5 | from __future__ import print_function
 | 
| 6 | 
 | 
| 7 | from _devbuild.gen.syntax_asdl import loc_t, Token
 | 
| 8 | from _devbuild.gen.value_asdl import (value, value_e, value_t, eggex_ops,
 | 
| 9 |                                       eggex_ops_e, eggex_ops_t, regex_match_e,
 | 
| 10 |                                       RegexMatch)
 | 
| 11 | from core import error
 | 
| 12 | from core import num
 | 
| 13 | from core import state
 | 
| 14 | from core import vm
 | 
| 15 | from frontend import typed_args
 | 
| 16 | from mycpp import mops
 | 
| 17 | from mycpp.mylib import log, tagswitch
 | 
| 18 | 
 | 
| 19 | from typing import Optional, cast, TYPE_CHECKING
 | 
| 20 | if TYPE_CHECKING:
 | 
| 21 |     from ysh.expr_eval import ExprEvaluator
 | 
| 22 | 
 | 
| 23 | _ = log
 | 
| 24 | 
 | 
| 25 | G = 0  # _group()
 | 
| 26 | S = 1  # _start()
 | 
| 27 | E = 2  # _end()
 | 
| 28 | 
 | 
| 29 | 
 | 
| 30 | class _MatchCallable(vm._Callable):
 | 
| 31 | 
 | 
| 32 |     def __init__(self, to_return, expr_ev):
 | 
| 33 |         # type: (int, Optional[ExprEvaluator]) -> None
 | 
| 34 |         self.to_return = to_return
 | 
| 35 |         self.expr_ev = expr_ev
 | 
| 36 | 
 | 
| 37 |     def _ReturnValue(self, match, group_index, blame_loc):
 | 
| 38 |         # type: (RegexMatch, int, loc_t) -> value_t
 | 
| 39 |         num_groups = len(match.indices) / 2  # including group 0
 | 
| 40 |         if group_index < num_groups:
 | 
| 41 |             start = match.indices[2 * group_index]
 | 
| 42 |             if self.to_return == S:
 | 
| 43 |                 return num.ToBig(start)
 | 
| 44 | 
 | 
| 45 |             end = match.indices[2 * group_index + 1]
 | 
| 46 |             if self.to_return == E:
 | 
| 47 |                 return num.ToBig(end)
 | 
| 48 | 
 | 
| 49 |             if start == -1:
 | 
| 50 |                 return value.Null
 | 
| 51 |             else:
 | 
| 52 |                 val = value.Str(match.s[start:end])  # type: value_t
 | 
| 53 | 
 | 
| 54 |                 convert_func = None  # type: Optional[value_t]
 | 
| 55 |                 convert_tok = None  # type: Optional[Token]
 | 
| 56 |                 with tagswitch(match.ops) as case:
 | 
| 57 |                     if case(eggex_ops_e.Yes):
 | 
| 58 |                         ops = cast(eggex_ops.Yes, match.ops)
 | 
| 59 | 
 | 
| 60 |                         # group 0 doesn't have a name or type attached to it
 | 
| 61 |                         if len(ops.convert_funcs) and group_index != 0:
 | 
| 62 |                             convert_func = ops.convert_funcs[group_index - 1]
 | 
| 63 |                             convert_tok = ops.convert_toks[group_index - 1]
 | 
| 64 | 
 | 
| 65 |                 if convert_func is not None:
 | 
| 66 |                     assert convert_tok is not None
 | 
| 67 |                     # Blame the group() call?  It would be nicer to blame the
 | 
| 68 |                     # Token re.Capture.func_name, but we lost that in
 | 
| 69 |                     # _EvalEggex()
 | 
| 70 |                     val = self.expr_ev.CallConvertFunc(convert_func, val,
 | 
| 71 |                                                        convert_tok, blame_loc)
 | 
| 72 | 
 | 
| 73 |                 return val
 | 
| 74 |         else:
 | 
| 75 |             assert num_groups != 0
 | 
| 76 |             raise error.Expr(
 | 
| 77 |                 'Expected capture group less than %d, got %d' %
 | 
| 78 |                 (num_groups, group_index), blame_loc)
 | 
| 79 | 
 | 
| 80 |     def _Call(self, match, group_arg, blame_loc):
 | 
| 81 |         # type: (RegexMatch, value_t, loc_t) -> value_t
 | 
| 82 |         group_index = _GetGroupIndex(group_arg, match.ops, blame_loc)
 | 
| 83 |         return self._ReturnValue(match, group_index, blame_loc)
 | 
| 84 | 
 | 
| 85 | 
 | 
| 86 | def _GetGroupIndex(group, ops, blame_loc):
 | 
| 87 |     # type: (value_t, eggex_ops_t, loc_t) -> int
 | 
| 88 |     UP_group = group
 | 
| 89 |     group_index = -1
 | 
| 90 | 
 | 
| 91 |     with tagswitch(group) as case:
 | 
| 92 |         if case(value_e.Int):
 | 
| 93 |             group = cast(value.Int, UP_group)
 | 
| 94 |             group_index_big = group.i
 | 
| 95 |             group_index = mops.BigTruncate(group_index_big)
 | 
| 96 | 
 | 
| 97 |         elif case(value_e.Str):
 | 
| 98 |             group = cast(value.Str, UP_group)
 | 
| 99 | 
 | 
| 100 |             UP_ops = ops
 | 
| 101 |             with tagswitch(ops) as case2:
 | 
| 102 |                 if case2(eggex_ops_e.No):
 | 
| 103 |                     raise error.Expr(
 | 
| 104 |                         "ERE captures don't have names (%r)" % group.s,
 | 
| 105 |                         blame_loc)
 | 
| 106 |                 elif case2(eggex_ops_e.Yes):
 | 
| 107 |                     ops = cast(eggex_ops.Yes, UP_ops)
 | 
| 108 |                     for i, name in enumerate(ops.capture_names):
 | 
| 109 |                         if name == group.s:
 | 
| 110 |                             group_index = i + 1  # 1-based
 | 
| 111 |                             break
 | 
| 112 |                     if group_index == -1:
 | 
| 113 |                         raise error.Expr('No such group %r' % group.s,
 | 
| 114 |                                          blame_loc)
 | 
| 115 | 
 | 
| 116 |         else:
 | 
| 117 |             # TODO: add method name to this error
 | 
| 118 |             raise error.TypeErr(group, 'expected Int or Str', blame_loc)
 | 
| 119 | 
 | 
| 120 |     assert group_index != -1, 'Should have been initialized'
 | 
| 121 |     return group_index
 | 
| 122 | 
 | 
| 123 | 
 | 
| 124 | class MatchFunc(_MatchCallable):
 | 
| 125 |     """
 | 
| 126 |     _group(i)
 | 
| 127 |     _start(i)
 | 
| 128 |     _end(i)
 | 
| 129 | 
 | 
| 130 |     _group(0)             : get the whole match
 | 
| 131 |     _group(1) to _group(N): get a submatch
 | 
| 132 |     _group('month')       : get group by name
 | 
| 133 | 
 | 
| 134 |     Ditto for _start() and _end()
 | 
| 135 |     """
 | 
| 136 | 
 | 
| 137 |     def __init__(self, to_return, expr_ev, mem):
 | 
| 138 |         # type: (int, Optional[ExprEvaluator], state.Mem) -> None
 | 
| 139 |         _MatchCallable.__init__(self, to_return, expr_ev)
 | 
| 140 |         self.mem = mem
 | 
| 141 | 
 | 
| 142 |     def Call(self, rd):
 | 
| 143 |         # type: (typed_args.Reader) -> value_t
 | 
| 144 | 
 | 
| 145 |         group_arg = rd.PosValue()
 | 
| 146 |         rd.Done()
 | 
| 147 | 
 | 
| 148 |         match = self.mem.GetRegexMatch()
 | 
| 149 |         UP_match = match
 | 
| 150 |         with tagswitch(match) as case:
 | 
| 151 |             if case(regex_match_e.No):
 | 
| 152 |                 # _group(0) etc. is illegal
 | 
| 153 |                 raise error.Expr('No regex capture groups',
 | 
| 154 |                                  rd.LeftParenToken())
 | 
| 155 | 
 | 
| 156 |             elif case(regex_match_e.Yes):
 | 
| 157 |                 match = cast(RegexMatch, UP_match)
 | 
| 158 | 
 | 
| 159 |                 return self._Call(match, group_arg, rd.LeftParenToken())
 | 
| 160 | 
 | 
| 161 |         raise AssertionError()
 | 
| 162 | 
 | 
| 163 | 
 | 
| 164 | class MatchMethod(_MatchCallable):
 | 
| 165 |     """
 | 
| 166 |     m => group(i)
 | 
| 167 |     m => start(i)
 | 
| 168 |     m => end(i)
 | 
| 169 |     """
 | 
| 170 | 
 | 
| 171 |     def __init__(self, to_return, expr_ev):
 | 
| 172 |         # type: (int, Optional[ExprEvaluator]) -> None
 | 
| 173 |         _MatchCallable.__init__(self, to_return, expr_ev)
 | 
| 174 | 
 | 
| 175 |     def Call(self, rd):
 | 
| 176 |         # type: (typed_args.Reader) -> value_t
 | 
| 177 | 
 | 
| 178 |         # This is guaranteed
 | 
| 179 |         match = rd.PosMatch()
 | 
| 180 |         group_arg = rd.PosValue()
 | 
| 181 |         rd.Done()
 | 
| 182 | 
 | 
| 183 |         return self._Call(match, group_arg, rd.LeftParenToken())
 | 
| 184 | 
 | 
| 185 | 
 | 
| 186 | # vim: sw=4
 |