| 1 | #!/usr/bin/env python2
|
| 2 | """
|
| 3 | func_eggex.py
|
| 4 | """
|
| 5 | from __future__ import print_function
|
| 6 |
|
| 7 | from _devbuild.gen.syntax_asdl import loc_t, Token
|
| 8 | from _devbuild.gen.value_asdl import (value, value_e, value_t, eggex_ops,
|
| 9 | eggex_ops_e, eggex_ops_t, regex_match_e,
|
| 10 | RegexMatch)
|
| 11 | from core import error
|
| 12 | from core import num
|
| 13 | from core import state
|
| 14 | from core import vm
|
| 15 | from frontend import typed_args
|
| 16 | from mycpp import mops
|
| 17 | from mycpp.mylib import log, tagswitch
|
| 18 |
|
| 19 | from typing import Optional, cast, TYPE_CHECKING
|
| 20 | if TYPE_CHECKING:
|
| 21 | from ysh.expr_eval import ExprEvaluator
|
| 22 |
|
| 23 | _ = log
|
| 24 |
|
| 25 | G = 0 # _group()
|
| 26 | S = 1 # _start()
|
| 27 | E = 2 # _end()
|
| 28 |
|
| 29 |
|
| 30 | class _MatchCallable(vm._Callable):
|
| 31 |
|
| 32 | def __init__(self, to_return, expr_ev):
|
| 33 | # type: (int, Optional[ExprEvaluator]) -> None
|
| 34 | self.to_return = to_return
|
| 35 | self.expr_ev = expr_ev
|
| 36 |
|
| 37 | def _ReturnValue(self, match, group_index, blame_loc):
|
| 38 | # type: (RegexMatch, int, loc_t) -> value_t
|
| 39 | num_groups = len(match.indices) / 2 # including group 0
|
| 40 | if group_index < num_groups:
|
| 41 | start = match.indices[2 * group_index]
|
| 42 | if self.to_return == S:
|
| 43 | return num.ToBig(start)
|
| 44 |
|
| 45 | end = match.indices[2 * group_index + 1]
|
| 46 | if self.to_return == E:
|
| 47 | return num.ToBig(end)
|
| 48 |
|
| 49 | if start == -1:
|
| 50 | return value.Null
|
| 51 | else:
|
| 52 | val = value.Str(match.s[start:end]) # type: value_t
|
| 53 |
|
| 54 | convert_func = None # type: Optional[value_t]
|
| 55 | convert_tok = None # type: Optional[Token]
|
| 56 | with tagswitch(match.ops) as case:
|
| 57 | if case(eggex_ops_e.Yes):
|
| 58 | ops = cast(eggex_ops.Yes, match.ops)
|
| 59 |
|
| 60 | # group 0 doesn't have a name or type attached to it
|
| 61 | if len(ops.convert_funcs) and group_index != 0:
|
| 62 | convert_func = ops.convert_funcs[group_index - 1]
|
| 63 | convert_tok = ops.convert_toks[group_index - 1]
|
| 64 |
|
| 65 | if convert_func is not None:
|
| 66 | assert convert_tok is not None
|
| 67 | # Blame the group() call? It would be nicer to blame the
|
| 68 | # Token re.Capture.func_name, but we lost that in
|
| 69 | # _EvalEggex()
|
| 70 | val = self.expr_ev.CallConvertFunc(convert_func, val,
|
| 71 | convert_tok, blame_loc)
|
| 72 |
|
| 73 | return val
|
| 74 | else:
|
| 75 | assert num_groups != 0
|
| 76 | raise error.Expr(
|
| 77 | 'Expected capture group less than %d, got %d' %
|
| 78 | (num_groups, group_index), blame_loc)
|
| 79 |
|
| 80 | def _Call(self, match, group_arg, blame_loc):
|
| 81 | # type: (RegexMatch, value_t, loc_t) -> value_t
|
| 82 | group_index = _GetGroupIndex(group_arg, match.ops, blame_loc)
|
| 83 | return self._ReturnValue(match, group_index, blame_loc)
|
| 84 |
|
| 85 |
|
| 86 | def _GetGroupIndex(group, ops, blame_loc):
|
| 87 | # type: (value_t, eggex_ops_t, loc_t) -> int
|
| 88 | UP_group = group
|
| 89 | group_index = -1
|
| 90 |
|
| 91 | with tagswitch(group) as case:
|
| 92 | if case(value_e.Int):
|
| 93 | group = cast(value.Int, UP_group)
|
| 94 | group_index_big = group.i
|
| 95 | group_index = mops.BigTruncate(group_index_big)
|
| 96 |
|
| 97 | elif case(value_e.Str):
|
| 98 | group = cast(value.Str, UP_group)
|
| 99 |
|
| 100 | UP_ops = ops
|
| 101 | with tagswitch(ops) as case2:
|
| 102 | if case2(eggex_ops_e.No):
|
| 103 | raise error.Expr(
|
| 104 | "ERE captures don't have names (%r)" % group.s,
|
| 105 | blame_loc)
|
| 106 | elif case2(eggex_ops_e.Yes):
|
| 107 | ops = cast(eggex_ops.Yes, UP_ops)
|
| 108 | for i, name in enumerate(ops.capture_names):
|
| 109 | if name == group.s:
|
| 110 | group_index = i + 1 # 1-based
|
| 111 | break
|
| 112 | if group_index == -1:
|
| 113 | raise error.Expr('No such group %r' % group.s,
|
| 114 | blame_loc)
|
| 115 |
|
| 116 | else:
|
| 117 | # TODO: add method name to this error
|
| 118 | raise error.TypeErr(group, 'expected Int or Str', blame_loc)
|
| 119 |
|
| 120 | assert group_index != -1, 'Should have been initialized'
|
| 121 | return group_index
|
| 122 |
|
| 123 |
|
| 124 | class MatchFunc(_MatchCallable):
|
| 125 | """
|
| 126 | _group(i)
|
| 127 | _start(i)
|
| 128 | _end(i)
|
| 129 |
|
| 130 | _group(0) : get the whole match
|
| 131 | _group(1) to _group(N): get a submatch
|
| 132 | _group('month') : get group by name
|
| 133 |
|
| 134 | Ditto for _start() and _end()
|
| 135 | """
|
| 136 |
|
| 137 | def __init__(self, to_return, expr_ev, mem):
|
| 138 | # type: (int, Optional[ExprEvaluator], state.Mem) -> None
|
| 139 | _MatchCallable.__init__(self, to_return, expr_ev)
|
| 140 | self.mem = mem
|
| 141 |
|
| 142 | def Call(self, rd):
|
| 143 | # type: (typed_args.Reader) -> value_t
|
| 144 |
|
| 145 | group_arg = rd.PosValue()
|
| 146 | rd.Done()
|
| 147 |
|
| 148 | match = self.mem.GetRegexMatch()
|
| 149 | UP_match = match
|
| 150 | with tagswitch(match) as case:
|
| 151 | if case(regex_match_e.No):
|
| 152 | # _group(0) etc. is illegal
|
| 153 | raise error.Expr('No regex capture groups',
|
| 154 | rd.LeftParenToken())
|
| 155 |
|
| 156 | elif case(regex_match_e.Yes):
|
| 157 | match = cast(RegexMatch, UP_match)
|
| 158 |
|
| 159 | return self._Call(match, group_arg, rd.LeftParenToken())
|
| 160 |
|
| 161 | raise AssertionError()
|
| 162 |
|
| 163 |
|
| 164 | class MatchMethod(_MatchCallable):
|
| 165 | """
|
| 166 | m => group(i)
|
| 167 | m => start(i)
|
| 168 | m => end(i)
|
| 169 | """
|
| 170 |
|
| 171 | def __init__(self, to_return, expr_ev):
|
| 172 | # type: (int, Optional[ExprEvaluator]) -> None
|
| 173 | _MatchCallable.__init__(self, to_return, expr_ev)
|
| 174 |
|
| 175 | def Call(self, rd):
|
| 176 | # type: (typed_args.Reader) -> value_t
|
| 177 |
|
| 178 | # This is guaranteed
|
| 179 | match = rd.PosMatch()
|
| 180 | group_arg = rd.PosValue()
|
| 181 | rd.Done()
|
| 182 |
|
| 183 | return self._Call(match, group_arg, rd.LeftParenToken())
|
| 184 |
|
| 185 |
|
| 186 | # vim: sw=4
|