| 1 | """
 | 
| 2 | format_strings.py
 | 
| 3 | 
 | 
| 4 | Parse a printf format string so we can compile it to function calls.
 | 
| 5 | """
 | 
| 6 | from __future__ import print_function
 | 
| 7 | 
 | 
| 8 | import re
 | 
| 9 | 
 | 
| 10 | 
 | 
| 11 | def DecodeMyPyString(s):
 | 
| 12 |     # type: (str) -> str
 | 
| 13 |     """Workaround for MyPy's weird escaping.
 | 
| 14 | 
 | 
| 15 |     Used below and in cppgen_pass.py.
 | 
| 16 |     """
 | 
| 17 |     byte_string = bytes(s, 'utf-8')
 | 
| 18 | 
 | 
| 19 |     # In Python 3
 | 
| 20 |     # >>> b'\\t'.decode('unicode_escape')
 | 
| 21 |     # '\t'
 | 
| 22 | 
 | 
| 23 |     raw_string = byte_string.decode('unicode_escape')
 | 
| 24 |     return raw_string
 | 
| 25 | 
 | 
| 26 | 
 | 
| 27 | class LiteralPart:
 | 
| 28 | 
 | 
| 29 |     def __init__(self, s):
 | 
| 30 |         self.s = s
 | 
| 31 |         self.strlen = len(s)
 | 
| 32 | 
 | 
| 33 |     def __repr__(self):
 | 
| 34 |         return '(Literal %r)' % (self.s)
 | 
| 35 | 
 | 
| 36 | 
 | 
| 37 | class SubstPart:
 | 
| 38 | 
 | 
| 39 |     def __init__(self, width, char_code, arg_num):
 | 
| 40 |         self.width = width
 | 
| 41 |         self.char_code = char_code
 | 
| 42 |         self.arg_num = arg_num
 | 
| 43 | 
 | 
| 44 |     def __repr__(self):
 | 
| 45 |         return '(Subst %r %s %d)' % (self.width, self.char_code, self.arg_num)
 | 
| 46 | 
 | 
| 47 | 
 | 
| 48 | PAT = re.compile(
 | 
| 49 |     '''
 | 
| 50 | ([^%]*)
 | 
| 51 | (?:
 | 
| 52 |   %([0-9]*)(.)   # optional number, and then character code
 | 
| 53 | )?
 | 
| 54 | ''', re.VERBOSE)
 | 
| 55 | 
 | 
| 56 | 
 | 
| 57 | def Parse(fmt):
 | 
| 58 | 
 | 
| 59 |     arg_num = 0
 | 
| 60 |     parts = []
 | 
| 61 |     for m in PAT.finditer(fmt):
 | 
| 62 |         lit = m.group(1)
 | 
| 63 |         width = m.group(2)
 | 
| 64 |         char_code = m.group(3)
 | 
| 65 | 
 | 
| 66 |         if lit:
 | 
| 67 |             parts.append(LiteralPart(lit))
 | 
| 68 |         if char_code:
 | 
| 69 |             if char_code == '%':
 | 
| 70 |                 part = LiteralPart('%')
 | 
| 71 |             else:
 | 
| 72 |                 part = SubstPart(width, char_code, arg_num)
 | 
| 73 |                 arg_num += 1
 | 
| 74 |             parts.append(part)
 | 
| 75 | 
 | 
| 76 |         #print('end =', m.end(0))
 | 
| 77 | 
 | 
| 78 |     return parts
 | 
| 79 | 
 | 
| 80 | 
 | 
| 81 | # Note: This would be a lot easier in Oil!
 | 
| 82 | # TODO: Should there be a char type?
 | 
| 83 | """
 | 
| 84 | enum format_part {
 | 
| 85 |   case Literal(s BigStr)
 | 
| 86 |   case Subst(char_code BigStr, arg_num Int)
 | 
| 87 | }
 | 
| 88 | 
 | 
| 89 | let PAT = ///
 | 
| 90 |   < ~['%']* : lit >    # anything except %
 | 
| 91 |   < '%' dot : subst >  # % and then any char
 | 
| 92 | ///
 | 
| 93 | 
 | 
| 94 | func Parse(fmt BigStr) {
 | 
| 95 |   var arg_num = 0
 | 
| 96 |   let parts = []
 | 
| 97 | 
 | 
| 98 |   for (m in find(fmt, PAT)) {
 | 
| 99 |     if (m.lit) {
 | 
| 100 |       do parts.append(format_part.Literal(m.lit))
 | 
| 101 |     }
 | 
| 102 |     if (m.subst) {
 | 
| 103 |       if (char_code == '%') {
 | 
| 104 |         part = format_part.Literal('%')
 | 
| 105 |       } else {
 | 
| 106 |         part = format_part.Subst(char_code, arg_num)
 | 
| 107 |       }
 | 
| 108 |       do parts.append(part)
 | 
| 109 |       set arg_num += 1
 | 
| 110 |     }
 | 
| 111 |   }
 | 
| 112 |   return parts
 | 
| 113 | }
 | 
| 114 | """
 |