OILS / opy / _regtest / src / asdl / format.py View on Github | oilshell.org

570 lines, 325 significant
1"""
2format.py -- Pretty print an ASDL data structure.
3
4Like encode.py, but uses text instead of binary.
5
6TODO:
7
8- auto-abbreviation of single field things (minus location)
9
10- option to omit spaces for SQ, SQ, W? It's all one thing.
11
12Places where we try a single line:
13 - arrays
14 - objects with name fields
15 - abbreviated, unnamed fields
16"""
17
18import re
19
20from asdl import asdl_ as asdl
21from core import util
22
23import os
24if not os.getenv('_OVM_DEPS'):
25 import cgi
26
27
28def DetectConsoleOutput(f):
29 """Wrapped to auto-detect."""
30 if f.isatty():
31 return AnsiOutput(f)
32 else:
33 return TextOutput(f)
34
35
36class ColorOutput(object):
37 """Abstract base class for plain text, ANSI color, and HTML color."""
38
39 def __init__(self, f):
40 self.f = f
41 self.num_chars = 0
42
43 def NewTempBuffer(self):
44 """Return a temporary buffer for the line wrapping calculation."""
45 raise NotImplementedError
46
47 def FileHeader(self):
48 """Hook for printing a full file."""
49 pass
50
51 def FileFooter(self):
52 """Hook for printing a full file."""
53 pass
54
55 def PushColor(self, str_type):
56 raise NotImplementedError
57
58 def PopColor(self):
59 raise NotImplementedError
60
61 def write(self, s):
62 self.f.write(s)
63 self.num_chars += len(s) # Only count visible characters!
64
65 def WriteRaw(self, raw):
66 """
67 Write raw data without escaping, and without counting control codes in the
68 length.
69 """
70 s, num_chars = raw
71 self.f.write(s)
72 self.num_chars += num_chars
73
74 def NumChars(self):
75 return self.num_chars
76
77 def GetRaw(self):
78 # For when we have an io.StringIO()
79 return self.f.getvalue(), self.num_chars
80
81
82class TextOutput(ColorOutput):
83 """TextOutput put obeys the color interface, but outputs nothing."""
84
85 def __init__(self, f):
86 ColorOutput.__init__(self, f)
87
88 def NewTempBuffer(self):
89 return TextOutput(util.Buffer())
90
91 def PushColor(self, str_type):
92 pass # ignore color
93
94 def PopColor(self):
95 pass # ignore color
96
97
98class HtmlOutput(ColorOutput):
99 """
100 HTML one can have wider columns. Maybe not even fixed-width font. Hm yeah
101 indentation should be logical then?
102
103 Color: HTML spans
104 """
105 def __init__(self, f):
106 ColorOutput.__init__(self, f)
107
108 def NewTempBuffer(self):
109 return HtmlOutput(util.Buffer())
110
111 def FileHeader(self):
112 # TODO: Use a different CSS file to make the colors match. I like string
113 # literals as yellow, etc.
114 #<link rel="stylesheet" type="text/css" href="/css/code.css" />
115 self.f.write("""
116<html>
117 <head>
118 <title>oil AST</title>
119 <style>
120 .n { color: brown }
121 .s { font-weight: bold }
122 .o { color: darkgreen }
123 </style>
124 </head>
125 <body>
126 <pre>
127""")
128
129 def FileFooter(self):
130 self.f.write("""
131 </pre>
132 </body>
133</html>
134 """)
135
136 def PushColor(self, str_type):
137 # To save bandwidth, use single character CSS names.
138 if str_type == _NODE_TYPE:
139 css_class = 'n'
140 elif str_type == _STRING_LITERAL:
141 css_class = 's'
142 elif str_type == _OTHER_LITERAL:
143 css_class = 'o'
144 elif str_type == _OTHER_TYPE:
145 css_class = 'o'
146 else:
147 raise AssertionError(str_type)
148 self.f.write('<span class="%s">' % css_class)
149
150 def PopColor(self):
151 self.f.write('</span>')
152
153 def write(self, s):
154 # PROBLEM: Double escaping!
155 self.f.write(cgi.escape(s))
156 self.num_chars += len(s) # Only count visible characters!
157
158
159# Color token types
160_NODE_TYPE = 1
161_STRING_LITERAL = 2
162_OTHER_LITERAL = 3 # Int and bool. Green?
163_OTHER_TYPE = 4 # Or
164
165
166# ANSI color constants (also in sh_spec.py)
167_RESET = '\033[0;0m'
168_BOLD = '\033[1m'
169
170_RED = '\033[31m'
171_GREEN = '\033[32m'
172_BLUE = '\033[34m'
173
174_YELLOW = '\033[33m'
175_CYAN = '\033[36m'
176
177
178class AnsiOutput(ColorOutput):
179 """For the console."""
180
181 def __init__(self, f):
182 ColorOutput.__init__(self, f)
183
184 def NewTempBuffer(self):
185 return AnsiOutput(util.Buffer())
186
187 def PushColor(self, str_type):
188 if str_type == _NODE_TYPE:
189 #self.f.write(_GREEN)
190 self.f.write(_YELLOW)
191 elif str_type == _STRING_LITERAL:
192 self.f.write(_BOLD)
193 elif str_type == _OTHER_LITERAL:
194 self.f.write(_GREEN)
195 elif str_type == _OTHER_TYPE:
196 self.f.write(_GREEN) # Same color as other literals for now
197 else:
198 raise AssertionError(str_type)
199
200 def PopColor(self):
201 self.f.write(_RESET)
202
203
204#
205# Nodes
206#
207
208
209class _Obj(object):
210 """Node for pretty-printing."""
211 def __init__(self, node_type):
212 self.node_type = node_type
213 self.fields = [] # list of 2-tuples of (name, Obj or ColoredString)
214
215 # Custom hooks can change these:
216 self.abbrev = False
217 self.show_node_type = True # only respected when abbrev is false
218 self.left = '('
219 self.right = ')'
220 self.unnamed_fields = [] # if this is set, it's printed instead?
221 # problem: CompoundWord just has word_part though
222 # List of Obj or ColoredString
223
224 def __repr__(self):
225 return '<_Obj %s %s>' % (self.node_type, self.fields)
226
227
228class _ColoredString(object):
229 """Node for pretty-printing."""
230 def __init__(self, s, str_type):
231 assert isinstance(s, str), s
232 self.s = s
233 self.str_type = str_type
234
235 def __repr__(self):
236 return '<_ColoredString %s %s>' % (self.s, self.str_type)
237
238
239def MakeFieldSubtree(obj, field_name, desc, abbrev_hook, omit_empty=True):
240 try:
241 field_val = getattr(obj, field_name)
242 except AttributeError:
243 # This happens when required fields are not initialized, e.g. FuncCall()
244 # without setting name.
245 raise AssertionError(
246 '%s is missing field %r' % (obj.__class__, field_name))
247
248 if isinstance(desc, asdl.IntType):
249 out_val = _ColoredString(str(field_val), _OTHER_LITERAL)
250
251 elif isinstance(desc, asdl.BoolType):
252 out_val = _ColoredString('T' if field_val else 'F', _OTHER_LITERAL)
253
254 elif isinstance(desc, asdl.Sum) and asdl.is_simple(desc):
255 out_val = field_val.name
256
257 elif isinstance(desc, asdl.StrType):
258 out_val = _ColoredString(field_val, _STRING_LITERAL)
259
260 elif isinstance(desc, asdl.ArrayType):
261 out_val = []
262 obj_list = field_val
263 for child_obj in obj_list:
264 t = MakeTree(child_obj, abbrev_hook)
265 out_val.append(t)
266
267 if omit_empty and not obj_list:
268 out_val = None
269
270 elif isinstance(desc, asdl.MaybeType):
271 if field_val is None:
272 out_val = None
273 else:
274 out_val = MakeTree(field_val, abbrev_hook)
275
276 else:
277 out_val = MakeTree(field_val, abbrev_hook)
278
279 return out_val
280
281
282def MakeTree(obj, abbrev_hook=None, omit_empty=True):
283 """The first step of printing: create a homogeneous tree.
284
285 Args:
286 obj: py_meta.Obj
287 omit_empty: Whether to omit empty lists
288 Returns:
289 _Obj node
290 """
291 from asdl import py_meta
292
293 if isinstance(obj, py_meta.SimpleObj): # Primitive
294 return obj.name
295
296 elif isinstance(obj, py_meta.CompoundObj):
297 # These lines can be possibly COMBINED all into one. () can replace
298 # indentation?
299 out_node = _Obj(obj.__class__.__name__)
300
301 for field_name, desc in obj.ASDL_TYPE.GetFields():
302 out_val = MakeFieldSubtree(obj, field_name, desc, abbrev_hook,
303 omit_empty=omit_empty)
304
305 if out_val is not None:
306 out_node.fields.append((field_name, out_val))
307
308 # Call user-defined hook to abbreviate compound objects.
309 if abbrev_hook:
310 abbrev_hook(obj, out_node)
311
312 elif isinstance(obj, str): # Could be an array of strings
313 return _ColoredString(obj, _STRING_LITERAL)
314
315 else:
316 # Id uses this now. TODO: Should we have plugins? Might need it for
317 # color.
318 return _ColoredString(repr(obj), _OTHER_TYPE)
319
320 return out_node
321
322
323# This is word characters, - and _, as well as path name characters . and /.
324_PLAIN_RE = re.compile(r'^[a-zA-Z0-9\-_./]+$')
325
326# NOTE: Turning JSON back on can be a cheap hack for detecting invalid unicode.
327# But we want to write our own AST walker for that.
328
329def _PrettyString(s):
330 if '\n' in s:
331 #return json.dumps(s) # account for the fact that $ matches the newline
332 return repr(s)
333 if _PLAIN_RE.match(s):
334 return s
335 else:
336 #return json.dumps(s)
337 return repr(s)
338
339
340INDENT = 2
341
342def _PrintWrappedArray(array, prefix_len, f, indent, max_col):
343 """Print an array of objects with line wrapping.
344
345 Returns whether they all fit on a single line, so you can print the closing
346 brace properly.
347 """
348 all_fit = True
349 chars_so_far = prefix_len
350
351 for i, val in enumerate(array):
352 if i != 0:
353 f.write(' ')
354
355 single_f = f.NewTempBuffer()
356 if _TrySingleLine(val, single_f, max_col - chars_so_far):
357 f.WriteRaw(single_f.GetRaw())
358 chars_so_far += single_f.NumChars()
359 else: # WRAP THE LINE
360 f.write('\n')
361 # TODO: Add max_col here, taking into account the field name
362 new_indent = indent + INDENT
363 PrintTree(val, f, indent=new_indent, max_col=max_col)
364
365 chars_so_far = 0 # allow more
366 all_fit = False
367 return all_fit
368
369
370def _PrintWholeArray(array, prefix_len, f, indent, max_col):
371 # This is UNLIKE the abbreviated case above, where we do WRAPPING.
372 # Here, ALL children must fit on a single line, or else we separate
373 # each one oonto a separate line. This is to avoid the following:
374 #
375 # children: [(C ...)
376 # (C ...)
377 # ]
378 # The first child is out of line. The abbreviated objects have a
379 # small header like C or DQ so it doesn't matter as much.
380 all_fit = True
381 pieces = []
382 chars_so_far = prefix_len
383 for item in array:
384 single_f = f.NewTempBuffer()
385 if _TrySingleLine(item, single_f, max_col - chars_so_far):
386 pieces.append(single_f.GetRaw())
387 chars_so_far += single_f.NumChars()
388 else:
389 all_fit = False
390 break
391
392 if all_fit:
393 for i, p in enumerate(pieces):
394 if i != 0:
395 f.write(' ')
396 f.WriteRaw(p)
397 f.write(']')
398 return all_fit
399
400
401def _PrintTreeObj(node, f, indent, max_col):
402 """Print a CompoundObj in abbreviated or normal form."""
403 ind = ' ' * indent
404
405 if node.abbrev: # abbreviated
406 prefix = ind + node.left
407 f.write(prefix)
408 if node.show_node_type:
409 f.PushColor(_NODE_TYPE)
410 f.write(node.node_type)
411 f.PopColor()
412 f.write(' ')
413
414 prefix_len = len(prefix) + len(node.node_type) + 1
415 all_fit = _PrintWrappedArray(
416 node.unnamed_fields, prefix_len, f, indent, max_col)
417
418 if not all_fit:
419 f.write('\n')
420 f.write(ind)
421 f.write(node.right)
422
423 else: # full form like (SimpleCommand ...)
424 f.write(ind + node.left)
425
426 f.PushColor(_NODE_TYPE)
427 f.write(node.node_type)
428 f.PopColor()
429
430 f.write('\n')
431 i = 0
432 for name, val in node.fields:
433 ind1 = ' ' * (indent+INDENT)
434 if isinstance(val, list): # list field
435 name_str = '%s%s: [' % (ind1, name)
436 f.write(name_str)
437 prefix_len = len(name_str)
438
439 if not _PrintWholeArray(val, prefix_len, f, indent, max_col):
440 f.write('\n')
441 for child in val:
442 # TODO: Add max_col here
443 PrintTree(child, f, indent=indent+INDENT+INDENT)
444 f.write('\n')
445 f.write('%s]' % ind1)
446
447 else: # primitive field
448 name_str = '%s%s: ' % (ind1, name)
449 f.write(name_str)
450 prefix_len = len(name_str)
451
452 # Try to print it on the same line as the field name; otherwise print
453 # it on a separate line.
454 single_f = f.NewTempBuffer()
455 if _TrySingleLine(val, single_f, max_col - prefix_len):
456 f.WriteRaw(single_f.GetRaw())
457 else:
458 f.write('\n')
459 # TODO: Add max_col here, taking into account the field name
460 PrintTree(val, f, indent=indent+INDENT+INDENT)
461 i += 1
462
463 f.write('\n') # separate fields
464
465 f.write(ind + node.right)
466
467
468def PrintTree(node, f, indent=0, max_col=100):
469 """Second step of printing: turn homogeneous tree into a colored string.
470
471 Args:
472 node: homogeneous tree node
473 f: ColorOutput instance.
474 max_col: don't print past this column number on ANY line
475 """
476 ind = ' ' * indent
477
478 # Try printing on a single line
479 single_f = f.NewTempBuffer()
480 single_f.write(ind)
481 if _TrySingleLine(node, single_f, max_col - indent):
482 f.WriteRaw(single_f.GetRaw())
483 return
484
485 if isinstance(node, str):
486 f.write(ind + _PrettyString(node))
487
488 elif isinstance(node, _ColoredString):
489 f.PushColor(node.str_type)
490 f.write(_PrettyString(node.s))
491 f.PopColor()
492
493 elif isinstance(node, _Obj):
494 _PrintTreeObj(node, f, indent, max_col)
495
496 else:
497 raise AssertionError(node)
498
499
500def _TrySingleLineObj(node, f, max_chars):
501 """Print an object on a single line."""
502 f.write(node.left)
503 if node.abbrev:
504 if node.show_node_type:
505 f.PushColor(_NODE_TYPE)
506 f.write(node.node_type)
507 f.PopColor()
508 f.write(' ')
509
510 for i, val in enumerate(node.unnamed_fields):
511 if i != 0:
512 f.write(' ')
513 if not _TrySingleLine(val, f, max_chars):
514 return False
515 else:
516 f.PushColor(_NODE_TYPE)
517 f.write(node.node_type)
518 f.PopColor()
519
520 for name, val in node.fields:
521 f.write(' %s:' % name)
522 if not _TrySingleLine(val, f, max_chars):
523 return False
524
525 f.write(node.right)
526 return True
527
528
529def _TrySingleLine(node, f, max_chars):
530 """Try printing on a single line.
531
532 Args:
533 node: homogeneous tree node
534 f: ColorOutput instance
535 max_chars: maximum number of characters to print on THIS line
536 indent: current indent level
537
538 Returns:
539 ok: whether it fit on the line of the given size.
540 If False, you can't use the value of f.
541 """
542 if isinstance(node, str):
543 f.write(_PrettyString(node))
544
545 elif isinstance(node, _ColoredString):
546 f.PushColor(node.str_type)
547 f.write(_PrettyString(node.s))
548 f.PopColor()
549
550 elif isinstance(node, list): # Can we fit the WHOLE list on the line?
551 f.write('[')
552 for i, item in enumerate(node):
553 if i != 0:
554 f.write(' ')
555 if not _TrySingleLine(item, f, max_chars):
556 return False
557 f.write(']')
558
559 elif isinstance(node, _Obj):
560 return _TrySingleLineObj(node, f, max_chars)
561
562 else:
563 raise AssertionError("Unexpected node: %r (%r)" % (node, node.__class__))
564
565 # Take into account the last char.
566 num_chars_so_far = f.NumChars()
567 if num_chars_so_far > max_chars:
568 return False
569
570 return True