OILS / data_lang / pretty.py View on Github | oilshell.org

695 lines, 335 significant
1#!/usr/bin/env python2
2"""
3Pretty print Oils values (and later other data/languages as well).
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# IfFlat(a, b) prints a if in flat mode or b otherwise.
78#
79# ~ Measures ~
80#
81# The algorithm used here is close to the one originally described by Wadler,
82# but it precomputes a "measure" for each node in the `doc`. This "measure"
83# allows each Groups to decide whether to print flat or not without needing to
84# look ahead per Wadler's algorithm. A measure has two pieces of information:
85#
86# - Measure.flat is the width of the doc if it's printed flat.
87# - Measure.nonflat is the width of the doc until the _earliest possible_
88# newline, or -1 if it doesn't contain a Break.
89#
90# Measures are used in two steps. First, they're computed bottom-up on the
91# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
92# each DocFragment. These Measures measure something different: the width from
93# the doc _to the end of the entire doc tree_. This second set of Measures (the
94# ones in the DocFragments) are computed top-down, and they're used to decide
95# for each Group whether to use flat mode or not, without needing to scan ahead.
96
97from __future__ import print_function
98
99from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
100from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
101from data_lang.j8 import ValueIdString, HeapValueId
102from core import ansi
103from frontend import match
104from mycpp import mops
105from mycpp.mylib import log, tagswitch, BufWriter, iteritems
106from typing import cast, List, Dict
107import fastfunc
108import libc
109
110_ = log
111
112################
113# Measurements #
114################
115
116
117def TryUnicodeWidth(s):
118 # type: (str) -> int
119 try:
120 width = libc.wcswidth(s)
121 except UnicodeError:
122 # e.g. en_US.UTF-8 locale missing, just return the number of bytes
123 width = len(s)
124
125 if width == -1: # non-printable wide char
126 return len(s)
127
128 return width
129
130
131def _EmptyMeasure():
132 # type: () -> Measure
133 """The measure of an empty doc."""
134 return Measure(0, -1)
135
136
137def _FlattenMeasure(measure):
138 # type: (Measure) -> Measure
139 """The measure if its document is rendered flat."""
140 return Measure(measure.flat, -1)
141
142
143def _ConcatMeasure(m1, m2):
144 # type: (Measure, Measure) -> Measure
145 """Compute the measure of concatenated docs.
146
147 If m1 and m2 are the measures of doc1 and doc2,
148 then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
149 This concatenation is associative but not commutative."""
150 if m1.nonflat != -1:
151 return Measure(m1.flat + m2.flat, m1.nonflat)
152 elif m2.nonflat != -1:
153 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
154 else:
155 return Measure(m1.flat + m2.flat, -1)
156
157
158def _SuffixLen(measure):
159 # type: (Measure) -> int
160 """The width until the earliest possible newline, or end of document."""
161 if measure.nonflat != -1:
162 return measure.nonflat
163 else:
164 return measure.flat
165
166
167####################
168# Doc Construction #
169####################
170
171
172def _Text(string):
173 # type: (str) -> MeasuredDoc
174 """Print `string` (which must not contain a newline)."""
175 return MeasuredDoc(doc.Text(string), Measure(TryUnicodeWidth(string), -1))
176
177
178def _Break(string):
179 # type: (str) -> MeasuredDoc
180 """If in `flat` mode, print `string`, otherwise print `\n`."""
181 return MeasuredDoc(doc.Break(string), Measure(TryUnicodeWidth(string), 0))
182
183
184def _Indent(indent, mdoc):
185 # type: (int, MeasuredDoc) -> MeasuredDoc
186 """Add `indent` spaces after every newline in `mdoc`."""
187 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
188
189
190def _Concat(mdocs):
191 # type: (List[MeasuredDoc]) -> MeasuredDoc
192 """Print the mdocs in order (with no space in between)."""
193 measure = _EmptyMeasure()
194 for mdoc in mdocs:
195 measure = _ConcatMeasure(measure, mdoc.measure)
196 return MeasuredDoc(doc.Concat(mdocs), measure)
197
198
199def _Group(mdoc):
200 # type: (MeasuredDoc) -> MeasuredDoc
201 """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
202 return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
203
204
205def _IfFlat(flat_mdoc, nonflat_mdoc):
206 # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
207 """If in flat mode, print `flat_mdoc` otherwise print `nonflat_mdoc`."""
208 return MeasuredDoc(
209 doc.IfFlat(flat_mdoc, nonflat_mdoc),
210 Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
211
212
213###################
214# Pretty Printing #
215###################
216
217_DEFAULT_MAX_WIDTH = 80
218_DEFAULT_INDENTATION = 4
219_DEFAULT_USE_STYLES = True
220_DEFAULT_SHOW_TYPE_PREFIX = True
221
222# Tuned for 'data_lang/pretty-benchmark.sh float-demo'
223# TODO: might want options for float width
224_DEFAULT_MAX_TABULAR_WIDTH = 22
225
226
227class PrettyPrinter(object):
228 """Pretty print an Oils value."""
229
230 def __init__(self):
231 # type: () -> None
232 """Construct a PrettyPrinter with default configuration options.
233
234 Use the Set*() methods for configuration before printing."""
235 self.max_width = _DEFAULT_MAX_WIDTH
236 self.indent = _DEFAULT_INDENTATION
237 self.use_styles = _DEFAULT_USE_STYLES
238 self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
239 self.max_tabular_width = _DEFAULT_MAX_TABULAR_WIDTH
240
241 def SetMaxWidth(self, max_width):
242 # type: (int) -> None
243 """Set the maximum line width.
244
245 Pretty printing will attempt to (but does not guarantee to) fit the doc
246 within this width.
247 """
248 self.max_width = max_width
249
250 def SetIndent(self, indent):
251 # type: (int) -> None
252 """Set the number of spaces per indentation level."""
253 self.indent = indent
254
255 def SetUseStyles(self, use_styles):
256 # type: (bool) -> None
257 """If true, print with ansi colors and styles. Otherwise print with plain text."""
258 self.use_styles = use_styles
259
260 def SetShowTypePrefix(self, show_type_prefix):
261 # type: (bool) -> None
262 """Set whether or not to print a type before the top-level value.
263
264 E.g. `(Bool) true`"""
265 self.show_type_prefix = show_type_prefix
266
267 def SetMaxTabularWidth(self, max_tabular_width):
268 # type: (int) -> None
269 """Set the maximum width that list elements can be, for them to be
270 vertically aligned."""
271 self.max_tabular_width = max_tabular_width
272
273 def PrintValue(self, val, buf):
274 # type: (value_t, BufWriter) -> None
275 """Pretty print an Oils value to a BufWriter."""
276 constructor = _DocConstructor(self.indent, self.use_styles,
277 self.show_type_prefix,
278 self.max_tabular_width)
279 document = constructor.Value(val)
280 self._PrintDoc(document, buf)
281
282 def _Fits(self, prefix_len, group, suffix_measure):
283 # type: (int, doc.Group, Measure) -> bool
284 """Will `group` fit flat on the current line?"""
285 measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
286 suffix_measure)
287 return prefix_len + _SuffixLen(measure) <= self.max_width
288
289 def _PrintDoc(self, document, buf):
290 # type: (MeasuredDoc, BufWriter) -> None
291 """Pretty print a `pretty.doc` to a BufWriter."""
292
293 # The width of the text we've printed so far on the current line
294 prefix_len = 0
295 # A _stack_ of document fragments to print. Each fragment contains:
296 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
297 # - The indentation level to print this doc node at.
298 # - Is this doc node being printed in flat mode?
299 # - The measure _from just after the doc node, to the end of the entire document_.
300 # (Call this the suffix_measure)
301 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
302
303 while len(fragments) > 0:
304 frag = fragments.pop()
305 with tagswitch(frag.mdoc.doc) as case:
306
307 if case(doc_e.Text):
308 text = cast(doc.Text, frag.mdoc.doc)
309 buf.write(text.string)
310 prefix_len += frag.mdoc.measure.flat
311
312 elif case(doc_e.Break):
313 if frag.is_flat:
314 break_str = cast(doc.Break, frag.mdoc.doc).string
315 buf.write(break_str)
316 prefix_len += frag.mdoc.measure.flat
317 else:
318 buf.write('\n')
319 buf.write_spaces(frag.indent)
320 prefix_len = frag.indent
321
322 elif case(doc_e.Indent):
323 indented = cast(doc.Indent, frag.mdoc.doc)
324 fragments.append(
325 DocFragment(indented.mdoc,
326 frag.indent + indented.indent,
327 frag.is_flat, frag.measure))
328
329 elif case(doc_e.Concat):
330 # If we encounter Concat([A, B, C]) with a suffix measure M,
331 # we need to push A,B,C onto the stack in reverse order:
332 # - C, with suffix_measure = B.measure + A.measure + M
333 # - B, with suffix_measure = A.measure + M
334 # - A, with suffix_measure = M
335 concat = cast(doc.Concat, frag.mdoc.doc)
336 measure = frag.measure
337 for mdoc in reversed(concat.mdocs):
338 fragments.append(
339 DocFragment(mdoc, frag.indent, frag.is_flat,
340 measure))
341 measure = _ConcatMeasure(mdoc.measure, measure)
342
343 elif case(doc_e.Group):
344 # If the group would fit on the current line when printed
345 # flat, do so. Otherwise, print it non-flat.
346 group = cast(doc.Group, frag.mdoc.doc)
347 flat = self._Fits(prefix_len, group, frag.measure)
348 fragments.append(
349 DocFragment(group.mdoc, frag.indent, flat,
350 frag.measure))
351
352 elif case(doc_e.IfFlat):
353 if_flat = cast(doc.IfFlat, frag.mdoc.doc)
354 if frag.is_flat:
355 subdoc = if_flat.flat_mdoc
356 else:
357 subdoc = if_flat.nonflat_mdoc
358 fragments.append(
359 DocFragment(subdoc, frag.indent, frag.is_flat,
360 frag.measure))
361
362
363################
364# Value -> Doc #
365################
366
367
368class _DocConstructor:
369 """Converts Oil values into `doc`s, which can then be pretty printed."""
370
371 def __init__(self, indent, use_styles, show_type_prefix,
372 max_tabular_width):
373 # type: (int, bool, bool, int) -> None
374 self.indent = indent
375 self.use_styles = use_styles
376 self.show_type_prefix = show_type_prefix
377 self.max_tabular_width = max_tabular_width
378 self.visiting = {} # type: Dict[int, bool]
379
380 # These can be configurable later
381 self.number_style = ansi.YELLOW
382 self.null_style = ansi.BOLD + ansi.RED
383 self.bool_style = ansi.BOLD + ansi.BLUE
384 self.string_style = ansi.GREEN
385 self.cycle_style = ansi.BOLD + ansi.MAGENTA
386 self.type_style = ansi.CYAN
387
388 def Value(self, val):
389 # type: (value_t) -> MeasuredDoc
390 """Convert an Oils value into a `doc`, which can then be pretty printed."""
391 self.visiting.clear()
392 if self.show_type_prefix:
393 ysh_type = value_str(val.tag(), dot=False)
394 return _Group(
395 _Concat([
396 _Text("(" + ysh_type + ")"),
397 _Break(" "),
398 self._Value(val)
399 ]))
400 else:
401 return self._Value(val)
402
403 def _Styled(self, style, mdoc):
404 # type: (str, MeasuredDoc) -> MeasuredDoc
405 """Apply the ANSI style string to the given node, if use_styles is set."""
406 if self.use_styles:
407 return _Concat([
408 MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
409 MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
410 ])
411 else:
412 return mdoc
413
414 def _Surrounded(self, open, mdoc, close):
415 # type: (str, MeasuredDoc, str) -> MeasuredDoc
416 """Print one of two options (using '[', ']' for open, close):
417
418 ```
419 [mdoc]
420 ------
421 [
422 mdoc
423 ]
424 ```
425 """
426 return _Group(
427 _Concat([
428 _Text(open),
429 _Indent(self.indent, _Concat([_Break(""), mdoc])),
430 _Break(""),
431 _Text(close)
432 ]))
433
434 def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
435 # type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
436 """Print one of two options
437 (using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
438
439 ```
440 [prefix:mdoc]
441 ------
442 [prefix
443 mdoc
444 ]
445 ```
446 """
447 return _Group(
448 _Concat([
449 _Text(open), prefix,
450 _Indent(self.indent, _Concat([_Break(sep), mdoc])),
451 _Break(""),
452 _Text(close)
453 ]))
454
455 def _Join(self, items, sep, space):
456 # type: (List[MeasuredDoc], str, str) -> MeasuredDoc
457 """Join `items`, using either 'sep+space' or 'sep+newline' between them.
458
459 E.g., if sep and space are ',' and '_', print one of these two cases:
460 ```
461 first,_second,_third
462 ------
463 first,
464 second,
465 third
466 ```
467 """
468 seq = [] # type: List[MeasuredDoc]
469 for i, item in enumerate(items):
470 if i != 0:
471 seq.append(_Text(sep))
472 seq.append(_Break(space))
473 seq.append(item)
474 return _Concat(seq)
475
476 def _Tabular(self, items, sep):
477 # type: (List[MeasuredDoc], str) -> MeasuredDoc
478 """Join `items` together, using one of three styles:
479
480 (showing spaces as underscores for clarity)
481 ```
482 first,_second,_third,_fourth,_fifth,_sixth,_seventh,_eighth
483 ------
484 first,___second,__third,
485 fourth,__fifth,___sixth,
486 seventh,_eighth
487 ------
488 first,
489 second,
490 third,
491 fourth,
492 fifth,
493 sixth,
494 seventh,
495 eighth
496 ```
497
498 The first "single line" style is used if the items fit on one line. The
499 second "tabular' style is used if the flat width of all items is no
500 greater than `self.max_tabular_width`. The third "multi line" style is
501 used otherwise.
502 """
503
504 # Why not "just" use tabular alignment so long as two items fit on every
505 # line? Because it isn't possible to check for that in the pretty
506 # printing language. There are two sorts of conditionals we can do:
507 #
508 # A. Inside the pretty printing language, which supports exactly one
509 # conditional: "does it fit on one line?".
510 # B. Outside the pretty printing language we can run arbitrary Python
511 # code, but we don't know how much space is available on the line
512 # because it depends on the context in which we're printed, which may
513 # vary.
514 #
515 # We're picking between the three styles, by using (A) to check if the
516 # first style fits on one line, then using (B) with "are all the items
517 # smaller than `self.max_tabular_width`?" to pick between style 2 and
518 # style 3.
519
520 if len(items) == 0:
521 return _Text("")
522
523 max_flat_len = 0
524 seq = [] # type: List[MeasuredDoc]
525 for i, item in enumerate(items):
526 if i != 0:
527 seq.append(_Text(sep))
528 seq.append(_Break(" "))
529 seq.append(item)
530 max_flat_len = max(max_flat_len, item.measure.flat)
531 non_tabular = _Concat(seq)
532
533 sep_width = TryUnicodeWidth(sep)
534 if max_flat_len + sep_width + 1 <= self.max_tabular_width:
535 tabular_seq = [] # type: List[MeasuredDoc]
536 for i, item in enumerate(items):
537 tabular_seq.append(item)
538 if i != len(items) - 1:
539 padding = max_flat_len - item.measure.flat + 1
540 tabular_seq.append(_Text(sep))
541 tabular_seq.append(_Group(_Break(" " * padding)))
542 tabular = _Concat(tabular_seq)
543 return _Group(_IfFlat(non_tabular, tabular))
544 else:
545 return non_tabular
546
547 def _DictKey(self, s):
548 # type: (str) -> MeasuredDoc
549 if match.IsValidVarName(s):
550 return _Text(s)
551 else:
552 return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
553
554 def _StringLiteral(self, s):
555 # type: (str) -> MeasuredDoc
556 return self._Styled(self.string_style,
557 _Text(fastfunc.J8EncodeString(
558 s, True))) # lossy_json=True
559
560 def _BashStringLiteral(self, s):
561 # type: (str) -> MeasuredDoc
562 return self._Styled(self.string_style,
563 _Text(fastfunc.ShellEncodeString(s, 0)))
564
565 def _YshList(self, vlist):
566 # type: (value.List) -> MeasuredDoc
567 """Print a string literal."""
568 if len(vlist.items) == 0:
569 return _Text("[]")
570 mdocs = [self._Value(item) for item in vlist.items]
571 return self._Surrounded("[", self._Tabular(mdocs, ","), "]")
572
573 def _YshDict(self, vdict):
574 # type: (value.Dict) -> MeasuredDoc
575 if len(vdict.d) == 0:
576 return _Text("{}")
577 mdocs = [] # type: List[MeasuredDoc]
578 for k, v in iteritems(vdict.d):
579 mdocs.append(
580 _Concat([self._DictKey(k),
581 _Text(": "),
582 self._Value(v)]))
583 return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
584
585 def _BashArray(self, varray):
586 # type: (value.BashArray) -> MeasuredDoc
587 type_name = self._Styled(self.type_style, _Text("BashArray"))
588 if len(varray.strs) == 0:
589 return _Concat([_Text("("), type_name, _Text(")")])
590 mdocs = [] # type: List[MeasuredDoc]
591 for s in varray.strs:
592 if s is None:
593 mdocs.append(_Text("null"))
594 else:
595 mdocs.append(self._BashStringLiteral(s))
596 return self._SurroundedAndPrefixed("(", type_name, " ",
597 self._Tabular(mdocs, ""), ")")
598
599 def _BashAssoc(self, vassoc):
600 # type: (value.BashAssoc) -> MeasuredDoc
601 type_name = self._Styled(self.type_style, _Text("BashAssoc"))
602 if len(vassoc.d) == 0:
603 return _Concat([_Text("("), type_name, _Text(")")])
604 mdocs = [] # type: List[MeasuredDoc]
605 for k2, v2 in iteritems(vassoc.d):
606 mdocs.append(
607 _Concat([
608 _Text("["),
609 self._BashStringLiteral(k2),
610 _Text("]="),
611 self._BashStringLiteral(v2)
612 ]))
613 return self._SurroundedAndPrefixed("(", type_name, " ",
614 self._Join(mdocs, "", " "), ")")
615
616 def _Value(self, val):
617 # type: (value_t) -> MeasuredDoc
618
619 with tagswitch(val) as case:
620 if case(value_e.Null):
621 return self._Styled(self.null_style, _Text("null"))
622
623 elif case(value_e.Bool):
624 b = cast(value.Bool, val).b
625 return self._Styled(self.bool_style,
626 _Text("true" if b else "false"))
627
628 elif case(value_e.Int):
629 i = cast(value.Int, val).i
630 return self._Styled(self.number_style, _Text(mops.ToStr(i)))
631
632 elif case(value_e.Float):
633 f = cast(value.Float, val).f
634 return self._Styled(self.number_style, _Text(str(f)))
635
636 elif case(value_e.Str):
637 s = cast(value.Str, val).s
638 return self._StringLiteral(s)
639
640 elif case(value_e.Range):
641 r = cast(value.Range, val)
642 return self._Styled(
643 self.number_style,
644 _Concat([
645 _Text(str(r.lower)),
646 _Text(" .. "),
647 _Text(str(r.upper))
648 ]))
649
650 elif case(value_e.List):
651 vlist = cast(value.List, val)
652 heap_id = HeapValueId(vlist)
653 if self.visiting.get(heap_id, False):
654 return _Concat([
655 _Text("["),
656 self._Styled(self.cycle_style, _Text("...")),
657 _Text("]")
658 ])
659 else:
660 self.visiting[heap_id] = True
661 result = self._YshList(vlist)
662 self.visiting[heap_id] = False
663 return result
664
665 elif case(value_e.Dict):
666 vdict = cast(value.Dict, val)
667 heap_id = HeapValueId(vdict)
668 if self.visiting.get(heap_id, False):
669 return _Concat([
670 _Text("{"),
671 self._Styled(self.cycle_style, _Text("...")),
672 _Text("}")
673 ])
674 else:
675 self.visiting[heap_id] = True
676 result = self._YshDict(vdict)
677 self.visiting[heap_id] = False
678 return result
679
680 elif case(value_e.BashArray):
681 varray = cast(value.BashArray, val)
682 return self._BashArray(varray)
683
684 elif case(value_e.BashAssoc):
685 vassoc = cast(value.BashAssoc, val)
686 return self._BashAssoc(vassoc)
687
688 else:
689 ysh_type = value_str(val.tag(), dot=False)
690 id_str = ValueIdString(val)
691 return self._Styled(self.type_style,
692 _Text("<" + ysh_type + id_str + ">"))
693
694
695# vim: sw=4