OILS / data_lang / pretty.py View on Github | oilshell.org

692 lines, 335 significant
1#!/usr/bin/env python2
2"""
3Pretty print Oils values (and later other data/languages as well).
4
5Pretty printing means intelligently choosing whitespace including indentation
6and newline placement, to attempt to display data nicely while staying within a
7maximum line width.
8"""
9
10# ~~~ Architecture ~~~
11#
12# Based on a version of the algorithm from Wadler's "A Prettier Printer".
13#
14# Pretty printing proceeds in two phases:
15#
16# 1. Convert the thing you want to print into a `doc`.
17# 2. Print the `doc` using a standard algorithm.
18#
19# This separation keeps the details of the data you want to print separate from
20# the printing algorithm.
21
22# ~~~ Pretty Printing Overview ~~~
23#
24# If you're just using this file, you don't need to know how pretty printing
25# works. Just call `PrettyPrinter().PrintValue()`. However if you want to change
26# or extend how values are printed, you'll need to know, so here's an overview.
27#
28# You may want to first read Walder's "A Prettier Printer", which this is based
29# off of:
30# https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
31#
32# Some additional reading, though only tangentially related:
33#
34# - https://homepages.inf.ed.ac.uk/wadler/papers/prettier/prettier.pdf
35# - https://lindig.github.io/papers/strictly-pretty-2000.pdf
36# - https://justinpombrio.net/2024/02/23/a-twist-on-Wadlers-printer.html
37# - https://lobste.rs/s/1r0aak/twist_on_wadler_s_printer
38# - https://lobste.rs/s/aevptj/why_is_prettier_rock_solid
39#
40# ~ Constructors ~
41#
42# There are just a few constructors for `doc`, from which everything else is
43# built from.
44#
45# Text(string) prints like:
46# |string
47#
48# Break(string) prints like:
49# |string
50# or like a newline:
51# |
52# |
53# (It does the latter if printed in "flat" mode, and the former otherwise. See
54# Group for details.)
55#
56# Concat(a, b) prints like:
57# |AAAAA
58# |AAABBB
59# |BBBBB
60#
61# Indent(3, a) prints like:
62# |AAAAA
63# | AAAAA
64# | AAAAA
65#
66# Group(a) makes a decision. It either:
67# - Prints `a` "flat", meaning that (i) every Break inside of it is printed as a
68# string instead of as a newline, and (ii) every Group nested inside of it is
69# printed flat.
70# - Prints `a` normally, meaning that (i) the Breaks inside of it are printed as
71# newlines, and (ii) the Groups inside of it make their own decision about
72# whether to be flat.
73# It makes this decision greedily. If the current line would not overflow if the
74# group printed flat, then it will print flat. This takes into account not only
75# the group itself, but the content before and after it on the same line.
76#
77# IfFlat(a, b) prints a if in flat mode or b otherwise.
78#
79# ~ Measures ~
80#
81# The algorithm used here is close to the one originally described by Wadler,
82# but it precomputes a "measure" for each node in the `doc`. This "measure"
83# allows each Groups to decide whether to print flat or not without needing to
84# look ahead per Wadler's algorithm. A measure has two pieces of information:
85#
86# - Measure.flat is the width of the doc if it's printed flat.
87# - Measure.nonflat is the width of the doc until the _earliest possible_
88# newline, or -1 if it doesn't contain a Break.
89#
90# Measures are used in two steps. First, they're computed bottom-up on the
91# `doc`, measuring the size of each node. Later, _PrintDoc() stores a measure in
92# each DocFragment. These Measures measure something different: the width from
93# the doc _to the end of the entire doc tree_. This second set of Measures (the
94# ones in the DocFragments) are computed top-down, and they're used to decide
95# for each Group whether to use flat mode or not, without needing to scan ahead.
96
97from __future__ import print_function
98
99from _devbuild.gen.pretty_asdl import doc, doc_e, DocFragment, Measure, MeasuredDoc
100from _devbuild.gen.value_asdl import value, value_e, value_t, value_str
101from data_lang.j8 import ValueIdString, HeapValueId
102from core import ansi
103from frontend import match
104from mycpp import mops
105from mycpp.mylib import log, tagswitch, BufWriter, iteritems
106from typing import cast, List, Dict
107import fastfunc
108import libc
109
110_ = log
111
112################
113# Measurements #
114################
115
116
117def TryUnicodeWidth(s):
118 # type: (str) -> int
119 try:
120 width = libc.wcswidth(s)
121 except UnicodeError:
122 # e.g. en_US.UTF-8 locale missing, just return the number of bytes
123 width = len(s)
124
125 if width == -1: # non-printable wide char
126 return len(s)
127
128 return width
129
130
131def _EmptyMeasure():
132 # type: () -> Measure
133 """The measure of an empty doc."""
134 return Measure(0, -1)
135
136
137def _FlattenMeasure(measure):
138 # type: (Measure) -> Measure
139 """The measure if its document is rendered flat."""
140 return Measure(measure.flat, -1)
141
142
143def _ConcatMeasure(m1, m2):
144 # type: (Measure, Measure) -> Measure
145 """Compute the measure of concatenated docs.
146
147 If m1 and m2 are the measures of doc1 and doc2,
148 then _ConcatMeasure(m1, m2) is the measure of doc.Concat([doc1, doc2]).
149 This concatenation is associative but not commutative."""
150 if m1.nonflat != -1:
151 return Measure(m1.flat + m2.flat, m1.nonflat)
152 elif m2.nonflat != -1:
153 return Measure(m1.flat + m2.flat, m1.flat + m2.nonflat)
154 else:
155 return Measure(m1.flat + m2.flat, -1)
156
157
158def _SuffixLen(measure):
159 # type: (Measure) -> int
160 """The width until the earliest possible newline, or end of document."""
161 if measure.nonflat != -1:
162 return measure.nonflat
163 else:
164 return measure.flat
165
166
167####################
168# Doc Construction #
169####################
170
171
172def _Text(string):
173 # type: (str) -> MeasuredDoc
174 """Print `string` (which must not contain a newline)."""
175 return MeasuredDoc(doc.Text(string), Measure(TryUnicodeWidth(string), -1))
176
177
178def _Break(string):
179 # type: (str) -> MeasuredDoc
180 """If in `flat` mode, print `string`, otherwise print `\n`."""
181 return MeasuredDoc(doc.Break(string), Measure(TryUnicodeWidth(string), 0))
182
183
184def _Indent(indent, mdoc):
185 # type: (int, MeasuredDoc) -> MeasuredDoc
186 """Add `indent` spaces after every newline in `mdoc`."""
187 return MeasuredDoc(doc.Indent(indent, mdoc), mdoc.measure)
188
189
190def _Concat(mdocs):
191 # type: (List[MeasuredDoc]) -> MeasuredDoc
192 """Print the mdocs in order (with no space in between)."""
193 measure = _EmptyMeasure()
194 for mdoc in mdocs:
195 measure = _ConcatMeasure(measure, mdoc.measure)
196 return MeasuredDoc(doc.Concat(mdocs), measure)
197
198
199def _Group(mdoc):
200 # type: (MeasuredDoc) -> MeasuredDoc
201 """Print `mdoc`. Use flat mode if `mdoc` will fit on the current line."""
202 return MeasuredDoc(doc.Group(mdoc), mdoc.measure)
203
204
205def _IfFlat(flat_mdoc, nonflat_mdoc):
206 # type: (MeasuredDoc, MeasuredDoc) -> MeasuredDoc
207 """If in flat mode, print `flat_mdoc` otherwise print `nonflat_mdoc`."""
208 return MeasuredDoc(
209 doc.IfFlat(flat_mdoc, nonflat_mdoc),
210 Measure(flat_mdoc.measure.flat, nonflat_mdoc.measure.nonflat))
211
212
213###################
214# Pretty Printing #
215###################
216
217_DEFAULT_MAX_WIDTH = 80
218_DEFAULT_INDENTATION = 4
219_DEFAULT_USE_STYLES = True
220_DEFAULT_SHOW_TYPE_PREFIX = True
221_DEFAULT_MAX_TABULAR_WIDTH = 16 # Tuned for float-demo in data_lang/pretty-benchmark.sh
222
223
224class PrettyPrinter(object):
225 """Pretty print an Oils value."""
226
227 def __init__(self):
228 # type: () -> None
229 """Construct a PrettyPrinter with default configuration options.
230
231 Use the Set*() methods for configuration before printing."""
232 self.max_width = _DEFAULT_MAX_WIDTH
233 self.indent = _DEFAULT_INDENTATION
234 self.use_styles = _DEFAULT_USE_STYLES
235 self.show_type_prefix = _DEFAULT_SHOW_TYPE_PREFIX
236 self.max_tabular_width = _DEFAULT_MAX_TABULAR_WIDTH
237
238 def SetMaxWidth(self, max_width):
239 # type: (int) -> None
240 """Set the maximum line width.
241
242 Pretty printing will attempt to (but does not guarantee to) fit the doc
243 within this width.
244 """
245 self.max_width = max_width
246
247 def SetIndent(self, indent):
248 # type: (int) -> None
249 """Set the number of spaces per indentation level."""
250 self.indent = indent
251
252 def SetUseStyles(self, use_styles):
253 # type: (bool) -> None
254 """If true, print with ansi colors and styles. Otherwise print with plain text."""
255 self.use_styles = use_styles
256
257 def SetShowTypePrefix(self, show_type_prefix):
258 # type: (bool) -> None
259 """Set whether or not to print a type before the top-level value.
260
261 E.g. `(Bool) true`"""
262 self.show_type_prefix = show_type_prefix
263
264 def SetMaxTabularWidth(self, max_tabular_width):
265 # type: (int) -> None
266 """Set the maximum width that list elements can be, for them to be
267 vertically aligned."""
268 self.max_tabular_width = max_tabular_width
269
270 def PrintValue(self, val, buf):
271 # type: (value_t, BufWriter) -> None
272 """Pretty print an Oils value to a BufWriter."""
273 constructor = _DocConstructor(self.indent, self.use_styles,
274 self.show_type_prefix,
275 self.max_tabular_width)
276 document = constructor.Value(val)
277 self._PrintDoc(document, buf)
278
279 def _Fits(self, prefix_len, group, suffix_measure):
280 # type: (int, doc.Group, Measure) -> bool
281 """Will `group` fit flat on the current line?"""
282 measure = _ConcatMeasure(_FlattenMeasure(group.mdoc.measure),
283 suffix_measure)
284 return prefix_len + _SuffixLen(measure) <= self.max_width
285
286 def _PrintDoc(self, document, buf):
287 # type: (MeasuredDoc, BufWriter) -> None
288 """Pretty print a `pretty.doc` to a BufWriter."""
289
290 # The width of the text we've printed so far on the current line
291 prefix_len = 0
292 # A _stack_ of document fragments to print. Each fragment contains:
293 # - A MeasuredDoc (doc node and its measure, saying how "big" it is)
294 # - The indentation level to print this doc node at.
295 # - Is this doc node being printed in flat mode?
296 # - The measure _from just after the doc node, to the end of the entire document_.
297 # (Call this the suffix_measure)
298 fragments = [DocFragment(_Group(document), 0, False, _EmptyMeasure())]
299
300 while len(fragments) > 0:
301 frag = fragments.pop()
302 with tagswitch(frag.mdoc.doc) as case:
303
304 if case(doc_e.Text):
305 text = cast(doc.Text, frag.mdoc.doc)
306 buf.write(text.string)
307 prefix_len += frag.mdoc.measure.flat
308
309 elif case(doc_e.Break):
310 if frag.is_flat:
311 break_str = cast(doc.Break, frag.mdoc.doc).string
312 buf.write(break_str)
313 prefix_len += frag.mdoc.measure.flat
314 else:
315 buf.write('\n')
316 buf.write_spaces(frag.indent)
317 prefix_len = frag.indent
318
319 elif case(doc_e.Indent):
320 indented = cast(doc.Indent, frag.mdoc.doc)
321 fragments.append(
322 DocFragment(indented.mdoc,
323 frag.indent + indented.indent,
324 frag.is_flat, frag.measure))
325
326 elif case(doc_e.Concat):
327 # If we encounter Concat([A, B, C]) with a suffix measure M,
328 # we need to push A,B,C onto the stack in reverse order:
329 # - C, with suffix_measure = B.measure + A.measure + M
330 # - B, with suffix_measure = A.measure + M
331 # - A, with suffix_measure = M
332 concat = cast(doc.Concat, frag.mdoc.doc)
333 measure = frag.measure
334 for mdoc in reversed(concat.mdocs):
335 fragments.append(
336 DocFragment(mdoc, frag.indent, frag.is_flat,
337 measure))
338 measure = _ConcatMeasure(mdoc.measure, measure)
339
340 elif case(doc_e.Group):
341 # If the group would fit on the current line when printed
342 # flat, do so. Otherwise, print it non-flat.
343 group = cast(doc.Group, frag.mdoc.doc)
344 flat = self._Fits(prefix_len, group, frag.measure)
345 fragments.append(
346 DocFragment(group.mdoc, frag.indent, flat,
347 frag.measure))
348
349 elif case(doc_e.IfFlat):
350 if_flat = cast(doc.IfFlat, frag.mdoc.doc)
351 if frag.is_flat:
352 subdoc = if_flat.flat_mdoc
353 else:
354 subdoc = if_flat.nonflat_mdoc
355 fragments.append(
356 DocFragment(subdoc, frag.indent, frag.is_flat,
357 frag.measure))
358
359
360################
361# Value -> Doc #
362################
363
364
365class _DocConstructor:
366 """Converts Oil values into `doc`s, which can then be pretty printed."""
367
368 def __init__(self, indent, use_styles, show_type_prefix,
369 max_tabular_width):
370 # type: (int, bool, bool, int) -> None
371 self.indent = indent
372 self.use_styles = use_styles
373 self.show_type_prefix = show_type_prefix
374 self.max_tabular_width = max_tabular_width
375 self.visiting = {} # type: Dict[int, bool]
376
377 # These can be configurable later
378 self.number_style = ansi.YELLOW
379 self.null_style = ansi.BOLD + ansi.RED
380 self.bool_style = ansi.BOLD + ansi.BLUE
381 self.string_style = ansi.GREEN
382 self.cycle_style = ansi.BOLD + ansi.MAGENTA
383 self.type_style = ansi.CYAN
384
385 def Value(self, val):
386 # type: (value_t) -> MeasuredDoc
387 """Convert an Oils value into a `doc`, which can then be pretty printed."""
388 self.visiting.clear()
389 if self.show_type_prefix:
390 ysh_type = value_str(val.tag(), dot=False)
391 return _Group(
392 _Concat([
393 _Text("(" + ysh_type + ")"),
394 _Break(" "),
395 self._Value(val)
396 ]))
397 else:
398 return self._Value(val)
399
400 def _Styled(self, style, mdoc):
401 # type: (str, MeasuredDoc) -> MeasuredDoc
402 """Apply the ANSI style string to the given node, if use_styles is set."""
403 if self.use_styles:
404 return _Concat([
405 MeasuredDoc(doc.Text(style), _EmptyMeasure()), mdoc,
406 MeasuredDoc(doc.Text(ansi.RESET), _EmptyMeasure())
407 ])
408 else:
409 return mdoc
410
411 def _Surrounded(self, open, mdoc, close):
412 # type: (str, MeasuredDoc, str) -> MeasuredDoc
413 """Print one of two options (using '[', ']' for open, close):
414
415 ```
416 [mdoc]
417 ------
418 [
419 mdoc
420 ]
421 ```
422 """
423 return _Group(
424 _Concat([
425 _Text(open),
426 _Indent(self.indent, _Concat([_Break(""), mdoc])),
427 _Break(""),
428 _Text(close)
429 ]))
430
431 def _SurroundedAndPrefixed(self, open, prefix, sep, mdoc, close):
432 # type: (str, MeasuredDoc, str, MeasuredDoc, str) -> MeasuredDoc
433 """Print one of two options
434 (using '[', 'prefix', ':', 'mdoc', ']' for open, prefix, sep, mdoc, close):
435
436 ```
437 [prefix:mdoc]
438 ------
439 [prefix
440 mdoc
441 ]
442 ```
443 """
444 return _Group(
445 _Concat([
446 _Text(open), prefix,
447 _Indent(self.indent, _Concat([_Break(sep), mdoc])),
448 _Break(""),
449 _Text(close)
450 ]))
451
452 def _Join(self, items, sep, space):
453 # type: (List[MeasuredDoc], str, str) -> MeasuredDoc
454 """Join `items`, using either 'sep+space' or 'sep+newline' between them.
455
456 E.g., if sep and space are ',' and '_', print one of these two cases:
457 ```
458 first,_second,_third
459 ------
460 first,
461 second,
462 third
463 ```
464 """
465 seq = [] # type: List[MeasuredDoc]
466 for i, item in enumerate(items):
467 if i != 0:
468 seq.append(_Text(sep))
469 seq.append(_Break(space))
470 seq.append(item)
471 return _Concat(seq)
472
473 def _Tabular(self, items, sep):
474 # type: (List[MeasuredDoc], str) -> MeasuredDoc
475 """Join `items` together, using one of three styles:
476
477 (showing spaces as underscores for clarity)
478 ```
479 first,_second,_third,_fourth,_fifth,_sixth,_seventh,_eighth
480 ------
481 first,___second,__third,
482 fourth,__fifth,___sixth,
483 seventh,_eighth
484 ------
485 first,
486 second,
487 third,
488 fourth,
489 fifth,
490 sixth,
491 seventh,
492 eighth
493 ```
494
495 The first "single line" style is used if the items fit on one line. The
496 second "tabular' style is used if the flat width of all items is no
497 greater than `self.max_tabular_width`. The third "multi line" style is
498 used otherwise.
499 """
500
501 # Why not "just" use tabular alignment so long as two items fit on every
502 # line? Because it isn't possible to check for that in the pretty
503 # printing language. There are two sorts of conditionals we can do:
504 #
505 # A. Inside the pretty printing language, which supports exactly one
506 # conditional: "does it fit on one line?".
507 # B. Outside the pretty printing language we can run arbitrary Python
508 # code, but we don't know how much space is available on the line
509 # because it depends on the context in which we're printed, which may
510 # vary.
511 #
512 # We're picking between the three styles, by using (A) to check if the
513 # first style fits on one line, then using (B) with "are all the items
514 # smaller than `self.max_tabular_width`?" to pick between style 2 and
515 # style 3.
516
517 if len(items) == 0:
518 return _Text("")
519
520 max_flat_len = 0
521 seq = [] # type: List[MeasuredDoc]
522 for i, item in enumerate(items):
523 if i != 0:
524 seq.append(_Text(sep))
525 seq.append(_Break(" "))
526 seq.append(item)
527 max_flat_len = max(max_flat_len, item.measure.flat)
528 non_tabular = _Concat(seq)
529
530 sep_width = TryUnicodeWidth(sep)
531 if max_flat_len + sep_width + 1 <= self.max_tabular_width:
532 tabular_seq = [] # type: List[MeasuredDoc]
533 for i, item in enumerate(items):
534 tabular_seq.append(item)
535 if i != len(items) - 1:
536 padding = max_flat_len - item.measure.flat + 1
537 tabular_seq.append(_Text(sep))
538 tabular_seq.append(_Group(_Break(" " * padding)))
539 tabular = _Concat(tabular_seq)
540 return _Group(_IfFlat(non_tabular, tabular))
541 else:
542 return non_tabular
543
544 def _DictKey(self, s):
545 # type: (str) -> MeasuredDoc
546 if match.IsValidVarName(s):
547 return _Text(s)
548 else:
549 return _Text(fastfunc.J8EncodeString(s, True)) # lossy_json=True
550
551 def _StringLiteral(self, s):
552 # type: (str) -> MeasuredDoc
553 return self._Styled(self.string_style,
554 _Text(fastfunc.J8EncodeString(
555 s, True))) # lossy_json=True
556
557 def _BashStringLiteral(self, s):
558 # type: (str) -> MeasuredDoc
559 return self._Styled(self.string_style,
560 _Text(fastfunc.ShellEncodeString(s, 0)))
561
562 def _YshList(self, vlist):
563 # type: (value.List) -> MeasuredDoc
564 """Print a string literal."""
565 if len(vlist.items) == 0:
566 return _Text("[]")
567 mdocs = [self._Value(item) for item in vlist.items]
568 return self._Surrounded("[", self._Tabular(mdocs, ","), "]")
569
570 def _YshDict(self, vdict):
571 # type: (value.Dict) -> MeasuredDoc
572 if len(vdict.d) == 0:
573 return _Text("{}")
574 mdocs = [] # type: List[MeasuredDoc]
575 for k, v in iteritems(vdict.d):
576 mdocs.append(
577 _Concat([self._DictKey(k),
578 _Text(": "),
579 self._Value(v)]))
580 return self._Surrounded("{", self._Join(mdocs, ",", " "), "}")
581
582 def _BashArray(self, varray):
583 # type: (value.BashArray) -> MeasuredDoc
584 type_name = self._Styled(self.type_style, _Text("BashArray"))
585 if len(varray.strs) == 0:
586 return _Concat([_Text("("), type_name, _Text(")")])
587 mdocs = [] # type: List[MeasuredDoc]
588 for s in varray.strs:
589 if s is None:
590 mdocs.append(_Text("null"))
591 else:
592 mdocs.append(self._BashStringLiteral(s))
593 return self._SurroundedAndPrefixed("(", type_name, " ",
594 self._Tabular(mdocs, ""), ")")
595
596 def _BashAssoc(self, vassoc):
597 # type: (value.BashAssoc) -> MeasuredDoc
598 type_name = self._Styled(self.type_style, _Text("BashAssoc"))
599 if len(vassoc.d) == 0:
600 return _Concat([_Text("("), type_name, _Text(")")])
601 mdocs = [] # type: List[MeasuredDoc]
602 for k2, v2 in iteritems(vassoc.d):
603 mdocs.append(
604 _Concat([
605 _Text("["),
606 self._BashStringLiteral(k2),
607 _Text("]="),
608 self._BashStringLiteral(v2)
609 ]))
610 return self._SurroundedAndPrefixed("(", type_name, " ",
611 self._Join(mdocs, "", " "), ")")
612
613 def _Value(self, val):
614 # type: (value_t) -> MeasuredDoc
615
616 with tagswitch(val) as case:
617 if case(value_e.Null):
618 return self._Styled(self.null_style, _Text("null"))
619
620 elif case(value_e.Bool):
621 b = cast(value.Bool, val).b
622 return self._Styled(self.bool_style,
623 _Text("true" if b else "false"))
624
625 elif case(value_e.Int):
626 i = cast(value.Int, val).i
627 return self._Styled(self.number_style, _Text(mops.ToStr(i)))
628
629 elif case(value_e.Float):
630 f = cast(value.Float, val).f
631 return self._Styled(self.number_style, _Text(str(f)))
632
633 elif case(value_e.Str):
634 s = cast(value.Str, val).s
635 return self._StringLiteral(s)
636
637 elif case(value_e.Range):
638 r = cast(value.Range, val)
639 return self._Styled(
640 self.number_style,
641 _Concat([
642 _Text(str(r.lower)),
643 _Text(" .. "),
644 _Text(str(r.upper))
645 ]))
646
647 elif case(value_e.List):
648 vlist = cast(value.List, val)
649 heap_id = HeapValueId(vlist)
650 if self.visiting.get(heap_id, False):
651 return _Concat([
652 _Text("["),
653 self._Styled(self.cycle_style, _Text("...")),
654 _Text("]")
655 ])
656 else:
657 self.visiting[heap_id] = True
658 result = self._YshList(vlist)
659 self.visiting[heap_id] = False
660 return result
661
662 elif case(value_e.Dict):
663 vdict = cast(value.Dict, val)
664 heap_id = HeapValueId(vdict)
665 if self.visiting.get(heap_id, False):
666 return _Concat([
667 _Text("{"),
668 self._Styled(self.cycle_style, _Text("...")),
669 _Text("}")
670 ])
671 else:
672 self.visiting[heap_id] = True
673 result = self._YshDict(vdict)
674 self.visiting[heap_id] = False
675 return result
676
677 elif case(value_e.BashArray):
678 varray = cast(value.BashArray, val)
679 return self._BashArray(varray)
680
681 elif case(value_e.BashAssoc):
682 vassoc = cast(value.BashAssoc, val)
683 return self._BashAssoc(vassoc)
684
685 else:
686 ysh_type = value_str(val.tag(), dot=False)
687 id_str = ValueIdString(val)
688 return self._Styled(self.type_style,
689 _Text("<" + ysh_type + id_str + ">"))
690
691
692# vim: sw=4