OILS / doctools / oils_doc.py View on Github | oilshell.org

671 lines, 380 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14import cgi
15import cStringIO
16import re
17import sys
18
19from doctools.util import log
20from lazylex import html
21
22
23def RemoveComments(s):
24 """Remove <!-- comments -->"""
25 f = cStringIO.StringIO()
26 out = html.Output(s, f)
27
28 tag_lexer = html.TagLexer(s)
29
30 pos = 0
31
32 for tok_id, end_pos in html.ValidTokens(s):
33 if tok_id == html.Comment:
34 value = s[pos:end_pos]
35 # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
36 if 'REPLACE' not in value:
37 out.PrintUntil(pos)
38 out.SkipTo(end_pos)
39 pos = end_pos
40
41 out.PrintTheRest()
42 return f.getvalue()
43
44
45class _Abbrev(object):
46
47 def __init__(self, fmt):
48 self.fmt = fmt
49
50 def __call__(self, value):
51 return self.fmt % {'value': value}
52
53
54_ABBREVIATIONS = {
55 'xref':
56 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
57
58 # alias for osh-help, for backward compatibility
59 # to link to the same version
60
61 # TODO: Remove all of these broken links!
62 'help':
63 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
64 'osh-help':
65 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
66 'oil-help':
67 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
68
69 # New style: one for every chapter?
70 # Problem: can't use relative links here, because some are from doc/ref, and
71 # some are from doc
72 'chap-type-method':
73 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
74 'chap-plugin':
75 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
76 'chap-builtin-cmd':
77 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
78
79 # for blog
80 'osh-help-latest':
81 _Abbrev(
82 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
83 ),
84 'oil-help-latest':
85 _Abbrev(
86 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
87 ),
88
89 # For the blog
90 'oils-doc':
91 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
92 'blog-tag':
93 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
94 'oils-commit':
95 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
96 'oils-src':
97 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
98 'blog-code-src':
99 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
100 'issue':
101 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
102 'wiki':
103 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
104}
105
106# Backward compatibility
107_ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
108_ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
109_ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
110
111# $xref:foo
112_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
113
114
115def ExpandLinks(s):
116 """Expand $xref:bash and so forth."""
117 f = cStringIO.StringIO()
118 out = html.Output(s, f)
119
120 tag_lexer = html.TagLexer(s)
121
122 pos = 0
123
124 it = html.ValidTokens(s)
125 while True:
126 try:
127 tok_id, end_pos = next(it)
128 except StopIteration:
129 break
130
131 if tok_id == html.StartTag:
132
133 tag_lexer.Reset(pos, end_pos)
134 if tag_lexer.TagName() == 'a':
135 open_tag_right = end_pos
136
137 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
138 if href_start == -1:
139 continue
140
141 # TODO: Need to unescape like GetAttr()
142 href = s[href_start:href_end]
143
144 new = None
145 m = _SHORTCUT_RE.match(href)
146 if m:
147 abbrev_name, arg = m.groups()
148 if not arg:
149 close_tag_left, _ = html.ReadUntilEndTag(
150 it, tag_lexer, 'a')
151 arg = s[open_tag_right:close_tag_left]
152
153 # Hack to so we can write [Wiki Page]($wiki) and have the link look
154 # like /Wiki-Page/
155 if abbrev_name == 'wiki':
156 arg = arg.replace(' ', '-')
157
158 func = _ABBREVIATIONS.get(abbrev_name)
159 if not func:
160 raise RuntimeError('Invalid abbreviation %r' %
161 abbrev_name)
162 new = func(arg)
163
164 if new is not None:
165 out.PrintUntil(href_start)
166 f.write(cgi.escape(new))
167 out.SkipTo(href_end)
168
169 pos = end_pos
170
171 out.PrintTheRest()
172
173 return f.getvalue()
174
175
176class _Plugin(object):
177
178 def __init__(self, s, start_pos, end_pos):
179 self.s = s
180 self.start_pos = start_pos
181 self.end_pos = end_pos
182
183 def PrintHighlighted(self, out):
184 raise NotImplementedError()
185
186
187# Optional newline at end
188_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
189
190_PROMPT_LINE_RE = re.compile(
191 r'''
192(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
193(.*?) # arbitrary text
194(?: # don't highlight tab completion
195 (&lt;TAB&gt;) # it's HTML escaped!!!
196 .*?
197)?
198(?:
199 [ ][ ]([#] .*) # optionally: two spaces then a comment
200)?
201$
202''', re.VERBOSE)
203
204_EOL_COMMENT_RE = re.compile(
205 r'''
206.*? # arbitrary text
207[ ][ ]([#] .*) # two spaces then a comment
208$
209''', re.VERBOSE)
210
211_COMMENT_LINE_RE = re.compile(r'#.*')
212
213
214def Lines(s, start_pos, end_pos):
215 pos = start_pos
216 while pos < end_pos:
217 m = _LINE_RE.match(s, pos, end_pos)
218 if not m:
219 raise RuntimeError("Should have matched a line")
220 line_end = m.end(0)
221
222 yield line_end
223
224 pos = line_end
225
226
227class ShPromptPlugin(_Plugin):
228 """Highlight shell prompts."""
229
230 def PrintHighlighted(self, out):
231 pos = self.start_pos
232 for line_end in Lines(self.s, self.start_pos, self.end_pos):
233
234 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
235 if m:
236 out.PrintUntil(m.start(0))
237 out.Print('<span class="sh-comment">')
238 out.PrintUntil(m.end(0))
239 out.Print('</span>')
240 else:
241 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
242 if m:
243 #log('MATCH %r', m.groups())
244
245 out.PrintUntil(m.start(1))
246 out.Print('<span class="sh-prompt">')
247 out.PrintUntil(m.end(1))
248 out.Print('</span>')
249
250 out.PrintUntil(m.start(2))
251 out.Print('<span class="sh-command">')
252 out.PrintUntil(m.end(2))
253 out.Print('</span>')
254
255 if m.group(3):
256 out.PrintUntil(m.start(3))
257 out.Print('<span class="sh-tab-complete">')
258 out.PrintUntil(m.end(3))
259 out.Print('</span>')
260
261 if m.group(4):
262 out.PrintUntil(m.start(4))
263 out.Print('<span class="sh-comment">')
264 out.PrintUntil(m.end(4))
265 out.Print('</span>')
266 else:
267 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
268 if m:
269 out.PrintUntil(m.start(1))
270 out.Print('<span class="sh-comment">')
271 out.PrintUntil(m.end(1))
272 out.Print('</span>')
273
274 out.PrintUntil(line_end)
275
276 pos = line_end
277
278
279class HelpTopicsPlugin(_Plugin):
280 """Highlight blocks of doc/ref/toc-*.md."""
281
282 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
283 _Plugin.__init__(self, s, start_pos, end_pos)
284 self.chapter = chapter
285 self.linkify_stop_col = linkify_stop_col
286
287 def PrintHighlighted(self, out):
288 from doctools import help_gen
289
290 debug_out = []
291 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
292 self.linkify_stop_col)
293
294 pos = self.start_pos
295 for line_end in Lines(self.s, self.start_pos, self.end_pos):
296 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
297 # add tags and leave everything alone.
298 line = self.s[pos:line_end]
299
300 html_line = r.Render(line)
301
302 if html_line is not None:
303 out.PrintUntil(pos)
304 out.Print(html_line)
305 out.SkipTo(line_end)
306
307 pos = line_end
308
309 return debug_out
310
311
312class PygmentsPlugin(_Plugin):
313
314 def __init__(self, s, start_pos, end_pos, lang):
315 _Plugin.__init__(self, s, start_pos, end_pos)
316 self.lang = lang
317
318 def PrintHighlighted(self, out):
319 try:
320 from pygments import lexers
321 from pygments import formatters
322 from pygments import highlight
323 except ImportError:
324 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
325 )
326 return
327
328 # unescape before passing to pygments, which will escape
329 code = html.ToText(self.s, self.start_pos, self.end_pos)
330
331 lexer = lexers.get_lexer_by_name(self.lang)
332 formatter = formatters.HtmlFormatter()
333
334 highlighted = highlight(code, lexer, formatter)
335 out.Print(highlighted)
336
337
338def SimpleHighlightCode(s):
339 """Simple highlighting for test/shell-vs-shell.sh."""
340
341 f = cStringIO.StringIO()
342 out = html.Output(s, f)
343
344 tag_lexer = html.TagLexer(s)
345
346 pos = 0
347
348 it = html.ValidTokens(s)
349
350 while True:
351 try:
352 tok_id, end_pos = next(it)
353 except StopIteration:
354 break
355
356 if tok_id == html.StartTag:
357
358 tag_lexer.Reset(pos, end_pos)
359 if tag_lexer.TagName() == 'pre':
360 pre_start_pos = pos
361 pre_end_pos = end_pos
362
363 slash_pre_right, slash_pre_right = \
364 html.ReadUntilEndTag(it, tag_lexer, 'pre')
365
366 out.PrintUntil(pre_end_pos)
367
368 # Using ShPromptPlugin because it does the comment highlighting we want!
369 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
370 plugin.PrintHighlighted(out)
371
372 out.SkipTo(slash_pre_right)
373
374 pos = end_pos
375
376 out.PrintTheRest()
377
378 return f.getvalue()
379
380
381CSS_CLASS_RE = re.compile(
382 r'''
383 language-chapter-links-
384 ([a-z0-9-]+) # chapter name
385 (?:_(\d+))? # optional linkify_stop_col
386 ''', re.VERBOSE)
387
388
389def HighlightCode(s, default_highlighter, debug_out=None):
390 """
391 Algorithm:
392 1. Collect what's inside <pre><code> ...
393 2. Then read lines with ShPromptPlugin.
394 3. If the line looks like a shell prompt and command, highlight them with
395 <span>
396 """
397 if debug_out is None:
398 debug_out = []
399
400 f = cStringIO.StringIO()
401 out = html.Output(s, f)
402
403 tag_lexer = html.TagLexer(s)
404
405 pos = 0
406
407 it = html.ValidTokens(s)
408
409 while True:
410 try:
411 tok_id, end_pos = next(it)
412 except StopIteration:
413 break
414
415 if tok_id == html.StartTag:
416
417 tag_lexer.Reset(pos, end_pos)
418 if tag_lexer.TagName() == 'pre':
419 pre_start_pos = pos
420 pos = end_pos
421
422 try:
423 tok_id, end_pos = next(it)
424 except StopIteration:
425 break
426
427 tag_lexer.Reset(pos, end_pos)
428 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
429
430 css_class = tag_lexer.GetAttr('class')
431 code_start_pos = end_pos
432
433 if css_class is None:
434 slash_code_left, slash_code_right = \
435 html.ReadUntilEndTag(it, tag_lexer, 'code')
436
437 if default_highlighter is not None:
438 # TODO: Refactor this to remove duplication with
439 # language-{sh-prompt,oil-sh} below
440
441 # oil-sh for compatibility
442 if default_highlighter in ('sh-prompt', 'oils-sh',
443 'oil-sh'):
444 out.PrintUntil(code_start_pos)
445
446 # Using ShPromptPlugin because it does the comment highlighting
447 # we want!
448 plugin = ShPromptPlugin(
449 s, code_start_pos, slash_code_left)
450 plugin.PrintHighlighted(out)
451
452 out.SkipTo(slash_code_left)
453 else:
454 raise RuntimeError(
455 'Unknown default highlighter %r' %
456 default_highlighter)
457
458 elif css_class.startswith('language'):
459 slash_code_left, slash_code_right = \
460 html.ReadUntilEndTag(it, tag_lexer, 'code')
461
462 if css_class == 'language-none':
463 # Allow ```none
464 pass
465
466 elif css_class in ('language-sh-prompt',
467 'language-oil-sh'):
468 # Here's we're KEEPING the original <pre><code>
469 # Print everything up to and including <pre><code language="...">
470 out.PrintUntil(code_start_pos)
471
472 plugin = ShPromptPlugin(s, code_start_pos,
473 slash_code_left)
474 plugin.PrintHighlighted(out)
475
476 out.SkipTo(slash_code_left)
477
478 elif css_class == 'language-ysh':
479 # TODO: Write an Oil syntax highlighter.
480 pass
481
482 elif css_class.startswith('language-chapter-links-'):
483 m = CSS_CLASS_RE.match(css_class)
484 assert m is not None, css_class
485
486 #log('%s GROUPS %s', css_class, m.groups())
487 chapter, num_str = m.groups()
488 if num_str is not None:
489 linkify_stop_col = int(num_str)
490 else:
491 linkify_stop_col = -1
492
493 out.PrintUntil(code_start_pos)
494
495 plugin = HelpTopicsPlugin(s, code_start_pos,
496 slash_code_left, chapter,
497 linkify_stop_col)
498
499 block_debug_info = plugin.PrintHighlighted(out)
500
501 # e.g. these are links to cmd-lang within a block in toc-ysh
502 chap_block = {
503 'to_chap': chapter,
504 'lines': block_debug_info
505 }
506 debug_out.append(chap_block)
507
508 out.SkipTo(slash_code_left)
509
510 else: # language-*: Use Pygments
511
512 # We REMOVE the original <pre><code> because Pygments gives you a <pre> already
513
514 # We just read closing </code>, and the next one should be </pre>.
515 try:
516 tok_id, end_pos = next(it)
517 except StopIteration:
518 break
519 tag_lexer.Reset(slash_code_right, end_pos)
520 assert tok_id == html.EndTag, tok_id
521 assert tag_lexer.TagName(
522 ) == 'pre', tag_lexer.TagName()
523 slash_pre_right = end_pos
524
525 out.PrintUntil(pre_start_pos)
526
527 lang = css_class[len('language-'):]
528 plugin = PygmentsPlugin(s, code_start_pos,
529 slash_code_left, lang)
530 plugin.PrintHighlighted(out)
531
532 out.SkipTo(slash_pre_right)
533 f.write('<!-- done pygments -->\n')
534
535 pos = end_pos
536
537 out.PrintTheRest()
538
539 return f.getvalue()
540
541
542def ExtractCode(s, f):
543 """Print code blocks to a plain text file.
544
545 So we can at least validate the syntax.
546
547 Similar to the algorithm code above:
548
549 1. Collect what's inside <pre><code> ...
550 2. Decode &amp; -> &,e tc. and return it
551 """
552 out = html.Output(s, f)
553 tag_lexer = html.TagLexer(s)
554
555 block_num = 0
556 pos = 0
557 it = html.ValidTokens(s)
558
559 while True:
560 try:
561 tok_id, end_pos = next(it)
562 except StopIteration:
563 break
564
565 if tok_id == html.StartTag:
566 tag_lexer.Reset(pos, end_pos)
567 if tag_lexer.TagName() == 'pre':
568 pre_start_pos = pos
569 pos = end_pos
570
571 try:
572 tok_id, end_pos = next(it)
573 except StopIteration:
574 break
575
576 tag_lexer.Reset(pos, end_pos)
577 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
578
579 css_class = tag_lexer.GetAttr('class')
580 # Skip code blocks that look like ```foo
581 # Usually we use 'oil-sh' as the default_highlighter, and all those
582 # code blocks should be extracted. TODO: maybe this should be
583 # oil-language?
584 if css_class is None:
585 code_start_pos = end_pos
586
587 out.SkipTo(code_start_pos)
588 out.Print('# block %d' % block_num)
589 out.Print('\n')
590
591 slash_code_left, slash_code_right = \
592 html.ReadUntilEndTag(it, tag_lexer, 'code')
593
594 text = html.ToText(s, code_start_pos, slash_code_left)
595 out.SkipTo(slash_code_left)
596
597 out.Print(text)
598 out.Print('\n')
599
600 block_num += 1
601
602 pos = end_pos
603
604 #out.PrintTheRest()
605
606
607class ShellSession(object):
608 """
609 TODO: Pass this to HighlightCode as a plugin
610
611 $ x=one
612 $ echo $x
613 $ echo two
614
615 Becomes
616
617 $ x=one
618 $ echo $x
619 one
620 $ echo two
621 two
622
623 And then you will have
624 blog/2019/12/_shell_session/
625 $hash1-stdout.txt
626 $hash2-stdout.txt
627
628 It hashes the command with md5 and then brings it back.
629 If the file already exists then it doesn't run it again.
630 You can delete the file to redo it.
631
632 TODO: write a loop that reads one line at a time, writes, it, then reads
633 output from bash.
634 Use the Lines iterator to get lines.
635 For extra credit, you can solve the PS2 problem? That's easily done with
636 Oil's parser.
637 """
638
639 def __init__(self, shell_exe, cache_dir):
640 """
641 Args:
642 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
643 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
644 """
645 self.shell_exe = shell_exe
646 self.cache_dir = cache_dir
647
648 def PrintHighlighted(self, s, start_pos, end_pos, out):
649 """
650 Args:
651 s: an HTML string.
652 """
653 pass
654
655
656def main(argv):
657 action = argv[1]
658
659 if action == 'highlight':
660 # for test/shell-vs-shell.sh
661
662 html = sys.stdin.read()
663 out = SimpleHighlightCode(html)
664 print(out)
665
666 else:
667 raise RuntimeError('Invalid action %r' % action)
668
669
670if __name__ == '__main__':
671 main(sys.argv)