| 1 | #!/usr/bin/env python2
 | 
| 2 | """oils_doc.py: HTML processing for Oil documentation.
 | 
| 3 | 
 | 
| 4 | Plugins:
 | 
| 5 |   ExpandLinks expands $xref, etc.
 | 
| 6 |   PygmentsPlugin -- for ```python, ```sh, ```c, etc.
 | 
| 7 |   HelpTopicsPlugin -- for help-index.html
 | 
| 8 | 
 | 
| 9 |   ShPromptPlugin -- understands $ echo hi, but doesn't run anything
 | 
| 10 |   ShSession -- runs shell snippets and caches the output
 | 
| 11 | """
 | 
| 12 | from __future__ import print_function
 | 
| 13 | 
 | 
| 14 | import cgi
 | 
| 15 | import cStringIO
 | 
| 16 | import re
 | 
| 17 | import sys
 | 
| 18 | 
 | 
| 19 | from doctools.util import log
 | 
| 20 | from lazylex import html
 | 
| 21 | 
 | 
| 22 | 
 | 
| 23 | def RemoveComments(s):
 | 
| 24 |     """Remove <!-- comments -->"""
 | 
| 25 |     f = cStringIO.StringIO()
 | 
| 26 |     out = html.Output(s, f)
 | 
| 27 | 
 | 
| 28 |     tag_lexer = html.TagLexer(s)
 | 
| 29 | 
 | 
| 30 |     pos = 0
 | 
| 31 | 
 | 
| 32 |     for tok_id, end_pos in html.ValidTokens(s):
 | 
| 33 |         if tok_id == html.Comment:
 | 
| 34 |             value = s[pos:end_pos]
 | 
| 35 |             # doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
 | 
| 36 |             if 'REPLACE' not in value:
 | 
| 37 |                 out.PrintUntil(pos)
 | 
| 38 |                 out.SkipTo(end_pos)
 | 
| 39 |         pos = end_pos
 | 
| 40 | 
 | 
| 41 |     out.PrintTheRest()
 | 
| 42 |     return f.getvalue()
 | 
| 43 | 
 | 
| 44 | 
 | 
| 45 | class _Abbrev(object):
 | 
| 46 | 
 | 
| 47 |     def __init__(self, fmt):
 | 
| 48 |         self.fmt = fmt
 | 
| 49 | 
 | 
| 50 |     def __call__(self, value):
 | 
| 51 |         return self.fmt % {'value': value}
 | 
| 52 | 
 | 
| 53 | 
 | 
| 54 | _ABBREVIATIONS = {
 | 
| 55 |     'xref':
 | 
| 56 |     _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
 | 
| 57 | 
 | 
| 58 |     # alias for osh-help, for backward compatibility
 | 
| 59 |     # to link to the same version
 | 
| 60 | 
 | 
| 61 |     # TODO: Remove all of these broken links!
 | 
| 62 |     'help':
 | 
| 63 |     _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
 | 
| 64 |     'osh-help':
 | 
| 65 |     _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
 | 
| 66 |     'oil-help':
 | 
| 67 |     _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
 | 
| 68 | 
 | 
| 69 |     # New style: one for every chapter?
 | 
| 70 |     # Problem: can't use relative links here, because some are from doc/ref, and
 | 
| 71 |     # some are from doc
 | 
| 72 |     'chap-type-method':
 | 
| 73 |     _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
 | 
| 74 |     'chap-plugin':
 | 
| 75 |     _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
 | 
| 76 |     'chap-builtin-cmd':
 | 
| 77 |     _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
 | 
| 78 | 
 | 
| 79 |     # for blog
 | 
| 80 |     'osh-help-latest':
 | 
| 81 |     _Abbrev(
 | 
| 82 |         '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
 | 
| 83 |     ),
 | 
| 84 |     'oil-help-latest':
 | 
| 85 |     _Abbrev(
 | 
| 86 |         '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
 | 
| 87 |     ),
 | 
| 88 | 
 | 
| 89 |     # For the blog
 | 
| 90 |     'oils-doc':
 | 
| 91 |     _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
 | 
| 92 |     'blog-tag':
 | 
| 93 |     _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
 | 
| 94 |     'oils-commit':
 | 
| 95 |     _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
 | 
| 96 |     'oils-src':
 | 
| 97 |     _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
 | 
| 98 |     'blog-code-src':
 | 
| 99 |     _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
 | 
| 100 |     'issue':
 | 
| 101 |     _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
 | 
| 102 |     'wiki':
 | 
| 103 |     _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
 | 
| 104 | }
 | 
| 105 | 
 | 
| 106 | # Backward compatibility
 | 
| 107 | _ABBREVIATIONS['oil-src'] = _ABBREVIATIONS['oils-src']
 | 
| 108 | _ABBREVIATIONS['oil-commit'] = _ABBREVIATIONS['oils-commit']
 | 
| 109 | _ABBREVIATIONS['oil-doc'] = _ABBREVIATIONS['oils-doc']
 | 
| 110 | 
 | 
| 111 | # $xref:foo
 | 
| 112 | _SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
 | 
| 113 | 
 | 
| 114 | 
 | 
| 115 | def ExpandLinks(s):
 | 
| 116 |     """Expand $xref:bash and so forth."""
 | 
| 117 |     f = cStringIO.StringIO()
 | 
| 118 |     out = html.Output(s, f)
 | 
| 119 | 
 | 
| 120 |     tag_lexer = html.TagLexer(s)
 | 
| 121 | 
 | 
| 122 |     pos = 0
 | 
| 123 | 
 | 
| 124 |     it = html.ValidTokens(s)
 | 
| 125 |     while True:
 | 
| 126 |         try:
 | 
| 127 |             tok_id, end_pos = next(it)
 | 
| 128 |         except StopIteration:
 | 
| 129 |             break
 | 
| 130 | 
 | 
| 131 |         if tok_id == html.StartTag:
 | 
| 132 | 
 | 
| 133 |             tag_lexer.Reset(pos, end_pos)
 | 
| 134 |             if tag_lexer.TagName() == 'a':
 | 
| 135 |                 open_tag_right = end_pos
 | 
| 136 | 
 | 
| 137 |                 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
 | 
| 138 |                 if href_start == -1:
 | 
| 139 |                     continue
 | 
| 140 | 
 | 
| 141 |                 # TODO: Need to unescape like GetAttr()
 | 
| 142 |                 href = s[href_start:href_end]
 | 
| 143 | 
 | 
| 144 |                 new = None
 | 
| 145 |                 m = _SHORTCUT_RE.match(href)
 | 
| 146 |                 if m:
 | 
| 147 |                     abbrev_name, arg = m.groups()
 | 
| 148 |                     if not arg:
 | 
| 149 |                         close_tag_left, _ = html.ReadUntilEndTag(
 | 
| 150 |                             it, tag_lexer, 'a')
 | 
| 151 |                         arg = s[open_tag_right:close_tag_left]
 | 
| 152 | 
 | 
| 153 |                     # Hack to so we can write [Wiki Page]($wiki) and have the link look
 | 
| 154 |                     # like /Wiki-Page/
 | 
| 155 |                     if abbrev_name == 'wiki':
 | 
| 156 |                         arg = arg.replace(' ', '-')
 | 
| 157 | 
 | 
| 158 |                     func = _ABBREVIATIONS.get(abbrev_name)
 | 
| 159 |                     if not func:
 | 
| 160 |                         raise RuntimeError('Invalid abbreviation %r' %
 | 
| 161 |                                            abbrev_name)
 | 
| 162 |                     new = func(arg)
 | 
| 163 | 
 | 
| 164 |                 if new is not None:
 | 
| 165 |                     out.PrintUntil(href_start)
 | 
| 166 |                     f.write(cgi.escape(new))
 | 
| 167 |                     out.SkipTo(href_end)
 | 
| 168 | 
 | 
| 169 |         pos = end_pos
 | 
| 170 | 
 | 
| 171 |     out.PrintTheRest()
 | 
| 172 | 
 | 
| 173 |     return f.getvalue()
 | 
| 174 | 
 | 
| 175 | 
 | 
| 176 | class _Plugin(object):
 | 
| 177 | 
 | 
| 178 |     def __init__(self, s, start_pos, end_pos):
 | 
| 179 |         self.s = s
 | 
| 180 |         self.start_pos = start_pos
 | 
| 181 |         self.end_pos = end_pos
 | 
| 182 | 
 | 
| 183 |     def PrintHighlighted(self, out):
 | 
| 184 |         raise NotImplementedError()
 | 
| 185 | 
 | 
| 186 | 
 | 
| 187 | # Optional newline at end
 | 
| 188 | _LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
 | 
| 189 | 
 | 
| 190 | _PROMPT_LINE_RE = re.compile(
 | 
| 191 |     r'''
 | 
| 192 | (\S* \$)[ ]       # flush-left non-whitespace, then dollar and space is a prompt
 | 
| 193 | (.*?)             # arbitrary text
 | 
| 194 | (?:               # don't highlight tab completion
 | 
| 195 |   (<TAB>)   # it's HTML escaped!!!
 | 
| 196 |   .*?
 | 
| 197 | )?
 | 
| 198 | (?:
 | 
| 199 |   [ ][ ]([#] .*)  # optionally: two spaces then a comment
 | 
| 200 | )?
 | 
| 201 | $
 | 
| 202 | ''', re.VERBOSE)
 | 
| 203 | 
 | 
| 204 | _EOL_COMMENT_RE = re.compile(
 | 
| 205 |     r'''
 | 
| 206 | .*?             # arbitrary text
 | 
| 207 | [ ][ ]([#] .*)  # two spaces then a comment
 | 
| 208 | $
 | 
| 209 | ''', re.VERBOSE)
 | 
| 210 | 
 | 
| 211 | _COMMENT_LINE_RE = re.compile(r'#.*')
 | 
| 212 | 
 | 
| 213 | 
 | 
| 214 | def Lines(s, start_pos, end_pos):
 | 
| 215 |     pos = start_pos
 | 
| 216 |     while pos < end_pos:
 | 
| 217 |         m = _LINE_RE.match(s, pos, end_pos)
 | 
| 218 |         if not m:
 | 
| 219 |             raise RuntimeError("Should have matched a line")
 | 
| 220 |         line_end = m.end(0)
 | 
| 221 | 
 | 
| 222 |         yield line_end
 | 
| 223 | 
 | 
| 224 |         pos = line_end
 | 
| 225 | 
 | 
| 226 | 
 | 
| 227 | class ShPromptPlugin(_Plugin):
 | 
| 228 |     """Highlight shell prompts."""
 | 
| 229 | 
 | 
| 230 |     def PrintHighlighted(self, out):
 | 
| 231 |         pos = self.start_pos
 | 
| 232 |         for line_end in Lines(self.s, self.start_pos, self.end_pos):
 | 
| 233 | 
 | 
| 234 |             m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
 | 
| 235 |             if m:
 | 
| 236 |                 out.PrintUntil(m.start(0))
 | 
| 237 |                 out.Print('<span class="sh-comment">')
 | 
| 238 |                 out.PrintUntil(m.end(0))
 | 
| 239 |                 out.Print('</span>')
 | 
| 240 |             else:
 | 
| 241 |                 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
 | 
| 242 |                 if m:
 | 
| 243 |                     #log('MATCH %r', m.groups())
 | 
| 244 | 
 | 
| 245 |                     out.PrintUntil(m.start(1))
 | 
| 246 |                     out.Print('<span class="sh-prompt">')
 | 
| 247 |                     out.PrintUntil(m.end(1))
 | 
| 248 |                     out.Print('</span>')
 | 
| 249 | 
 | 
| 250 |                     out.PrintUntil(m.start(2))
 | 
| 251 |                     out.Print('<span class="sh-command">')
 | 
| 252 |                     out.PrintUntil(m.end(2))
 | 
| 253 |                     out.Print('</span>')
 | 
| 254 | 
 | 
| 255 |                     if m.group(3):
 | 
| 256 |                         out.PrintUntil(m.start(3))
 | 
| 257 |                         out.Print('<span class="sh-tab-complete">')
 | 
| 258 |                         out.PrintUntil(m.end(3))
 | 
| 259 |                         out.Print('</span>')
 | 
| 260 | 
 | 
| 261 |                     if m.group(4):
 | 
| 262 |                         out.PrintUntil(m.start(4))
 | 
| 263 |                         out.Print('<span class="sh-comment">')
 | 
| 264 |                         out.PrintUntil(m.end(4))
 | 
| 265 |                         out.Print('</span>')
 | 
| 266 |                 else:
 | 
| 267 |                     m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
 | 
| 268 |                     if m:
 | 
| 269 |                         out.PrintUntil(m.start(1))
 | 
| 270 |                         out.Print('<span class="sh-comment">')
 | 
| 271 |                         out.PrintUntil(m.end(1))
 | 
| 272 |                         out.Print('</span>')
 | 
| 273 | 
 | 
| 274 |             out.PrintUntil(line_end)
 | 
| 275 | 
 | 
| 276 |             pos = line_end
 | 
| 277 | 
 | 
| 278 | 
 | 
| 279 | class HelpTopicsPlugin(_Plugin):
 | 
| 280 |     """Highlight blocks of doc/ref/toc-*.md."""
 | 
| 281 | 
 | 
| 282 |     def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
 | 
| 283 |         _Plugin.__init__(self, s, start_pos, end_pos)
 | 
| 284 |         self.chapter = chapter
 | 
| 285 |         self.linkify_stop_col = linkify_stop_col
 | 
| 286 | 
 | 
| 287 |     def PrintHighlighted(self, out):
 | 
| 288 |         from doctools import help_gen
 | 
| 289 | 
 | 
| 290 |         debug_out = []
 | 
| 291 |         r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
 | 
| 292 |                                        self.linkify_stop_col)
 | 
| 293 | 
 | 
| 294 |         pos = self.start_pos
 | 
| 295 |         for line_end in Lines(self.s, self.start_pos, self.end_pos):
 | 
| 296 |             # NOTE: IndexLineToHtml accepts an HTML ESCAPED line.  It's valid to just
 | 
| 297 |             # add tags and leave everything alone.
 | 
| 298 |             line = self.s[pos:line_end]
 | 
| 299 | 
 | 
| 300 |             html_line = r.Render(line)
 | 
| 301 | 
 | 
| 302 |             if html_line is not None:
 | 
| 303 |                 out.PrintUntil(pos)
 | 
| 304 |                 out.Print(html_line)
 | 
| 305 |                 out.SkipTo(line_end)
 | 
| 306 | 
 | 
| 307 |             pos = line_end
 | 
| 308 | 
 | 
| 309 |         return debug_out
 | 
| 310 | 
 | 
| 311 | 
 | 
| 312 | class PygmentsPlugin(_Plugin):
 | 
| 313 | 
 | 
| 314 |     def __init__(self, s, start_pos, end_pos, lang):
 | 
| 315 |         _Plugin.__init__(self, s, start_pos, end_pos)
 | 
| 316 |         self.lang = lang
 | 
| 317 | 
 | 
| 318 |     def PrintHighlighted(self, out):
 | 
| 319 |         try:
 | 
| 320 |             from pygments import lexers
 | 
| 321 |             from pygments import formatters
 | 
| 322 |             from pygments import highlight
 | 
| 323 |         except ImportError:
 | 
| 324 |             log("Warning: Couldn't import pygments, so skipping syntax highlighting"
 | 
| 325 |                 )
 | 
| 326 |             return
 | 
| 327 | 
 | 
| 328 |         # unescape before passing to pygments, which will escape
 | 
| 329 |         code = html.ToText(self.s, self.start_pos, self.end_pos)
 | 
| 330 | 
 | 
| 331 |         lexer = lexers.get_lexer_by_name(self.lang)
 | 
| 332 |         formatter = formatters.HtmlFormatter()
 | 
| 333 | 
 | 
| 334 |         highlighted = highlight(code, lexer, formatter)
 | 
| 335 |         out.Print(highlighted)
 | 
| 336 | 
 | 
| 337 | 
 | 
| 338 | def SimpleHighlightCode(s):
 | 
| 339 |     """Simple highlighting for test/shell-vs-shell.sh."""
 | 
| 340 | 
 | 
| 341 |     f = cStringIO.StringIO()
 | 
| 342 |     out = html.Output(s, f)
 | 
| 343 | 
 | 
| 344 |     tag_lexer = html.TagLexer(s)
 | 
| 345 | 
 | 
| 346 |     pos = 0
 | 
| 347 | 
 | 
| 348 |     it = html.ValidTokens(s)
 | 
| 349 | 
 | 
| 350 |     while True:
 | 
| 351 |         try:
 | 
| 352 |             tok_id, end_pos = next(it)
 | 
| 353 |         except StopIteration:
 | 
| 354 |             break
 | 
| 355 | 
 | 
| 356 |         if tok_id == html.StartTag:
 | 
| 357 | 
 | 
| 358 |             tag_lexer.Reset(pos, end_pos)
 | 
| 359 |             if tag_lexer.TagName() == 'pre':
 | 
| 360 |                 pre_start_pos = pos
 | 
| 361 |                 pre_end_pos = end_pos
 | 
| 362 | 
 | 
| 363 |                 slash_pre_right, slash_pre_right = \
 | 
| 364 |                     html.ReadUntilEndTag(it, tag_lexer, 'pre')
 | 
| 365 | 
 | 
| 366 |                 out.PrintUntil(pre_end_pos)
 | 
| 367 | 
 | 
| 368 |                 # Using ShPromptPlugin because it does the comment highlighting we want!
 | 
| 369 |                 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
 | 
| 370 |                 plugin.PrintHighlighted(out)
 | 
| 371 | 
 | 
| 372 |                 out.SkipTo(slash_pre_right)
 | 
| 373 | 
 | 
| 374 |         pos = end_pos
 | 
| 375 | 
 | 
| 376 |     out.PrintTheRest()
 | 
| 377 | 
 | 
| 378 |     return f.getvalue()
 | 
| 379 | 
 | 
| 380 | 
 | 
| 381 | CSS_CLASS_RE = re.compile(
 | 
| 382 |     r'''
 | 
| 383 |    language-chapter-links-
 | 
| 384 |    ([a-z0-9-]+)        # chapter name
 | 
| 385 |    (?:_(\d+))?      # optional linkify_stop_col
 | 
| 386 |    ''', re.VERBOSE)
 | 
| 387 | 
 | 
| 388 | 
 | 
| 389 | def HighlightCode(s, default_highlighter, debug_out=None):
 | 
| 390 |     """
 | 
| 391 |     Algorithm:
 | 
| 392 |     1. Collect what's inside <pre><code> ...
 | 
| 393 |     2. Then read lines with ShPromptPlugin.
 | 
| 394 |     3. If the line looks like a shell prompt and command, highlight them with
 | 
| 395 |        <span>
 | 
| 396 |     """
 | 
| 397 |     if debug_out is None:
 | 
| 398 |         debug_out = []
 | 
| 399 | 
 | 
| 400 |     f = cStringIO.StringIO()
 | 
| 401 |     out = html.Output(s, f)
 | 
| 402 | 
 | 
| 403 |     tag_lexer = html.TagLexer(s)
 | 
| 404 | 
 | 
| 405 |     pos = 0
 | 
| 406 | 
 | 
| 407 |     it = html.ValidTokens(s)
 | 
| 408 | 
 | 
| 409 |     while True:
 | 
| 410 |         try:
 | 
| 411 |             tok_id, end_pos = next(it)
 | 
| 412 |         except StopIteration:
 | 
| 413 |             break
 | 
| 414 | 
 | 
| 415 |         if tok_id == html.StartTag:
 | 
| 416 | 
 | 
| 417 |             tag_lexer.Reset(pos, end_pos)
 | 
| 418 |             if tag_lexer.TagName() == 'pre':
 | 
| 419 |                 pre_start_pos = pos
 | 
| 420 |                 pos = end_pos
 | 
| 421 | 
 | 
| 422 |                 try:
 | 
| 423 |                     tok_id, end_pos = next(it)
 | 
| 424 |                 except StopIteration:
 | 
| 425 |                     break
 | 
| 426 | 
 | 
| 427 |                 tag_lexer.Reset(pos, end_pos)
 | 
| 428 |                 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
 | 
| 429 | 
 | 
| 430 |                     css_class = tag_lexer.GetAttr('class')
 | 
| 431 |                     code_start_pos = end_pos
 | 
| 432 | 
 | 
| 433 |                     if css_class is None:
 | 
| 434 |                         slash_code_left, slash_code_right = \
 | 
| 435 |                             html.ReadUntilEndTag(it, tag_lexer, 'code')
 | 
| 436 | 
 | 
| 437 |                         if default_highlighter is not None:
 | 
| 438 |                             # TODO: Refactor this to remove duplication with
 | 
| 439 |                             # language-{sh-prompt,oil-sh} below
 | 
| 440 | 
 | 
| 441 |                             # oil-sh for compatibility
 | 
| 442 |                             if default_highlighter in ('sh-prompt', 'oils-sh',
 | 
| 443 |                                                        'oil-sh'):
 | 
| 444 |                                 out.PrintUntil(code_start_pos)
 | 
| 445 | 
 | 
| 446 |                                 # Using ShPromptPlugin because it does the comment highlighting
 | 
| 447 |                                 # we want!
 | 
| 448 |                                 plugin = ShPromptPlugin(
 | 
| 449 |                                     s, code_start_pos, slash_code_left)
 | 
| 450 |                                 plugin.PrintHighlighted(out)
 | 
| 451 | 
 | 
| 452 |                                 out.SkipTo(slash_code_left)
 | 
| 453 |                             else:
 | 
| 454 |                                 raise RuntimeError(
 | 
| 455 |                                     'Unknown default highlighter %r' %
 | 
| 456 |                                     default_highlighter)
 | 
| 457 | 
 | 
| 458 |                     elif css_class.startswith('language'):
 | 
| 459 |                         slash_code_left, slash_code_right = \
 | 
| 460 |                             html.ReadUntilEndTag(it, tag_lexer, 'code')
 | 
| 461 | 
 | 
| 462 |                         if css_class == 'language-none':
 | 
| 463 |                             # Allow ```none
 | 
| 464 |                             pass
 | 
| 465 | 
 | 
| 466 |                         elif css_class in ('language-sh-prompt',
 | 
| 467 |                                            'language-oil-sh'):
 | 
| 468 |                             # Here's we're KEEPING the original <pre><code>
 | 
| 469 |                             # Print everything up to and including <pre><code language="...">
 | 
| 470 |                             out.PrintUntil(code_start_pos)
 | 
| 471 | 
 | 
| 472 |                             plugin = ShPromptPlugin(s, code_start_pos,
 | 
| 473 |                                                     slash_code_left)
 | 
| 474 |                             plugin.PrintHighlighted(out)
 | 
| 475 | 
 | 
| 476 |                             out.SkipTo(slash_code_left)
 | 
| 477 | 
 | 
| 478 |                         elif css_class == 'language-ysh':
 | 
| 479 |                             # TODO: Write an Oil syntax highlighter.
 | 
| 480 |                             pass
 | 
| 481 | 
 | 
| 482 |                         elif css_class.startswith('language-chapter-links-'):
 | 
| 483 |                             m = CSS_CLASS_RE.match(css_class)
 | 
| 484 |                             assert m is not None, css_class
 | 
| 485 | 
 | 
| 486 |                             #log('%s GROUPS %s', css_class, m.groups())
 | 
| 487 |                             chapter, num_str = m.groups()
 | 
| 488 |                             if num_str is not None:
 | 
| 489 |                                 linkify_stop_col = int(num_str)
 | 
| 490 |                             else:
 | 
| 491 |                                 linkify_stop_col = -1
 | 
| 492 | 
 | 
| 493 |                             out.PrintUntil(code_start_pos)
 | 
| 494 | 
 | 
| 495 |                             plugin = HelpTopicsPlugin(s, code_start_pos,
 | 
| 496 |                                                       slash_code_left, chapter,
 | 
| 497 |                                                       linkify_stop_col)
 | 
| 498 | 
 | 
| 499 |                             block_debug_info = plugin.PrintHighlighted(out)
 | 
| 500 | 
 | 
| 501 |                             # e.g. these are links to cmd-lang within a block in toc-ysh
 | 
| 502 |                             chap_block = {
 | 
| 503 |                                 'to_chap': chapter,
 | 
| 504 |                                 'lines': block_debug_info
 | 
| 505 |                             }
 | 
| 506 |                             debug_out.append(chap_block)
 | 
| 507 | 
 | 
| 508 |                             out.SkipTo(slash_code_left)
 | 
| 509 | 
 | 
| 510 |                         else:  # language-*: Use Pygments
 | 
| 511 | 
 | 
| 512 |                             # We REMOVE the original <pre><code> because Pygments gives you a <pre> already
 | 
| 513 | 
 | 
| 514 |                             # We just read closing </code>, and the next one should be </pre>.
 | 
| 515 |                             try:
 | 
| 516 |                                 tok_id, end_pos = next(it)
 | 
| 517 |                             except StopIteration:
 | 
| 518 |                                 break
 | 
| 519 |                             tag_lexer.Reset(slash_code_right, end_pos)
 | 
| 520 |                             assert tok_id == html.EndTag, tok_id
 | 
| 521 |                             assert tag_lexer.TagName(
 | 
| 522 |                             ) == 'pre', tag_lexer.TagName()
 | 
| 523 |                             slash_pre_right = end_pos
 | 
| 524 | 
 | 
| 525 |                             out.PrintUntil(pre_start_pos)
 | 
| 526 | 
 | 
| 527 |                             lang = css_class[len('language-'):]
 | 
| 528 |                             plugin = PygmentsPlugin(s, code_start_pos,
 | 
| 529 |                                                     slash_code_left, lang)
 | 
| 530 |                             plugin.PrintHighlighted(out)
 | 
| 531 | 
 | 
| 532 |                             out.SkipTo(slash_pre_right)
 | 
| 533 |                             f.write('<!-- done pygments -->\n')
 | 
| 534 | 
 | 
| 535 |         pos = end_pos
 | 
| 536 | 
 | 
| 537 |     out.PrintTheRest()
 | 
| 538 | 
 | 
| 539 |     return f.getvalue()
 | 
| 540 | 
 | 
| 541 | 
 | 
| 542 | def ExtractCode(s, f):
 | 
| 543 |     """Print code blocks to a plain text file.
 | 
| 544 | 
 | 
| 545 |     So we can at least validate the syntax.
 | 
| 546 | 
 | 
| 547 |     Similar to the algorithm code above:
 | 
| 548 | 
 | 
| 549 |     1. Collect what's inside <pre><code> ...
 | 
| 550 |     2. Decode & -> &,e tc. and return it
 | 
| 551 |     """
 | 
| 552 |     out = html.Output(s, f)
 | 
| 553 |     tag_lexer = html.TagLexer(s)
 | 
| 554 | 
 | 
| 555 |     block_num = 0
 | 
| 556 |     pos = 0
 | 
| 557 |     it = html.ValidTokens(s)
 | 
| 558 | 
 | 
| 559 |     while True:
 | 
| 560 |         try:
 | 
| 561 |             tok_id, end_pos = next(it)
 | 
| 562 |         except StopIteration:
 | 
| 563 |             break
 | 
| 564 | 
 | 
| 565 |         if tok_id == html.StartTag:
 | 
| 566 |             tag_lexer.Reset(pos, end_pos)
 | 
| 567 |             if tag_lexer.TagName() == 'pre':
 | 
| 568 |                 pre_start_pos = pos
 | 
| 569 |                 pos = end_pos
 | 
| 570 | 
 | 
| 571 |                 try:
 | 
| 572 |                     tok_id, end_pos = next(it)
 | 
| 573 |                 except StopIteration:
 | 
| 574 |                     break
 | 
| 575 | 
 | 
| 576 |                 tag_lexer.Reset(pos, end_pos)
 | 
| 577 |                 if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
 | 
| 578 | 
 | 
| 579 |                     css_class = tag_lexer.GetAttr('class')
 | 
| 580 |                     # Skip code blocks that look like ```foo
 | 
| 581 |                     # Usually we use 'oil-sh' as the default_highlighter, and all those
 | 
| 582 |                     # code blocks should be extracted.  TODO: maybe this should be
 | 
| 583 |                     # oil-language?
 | 
| 584 |                     if css_class is None:
 | 
| 585 |                         code_start_pos = end_pos
 | 
| 586 | 
 | 
| 587 |                         out.SkipTo(code_start_pos)
 | 
| 588 |                         out.Print('# block %d' % block_num)
 | 
| 589 |                         out.Print('\n')
 | 
| 590 | 
 | 
| 591 |                         slash_code_left, slash_code_right = \
 | 
| 592 |                             html.ReadUntilEndTag(it, tag_lexer, 'code')
 | 
| 593 | 
 | 
| 594 |                         text = html.ToText(s, code_start_pos, slash_code_left)
 | 
| 595 |                         out.SkipTo(slash_code_left)
 | 
| 596 | 
 | 
| 597 |                         out.Print(text)
 | 
| 598 |                         out.Print('\n')
 | 
| 599 | 
 | 
| 600 |                         block_num += 1
 | 
| 601 | 
 | 
| 602 |         pos = end_pos
 | 
| 603 | 
 | 
| 604 |     #out.PrintTheRest()
 | 
| 605 | 
 | 
| 606 | 
 | 
| 607 | class ShellSession(object):
 | 
| 608 |     """
 | 
| 609 |     TODO: Pass this to HighlightCode as a plugin
 | 
| 610 | 
 | 
| 611 |     $ x=one
 | 
| 612 |     $ echo $x
 | 
| 613 |     $ echo two
 | 
| 614 | 
 | 
| 615 |     Becomes
 | 
| 616 | 
 | 
| 617 |     $ x=one
 | 
| 618 |     $ echo $x
 | 
| 619 |     one
 | 
| 620 |     $ echo two
 | 
| 621 |     two
 | 
| 622 | 
 | 
| 623 |     And then you will have
 | 
| 624 |     blog/2019/12/_shell_session/
 | 
| 625 |       $hash1-stdout.txt
 | 
| 626 |       $hash2-stdout.txt
 | 
| 627 | 
 | 
| 628 |     It hashes the command with md5 and then brings it back.
 | 
| 629 |     If the file already exists then it doesn't run it again.
 | 
| 630 |     You can delete the file to redo it.
 | 
| 631 | 
 | 
| 632 |     TODO: write a loop that reads one line at a time, writes, it, then reads
 | 
| 633 |     output from bash.
 | 
| 634 |     Use the Lines iterator to get lines.
 | 
| 635 |     For extra credit, you can solve the PS2 problem?  That's easily done with
 | 
| 636 |     Oil's parser.
 | 
| 637 |     """
 | 
| 638 | 
 | 
| 639 |     def __init__(self, shell_exe, cache_dir):
 | 
| 640 |         """
 | 
| 641 |         Args:
 | 
| 642 |           shell_exe: sh, bash, osh, or oil.  Use the one in the $PATH by default.
 | 
| 643 |           cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
 | 
| 644 |         """
 | 
| 645 |         self.shell_exe = shell_exe
 | 
| 646 |         self.cache_dir = cache_dir
 | 
| 647 | 
 | 
| 648 |     def PrintHighlighted(self, s, start_pos, end_pos, out):
 | 
| 649 |         """
 | 
| 650 |         Args:
 | 
| 651 |           s: an HTML string.
 | 
| 652 |         """
 | 
| 653 |         pass
 | 
| 654 | 
 | 
| 655 | 
 | 
| 656 | def main(argv):
 | 
| 657 |     action = argv[1]
 | 
| 658 | 
 | 
| 659 |     if action == 'highlight':
 | 
| 660 |         # for test/shell-vs-shell.sh
 | 
| 661 | 
 | 
| 662 |         html = sys.stdin.read()
 | 
| 663 |         out = SimpleHighlightCode(html)
 | 
| 664 |         print(out)
 | 
| 665 | 
 | 
| 666 |     else:
 | 
| 667 |         raise RuntimeError('Invalid action %r' % action)
 | 
| 668 | 
 | 
| 669 | 
 | 
| 670 | if __name__ == '__main__':
 | 
| 671 |     main(sys.argv)
 |