| 1 | #!/usr/bin/env python2
 | 
| 2 | from __future__ import print_function
 | 
| 3 | """help_gen.py
 | 
| 4 | 
 | 
| 5 | Ideas for HTML -> ANSI converter:
 | 
| 6 | 
 | 
| 7 | - `ls`  ->  <code>ls</code>  ->  is reverse video?
 | 
| 8 | - [link]()  ->  <a href="">  ->  underlined, and then add a number to the bottom?
 | 
| 9 |   - could also be bright blue
 | 
| 10 | - <pre> is also indented 4 spaces, like the markdown
 | 
| 11 | - red X <span class="X">X</span>
 | 
| 12 | 
 | 
| 13 | - comments in code examples could be green?
 | 
| 14 | 
 | 
| 15 | What about:
 | 
| 16 | 
 | 
| 17 | - headings h1, h2, h3, h4
 | 
| 18 |   - Right now cards use reverse video.  Centering didn't look great.
 | 
| 19 | 
 | 
| 20 | - <ul> - you could use a Unicode bullet here
 | 
| 21 | - <ol>
 | 
| 22 | 
 | 
| 23 | Word wrapping?  troff/groff doesn't do it, but they do this weird right-justify
 | 
| 24 | thing.
 | 
| 25 | """
 | 
| 26 | 
 | 
| 27 | import cStringIO
 | 
| 28 | import HTMLParser
 | 
| 29 | import os
 | 
| 30 | import pprint
 | 
| 31 | import re
 | 
| 32 | import sys
 | 
| 33 | 
 | 
| 34 | from doctools import html_lib
 | 
| 35 | from doctools.util import log
 | 
| 36 | from lazylex import html
 | 
| 37 | 
 | 
| 38 | #from typing import List, Tuple
 | 
| 39 | 
 | 
| 40 | # Sections have alphabetical characters, spaces, and '/' for I/O.  They are
 | 
| 41 | # turned into anchors.
 | 
| 42 | SECTION_RE = re.compile(
 | 
| 43 |     r'''
 | 
| 44 |   \s*
 | 
| 45 |   \[
 | 
| 46 |   ([a-zA-Z0-9 /:]+)  # colon for ysh:upgrade
 | 
| 47 |   \]
 | 
| 48 | ''', re.VERBOSE)
 | 
| 49 | 
 | 
| 50 | # Complex heuristic to highlight topics.
 | 
| 51 | TOPIC_RE = re.compile(
 | 
| 52 |     r'''
 | 
| 53 |   (X[ ])?             # optional deprecation symbol X, then a single space
 | 
| 54 |   @?                  # optional @array, e.g. @BASH_SOURCE
 | 
| 55 | 
 | 
| 56 |   ([a-zA-Z_][a-zA-Z0-9/:_-]+)
 | 
| 57 |                       # topic names: osh-usage, _status, ysh:all, BASH_REMATCH
 | 
| 58 |                       #              List/append, cmd/append
 | 
| 59 | 
 | 
| 60 |   ( [ ] [^a-zA-Z0-9 ] \S*
 | 
| 61 |                       # trailer like >> or (make)
 | 
| 62 |     |
 | 
| 63 |     \(\)              # optional () for func()
 | 
| 64 |   )?      
 | 
| 65 | 
 | 
| 66 |   (                   # order of these 2 clauses matters
 | 
| 67 |     [ ]*\n            # spaces/newline
 | 
| 68 |     |
 | 
| 69 |     [ ]+              # 1 or more spaces
 | 
| 70 |   )
 | 
| 71 | ''', re.VERBOSE)
 | 
| 72 | """
 | 
| 73 | ''', re.VERBOSE)
 | 
| 74 | """
 | 
| 75 | 
 | 
| 76 | 
 | 
| 77 | def _StringToHref(s):
 | 
| 78 |     # lower case to match what doctools/cmark.py does
 | 
| 79 |     return s.lower().replace(' ', '-')
 | 
| 80 | 
 | 
| 81 | 
 | 
| 82 | X_LEFT_SPAN = '<span style="color: darkred">'
 | 
| 83 | 
 | 
| 84 | 
 | 
| 85 | class TopicHtmlRenderer(object):
 | 
| 86 | 
 | 
| 87 |     def __init__(self, chapter, debug_out, linkify_stop_col):
 | 
| 88 |         self.chapter = chapter
 | 
| 89 |         self.debug_out = debug_out
 | 
| 90 |         self.linkify_stop_col = linkify_stop_col
 | 
| 91 | 
 | 
| 92 |         self.html_page = 'chap-%s.html' % chapter
 | 
| 93 | 
 | 
| 94 |     def _PrintTopic(self, m, out, line_info):
 | 
| 95 |         # The X
 | 
| 96 |         topic_impl = True
 | 
| 97 |         if m.group(1):
 | 
| 98 |             out.PrintUntil(m.start(1))
 | 
| 99 |             out.Print(X_LEFT_SPAN)
 | 
| 100 |             out.PrintUntil(m.end(1))
 | 
| 101 |             out.Print('</span>')
 | 
| 102 |             topic_impl = False
 | 
| 103 | 
 | 
| 104 |         # The topic name to link
 | 
| 105 |         topic = m.group(2)
 | 
| 106 |         line_info['topics'].append((topic, topic_impl))
 | 
| 107 | 
 | 
| 108 |         out.PrintUntil(m.start(2))
 | 
| 109 |         out.Print('<a href="%s#%s">' % (self.html_page, topic))
 | 
| 110 |         out.PrintUntil(m.end(2))
 | 
| 111 |         out.Print('</a>')
 | 
| 112 | 
 | 
| 113 |     def Render(self, line):
 | 
| 114 |         """Convert a line of text to HTML.
 | 
| 115 | 
 | 
| 116 |         Topics are highlighted and X made red.
 | 
| 117 | 
 | 
| 118 |         Args:
 | 
| 119 |           chapter: where to link to
 | 
| 120 |           line: RAW SPAN of HTML that is already escaped.
 | 
| 121 |           debug_out: structured data
 | 
| 122 | 
 | 
| 123 |         Returns:
 | 
| 124 |           The HTML with some tags inserted.
 | 
| 125 |         """
 | 
| 126 |         f = cStringIO.StringIO()
 | 
| 127 |         out = html.Output(line, f)
 | 
| 128 | 
 | 
| 129 |         pos = 0  # position within line
 | 
| 130 | 
 | 
| 131 |         section_impl = True
 | 
| 132 | 
 | 
| 133 |         if line.startswith('X '):
 | 
| 134 |             out.Print(X_LEFT_SPAN)
 | 
| 135 |             out.PrintUntil(2)
 | 
| 136 |             out.Print('</span>')
 | 
| 137 |             pos = 2
 | 
| 138 |             section_impl = False
 | 
| 139 |         elif line.startswith('  '):
 | 
| 140 |             pos = 2
 | 
| 141 |         else:
 | 
| 142 |             return line
 | 
| 143 | 
 | 
| 144 |         # Highlight [Section] at the start of a line.
 | 
| 145 |         m = SECTION_RE.match(line, pos)
 | 
| 146 |         if m:
 | 
| 147 |             section_name = m.group(1)
 | 
| 148 |             #href = _StringToHref(section_name)
 | 
| 149 |             href = html_lib.PrettyHref(section_name, preserve_anchor_case=True)
 | 
| 150 | 
 | 
| 151 |             out.PrintUntil(m.start(1))
 | 
| 152 |             out.Print('<a href="%s#%s" class="level2">' %
 | 
| 153 |                       (self.html_page, href))
 | 
| 154 |             out.PrintUntil(m.end(1))  # anchor
 | 
| 155 |             out.Print('</a>')
 | 
| 156 | 
 | 
| 157 |             pos = m.end(0)  # ADVANCE
 | 
| 158 |         else:
 | 
| 159 |             section_name = None
 | 
| 160 | 
 | 
| 161 |         line_info = {
 | 
| 162 |             'section': section_name,
 | 
| 163 |             'impl': section_impl,
 | 
| 164 |             'topics': []
 | 
| 165 |         }
 | 
| 166 |         self.debug_out.append(line_info)
 | 
| 167 | 
 | 
| 168 |         # Whitespace after section, or leading whitespace
 | 
| 169 |         _SPACE_1 = re.compile(r'[ ]+')
 | 
| 170 |         m = _SPACE_1.match(line, pos)
 | 
| 171 |         assert m, 'Expected whitespace %r' % line
 | 
| 172 | 
 | 
| 173 |         pos = m.end()
 | 
| 174 | 
 | 
| 175 |         # Keep matching topics until it doesn't match.
 | 
| 176 |         while True:
 | 
| 177 |             m = TOPIC_RE.match(line, pos)
 | 
| 178 | 
 | 
| 179 |             if not m:
 | 
| 180 |                 break
 | 
| 181 | 
 | 
| 182 |             pos = m.end()
 | 
| 183 | 
 | 
| 184 |             # The 1-based column number of the end of this topic
 | 
| 185 |             col = m.end(2) + 1
 | 
| 186 |             if self.linkify_stop_col != -1 and col > self.linkify_stop_col:
 | 
| 187 |                 #log('STOPPING %d > %d' % (col, self.linkify_stop_col))
 | 
| 188 |                 break
 | 
| 189 | 
 | 
| 190 |             self._PrintTopic(m, out, line_info)
 | 
| 191 | 
 | 
| 192 |         #log('trailing %r', line[pos:])
 | 
| 193 | 
 | 
| 194 |         out.PrintTheRest()
 | 
| 195 |         return f.getvalue()
 | 
| 196 | 
 | 
| 197 | 
 | 
| 198 | class Splitter(HTMLParser.HTMLParser):
 | 
| 199 |     """Split an HTML stream starting at each of the heading tags.
 | 
| 200 | 
 | 
| 201 |     For *-help.html.
 | 
| 202 | 
 | 
| 203 |     TODO: Rewrite with this with lazylex!
 | 
| 204 | 
 | 
| 205 |     Algorithm:
 | 
| 206 |     - ExtractBody() first, then match balanced tags
 | 
| 207 |     - SPLIT by h2, h3, h4
 | 
| 208 |     - Match <pre><code> blocks and re-indent
 | 
| 209 |     - Later:
 | 
| 210 |       - links <a href="">
 | 
| 211 |       - `` is turned into inline <code></code>
 | 
| 212 |       - ** ** for bold
 | 
| 213 |       - * * for emphasis
 | 
| 214 |       - <p> needs word wrapping!  Oops.
 | 
| 215 |         - actually cmark seems to preserve this?  OK maybe not.
 | 
| 216 |         - we just need space between <p>
 | 
| 217 |     """
 | 
| 218 | 
 | 
| 219 |     def __init__(self, heading_tags, out):
 | 
| 220 |         HTMLParser.HTMLParser.__init__(self)
 | 
| 221 |         self.heading_tags = heading_tags
 | 
| 222 |         self.out = out
 | 
| 223 | 
 | 
| 224 |         self.cur_group = None  # type-not-checked: List[Tuple[str, str, List, List]]
 | 
| 225 |         self.in_heading = False
 | 
| 226 | 
 | 
| 227 |         self.indent = 0
 | 
| 228 | 
 | 
| 229 |     def log(self, msg, *args):
 | 
| 230 |         ind = self.indent * ' '
 | 
| 231 |         if 0:
 | 
| 232 |             log(ind + msg, *args)
 | 
| 233 | 
 | 
| 234 |     def handle_starttag(self, tag, attrs):
 | 
| 235 |         if tag in self.heading_tags:
 | 
| 236 |             self.in_heading = True
 | 
| 237 |             if self.cur_group:
 | 
| 238 |                 self.out.append(self.cur_group)
 | 
| 239 | 
 | 
| 240 |             self.cur_group = (tag, attrs, [], [])
 | 
| 241 | 
 | 
| 242 |         self.log('[%d] <> %s %s', self.indent, tag, attrs)
 | 
| 243 |         self.indent += 1
 | 
| 244 | 
 | 
| 245 |     def handle_endtag(self, tag):
 | 
| 246 |         if tag in self.heading_tags:
 | 
| 247 |             self.in_heading = False
 | 
| 248 | 
 | 
| 249 |         self.log('[%d] </> %s', self.indent, tag)
 | 
| 250 |         self.indent -= 1
 | 
| 251 | 
 | 
| 252 |     def handle_entityref(self, name):
 | 
| 253 |         """
 | 
| 254 |         From Python docs:
 | 
| 255 |         This method is called to process a named character reference of the form
 | 
| 256 |         &name; (e.g. >), where name is a general entity reference (e.g. 'gt').
 | 
| 257 |         """
 | 
| 258 |         c = html.CHAR_ENTITY[name]
 | 
| 259 |         if self.in_heading:
 | 
| 260 |             self.cur_group[2].append(c)
 | 
| 261 |         else:
 | 
| 262 |             if self.cur_group:
 | 
| 263 |                 self.cur_group[3].append(c)
 | 
| 264 | 
 | 
| 265 |     def handle_data(self, data):
 | 
| 266 |         self.log('data %r', data)
 | 
| 267 |         if self.in_heading:
 | 
| 268 |             self.cur_group[2].append(data)
 | 
| 269 |         else:
 | 
| 270 |             if self.cur_group:
 | 
| 271 |                 self.cur_group[3].append(data)
 | 
| 272 | 
 | 
| 273 |     def end(self):
 | 
| 274 |         if self.cur_group:
 | 
| 275 |             self.out.append(self.cur_group)
 | 
| 276 | 
 | 
| 277 |         # Maybe detect nesting?
 | 
| 278 |         if self.indent != 0:
 | 
| 279 |             raise RuntimeError(
 | 
| 280 |                 'Unbalanced HTML tags: indent=%d, cur_group=%s' %
 | 
| 281 |                 (self.indent, self.cur_group))
 | 
| 282 | 
 | 
| 283 | 
 | 
| 284 | def ExtractBody(s):
 | 
| 285 |     """Extract what's in between <body></body>
 | 
| 286 | 
 | 
| 287 |     The splitter needs balanced tags, and what's in <head> isn't
 | 
| 288 |     balanced.
 | 
| 289 |     """
 | 
| 290 |     f = cStringIO.StringIO()
 | 
| 291 |     out = html.Output(s, f)
 | 
| 292 |     tag_lexer = html.TagLexer(s)
 | 
| 293 | 
 | 
| 294 |     pos = 0
 | 
| 295 |     it = html.ValidTokens(s)
 | 
| 296 |     while True:
 | 
| 297 |         try:
 | 
| 298 |             tok_id, end_pos = next(it)
 | 
| 299 |         except StopIteration:
 | 
| 300 |             break
 | 
| 301 | 
 | 
| 302 |         if tok_id == html.StartTag:
 | 
| 303 |             tag_lexer.Reset(pos, end_pos)
 | 
| 304 |             if tag_lexer.TagName() == 'body':
 | 
| 305 |                 body_start_right = end_pos  # right after <body>
 | 
| 306 | 
 | 
| 307 |                 out.SkipTo(body_start_right)
 | 
| 308 |                 body_end_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'body')
 | 
| 309 | 
 | 
| 310 |                 out.PrintUntil(body_end_left)
 | 
| 311 |                 break
 | 
| 312 | 
 | 
| 313 |         pos = end_pos
 | 
| 314 | 
 | 
| 315 |     return f.getvalue()
 | 
| 316 | 
 | 
| 317 | 
 | 
| 318 | def SplitIntoCards(heading_tags, contents):
 | 
| 319 |     contents = ExtractBody(contents)
 | 
| 320 | 
 | 
| 321 |     groups = []
 | 
| 322 |     sp = Splitter(heading_tags, groups)
 | 
| 323 |     sp.feed(contents)
 | 
| 324 |     sp.end()
 | 
| 325 | 
 | 
| 326 |     for tag, attrs, heading_parts, parts in groups:
 | 
| 327 |         heading = ''.join(heading_parts).strip()
 | 
| 328 | 
 | 
| 329 |         # Don't strip leading space?
 | 
| 330 |         text = ''.join(parts)
 | 
| 331 |         text = text.strip('\n') + '\n'
 | 
| 332 | 
 | 
| 333 |         #log('text = %r', text[:10])
 | 
| 334 | 
 | 
| 335 |         yield tag, attrs, heading, text
 | 
| 336 | 
 | 
| 337 |     #log('make_help.py: Parsed %d parts', len(groups))
 | 
| 338 | 
 | 
| 339 | 
 | 
| 340 | def HelpTopics(s):
 | 
| 341 |     """Given an HTML page like index-{osh,ysh}.html,
 | 
| 342 | 
 | 
| 343 |     Yield groups (section_id, section_name, block of text)
 | 
| 344 |     """
 | 
| 345 |     tag_lexer = html.TagLexer(s)
 | 
| 346 | 
 | 
| 347 |     pos = 0
 | 
| 348 |     it = html.ValidTokens(s)
 | 
| 349 |     while True:
 | 
| 350 |         try:
 | 
| 351 |             tok_id, end_pos = next(it)
 | 
| 352 |         except StopIteration:
 | 
| 353 |             break
 | 
| 354 | 
 | 
| 355 |         if tok_id == html.StartTag:
 | 
| 356 |             tag_lexer.Reset(pos, end_pos)
 | 
| 357 |             #log('%r', tag_lexer.TagString())
 | 
| 358 |             #log('%r', tag_lexer.TagName())
 | 
| 359 | 
 | 
| 360 |             # Capture <h2 id="foo"> first
 | 
| 361 |             if tag_lexer.TagName() == 'h2':
 | 
| 362 |                 h2_start_right = end_pos
 | 
| 363 | 
 | 
| 364 |                 open_tag_right = end_pos
 | 
| 365 |                 section_id = tag_lexer.GetAttr('id')
 | 
| 366 |                 assert section_id, 'Expected id= in %r' % tag_lexer.TagString()
 | 
| 367 | 
 | 
| 368 |                 h2_end_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'h2')
 | 
| 369 | 
 | 
| 370 |                 anchor_html = s[h2_start_right:h2_end_left]
 | 
| 371 |                 paren_pos = anchor_html.find('(')
 | 
| 372 |                 if paren_pos == -1:
 | 
| 373 |                     section_name = anchor_html
 | 
| 374 |                 else:
 | 
| 375 |                     section_name = anchor_html[:paren_pos].strip()
 | 
| 376 | 
 | 
| 377 |                 # Now find the <code></code> span
 | 
| 378 |                 _, code_start_right = html.ReadUntilStartTag(
 | 
| 379 |                     it, tag_lexer, 'code')
 | 
| 380 |                 css_class = tag_lexer.GetAttr('class')
 | 
| 381 |                 assert css_class is not None
 | 
| 382 |                 assert css_class.startswith(
 | 
| 383 |                     'language-chapter-links-'), tag_lexer.TagString()
 | 
| 384 | 
 | 
| 385 |                 code_end_left, _ = html.ReadUntilEndTag(it, tag_lexer, 'code')
 | 
| 386 | 
 | 
| 387 |                 text = html.ToText(s, code_start_right, code_end_left)
 | 
| 388 |                 yield section_id, section_name, text
 | 
| 389 | 
 | 
| 390 |         pos = end_pos
 | 
| 391 | 
 | 
| 392 | 
 | 
| 393 | class DocNode(object):
 | 
| 394 |     """To visualize doc structure."""
 | 
| 395 | 
 | 
| 396 |     def __init__(self, name, attrs=None, text=None):
 | 
| 397 |         self.name = name
 | 
| 398 |         self.attrs = attrs  # for h2 and h3 links
 | 
| 399 |         self.text = text
 | 
| 400 |         self.children = []
 | 
| 401 | 
 | 
| 402 | 
 | 
| 403 | def CardsFromIndex(sh, out_prefix):
 | 
| 404 |     sections = []
 | 
| 405 |     for section_id, section_name, text in HelpTopics(sys.stdin.read()):
 | 
| 406 |         if 0:
 | 
| 407 |             log('section_id = %r', section_id)
 | 
| 408 |             log('section_name = %r', section_name)
 | 
| 409 |             log('')
 | 
| 410 |             #log('text = %r', text[:20])
 | 
| 411 | 
 | 
| 412 |         topic = '%s-%s' % (sh, section_id)  # e.g. ysh-overview
 | 
| 413 | 
 | 
| 414 |         path = os.path.join(out_prefix, topic)
 | 
| 415 |         with open(path, 'w') as f:
 | 
| 416 |             f.write('%s\n\n' %
 | 
| 417 |                     section_name)  # section_id is printed dynamically
 | 
| 418 |             f.write(text)
 | 
| 419 |             #f.write('\n')  # extra
 | 
| 420 |         log('  Wrote %s', path)
 | 
| 421 |         sections.append(section_id)
 | 
| 422 | 
 | 
| 423 |     log('  (doctools/make_help) -> %d sections -> %s', len(sections),
 | 
| 424 |         out_prefix)
 | 
| 425 | 
 | 
| 426 | 
 | 
| 427 | def CardsFromChapters(out_dir, tag_level, paths):
 | 
| 428 |     """
 | 
| 429 |     Args:
 | 
| 430 |       paths: list of chap-*.html to read
 | 
| 431 |     """
 | 
| 432 |     topic_to_chap = {}
 | 
| 433 | 
 | 
| 434 |     root_node = DocNode('/')
 | 
| 435 |     cur_h2_node = None
 | 
| 436 | 
 | 
| 437 |     for path in paths:
 | 
| 438 |         with open(path) as f:
 | 
| 439 |             contents = f.read()
 | 
| 440 | 
 | 
| 441 |         filename = os.path.basename(path)
 | 
| 442 | 
 | 
| 443 |         tmp, _ = os.path.splitext(filename)
 | 
| 444 |         assert tmp.startswith('chap-')
 | 
| 445 |         chapter_name = tmp[len('chap-'):]
 | 
| 446 | 
 | 
| 447 |         page_node = DocNode(filename)
 | 
| 448 | 
 | 
| 449 |         cards = SplitIntoCards(['h2', 'h3', 'h4'], contents)
 | 
| 450 | 
 | 
| 451 |         for tag, attrs, heading, text in cards:
 | 
| 452 |             values = [v for k, v in attrs if k == 'id']
 | 
| 453 |             id_value = values[0] if len(values) == 1 else None
 | 
| 454 | 
 | 
| 455 |             topic_id = (id_value if id_value else html_lib.PrettyHref(
 | 
| 456 |                 heading, preserve_anchor_case=True))
 | 
| 457 | 
 | 
| 458 |             if tag == 'h2':
 | 
| 459 |                 h2 = DocNode(topic_id, attrs=attrs)
 | 
| 460 |                 page_node.children.append(h2)
 | 
| 461 |                 cur_h2_node = h2
 | 
| 462 |             elif tag == 'h3':
 | 
| 463 |                 # attach text so we can see which topics have empty bodies
 | 
| 464 |                 h3 = DocNode(topic_id, attrs=attrs, text=text)
 | 
| 465 |                 cur_h2_node.children.append(h3)
 | 
| 466 | 
 | 
| 467 |             if tag != tag_level:
 | 
| 468 |                 continue  # we only care about h3 now
 | 
| 469 | 
 | 
| 470 |             if 0:
 | 
| 471 |                 log('tag = %r', tag)
 | 
| 472 |                 log('topic_id = %r', topic_id)
 | 
| 473 |                 log('heading = %r', heading)
 | 
| 474 |                 log('text = %r', text[:20])
 | 
| 475 | 
 | 
| 476 |             embed = ('oils-embed', '1') in attrs
 | 
| 477 | 
 | 
| 478 |             if out_dir is not None and embed:
 | 
| 479 |                 # indices start with _
 | 
| 480 |                 path = os.path.join(out_dir, topic_id)
 | 
| 481 |                 with open(path, 'w') as f:
 | 
| 482 |                     f.write(text)
 | 
| 483 | 
 | 
| 484 |             # help builtin will show URL if there's a chapter name
 | 
| 485 |             topic_to_chap[topic_id] = None if embed else chapter_name
 | 
| 486 | 
 | 
| 487 |         root_node.children.append(page_node)
 | 
| 488 | 
 | 
| 489 |     num_sections = sum(len(child.children) for child in root_node.children)
 | 
| 490 | 
 | 
| 491 |     log(
 | 
| 492 |         '%d chapters -> (doctools/make_help) -> %d <h3> cards from %d <h2> sections to %s',
 | 
| 493 |         len(paths), len(topic_to_chap), num_sections, out_dir)
 | 
| 494 | 
 | 
| 495 |     return topic_to_chap, root_node
 | 
| 496 | 
 | 
| 497 | 
 | 
| 498 | class StrPool(object):
 | 
| 499 | 
 | 
| 500 |     def __init__(self):
 | 
| 501 |         self.var_names = {}
 | 
| 502 |         self.global_strs = []
 | 
| 503 |         self.unique_id = 1
 | 
| 504 | 
 | 
| 505 |     def Add(self, s):
 | 
| 506 |         if s in self.var_names:
 | 
| 507 |             return
 | 
| 508 | 
 | 
| 509 |         var_name = 'gStr%d' % self.unique_id
 | 
| 510 |         self.unique_id += 1
 | 
| 511 | 
 | 
| 512 |         import json
 | 
| 513 |         # Use JSON as approximation for C++ string
 | 
| 514 |         self.global_strs.append('GLOBAL_STR(%s, %s)' %
 | 
| 515 |                                 (var_name, json.dumps(s)))
 | 
| 516 | 
 | 
| 517 |         self.var_names[s] = var_name
 | 
| 518 | 
 | 
| 519 | 
 | 
| 520 | def WriteTopicDict(topic_dict, header_f, cc_f):
 | 
| 521 |     header_f.write('''
 | 
| 522 | #include "mycpp/runtime.h"
 | 
| 523 | 
 | 
| 524 | namespace help_meta {
 | 
| 525 | Dict<BigStr*, BigStr*>* TopicMetadata();
 | 
| 526 | }
 | 
| 527 | ''')
 | 
| 528 | 
 | 
| 529 |     pool = StrPool()
 | 
| 530 | 
 | 
| 531 |     for k, v in topic_dict.iteritems():
 | 
| 532 |         pool.Add(k)
 | 
| 533 |         if v is not None:
 | 
| 534 |             pool.Add(v)
 | 
| 535 |         #log('%s %s', k, v)
 | 
| 536 | 
 | 
| 537 |     num_items = len(topic_dict)
 | 
| 538 |     key_names = []
 | 
| 539 |     val_names = []
 | 
| 540 | 
 | 
| 541 |     for k, v in topic_dict.iteritems():
 | 
| 542 |         key_names.append(pool.var_names[k])
 | 
| 543 |         if v is None:
 | 
| 544 |             v_str = 'nullptr'
 | 
| 545 |         else:
 | 
| 546 |             v_str = pool.var_names[v]
 | 
| 547 |         val_names.append(v_str)
 | 
| 548 | 
 | 
| 549 |     cc_f.write('''
 | 
| 550 | #include "mycpp/runtime.h"
 | 
| 551 | 
 | 
| 552 | namespace help_meta {
 | 
| 553 | 
 | 
| 554 | %s
 | 
| 555 | 
 | 
| 556 | GLOBAL_DICT(gTopics, BigStr*, BigStr*, %d, {%s}, {%s});
 | 
| 557 | 
 | 
| 558 | Dict<BigStr*, BigStr*>* TopicMetadata() {
 | 
| 559 |   return gTopics;
 | 
| 560 | }
 | 
| 561 | }
 | 
| 562 | ''' % ('\n'.join(pool.global_strs), num_items, ' COMMA '.join(key_names),
 | 
| 563 |        ' COMMA '.join(val_names)))
 | 
| 564 | 
 | 
| 565 | 
 | 
| 566 | def main(argv):
 | 
| 567 |     action = argv[1]
 | 
| 568 | 
 | 
| 569 |     if action == 'cards-from-index':
 | 
| 570 |         sh = argv[2]  # osh or ysh
 | 
| 571 |         out_prefix = argv[3]
 | 
| 572 | 
 | 
| 573 |         # Read HTML from stdin
 | 
| 574 |         # TODO: could pass a list of files to speed it up
 | 
| 575 |         CardsFromIndex(sh, out_prefix)
 | 
| 576 | 
 | 
| 577 |     elif action == 'cards-from-chapters':
 | 
| 578 | 
 | 
| 579 |         out_dir = argv[2]
 | 
| 580 |         py_out = argv[3]
 | 
| 581 |         cc_prefix = argv[4]
 | 
| 582 |         pages = argv[5:]
 | 
| 583 | 
 | 
| 584 |         topic_to_chap, _ = CardsFromChapters(out_dir, 'h3', pages)
 | 
| 585 | 
 | 
| 586 |         # Write topic dict as Python and C++
 | 
| 587 | 
 | 
| 588 |         with open(py_out, 'w') as f:
 | 
| 589 |             f.write('TOPICS = %s\n' % pprint.pformat(topic_to_chap))
 | 
| 590 | 
 | 
| 591 |             f.write('''
 | 
| 592 | 
 | 
| 593 | from typing import Dict
 | 
| 594 | 
 | 
| 595 | def TopicMetadata():
 | 
| 596 |   # type: () -> Dict[str, str]
 | 
| 597 |   return TOPICS
 | 
| 598 | ''')
 | 
| 599 | 
 | 
| 600 |         h_path = cc_prefix + '.h'
 | 
| 601 |         cc_path = cc_prefix + '.cc'
 | 
| 602 | 
 | 
| 603 |         with open(h_path, 'w') as header_f:
 | 
| 604 |             with open(cc_path, 'w') as cc_f:
 | 
| 605 |                 WriteTopicDict(topic_to_chap, header_f, cc_f)
 | 
| 606 | 
 | 
| 607 |     elif action == 'ref-check':
 | 
| 608 |         from doctools import cmark
 | 
| 609 |         from doctools import oils_doc
 | 
| 610 |         from doctools import ref_check
 | 
| 611 | 
 | 
| 612 |         chapters = []
 | 
| 613 |         all_toc_nodes = []
 | 
| 614 | 
 | 
| 615 |         for path in argv[2:]:
 | 
| 616 |             filename = os.path.basename(path)
 | 
| 617 | 
 | 
| 618 |             if filename.endswith('.md'):
 | 
| 619 |                 assert filename.startswith('toc-'), path
 | 
| 620 | 
 | 
| 621 |                 # First convert to HTML
 | 
| 622 |                 with open(path) as in_file:
 | 
| 623 |                     html = cmark.md2html(in_file.read())
 | 
| 624 | 
 | 
| 625 |                 # Now highlight code, which # which gives debug output for the
 | 
| 626 |                 # language-chapter-links-*
 | 
| 627 | 
 | 
| 628 |                 box_nodes = []
 | 
| 629 |                 html = oils_doc.HighlightCode(html, None, debug_out=box_nodes)
 | 
| 630 |                 all_toc_nodes.append({'toc': filename, 'boxes': box_nodes})
 | 
| 631 | 
 | 
| 632 |             elif filename.endswith('.html'):
 | 
| 633 |                 assert filename.startswith('chap-'), path
 | 
| 634 |                 chapters.append(path)
 | 
| 635 | 
 | 
| 636 |             else:
 | 
| 637 |                 raise RuntimeError('Expected toc-* or chap-*, got %r' %
 | 
| 638 |                                    filename)
 | 
| 639 | 
 | 
| 640 |         topics, chap_tree = CardsFromChapters(None, 'h3', chapters)
 | 
| 641 | 
 | 
| 642 |         #log('%d chapters: %s', len(chapters), chapters[:5])
 | 
| 643 |         #log('%d topics: %s', len(topics), topics.keys()[:10])
 | 
| 644 |         log('')
 | 
| 645 | 
 | 
| 646 |         # Compare TOC vs. chapters
 | 
| 647 |         ref_check.Check(all_toc_nodes, chap_tree)
 | 
| 648 | 
 | 
| 649 |     else:
 | 
| 650 |         raise RuntimeError('Invalid action %r' % action)
 | 
| 651 | 
 | 
| 652 | 
 | 
| 653 | if __name__ == '__main__':
 | 
| 654 |     try:
 | 
| 655 |         main(sys.argv)
 | 
| 656 |     except RuntimeError as e:
 | 
| 657 |         print('FATAL: %s' % e, file=sys.stderr)
 | 
| 658 |         sys.exit(1)
 |