| 1 | #!/usr/bin/env python
 | 
| 2 | from __future__ import print_function
 | 
| 3 | """
 | 
| 4 | quick_ref.py
 | 
| 5 | """
 | 
| 6 | 
 | 
| 7 | import cgi
 | 
| 8 | import os
 | 
| 9 | import pprint
 | 
| 10 | import re
 | 
| 11 | import sys
 | 
| 12 | 
 | 
| 13 | 
 | 
| 14 | # e.g. COMMAND LANGUAGE
 | 
| 15 | CAPS_RE = re.compile(r'^[A-Z ]+$')
 | 
| 16 | 
 | 
| 17 | # 1. Optional X, then a SINGLE space
 | 
| 18 | # 2. lower-case or upper-case topic
 | 
| 19 | # 3. Optional: A SINGLE space, then punctuation
 | 
| 20 | 
 | 
| 21 | TOPIC_RE = re.compile(
 | 
| 22 |     r'\b(X[ ])?\@?([a-z_\-]+|[A-Z0-9_]+)([ ]\S+)?', re.VERBOSE)
 | 
| 23 | 
 | 
| 24 | # Sections have alphabetical characters, spaces, and '/' for I/O.  They are
 | 
| 25 | # turned into anchors.
 | 
| 26 | SECTION_RE = re.compile(r'\s*\[([a-zA-Z /]+)\]')
 | 
| 27 | 
 | 
| 28 | # Can occur at the beginning of a line, or before a topic
 | 
| 29 | RED_X = '<span style="color: darkred">X </span>'
 | 
| 30 | 
 | 
| 31 | 
 | 
| 32 | def _StringToHref(s):
 | 
| 33 |   return s.replace(' ', '-')
 | 
| 34 | 
 | 
| 35 | 
 | 
| 36 | def MaybeHighlightSection(line, parts):
 | 
| 37 |   m = SECTION_RE.match(line)
 | 
| 38 |   if not m:
 | 
| 39 |     return line
 | 
| 40 | 
 | 
| 41 |   #print >>sys.stderr, m.groups()
 | 
| 42 | 
 | 
| 43 |   start = m.start(1)
 | 
| 44 |   end = m.end(1)
 | 
| 45 |   parts.append(line[:start])  # this is spaces, so not bothering to escape
 | 
| 46 | 
 | 
| 47 |   section = m.group(1)
 | 
| 48 |   href = _StringToHref(section)
 | 
| 49 |   section_link = '<a href="#%s" class="level2">%s</a>' % (href, section)
 | 
| 50 |   parts.append(section_link)
 | 
| 51 | 
 | 
| 52 |   return line[end:]
 | 
| 53 | 
 | 
| 54 | 
 | 
| 55 | def HighlightLine(line):
 | 
| 56 |   """Convert a line of text to HTML.
 | 
| 57 | 
 | 
| 58 |   Topics are highlighted and X made red."""
 | 
| 59 | 
 | 
| 60 |   parts = []
 | 
| 61 |   last_end = 0
 | 
| 62 |   found_one = False
 | 
| 63 | 
 | 
| 64 |   line = MaybeHighlightSection(line, parts)
 | 
| 65 | 
 | 
| 66 |   for m in TOPIC_RE.finditer(line):
 | 
| 67 |     #print >>sys.stderr, m.groups()
 | 
| 68 | 
 | 
| 69 |     have_x = m.group(1) is not None
 | 
| 70 |     start = m.start(1) if have_x else m.start(2)
 | 
| 71 | 
 | 
| 72 |     have_suffix = m.group(3) is not None
 | 
| 73 | 
 | 
| 74 |     prior_piece = cgi.escape(line[last_end:start])
 | 
| 75 |     parts.append(prior_piece)
 | 
| 76 | 
 | 
| 77 |     if have_x:
 | 
| 78 |       parts.append(RED_X)
 | 
| 79 | 
 | 
| 80 |     # Topics on the same line must be separated by exactly THREE spaces
 | 
| 81 |     if found_one and prior_piece not in ('   ', '   @'):
 | 
| 82 |       last_end = start
 | 
| 83 |       break  # stop linking
 | 
| 84 | 
 | 
| 85 |     # this matters because the separator is three spaces
 | 
| 86 |     end = m.end(3) if have_suffix else m.end(2)
 | 
| 87 |     last_end = end
 | 
| 88 | 
 | 
| 89 |     topic = line[m.start(2):m.end(2)]
 | 
| 90 |     topic_link = '<a href="#%s">%s</a>' % (topic, topic)
 | 
| 91 |     parts.append(topic_link)
 | 
| 92 | 
 | 
| 93 |     if have_suffix:
 | 
| 94 |       parts.append(cgi.escape(m.group(3)))
 | 
| 95 | 
 | 
| 96 |     found_one = True
 | 
| 97 | 
 | 
| 98 |   last_piece = cgi.escape(line[last_end:len(line)])
 | 
| 99 |   parts.append(last_piece)
 | 
| 100 | 
 | 
| 101 |   #print >>sys.stderr, parts
 | 
| 102 | 
 | 
| 103 |   html_line = ''.join(parts)
 | 
| 104 |   #print >>sys.stderr, html_line
 | 
| 105 | 
 | 
| 106 |   return html_line
 | 
| 107 | 
 | 
| 108 | 
 | 
| 109 | def TableOfContents(f):
 | 
| 110 | 
 | 
| 111 | 
 | 
| 112 |   # inputs: -toc.txt, -pages.txt
 | 
| 113 | 
 | 
| 114 |   # outputs:
 | 
| 115 |   #   tree of HTML
 | 
| 116 | 
 | 
| 117 |   # maybe: man page for OSH usage (need to learn troff formatting!)
 | 
| 118 | 
 | 
| 119 |   # syntactic elements:
 | 
| 120 |   # - toc
 | 
| 121 |   #   - links to pages
 | 
| 122 |   #   - (X) for not implemented
 | 
| 123 |   #   - aliases:  semicolon ;
 | 
| 124 |   # - pages
 | 
| 125 |   #   - usage line (^Usage:)
 | 
| 126 |   #   - internal links read[1]
 | 
| 127 |   #     - <a href="#read"><read>
 | 
| 128 |   #     - read[1]
 | 
| 129 |   #
 | 
| 130 |   #   - example blocks
 | 
| 131 | 
 | 
| 132 |   # generated parts:
 | 
| 133 |   #  - builtin usage lines, from core/args.py
 | 
| 134 |   #  - and osh usage itself
 | 
| 135 | 
 | 
| 136 |   # Language:
 | 
| 137 | 
 | 
| 138 |   ##### COMMAND LANGUAGE  (turns into <a name=>)
 | 
| 139 |   ### Commands
 | 
| 140 |   # case
 | 
| 141 |   # if
 | 
| 142 | 
 | 
| 143 |   # Basically any line that begins with ^# ^### or ^##### is special?
 | 
| 144 |   # <h1> <h2> <h3>
 | 
| 145 |   # Still need links
 | 
| 146 | 
 | 
| 147 |   # TODO:
 | 
| 148 |   # - Copy sh_spec.py for # parsing
 | 
| 149 |   # - Copy oilshell.org Snip for running examples and showing output!
 | 
| 150 | 
 | 
| 151 |   # More stuff:
 | 
| 152 |   # - command, word, arith, boolean all need intros.
 | 
| 153 |   # - So does every section need a name?
 | 
| 154 |   # - Maybe just highlight anything after [?
 | 
| 155 |   #   - What kind of links are they?
 | 
| 156 | 
 | 
| 157 |   # Three level hierarchy:
 | 
| 158 |   # CAP WORDS
 | 
| 159 |   # [Title Words For Sections]
 | 
| 160 |   #  problem: line brekas like [Shell Process
 | 
| 161 |   #    Control]
 | 
| 162 |   #  there is no valid way to mark this up, even if you could parse it!
 | 
| 163 |   #    you would need a table?
 | 
| 164 | 
 | 
| 165 |   # lower-with-dashes for topics
 | 
| 166 | 
 | 
| 167 | 
 | 
| 168 |   # TODO: Add version and so forht?
 | 
| 169 |   title_line = f.readline()
 | 
| 170 |   print('<h1>%s</h1>' % cgi.escape(title_line))
 | 
| 171 |   print('<a name="toc"></a>')
 | 
| 172 |   # doc/run.sh must set environment.
 | 
| 173 |   print('<i>Version %s</i>' % os.environ['OIL_VERSION'])
 | 
| 174 |   print('<pre>')
 | 
| 175 | 
 | 
| 176 |   for line in f:
 | 
| 177 |     if not line.strip():
 | 
| 178 |       sys.stdout.write('\n')
 | 
| 179 |       continue
 | 
| 180 | 
 | 
| 181 |     if CAPS_RE.match(line):
 | 
| 182 |       heading = line.strip()
 | 
| 183 |       anchor_text = cgi.escape(heading)
 | 
| 184 |       href = _StringToHref(heading)
 | 
| 185 |       # Add the newline back here
 | 
| 186 |       html_line = '<b><a href="#%s" class="level1">%s</a></b>\n' % (
 | 
| 187 |           href, anchor_text)
 | 
| 188 |     elif line.startswith('  '):
 | 
| 189 |       html_line = HighlightLine(line)
 | 
| 190 |     elif line.startswith('X '):
 | 
| 191 |       html_line = RED_X + HighlightLine(line[2:])
 | 
| 192 |     else:
 | 
| 193 |       html_line = cgi.escape(line)
 | 
| 194 | 
 | 
| 195 |     sys.stdout.write(html_line)
 | 
| 196 | 
 | 
| 197 |   print('</pre>')
 | 
| 198 | 
 | 
| 199 | # TODO:
 | 
| 200 | # - group 1: # prefix determines h1, h2, h3
 | 
| 201 | # - group 2 is the <a name=""> -- there can be MORE THAN ONE
 | 
| 202 | #   - OSH-BINARY
 | 
| 203 | #   - Commands
 | 
| 204 | #   - for-expr
 | 
| 205 | #   - true|false
 | 
| 206 | # - group 3: the anchor text to display
 | 
| 207 | #
 | 
| 208 | 
 | 
| 209 | ## Conditional Conditional Constructs
 | 
| 210 | ## Quotes Quotes
 | 
| 211 | ### COMMAND-LANGUAGE Command Language
 | 
| 212 | 
 | 
| 213 | ### {Conditional} Conditional Constructs
 | 
| 214 | ### <Conditional> Conditional Constructs
 | 
| 215 | 
 | 
| 216 | # These have no title?  Just true?  false?
 | 
| 217 | 
 | 
| 218 | # true|false true
 | 
| 219 | 
 | 
| 220 | 
 | 
| 221 | class TextOutput:
 | 
| 222 |   def __init__(self, text_dir, topic_lookup):
 | 
| 223 |     self.text_dir = text_dir
 | 
| 224 |     self.topic_lookup = topic_lookup
 | 
| 225 | 
 | 
| 226 |   def WriteFile(self, section_id, topics, lines):
 | 
| 227 |     """
 | 
| 228 |     """
 | 
| 229 |     section_name = '%d-%d-%d' % tuple(section_id)
 | 
| 230 |     path = os.path.join(self.text_dir, section_name)
 | 
| 231 |     with open(path, 'w') as f:
 | 
| 232 |       for line in lines:
 | 
| 233 |         f.write(line)
 | 
| 234 |     #print >>sys.stderr, 'Wrote %s' % path
 | 
| 235 | 
 | 
| 236 |     for topic in topics:
 | 
| 237 |       self.topic_lookup[topic] = section_name
 | 
| 238 | 
 | 
| 239 | 
 | 
| 240 | # TODO: Also allow {} in addition to <> delimiters.
 | 
| 241 | HEADING_RE = re.compile(r'(#+) <(.*)>(.*)')
 | 
| 242 | 
 | 
| 243 | def Pages(f, text_out):
 | 
| 244 |   print('<pre>')
 | 
| 245 | 
 | 
| 246 |   section_id = [0, 0, 0]  # L1, L2, L3
 | 
| 247 |   topics = []
 | 
| 248 |   prev_topics = []  # from previous iteration
 | 
| 249 |   prev_lines = []
 | 
| 250 | 
 | 
| 251 |   for line in f:
 | 
| 252 |     if line.startswith('##'):  # heading or comment
 | 
| 253 |       m = HEADING_RE.match(line)
 | 
| 254 |       if m:
 | 
| 255 |         # We got a heading.  Write the previous lines
 | 
| 256 |         text_out.WriteFile(section_id, prev_topics, prev_lines)
 | 
| 257 |         prev_lines = []
 | 
| 258 | 
 | 
| 259 |         level, topic_str, text = m.groups()
 | 
| 260 |         #print >>sys.stderr, m.groups()
 | 
| 261 |         topics = topic_str.split()
 | 
| 262 |         if not text.strip():
 | 
| 263 |           text = topic_str
 | 
| 264 | 
 | 
| 265 |         if len(level) == 5:
 | 
| 266 |           htag = 2
 | 
| 267 |           section_id[0] += 1  # from 2.3.4 to 3.0.0
 | 
| 268 |           section_id[1] = 0
 | 
| 269 |           section_id[2] = 0
 | 
| 270 | 
 | 
| 271 |         elif len(level) == 4:
 | 
| 272 |           htag = 3
 | 
| 273 |           section_id[1] += 1  # from 2.2.3 to 2.3.0
 | 
| 274 |           section_id[2] = 0
 | 
| 275 | 
 | 
| 276 |         elif len(level) == 3:
 | 
| 277 |           htag = 4
 | 
| 278 |           section_id[2] += 1  # from 2.2.2 to 2.2.3
 | 
| 279 | 
 | 
| 280 |         else:
 | 
| 281 |           raise RuntimeError('Invalid level %r' % level)
 | 
| 282 | 
 | 
| 283 |         print('</pre>')
 | 
| 284 |         for topic in topics:
 | 
| 285 |           print('<a name="%s"></a>' % topic)
 | 
| 286 |         print('<h%d>%s</h%d>' % (htag, text, htag))
 | 
| 287 |         print('<!-- %d.%d.%d -->' % tuple(section_id))
 | 
| 288 |         print('<pre>')
 | 
| 289 | 
 | 
| 290 |         prev_topics = topics
 | 
| 291 |         
 | 
| 292 |       else:
 | 
| 293 |         # Three or more should be a heading, not a comment.
 | 
| 294 |         if line.startswith('###'):
 | 
| 295 |           raise RuntimeError('Expected a heading, got %r' % line)
 | 
| 296 | 
 | 
| 297 |     else:  # normal line
 | 
| 298 |       sys.stdout.write(cgi.escape(line))
 | 
| 299 |       prev_lines.append(line)
 | 
| 300 |       continue
 | 
| 301 | 
 | 
| 302 |   print('</pre>')
 | 
| 303 | 
 | 
| 304 | 
 | 
| 305 | 
 | 
| 306 | def main(argv):
 | 
| 307 |   action = argv[1]
 | 
| 308 |   if action == 'toc':
 | 
| 309 |     with open(argv[2]) as f:
 | 
| 310 |       TableOfContents(f)
 | 
| 311 | 
 | 
| 312 |   elif action == 'pages':
 | 
| 313 |     pages_txt, text_dir, py_out_path = argv[2:5]
 | 
| 314 | 
 | 
| 315 |     topic_lookup = {}
 | 
| 316 |     with open(pages_txt) as f:
 | 
| 317 |       text_out = TextOutput(text_dir, topic_lookup)
 | 
| 318 |       Pages(f, text_out)
 | 
| 319 | 
 | 
| 320 |     # TODO: Fuzzy matching of help topics
 | 
| 321 |     d = pprint.pformat(topic_lookup)
 | 
| 322 |     #print >>sys.stderr, d
 | 
| 323 |     with open(py_out_path, 'w') as f:
 | 
| 324 |       f.write('TOPIC_LOOKUP = ')
 | 
| 325 |       f.write(d)
 | 
| 326 |       # BUG WORKAROUND: The OPy parser requires an EOL!  See opy/run.sh parser-bug.
 | 
| 327 |       f.write('\n')
 | 
| 328 | 
 | 
| 329 |     print('Wrote %s/ and %s' % (text_dir, py_out_path), file=sys.stderr)
 | 
| 330 | 
 | 
| 331 |   else:
 | 
| 332 |     raise RuntimeError('Invalid action %r' % action)
 | 
| 333 | 
 | 
| 334 | 
 | 
| 335 | if __name__ == '__main__':
 | 
| 336 |   try:
 | 
| 337 |     main(sys.argv)
 | 
| 338 |   except RuntimeError as e:
 | 
| 339 |     print('FATAL: %s' % e, file=sys.stderr)
 | 
| 340 |     sys.exit(1)
 |