| 1 | #!/usr/bin/env python2
 | 
| 2 | """split_doc.py."""
 | 
| 3 | from __future__ import print_function
 | 
| 4 | 
 | 
| 5 | import json
 | 
| 6 | import optparse
 | 
| 7 | import re
 | 
| 8 | import sys
 | 
| 9 | 
 | 
| 10 | DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
 | 
| 11 | 
 | 
| 12 | META_RE = re.compile(r'(\S+): [ ]* (.*)', re.VERBOSE)
 | 
| 13 | 
 | 
| 14 | 
 | 
| 15 | def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
 | 
| 16 |     """Split a document into metadata JSON and content Markdown.
 | 
| 17 | 
 | 
| 18 |     Used for blog posts and index.md / cross-ref.md.
 | 
| 19 |     """
 | 
| 20 |     first_line = entry_f.readline()
 | 
| 21 |     if strict and first_line.strip() != '---':
 | 
| 22 |         raise RuntimeError("Document should start with --- (got %r)" %
 | 
| 23 |                            first_line)
 | 
| 24 | 
 | 
| 25 |     meta = {}
 | 
| 26 | 
 | 
| 27 |     # TODO: if first_line is ---, then read metadata in key: value format.
 | 
| 28 |     if first_line.strip() == '---':
 | 
| 29 |         while True:
 | 
| 30 |             line = entry_f.readline().strip()
 | 
| 31 |             if line == '---':
 | 
| 32 |                 break
 | 
| 33 |             m = META_RE.match(line)
 | 
| 34 |             if not m:
 | 
| 35 |                 raise RuntimeError('Invalid metadata line %r' % line)
 | 
| 36 |             name, value = m.groups()
 | 
| 37 | 
 | 
| 38 |             if name == 'date':
 | 
| 39 |                 m2 = DATE_RE.match(value)
 | 
| 40 |                 if not m2:
 | 
| 41 |                     raise RuntimeError('Invalid date %r' % value)
 | 
| 42 |                 year, month, day = m2.groups()
 | 
| 43 |                 meta['year'] = int(year)
 | 
| 44 |                 meta['month'] = int(month)
 | 
| 45 |                 meta['day'] = int(day)
 | 
| 46 | 
 | 
| 47 |             elif name == 'updated_date':
 | 
| 48 |                 m2 = DATE_RE.match(value)
 | 
| 49 |                 if not m2:
 | 
| 50 |                     raise RuntimeError('Invalid date %r' % value)
 | 
| 51 |                 year, month, day = m2.groups()
 | 
| 52 |                 meta['updated_year'] = int(year)
 | 
| 53 |                 meta['updated_month'] = int(month)
 | 
| 54 |                 meta['updated_day'] = int(day)
 | 
| 55 | 
 | 
| 56 |             else:
 | 
| 57 |                 meta[name] = value
 | 
| 58 | 
 | 
| 59 |         #print('line = %r' % line, file=sys.stderr)
 | 
| 60 |         while True:
 | 
| 61 |             first_nonempty = entry_f.readline()
 | 
| 62 |             if first_nonempty.strip() != '':
 | 
| 63 |                 break
 | 
| 64 | 
 | 
| 65 |     else:
 | 
| 66 |         if first_line:
 | 
| 67 |             first_nonempty = first_line
 | 
| 68 |         else:
 | 
| 69 |             while True:
 | 
| 70 |                 first_nonempty = entry_f.readline()
 | 
| 71 |                 if first_nonempty.strip() != '':
 | 
| 72 |                     break
 | 
| 73 | 
 | 
| 74 |     # Invariant: we've read the first non-empty line here.  Now we need to see if
 | 
| 75 |     # it's the title.
 | 
| 76 | 
 | 
| 77 |     #print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
 | 
| 78 | 
 | 
| 79 |     line_two = entry_f.readline()
 | 
| 80 |     if re.match('=+', line_two):
 | 
| 81 |         meta['title'] = first_nonempty.strip()
 | 
| 82 | 
 | 
| 83 |     # Fill in defaults after parsing all values.
 | 
| 84 |     for name, value in default_vals.iteritems():
 | 
| 85 |         if name not in meta:
 | 
| 86 |             meta[name] = value
 | 
| 87 | 
 | 
| 88 |     json.dump(meta, meta_f, indent=2)
 | 
| 89 | 
 | 
| 90 |     # Read the rest of the file and write it
 | 
| 91 |     contents = entry_f.read()
 | 
| 92 | 
 | 
| 93 |     content_f.write(first_nonempty)
 | 
| 94 |     content_f.write(line_two)
 | 
| 95 | 
 | 
| 96 |     content_f.write(contents)
 | 
| 97 | 
 | 
| 98 |     comments_url = meta.get('comments_url', '')
 | 
| 99 |     if comments_url:
 | 
| 100 |         content_f.write("""
 | 
| 101 | [comments-url]: %s
 | 
| 102 | 
 | 
| 103 | """ % comments_url)
 | 
| 104 | 
 | 
| 105 | 
 | 
| 106 | def Options():
 | 
| 107 |     p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
 | 
| 108 |     # Like awk -v
 | 
| 109 |     p.add_option(
 | 
| 110 |         '-v',
 | 
| 111 |         dest='default_vals',
 | 
| 112 |         action='append',
 | 
| 113 |         default=[],
 | 
| 114 |         help=
 | 
| 115 |         "If the doc's own metadata doesn't define 'name', set it to this value"
 | 
| 116 |     )
 | 
| 117 |     p.add_option('-s',
 | 
| 118 |                  '--strict',
 | 
| 119 |                  dest='strict',
 | 
| 120 |                  action='store_true',
 | 
| 121 |                  default=False,
 | 
| 122 |                  help="Require metadata")
 | 
| 123 |     return p
 | 
| 124 | 
 | 
| 125 | 
 | 
| 126 | def main(argv):
 | 
| 127 |     o = Options()
 | 
| 128 |     opts, argv = o.parse_args(argv)
 | 
| 129 | 
 | 
| 130 |     entry_path = argv[1]  # e.g. blog/2016/11/01.md
 | 
| 131 |     out_prefix = argv[2]  # e.g _site/blog/2016/11/01
 | 
| 132 | 
 | 
| 133 |     meta_path = out_prefix + '_meta.json'
 | 
| 134 |     content_path = out_prefix + '_content.md'
 | 
| 135 | 
 | 
| 136 |     default_vals = {}
 | 
| 137 |     for pair in opts.default_vals:
 | 
| 138 |         name, value = pair.split('=', 1)
 | 
| 139 |         default_vals[name] = value
 | 
| 140 | 
 | 
| 141 |     with \
 | 
| 142 |         open(entry_path) as entry_f, \
 | 
| 143 |         open(meta_path, 'w') as meta_f, \
 | 
| 144 |         open(content_path, 'w') as content_f:
 | 
| 145 |         SplitDocument(default_vals,
 | 
| 146 |                       entry_f,
 | 
| 147 |                       meta_f,
 | 
| 148 |                       content_f,
 | 
| 149 |                       strict=opts.strict)
 | 
| 150 | 
 | 
| 151 | 
 | 
| 152 | if __name__ == '__main__':
 | 
| 153 |     try:
 | 
| 154 |         main(sys.argv)
 | 
| 155 |     except RuntimeError as e:
 | 
| 156 |         print('FATAL: %s' % e, file=sys.stderr)
 | 
| 157 |         sys.exit(1)
 |