| 1 | #!/usr/bin/env python2
 | 
| 2 | """ref_check.py: Check integrity of doc/ref, and print metrics."""
 | 
| 3 | from __future__ import print_function
 | 
| 4 | 
 | 
| 5 | import collections
 | 
| 6 | #from pprint import pprint
 | 
| 7 | import sys
 | 
| 8 | 
 | 
| 9 | from doctools.util import log
 | 
| 10 | 
 | 
| 11 | 
 | 
| 12 | def PrintTree(node, f, indent=0):
 | 
| 13 |     """Print DocNode tree in make_help.py."""
 | 
| 14 |     if node.attrs:
 | 
| 15 |         a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
 | 
| 16 |         a_str = '(%s)' % a_str
 | 
| 17 |     else:
 | 
| 18 |         a_str = ''
 | 
| 19 | 
 | 
| 20 |     print('%s%s %s' % (indent * '  ', node.name, a_str), file=f)
 | 
| 21 |     for ch in node.children:
 | 
| 22 |         PrintTree(ch, f, indent + 1)
 | 
| 23 | 
 | 
| 24 | 
 | 
| 25 | def Check(all_toc_nodes, chap_tree):
 | 
| 26 |     """
 | 
| 27 |     Args:
 | 
| 28 |       all_toc_nodes: Structure of doc/ref/toc-*.md
 | 
| 29 |       chap_tree: Structure of chap-*.html
 | 
| 30 |     """
 | 
| 31 |     all_topics = []
 | 
| 32 | 
 | 
| 33 |     link_from = {}  # (filename, topic) -> implemented
 | 
| 34 |     link_to = set()
 | 
| 35 | 
 | 
| 36 |     section_check = collections.defaultdict(list)
 | 
| 37 |     toc_topic_check = collections.defaultdict(list)
 | 
| 38 | 
 | 
| 39 |     #
 | 
| 40 |     # Walk the TOC metadata
 | 
| 41 |     #
 | 
| 42 | 
 | 
| 43 |     topics_not_impl = 0
 | 
| 44 |     sections_not_impl = 0
 | 
| 45 | 
 | 
| 46 |     log('TOC:')
 | 
| 47 |     log('')
 | 
| 48 |     for toc_node in all_toc_nodes:
 | 
| 49 |         toc = toc_node['toc']
 | 
| 50 |         log('  %s', toc)
 | 
| 51 |         for box_node in toc_node['boxes']:
 | 
| 52 |             to_chap = box_node['to_chap']
 | 
| 53 |             log('    %s' % to_chap)
 | 
| 54 |             for line_info in box_node['lines']:
 | 
| 55 |                 section = line_info['section']
 | 
| 56 |                 section_impl = line_info['impl']
 | 
| 57 |                 if not section_impl:
 | 
| 58 |                     sections_not_impl += 1
 | 
| 59 | 
 | 
| 60 |                 topics = line_info['topics']
 | 
| 61 |                 for topic, topic_impl in topics:
 | 
| 62 |                     is_implemented = topic_impl and section_impl
 | 
| 63 | 
 | 
| 64 |                     chap_filename = 'chap-%s.html' % to_chap
 | 
| 65 |                     link_from[chap_filename, topic] = is_implemented
 | 
| 66 | 
 | 
| 67 |                     if is_implemented:
 | 
| 68 |                         toc_topic_check[topic].append(toc)
 | 
| 69 |                     else:
 | 
| 70 |                         topics_not_impl += 1
 | 
| 71 | 
 | 
| 72 |                 all_topics.extend(topics)
 | 
| 73 | 
 | 
| 74 |     log('')
 | 
| 75 | 
 | 
| 76 |     log('TOC stats:')
 | 
| 77 |     log('  All Topics: %d', len(all_topics))
 | 
| 78 |     log('  Unique topics: %d', len(set(all_topics)))
 | 
| 79 |     log('  Topics marked implemented: %d', len(toc_topic_check))
 | 
| 80 |     log('  Topics not implemented: %d', topics_not_impl)
 | 
| 81 |     log('  Sections not implemented (X): %d', sections_not_impl)
 | 
| 82 |     log('')
 | 
| 83 | 
 | 
| 84 |     if 0:
 | 
| 85 |         PrintTree(chap_tree, sys.stdout)
 | 
| 86 | 
 | 
| 87 |     num_sections = 0
 | 
| 88 |     num_topics = 0
 | 
| 89 |     num_topics_written = 0
 | 
| 90 | 
 | 
| 91 |     #
 | 
| 92 |     # Walk the Chapter Tree
 | 
| 93 |     #
 | 
| 94 | 
 | 
| 95 |     chap_topics = collections.defaultdict(list)  # topic_id -> list of chapters
 | 
| 96 |     short_topics = []
 | 
| 97 | 
 | 
| 98 |     min_words = 5  # arbitrary
 | 
| 99 | 
 | 
| 100 |     for chap in chap_tree.children:
 | 
| 101 | 
 | 
| 102 |         for section in chap.children:
 | 
| 103 |             num_sections += 1
 | 
| 104 | 
 | 
| 105 |             for topic in section.children:
 | 
| 106 |                 num_topics += 1
 | 
| 107 | 
 | 
| 108 |                 values = [v for k, v in topic.attrs if k == 'id']
 | 
| 109 |                 if len(values) == 1:
 | 
| 110 |                     topic_id = values[0]
 | 
| 111 |                 else:
 | 
| 112 |                     topic_id = topic.name
 | 
| 113 | 
 | 
| 114 |                 chap_topics[topic_id].append(chap.name)
 | 
| 115 |                 link_to.add((chap.name, topic_id))
 | 
| 116 | 
 | 
| 117 |                 # split by whitespace
 | 
| 118 |                 num_words = len(topic.text.split())
 | 
| 119 |                 if num_words > min_words:
 | 
| 120 |                     num_topics_written += 1
 | 
| 121 |                 elif num_words > 1:
 | 
| 122 |                     short_topics.append((topic_id, topic.text))
 | 
| 123 | 
 | 
| 124 |     num_chapters = len(chap_tree.children)
 | 
| 125 | 
 | 
| 126 |     log('Chapter stats:')
 | 
| 127 |     log('  num chapters = %d', num_chapters)
 | 
| 128 |     log('  num_sections = %d', num_sections)
 | 
| 129 |     log('  num_topics = %d', num_topics)
 | 
| 130 | 
 | 
| 131 |     chap_topic_set = set(chap_topics)
 | 
| 132 |     log('  num unique topics = %d', len(chap_topic_set))
 | 
| 133 |     log('  topics with first draft (more than %d words) = %d', min_words,
 | 
| 134 |         num_topics_written)
 | 
| 135 |     log('')
 | 
| 136 | 
 | 
| 137 |     log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
 | 
| 138 |     log('')
 | 
| 139 |     log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
 | 
| 140 |     log('')
 | 
| 141 | 
 | 
| 142 |     index_topic_set = set(toc_topic_check)
 | 
| 143 | 
 | 
| 144 |     assert 'j8-escape' in index_topic_set
 | 
| 145 |     assert 'j8-escape' in chap_topic_set
 | 
| 146 | 
 | 
| 147 |     # Report on topic namespace integrity, e.g. 'help append' should go to one
 | 
| 148 |     # thing
 | 
| 149 |     log('Topics in multiple chapters:')
 | 
| 150 |     for topic_id, chaps in chap_topics.iteritems():
 | 
| 151 |         if len(chaps) > 1:
 | 
| 152 |             log('  %s: %s', topic_id, ' '.join(chaps))
 | 
| 153 |     log('')
 | 
| 154 | 
 | 
| 155 |     log('Duplicate topics in TOC:')
 | 
| 156 |     log('')
 | 
| 157 |     for topic in sorted(toc_topic_check):
 | 
| 158 |         toc_list = toc_topic_check[topic]
 | 
| 159 |         if len(toc_list) > 1:
 | 
| 160 |             log('%20s: %s', topic, ' '.join(toc_list))
 | 
| 161 |     log('')
 | 
| 162 | 
 | 
| 163 |     # Report on link integrity
 | 
| 164 |     if 1:
 | 
| 165 |         # TOC topics with X can be missing
 | 
| 166 |         impl_link_from = set(k for k, v in link_from.iteritems() if v)
 | 
| 167 |         broken = impl_link_from - link_to
 | 
| 168 |         log('%d Broken Links:', len(broken))
 | 
| 169 |         for pair in sorted(broken):
 | 
| 170 |             log('  %s', pair)
 | 
| 171 |         log('')
 | 
| 172 | 
 | 
| 173 |         orphaned = link_to - set(link_from)
 | 
| 174 |         log('%d Orphaned Topics:', len(orphaned))
 | 
| 175 |         for pair in sorted(orphaned):
 | 
| 176 |             log('  %s', pair)
 | 
| 177 |         log('')
 | 
| 178 | 
 | 
| 179 |     log('Short topics:')
 | 
| 180 |     for topic, text in short_topics:
 | 
| 181 |         log('%15s  %r', topic, text)
 | 
| 182 |     log('')
 |