1 | #!/usr/bin/env python2
|
2 | """ref_check.py: Check integrity of doc/ref, and print metrics."""
|
3 | from __future__ import print_function
|
4 |
|
5 | import collections
|
6 | #from pprint import pprint
|
7 | import sys
|
8 |
|
9 | from doctools.util import log
|
10 |
|
11 |
|
12 | def PrintTree(node, f, indent=0):
|
13 | """Print DocNode tree in make_help.py."""
|
14 | if node.attrs:
|
15 | a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
|
16 | a_str = '(%s)' % a_str
|
17 | else:
|
18 | a_str = ''
|
19 |
|
20 | print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
|
21 | for ch in node.children:
|
22 | PrintTree(ch, f, indent + 1)
|
23 |
|
24 |
|
25 | def Check(all_toc_nodes, chap_tree):
|
26 | """
|
27 | Args:
|
28 | all_toc_nodes: Structure of doc/ref/toc-*.md
|
29 | chap_tree: Structure of chap-*.html
|
30 | """
|
31 | all_topics = []
|
32 |
|
33 | link_from = {} # (filename, topic) -> implemented
|
34 | link_to = set()
|
35 |
|
36 | section_check = collections.defaultdict(list)
|
37 | toc_topic_check = collections.defaultdict(list)
|
38 |
|
39 | #
|
40 | # Walk the TOC metadata
|
41 | #
|
42 |
|
43 | topics_not_impl = 0
|
44 | sections_not_impl = 0
|
45 |
|
46 | log('TOC:')
|
47 | log('')
|
48 | for toc_node in all_toc_nodes:
|
49 | toc = toc_node['toc']
|
50 | log(' %s', toc)
|
51 | for box_node in toc_node['boxes']:
|
52 | to_chap = box_node['to_chap']
|
53 | log(' %s' % to_chap)
|
54 | for line_info in box_node['lines']:
|
55 | section = line_info['section']
|
56 | section_impl = line_info['impl']
|
57 | if not section_impl:
|
58 | sections_not_impl += 1
|
59 |
|
60 | topics = line_info['topics']
|
61 | for topic, topic_impl in topics:
|
62 | is_implemented = topic_impl and section_impl
|
63 |
|
64 | chap_filename = 'chap-%s.html' % to_chap
|
65 | link_from[chap_filename, topic] = is_implemented
|
66 |
|
67 | if is_implemented:
|
68 | toc_topic_check[topic].append(toc)
|
69 | else:
|
70 | topics_not_impl += 1
|
71 |
|
72 | all_topics.extend(topics)
|
73 |
|
74 | log('')
|
75 |
|
76 | log('TOC stats:')
|
77 | log(' All Topics: %d', len(all_topics))
|
78 | log(' Unique topics: %d', len(set(all_topics)))
|
79 | log(' Topics marked implemented: %d', len(toc_topic_check))
|
80 | log(' Topics not implemented: %d', topics_not_impl)
|
81 | log(' Sections not implemented (X): %d', sections_not_impl)
|
82 | log('')
|
83 |
|
84 | if 0:
|
85 | PrintTree(chap_tree, sys.stdout)
|
86 |
|
87 | num_sections = 0
|
88 | num_topics = 0
|
89 | num_topics_written = 0
|
90 |
|
91 | #
|
92 | # Walk the Chapter Tree
|
93 | #
|
94 |
|
95 | chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
|
96 | short_topics = []
|
97 |
|
98 | min_words = 5 # arbitrary
|
99 |
|
100 | for chap in chap_tree.children:
|
101 |
|
102 | for section in chap.children:
|
103 | num_sections += 1
|
104 |
|
105 | for topic in section.children:
|
106 | num_topics += 1
|
107 |
|
108 | values = [v for k, v in topic.attrs if k == 'id']
|
109 | if len(values) == 1:
|
110 | topic_id = values[0]
|
111 | else:
|
112 | topic_id = topic.name
|
113 |
|
114 | chap_topics[topic_id].append(chap.name)
|
115 | link_to.add((chap.name, topic_id))
|
116 |
|
117 | # split by whitespace
|
118 | num_words = len(topic.text.split())
|
119 | if num_words > min_words:
|
120 | num_topics_written += 1
|
121 | elif num_words > 1:
|
122 | short_topics.append((topic_id, topic.text))
|
123 |
|
124 | num_chapters = len(chap_tree.children)
|
125 |
|
126 | log('Chapter stats:')
|
127 | log(' num chapters = %d', num_chapters)
|
128 | log(' num_sections = %d', num_sections)
|
129 | log(' num_topics = %d', num_topics)
|
130 |
|
131 | chap_topic_set = set(chap_topics)
|
132 | log(' num unique topics = %d', len(chap_topic_set))
|
133 | log(' topics with first draft (more than %d words) = %d', min_words,
|
134 | num_topics_written)
|
135 | log('')
|
136 |
|
137 | log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
|
138 | log('')
|
139 | log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
|
140 | log('')
|
141 |
|
142 | index_topic_set = set(toc_topic_check)
|
143 |
|
144 | assert 'j8-escape' in index_topic_set
|
145 | assert 'j8-escape' in chap_topic_set
|
146 |
|
147 | # Report on topic namespace integrity, e.g. 'help append' should go to one
|
148 | # thing
|
149 | log('Topics in multiple chapters:')
|
150 | for topic_id, chaps in chap_topics.iteritems():
|
151 | if len(chaps) > 1:
|
152 | log(' %s: %s', topic_id, ' '.join(chaps))
|
153 | log('')
|
154 |
|
155 | log('Duplicate topics in TOC:')
|
156 | log('')
|
157 | for topic in sorted(toc_topic_check):
|
158 | toc_list = toc_topic_check[topic]
|
159 | if len(toc_list) > 1:
|
160 | log('%20s: %s', topic, ' '.join(toc_list))
|
161 | log('')
|
162 |
|
163 | # Report on link integrity
|
164 | if 1:
|
165 | # TOC topics with X can be missing
|
166 | impl_link_from = set(k for k, v in link_from.iteritems() if v)
|
167 | broken = impl_link_from - link_to
|
168 | log('%d Broken Links:', len(broken))
|
169 | for pair in sorted(broken):
|
170 | log(' %s', pair)
|
171 | log('')
|
172 |
|
173 | orphaned = link_to - set(link_from)
|
174 | log('%d Orphaned Topics:', len(orphaned))
|
175 | for pair in sorted(orphaned):
|
176 | log(' %s', pair)
|
177 | log('')
|
178 |
|
179 | log('Short topics:')
|
180 | for topic, text in short_topics:
|
181 | log('%15s %r', topic, text)
|
182 | log('')
|