OILS / doctools / ref_check.py View on Github | oilshell.org

182 lines, 122 significant
1#!/usr/bin/env python2
2"""ref_check.py: Check integrity of doc/ref, and print metrics."""
3from __future__ import print_function
4
5import collections
6#from pprint import pprint
7import sys
8
9from doctools.util import log
10
11
12def PrintTree(node, f, indent=0):
13 """Print DocNode tree in make_help.py."""
14 if node.attrs:
15 a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
16 a_str = '(%s)' % a_str
17 else:
18 a_str = ''
19
20 print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
21 for ch in node.children:
22 PrintTree(ch, f, indent + 1)
23
24
25def Check(all_toc_nodes, chap_tree):
26 """
27 Args:
28 all_toc_nodes: Structure of doc/ref/toc-*.md
29 chap_tree: Structure of chap-*.html
30 """
31 all_topics = []
32
33 link_from = {} # (filename, topic) -> implemented
34 link_to = set()
35
36 section_check = collections.defaultdict(list)
37 toc_topic_check = collections.defaultdict(list)
38
39 #
40 # Walk the TOC metadata
41 #
42
43 topics_not_impl = 0
44 sections_not_impl = 0
45
46 log('TOC:')
47 log('')
48 for toc_node in all_toc_nodes:
49 toc = toc_node['toc']
50 log(' %s', toc)
51 for box_node in toc_node['boxes']:
52 to_chap = box_node['to_chap']
53 log(' %s' % to_chap)
54 for line_info in box_node['lines']:
55 section = line_info['section']
56 section_impl = line_info['impl']
57 if not section_impl:
58 sections_not_impl += 1
59
60 topics = line_info['topics']
61 for topic, topic_impl in topics:
62 is_implemented = topic_impl and section_impl
63
64 chap_filename = 'chap-%s.html' % to_chap
65 link_from[chap_filename, topic] = is_implemented
66
67 if is_implemented:
68 toc_topic_check[topic].append(toc)
69 else:
70 topics_not_impl += 1
71
72 all_topics.extend(topics)
73
74 log('')
75
76 log('TOC stats:')
77 log(' All Topics: %d', len(all_topics))
78 log(' Unique topics: %d', len(set(all_topics)))
79 log(' Topics marked implemented: %d', len(toc_topic_check))
80 log(' Topics not implemented: %d', topics_not_impl)
81 log(' Sections not implemented (X): %d', sections_not_impl)
82 log('')
83
84 if 0:
85 PrintTree(chap_tree, sys.stdout)
86
87 num_sections = 0
88 num_topics = 0
89 num_topics_written = 0
90
91 #
92 # Walk the Chapter Tree
93 #
94
95 chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
96 short_topics = []
97
98 min_words = 5 # arbitrary
99
100 for chap in chap_tree.children:
101
102 for section in chap.children:
103 num_sections += 1
104
105 for topic in section.children:
106 num_topics += 1
107
108 values = [v for k, v in topic.attrs if k == 'id']
109 if len(values) == 1:
110 topic_id = values[0]
111 else:
112 topic_id = topic.name
113
114 chap_topics[topic_id].append(chap.name)
115 link_to.add((chap.name, topic_id))
116
117 # split by whitespace
118 num_words = len(topic.text.split())
119 if num_words > min_words:
120 num_topics_written += 1
121 elif num_words > 1:
122 short_topics.append((topic_id, topic.text))
123
124 num_chapters = len(chap_tree.children)
125
126 log('Chapter stats:')
127 log(' num chapters = %d', num_chapters)
128 log(' num_sections = %d', num_sections)
129 log(' num_topics = %d', num_topics)
130
131 chap_topic_set = set(chap_topics)
132 log(' num unique topics = %d', len(chap_topic_set))
133 log(' topics with first draft (more than %d words) = %d', min_words,
134 num_topics_written)
135 log('')
136
137 log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
138 log('')
139 log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
140 log('')
141
142 index_topic_set = set(toc_topic_check)
143
144 assert 'j8-escape' in index_topic_set
145 assert 'j8-escape' in chap_topic_set
146
147 # Report on topic namespace integrity, e.g. 'help append' should go to one
148 # thing
149 log('Topics in multiple chapters:')
150 for topic_id, chaps in chap_topics.iteritems():
151 if len(chaps) > 1:
152 log(' %s: %s', topic_id, ' '.join(chaps))
153 log('')
154
155 log('Duplicate topics in TOC:')
156 log('')
157 for topic in sorted(toc_topic_check):
158 toc_list = toc_topic_check[topic]
159 if len(toc_list) > 1:
160 log('%20s: %s', topic, ' '.join(toc_list))
161 log('')
162
163 # Report on link integrity
164 if 1:
165 # TOC topics with X can be missing
166 impl_link_from = set(k for k, v in link_from.iteritems() if v)
167 broken = impl_link_from - link_to
168 log('%d Broken Links:', len(broken))
169 for pair in sorted(broken):
170 log(' %s', pair)
171 log('')
172
173 orphaned = link_to - set(link_from)
174 log('%d Orphaned Topics:', len(orphaned))
175 for pair in sorted(orphaned):
176 log(' %s', pair)
177 log('')
178
179 log('Short topics:')
180 for topic, text in short_topics:
181 log('%15s %r', topic, text)
182 log('')