1 | #!/usr/bin/env python2
|
2 | """html_lib.py.
|
3 |
|
4 | Shared between HTML processors.
|
5 |
|
6 | TODO: Write a "pull parser" API!
|
7 | """
|
8 | from __future__ import print_function
|
9 |
|
10 | import cgi
|
11 | import re
|
12 |
|
13 |
|
14 | def AttrsToString(attrs):
|
15 | if not attrs:
|
16 | return ''
|
17 |
|
18 | # Important: there's a leading space here.
|
19 | # TODO: Change href="$help:command" to href="help.html#command"
|
20 | return ''.join(' %s="%s"' % (k, cgi.escape(v)) for (k, v) in attrs)
|
21 |
|
22 |
|
23 | def PrettyHref(s, preserve_anchor_case=False):
|
24 | """Turn arbitrary heading text into href with no special characters.
|
25 |
|
26 | This is modeled after what github does. It makes everything lower case.
|
27 | """
|
28 | # Split by whitespace or hyphen
|
29 | words = re.split(r'[\s\-]+', s)
|
30 |
|
31 | if preserve_anchor_case:
|
32 | # doc/ref: Keep only alphanumeric and /, for List/append, cmd/append
|
33 | # Note that "preserve_anchor_case" could be renamed
|
34 | keep_re = r'[\w/]+'
|
35 | else:
|
36 | # Keep only alphanumeric
|
37 | keep_re = r'\w+'
|
38 |
|
39 | keep = [''.join(re.findall(keep_re, w)) for w in words]
|
40 |
|
41 | # Join with - and lowercase. And then remove empty words, unlike Github.
|
42 | # This is SIMILAR to what Github does, but there's no need to be 100%
|
43 | # compatible.
|
44 |
|
45 | pretty = '-'.join(p for p in keep if p)
|
46 | if not preserve_anchor_case:
|
47 | pretty = pretty.lower()
|
48 | return pretty
|