| 1 | #!/usr/bin/env python2
 | 
| 2 | """html_lib.py.
 | 
| 3 | 
 | 
| 4 | Shared between HTML processors.
 | 
| 5 | 
 | 
| 6 | TODO: Write a "pull parser" API!
 | 
| 7 | """
 | 
| 8 | from __future__ import print_function
 | 
| 9 | 
 | 
| 10 | import cgi
 | 
| 11 | import re
 | 
| 12 | 
 | 
| 13 | 
 | 
| 14 | def AttrsToString(attrs):
 | 
| 15 |     if not attrs:
 | 
| 16 |         return ''
 | 
| 17 | 
 | 
| 18 |     # Important: there's a leading space here.
 | 
| 19 |     # TODO: Change href="$help:command" to href="help.html#command"
 | 
| 20 |     return ''.join(' %s="%s"' % (k, cgi.escape(v)) for (k, v) in attrs)
 | 
| 21 | 
 | 
| 22 | 
 | 
| 23 | def PrettyHref(s, preserve_anchor_case=False):
 | 
| 24 |     """Turn arbitrary heading text into href with no special characters.
 | 
| 25 | 
 | 
| 26 |     This is modeled after what github does.  It makes everything lower case.
 | 
| 27 |     """
 | 
| 28 |     # Split by whitespace or hyphen
 | 
| 29 |     words = re.split(r'[\s\-]+', s)
 | 
| 30 | 
 | 
| 31 |     if preserve_anchor_case:
 | 
| 32 |         # doc/ref: Keep only alphanumeric and /, for List/append, cmd/append
 | 
| 33 |         # Note that "preserve_anchor_case" could be renamed
 | 
| 34 |         keep_re = r'[\w/]+'
 | 
| 35 |     else:
 | 
| 36 |         # Keep only alphanumeric
 | 
| 37 |         keep_re = r'\w+'
 | 
| 38 | 
 | 
| 39 |     keep = [''.join(re.findall(keep_re, w)) for w in words]
 | 
| 40 | 
 | 
| 41 |     # Join with - and lowercase.  And then remove empty words, unlike Github.
 | 
| 42 |     # This is SIMILAR to what Github does, but there's no need to be 100%
 | 
| 43 |     # compatible.
 | 
| 44 | 
 | 
| 45 |     pretty = '-'.join(p for p in keep if p)
 | 
| 46 |     if not preserve_anchor_case:
 | 
| 47 |         pretty = pretty.lower()
 | 
| 48 |     return pretty
 |