| 1 | #!/usr/bin/env python2
 | 
| 2 | from __future__ import print_function
 | 
| 3 | 
 | 
| 4 | import unittest
 | 
| 5 | 
 | 
| 6 | from lazylex import html  # module under test log = html.log
 | 
| 7 | 
 | 
| 8 | log = html.log
 | 
| 9 | 
 | 
| 10 | with open('lazylex/testdata.html') as f:
 | 
| 11 |     TEST_HTML = f.read()
 | 
| 12 | 
 | 
| 13 | 
 | 
| 14 | def _MakeTagLexer(s):
 | 
| 15 |     lex = html.TagLexer(s)
 | 
| 16 |     lex.Reset(0, len(s))
 | 
| 17 |     return lex
 | 
| 18 | 
 | 
| 19 | 
 | 
| 20 | def _PrintTokens(lex):
 | 
| 21 |     log('')
 | 
| 22 |     log('tag = %r', lex.TagName())
 | 
| 23 |     for tok, start, end in lex.Tokens():
 | 
| 24 |         log('%s %r', tok, lex.s[start:end])
 | 
| 25 | 
 | 
| 26 | 
 | 
| 27 | class HtmlTest(unittest.TestCase):
 | 
| 28 | 
 | 
| 29 |     def testTagLexer(self):
 | 
| 30 |         # Invalid!
 | 
| 31 |         #lex = _MakeTagLexer('< >')
 | 
| 32 |         #print(lex.Tag())
 | 
| 33 | 
 | 
| 34 |         lex = _MakeTagLexer('<a>')
 | 
| 35 |         _PrintTokens(lex)
 | 
| 36 | 
 | 
| 37 |         lex = _MakeTagLexer('<a novalue>')
 | 
| 38 |         _PrintTokens(lex)
 | 
| 39 | 
 | 
| 40 |         # Note: we could have a different HasAttr() method
 | 
| 41 |         # <a novalue> means lex.Get('novalue') == None
 | 
| 42 |         # https://developer.mozilla.org/en-US/docs/Web/API/Element/hasAttribute
 | 
| 43 |         self.assertEqual(None, lex.GetAttr('novalue'))
 | 
| 44 | 
 | 
| 45 |         lex = _MakeTagLexer('<a href="double quoted">')
 | 
| 46 |         _PrintTokens(lex)
 | 
| 47 | 
 | 
| 48 |         self.assertEqual('double quoted', lex.GetAttr('href'))
 | 
| 49 |         self.assertEqual(None, lex.GetAttr('oops'))
 | 
| 50 | 
 | 
| 51 |         lex = _MakeTagLexer('<a href=foo class="bar">')
 | 
| 52 |         _PrintTokens(lex)
 | 
| 53 | 
 | 
| 54 |         lex = _MakeTagLexer('<a href=foo class="bar" />')
 | 
| 55 |         _PrintTokens(lex)
 | 
| 56 | 
 | 
| 57 |     # IndexLinker in devtools/make_help.py
 | 
| 58 |     #  <pre> sections in doc/html_help.py
 | 
| 59 |     # TocExtractor in devtools/cmark.py
 | 
| 60 | 
 | 
| 61 |     def testPstrip(self):
 | 
| 62 |         """Remove anything like this.
 | 
| 63 | 
 | 
| 64 |         <p><pstrip> </pstrip></p>
 | 
| 65 |         """
 | 
| 66 |         pass
 | 
| 67 | 
 | 
| 68 |     def testCommentParse(self):
 | 
| 69 |         """"""
 | 
| 70 |         n = len(TEST_HTML)
 | 
| 71 |         for tok_id, end_pos in html._Tokens(TEST_HTML, 0, n):
 | 
| 72 |             if tok_id == html.Invalid:
 | 
| 73 |                 raise RuntimeError()
 | 
| 74 |             print(tok_id)
 | 
| 75 | 
 | 
| 76 | 
 | 
| 77 | if __name__ == '__main__':
 | 
| 78 |     unittest.main()
 |