| 1 | #!/usr/bin/env python2
 | 
| 2 | """
 | 
| 3 | split.test.py: Tests for split.py
 | 
| 4 | """
 | 
| 5 | 
 | 
| 6 | import unittest
 | 
| 7 | 
 | 
| 8 | from osh import split  # module under test
 | 
| 9 | 
 | 
| 10 | 
 | 
| 11 | def _RunSplitCases(test, sp, cases):
 | 
| 12 |     for expected_parts, s, allow_escape in cases:
 | 
| 13 |         spans = sp.Split(s, allow_escape)
 | 
| 14 |         if 0:
 | 
| 15 |             print('%r: %s' % (s, spans))
 | 
| 16 |         else:
 | 
| 17 |             # Verbose for debugging
 | 
| 18 |             print(repr(s))
 | 
| 19 |             for span in spans:
 | 
| 20 |                 print('  %s %s' % span)
 | 
| 21 | 
 | 
| 22 |         parts = split._SpansToParts(s, spans)
 | 
| 23 |         print('PARTS %s' % parts)
 | 
| 24 | 
 | 
| 25 |         test.assertEqual(expected_parts, parts,
 | 
| 26 |                          '%r: %s != %s' % (s, expected_parts, parts))
 | 
| 27 | 
 | 
| 28 | 
 | 
| 29 | class SplitTest(unittest.TestCase):
 | 
| 30 | 
 | 
| 31 |     def testSpansToParts(self):
 | 
| 32 |         sp = split.IfsSplitter(split.DEFAULT_IFS, '')
 | 
| 33 | 
 | 
| 34 |         s = 'one\\ two'
 | 
| 35 |         spans = sp.Split(s, False)
 | 
| 36 |         print(spans)
 | 
| 37 | 
 | 
| 38 |         parts = split._SpansToParts(s, spans)
 | 
| 39 |         self.assertEqual(['one\\', 'two'], parts)
 | 
| 40 | 
 | 
| 41 |         spans = sp.Split(s, True)  # allow_escape
 | 
| 42 |         parts = split._SpansToParts(s, spans)
 | 
| 43 |         self.assertEqual(['one two'], parts)
 | 
| 44 | 
 | 
| 45 |         # NOTE: Only read builtin supports max_results
 | 
| 46 |         return
 | 
| 47 | 
 | 
| 48 |         parts = split._SpansToParts(s, spans, max_results=1)
 | 
| 49 |         self.assertEqual(['one\\ two'], parts)
 | 
| 50 | 
 | 
| 51 |         print(spans)
 | 
| 52 | 
 | 
| 53 |         parts = split._SpansToParts(s, spans, max_results=1)
 | 
| 54 |         self.assertEqual(['one two'], parts)
 | 
| 55 | 
 | 
| 56 |     def testTrailingWhitespaceBug(self):
 | 
| 57 |         # Bug: these differed
 | 
| 58 |         CASES = [
 | 
| 59 |             (['x y'], r' x\ y', True),
 | 
| 60 |             (['ab '], r' ab\ ', True),
 | 
| 61 |             (['ab '], r' ab\  ', True),
 | 
| 62 |         ]
 | 
| 63 |         sp = split.IfsSplitter(split.DEFAULT_IFS, '')
 | 
| 64 |         _RunSplitCases(self, sp, CASES)
 | 
| 65 | 
 | 
| 66 |     def testDefaultIfs(self):
 | 
| 67 |         CASES = [
 | 
| 68 |             ([], '', True),
 | 
| 69 |             (['a'], 'a', True),
 | 
| 70 |             (['a'], ' a ', True),
 | 
| 71 |             (['ab'], '\tab\n', True),
 | 
| 72 |             (['a', 'b'], 'a  b\n', True),
 | 
| 73 |             (['a b'], r'a\ b', True),
 | 
| 74 |             (['a\\', 'b'], r'a\ b', False),
 | 
| 75 |             ([r'\*.sh'], r'\\*.sh', True),
 | 
| 76 |             (['Aa', 'b', ' a b'], 'Aa b \\ a\\ b', True),
 | 
| 77 |         ]
 | 
| 78 | 
 | 
| 79 |         sp = split.IfsSplitter(split.DEFAULT_IFS, '')
 | 
| 80 |         _RunSplitCases(self, sp, CASES)
 | 
| 81 | 
 | 
| 82 |         self.assertEqual(r'a\ _b', sp.Escape('a _b'))
 | 
| 83 | 
 | 
| 84 |     def testMixedIfs(self):
 | 
| 85 |         CASES = [
 | 
| 86 |             ([], '', True),
 | 
| 87 |             (['a', 'b'], 'a_b', True),
 | 
| 88 |             (['a', 'b'], ' a b ', True),
 | 
| 89 |             (['a', 'b'], 'a _ b', True),
 | 
| 90 |             (['a', 'b'], '  a _ b  ', True),
 | 
| 91 |             (['a', '', 'b'], 'a _ _ b', True),
 | 
| 92 |             (['a', '', 'b'], 'a __ b', True),
 | 
| 93 |             (['a', '', '', 'b'], 'a _  _ _  b', True),
 | 
| 94 |             (['a'], '  a _ ', True),
 | 
| 95 | 
 | 
| 96 |             # NOTES:
 | 
| 97 |             # - This cases REQUIRES ignoring leading whitespace.  The state machine
 | 
| 98 |             # can't handle it.  Contrast with the case above.
 | 
| 99 |             # - We get three spans with index 1 because of the initial rule to
 | 
| 100 |             # ignore whitespace, and then EMIT_EMPTY.  Seems harmless for now?
 | 
| 101 |             (['', 'a'], ' _ a _ ', True),
 | 
| 102 | 
 | 
| 103 |             # Backslash escape
 | 
| 104 |             (['a b'], r'a\ b', True),
 | 
| 105 |             (['a\\', 'b'], r'a\ b', False),
 | 
| 106 |         ]
 | 
| 107 | 
 | 
| 108 |         # IFS='_ '
 | 
| 109 |         sp = split.IfsSplitter(' ', '_')
 | 
| 110 |         _RunSplitCases(self, sp, CASES)
 | 
| 111 | 
 | 
| 112 |         self.assertEqual('a\ \_b', sp.Escape('a _b'))
 | 
| 113 | 
 | 
| 114 |     def testWhitespaceOnly(self):
 | 
| 115 |         CASES = [
 | 
| 116 |             ([], '', True),
 | 
| 117 |             ([], '\t', True),
 | 
| 118 |             (['a'], 'a\t', True),
 | 
| 119 |             (['a', 'b'], '\t\ta\tb\t', True),
 | 
| 120 | 
 | 
| 121 |             # Backslash escape
 | 
| 122 |             (['a\tb'], 'a\\\tb', True),
 | 
| 123 |             (['a\\', 'b'], 'a\\\tb', False),
 | 
| 124 |         ]
 | 
| 125 | 
 | 
| 126 |         # IFS='_ '
 | 
| 127 |         sp = split.IfsSplitter('\t', '')
 | 
| 128 |         _RunSplitCases(self, sp, CASES)
 | 
| 129 | 
 | 
| 130 |         self.assertEqual('a b', sp.Escape('a b'))
 | 
| 131 |         self.assertEqual('a\\\tb', sp.Escape('a\tb'))
 | 
| 132 | 
 | 
| 133 |     def testOtherOnly(self):
 | 
| 134 |         CASES = [
 | 
| 135 |             ([], '', True),
 | 
| 136 |             ([''], '_', True),
 | 
| 137 |             (['a'], 'a_', True),
 | 
| 138 |             (['', '', 'a', 'b'], '__a_b_', True),
 | 
| 139 | 
 | 
| 140 |             # Backslash escape
 | 
| 141 |             (['a_b'], r'a\_b', True),
 | 
| 142 |             (['a\\', 'b'], r'a\_b', False),
 | 
| 143 |         ]
 | 
| 144 | 
 | 
| 145 |         # IFS='_ '
 | 
| 146 |         sp = split.IfsSplitter('', '_')
 | 
| 147 |         _RunSplitCases(self, sp, CASES)
 | 
| 148 | 
 | 
| 149 |     def testTwoOther(self):
 | 
| 150 |         CASES = [
 | 
| 151 |             (['a', '', 'b', '', '', 'c', 'd'], 'a__b---c_d', True),
 | 
| 152 | 
 | 
| 153 |             # Backslash escape
 | 
| 154 |             (['a_-b'], r'a\_\-b', True),
 | 
| 155 |             (['a\\', '\\', 'b'], r'a\_\-b', False),
 | 
| 156 |         ]
 | 
| 157 | 
 | 
| 158 |         # IFS='_ '
 | 
| 159 |         sp = split.IfsSplitter('', '_-')
 | 
| 160 |         _RunSplitCases(self, sp, CASES)
 | 
| 161 | 
 | 
| 162 | 
 | 
| 163 | if __name__ == '__main__':
 | 
| 164 |     unittest.main()
 |