OILS / mycpp / mylib.py View on Github | oilshell.org

529 lines, 186 significant
1"""
2mylib.py
3"""
4from __future__ import print_function
5
6try:
7 import cStringIO
8except ImportError:
9 # Python 3 doesn't have cStringIO. Our yaks/ demo currently uses
10 # mycpp/mylib.py with Python 3.
11 cStringIO = None
12 import io
13
14import sys
15
16from pylib import collections_
17try:
18 import posix_ as posix
19except ImportError:
20 # Hack for tangled dependencies.
21 import os
22 posix = os
23
24from typing import (Tuple, List, Dict, Optional, Iterator, Any, TypeVar,
25 Generic, cast, TYPE_CHECKING)
26if TYPE_CHECKING:
27 from mycpp import mops
28
29# For conditional translation
30CPP = False
31PYTHON = True
32
33# Use POSIX name directly
34STDIN_FILENO = 0
35
36
37def MaybeCollect():
38 # type: () -> None
39 pass
40
41
42def NewDict():
43 # type: () -> Dict[str, Any]
44 """Make dictionaries ordered in Python, e.g. for JSON.
45
46 In C++, our Dict implementation should be ordered.
47 """
48 return collections_.OrderedDict()
49
50
51def log(msg, *args):
52 # type: (str, *Any) -> None
53 """Print debug output to stderr."""
54 if args:
55 msg = msg % args
56 print(msg, file=sys.stderr)
57
58
59def print_stderr(s):
60 # type: (str) -> None
61 """Print a message to stderr for the user.
62
63 This should be used sparingly, since it doesn't have location info, like
64 ui.ErrorFormatter does. We use it to print fatal I/O errors that were only
65 caught at the top level.
66 """
67 print(s, file=sys.stderr)
68
69
70#
71# Byte Operations avoid excessive allocations with string algorithms
72#
73
74
75def ByteAt(s, i):
76 # type: (str, int) -> int
77 """i must be in bounds."""
78
79 # This simplifies the C++ implementation
80 assert 0 <= i, 'No negative indices'
81 assert i < len(s), 'No negative indices'
82
83 return ord(s[i])
84
85
86def ByteEquals(byte, ch):
87 # type: (int, str) -> bool
88 assert len(ch) == 1, ch
89 assert 0 <= byte < 256, byte
90
91 return byte == ord(ch)
92
93
94def ByteInSet(byte, byte_set):
95 # type: (int, str) -> bool
96 assert 0 <= byte < 256, byte
97
98 return chr(byte) in byte_set
99
100
101def JoinBytes(byte_list):
102 # type: (List[int]) -> str
103
104 return ''.join(chr(b) for b in byte_list)
105
106
107#
108# For SparseArray
109#
110
111
112def BigIntSort(keys):
113 # type: (List[mops.BigInt]) -> None
114 keys.sort(key=lambda big: big.i)
115
116
117#
118# Files
119#
120
121
122class File:
123 """
124 TODO: This should define a read/write interface, and then LineReader() and
125 Writer() can possibly inherit it, with runtime assertions
126
127 Then we allow downcasting from File -> LineReader, like we currently do in
128 C++ in gc_mylib.h.
129
130 Inheritance can't express the structural Reader/Writer pattern of Go, which
131 would be better. I suppose we could use File* everywhere, but having
132 fine-grained types is nicer. And there will be very few casts.
133 """
134 pass
135
136
137class LineReader:
138
139 def readline(self):
140 # type: () -> str
141 raise NotImplementedError()
142
143 def close(self):
144 # type: () -> None
145 raise NotImplementedError()
146
147 def isatty(self):
148 # type: () -> bool
149 raise NotImplementedError()
150
151
152if TYPE_CHECKING:
153
154 class BufLineReader(LineReader):
155
156 def __init__(self, s):
157 # type: (str) -> None
158 raise NotImplementedError()
159
160 def open(path):
161 # type: (str) -> LineReader
162
163 # TODO: should probably return mylib.File
164 # mylib.open() is currently only used in yaks/yaks_main and
165 # bin.osh_parse
166 raise NotImplementedError()
167
168else:
169 # Actual runtime
170 if cStringIO:
171 BufLineReader = cStringIO.StringIO
172 else: # Python 3
173 BufLineReader = io.StringIO
174
175 open = open
176
177
178class Writer:
179
180 def write(self, s):
181 # type: (str) -> None
182 raise NotImplementedError()
183
184 def flush(self):
185 # type: () -> None
186 raise NotImplementedError()
187
188 def isatty(self):
189 # type: () -> bool
190 raise NotImplementedError()
191
192 def close(self):
193 # type: () -> None
194 raise NotImplementedError()
195
196
197class BufWriter(Writer):
198 """Mimic StringIO API, but add clear() so we can reuse objects.
199
200 We can also add accelerators for directly writing numbers, to avoid
201 allocations when encoding JSON.
202 """
203
204 def __init__(self):
205 # type: () -> None
206 self.parts = []
207
208 def write(self, s):
209 # type: (str) -> None
210 self.parts.append(s)
211
212 def write_spaces(self, n):
213 # type: (int) -> None
214 """For JSON indenting. Avoid intermediate allocations in C++."""
215 self.parts.append(' ' * n)
216
217 def getvalue(self):
218 # type: () -> str
219 return ''.join(self.parts)
220
221 def clear(self):
222 # type: () -> None
223 del self.parts[:]
224
225 def close(self):
226 # type: () -> None
227
228 # No-op for now - we could invalidate write()?
229
230 pass
231
232
233def Stdout():
234 # type: () -> Writer
235 return sys.stdout
236
237
238def Stderr():
239 # type: () -> Writer
240 return sys.stderr
241
242
243def Stdin():
244 # type: () -> LineReader
245 return sys.stdin
246
247
248class switch(object):
249 """Translates to C switch on int.
250
251 with tagswitch(i) as case:
252 if case(42, 43):
253 print('hi')
254 elif case(99):
255 print('two')
256 else:
257 print('neither')
258 """
259
260 def __init__(self, value):
261 # type: (int) -> None
262 self.value = value
263
264 def __enter__(self):
265 # type: () -> switch
266 return self
267
268 def __exit__(self, type, value, traceback):
269 # type: (Any, Any, Any) -> bool
270 return False # Allows a traceback to occur
271
272 def __call__(self, *cases):
273 # type: (*Any) -> bool
274 return self.value in cases
275
276
277class str_switch(object):
278 """Translates to fast dispatch on string length, then memcmp()."""
279
280 def __init__(self, value):
281 # type: (str) -> None
282 self.value = value
283
284 def __enter__(self):
285 # type: () -> switch
286 return self
287
288 def __exit__(self, type, value, traceback):
289 # type: (Any, Any, Any) -> bool
290 return False # Allows a traceback to occur
291
292 def __call__(self, *cases):
293 # type: (*Any) -> bool
294 return self.value in cases
295
296
297class tagswitch(object):
298 """A ContextManager that translates to switch statement over ASDL types."""
299
300 def __init__(self, node):
301 # type: (Any) -> None
302 self.tag = node.tag()
303
304 def __enter__(self):
305 # type: () -> tagswitch
306 return self
307
308 def __exit__(self, type, value, traceback):
309 # type: (Any, Any, Any) -> bool
310 return False # Allows a traceback to occur
311
312 def __call__(self, *cases):
313 # type: (*Any) -> bool
314 return self.tag in cases
315
316
317if TYPE_CHECKING:
318 # Doesn't work
319 T = TypeVar('T')
320
321 class StackArray(Generic[T]):
322
323 def __init__(self):
324 self.items = [] # type: List[T]
325
326 def append(self, item):
327 # type: (T) -> None
328 self.items.append(item)
329
330 def pop(self):
331 # type: () -> T
332 return self.items.pop()
333
334 # Doesn't work, this is only for primitive types
335 #StackArray = NewType('StackArray', list)
336
337
338def MakeStackArray(item_type):
339 # type: (TypeVar) -> StackArray[item_type]
340 """
341 Convenience "constructor" used like this:
342
343 myarray = MakeStackArray(int)
344
345 The idiom could also be
346
347 myarray = cast('StackArray[int]', [])
348
349 But that's uglier.
350 """
351 return cast('StackArray[item_type]', [])
352
353
354if TYPE_CHECKING:
355 K = TypeVar('K')
356 V = TypeVar('V')
357
358
359def iteritems(d):
360 # type: (Dict[K, V]) -> Iterator[Tuple[K, V]]
361 """Make translation a bit easier."""
362 return d.iteritems()
363
364
365def split_once(s, delim):
366 # type: (str, str) -> Tuple[str, Optional[str]]
367 """Easier to call than split(s, 1) because of tuple unpacking."""
368
369 parts = s.split(delim, 1)
370 if len(parts) == 1:
371 no_str = None # type: Optional[str]
372 return s, no_str
373 else:
374 return parts[0], parts[1]
375
376
377def hex_lower(i):
378 # type: (int) -> str
379 return '%x' % i
380
381
382def dict_erase(d, key):
383 # type: (Dict[Any, Any], Any) -> None
384 """
385 Ensure that a key isn't in the Dict d. This makes C++ translation easier.
386 """
387 try:
388 del d[key]
389 except KeyError:
390 pass
391
392
393def str_cmp(s1, s2):
394 # type: (str, str) -> int
395 if s1 == s2:
396 return 0
397 if s1 < s2:
398 return -1
399 else:
400 return 1
401
402
403class UniqueObjects(object):
404 """A set of objects identified by their address in memory
405
406 Python's id(obj) returns the address of any object. But we don't simply
407 implement it, because it requires a uint64_t on 64-bit systems, while mycpp
408 only supports 'int'.
409
410 So we have a whole class.
411
412 Should be used for:
413
414 - Cycle detection when pretty printing, as Python's repr() does
415 - See CPython's Objects/object.c PyObject_Repr()
416 /* These methods are used to control infinite recursion in repr, str, print,
417 etc. Container objects that may recursively contain themselves,
418 e.g. builtin dictionaries and lists, should use Py_ReprEnter() and
419 Py_ReprLeave() to avoid infinite recursion.
420 */
421 - e.g. dictobject.c dict_repr() calls Py_ReprEnter() to print {...}
422 - In Python 2.7 a GLOBAL VAR is used
423
424 - It also checks for STACK OVERFLOW
425
426 - Packle serialization
427 """
428
429 def __init__(self):
430 # 64-bit id() -> small integer ID
431 self.addresses = {} # type: Dict[int, int]
432
433 def Contains(self, obj):
434 # type: (Any) -> bool
435 """ Convenience? """
436 return self.Get(obj) != -1
437
438 def MaybeAdd(self, obj):
439 # type: (Any) -> None
440 """ Convenience? """
441
442 # def AddNewObject(self, obj):
443 def Add(self, obj):
444 # type: (Any) -> None
445 """
446 Assert it isn't already there, and assign a new ID!
447
448 # Lib/pickle does:
449
450 self.memo[id(obj)] = memo_len, obj
451
452 I guess that's the object ID and a void*
453
454 Then it does:
455
456 x = self.memo.get(id(obj))
457
458 and
459
460 # If the object is already in the memo, this means it is
461 # recursive. In this case, throw away everything we put on the
462 # stack, and fetch the object back from the memo.
463 if id(obj) in self.memo:
464 write(POP + self.get(self.memo[id(obj)][0]))
465
466 BUT It only uses the numeric ID!
467 """
468 addr = id(obj)
469 assert addr not in self.addresses
470 self.addresses[addr] = len(self.addresses)
471
472 def Get(self, obj):
473 # type: (Any) -> int
474 """
475 Returns unique ID assigned
476
477 Returns -1 if it doesn't exist?
478 """
479 addr = id(obj)
480 return self.addresses.get(addr, -1)
481
482 # Note: self.memo.clear() doesn't appear to be used
483
484
485def probe(provider, name, *args):
486 # type: (str, str, Any) -> None
487 """Create a probe for use with profilers like linux perf and ebpf or dtrace."""
488 # Noop. Just a marker for mycpp to emit a DTRACE_PROBE()
489 return
490
491
492if 0:
493 # Prototype of Unix file descriptor I/O, compared with FILE* libc I/O.
494 # Doesn't seem like we need this now.
495
496 # Short versions of STDOUT_FILENO and STDERR_FILENO
497 kStdout = 1
498 kStderr = 2
499
500 def writeln(s, fd=kStdout):
501 # type: (str, int) -> None
502 """Write a line. The name is consistent with JavaScript writeln() and Rust.
503
504 e.g.
505 writeln("x = %d" % x, kStderr)
506
507 TODO: The Oil interpreter shouldn't use print() anywhere. Instead it can use
508 writeln(s) and writeln(s, kStderr)
509 """
510 posix.write(fd, s)
511 posix.write(fd, '\n')
512
513 class File(object):
514 """Custom file wrapper for Unix I/O like write() read()
515
516 Not C I/O like fwrite() fread(). There should be no flush().
517 """
518
519 def __init__(self, fd):
520 # type: (int) -> None
521 self.fd = fd
522
523 def write(self, s):
524 # type: (str) -> None
525 posix.write(self.fd, s)
526
527 def writeln(self, s):
528 # type: (str) -> None
529 writeln(s, fd=self.fd)