OILS / mycpp / mylib.py View on Github | oilshell.org

532 lines, 188 significant
1"""
2mylib.py
3"""
4from __future__ import print_function
5
6try:
7 import cStringIO
8except ImportError:
9 # Python 3 doesn't have cStringIO. Our yaks/ demo currently uses
10 # mycpp/mylib.py with Python 3.
11 cStringIO = None
12 import io
13
14import sys
15
16from pylib import collections_
17try:
18 import posix_ as posix
19except ImportError:
20 # Hack for tangled dependencies.
21 import os
22 posix = os
23
24from typing import (Tuple, List, Dict, Optional, Iterator, Any, TypeVar,
25 Generic, cast, TYPE_CHECKING)
26if TYPE_CHECKING:
27 from mycpp import mops
28
29# For conditional translation
30CPP = False
31PYTHON = True
32
33# Use POSIX name directly
34STDIN_FILENO = 0
35
36
37def MaybeCollect():
38 # type: () -> None
39 pass
40
41
42def NewDict():
43 # type: () -> Dict[str, Any]
44 """Make dictionaries ordered in Python, e.g. for JSON.
45
46 In C++, our Dict implementation should be ordered.
47 """
48 return collections_.OrderedDict()
49
50
51def log(msg, *args):
52 # type: (str, *Any) -> None
53 """Print debug output to stderr."""
54 if args:
55 msg = msg % args
56 print(msg, file=sys.stderr)
57
58
59def print_stderr(s):
60 # type: (str) -> None
61 """Print a message to stderr for the user.
62
63 This should be used sparingly, since it doesn't have location info, like
64 ui.ErrorFormatter does. We use it to print fatal I/O errors that were only
65 caught at the top level.
66 """
67 print(s, file=sys.stderr)
68
69
70#
71# Byte Operations avoid excessive allocations with string algorithms
72#
73
74
75def ByteAt(s, i):
76 # type: (str, int) -> int
77 """i must be in bounds."""
78
79 # This simplifies the C++ implementation
80 assert 0 <= i, 'No negative indices'
81 assert i < len(s), 'No negative indices'
82
83 return ord(s[i])
84
85
86def ByteEquals(byte, ch):
87 # type: (int, str) -> bool
88 assert len(ch) == 1, ch
89 assert 0 <= byte < 256, byte
90
91 return byte == ord(ch)
92
93
94def ByteInSet(byte, byte_set):
95 # type: (int, str) -> bool
96 assert 0 <= byte < 256, byte
97
98 return chr(byte) in byte_set
99
100
101def JoinBytes(byte_list):
102 # type: (List[int]) -> str
103
104 return ''.join(chr(b) for b in byte_list)
105
106
107#
108# For SparseArray
109#
110
111
112def BigIntSort(keys):
113 # type: (List[mops.BigInt]) -> None
114 keys.sort(key=lambda big: big.i)
115
116
117#
118# Files
119#
120
121
122class File:
123 """
124 TODO: This should define a read/write interface, and then LineReader() and
125 Writer() can possibly inherit it, with runtime assertions
126
127 Then we allow downcasting from File -> LineReader, like we currently do in
128 C++ in gc_mylib.h.
129
130 Inheritance can't express the structural Reader/Writer pattern of Go, which
131 would be better. I suppose we could use File* everywhere, but having
132 fine-grained types is nicer. And there will be very few casts.
133 """
134 pass
135
136
137class LineReader:
138
139 def readline(self):
140 # type: () -> str
141 raise NotImplementedError()
142
143 def close(self):
144 # type: () -> None
145 raise NotImplementedError()
146
147 def isatty(self):
148 # type: () -> bool
149 raise NotImplementedError()
150
151
152if TYPE_CHECKING:
153
154 class BufLineReader(LineReader):
155
156 def __init__(self, s):
157 # type: (str) -> None
158 raise NotImplementedError()
159
160 def open(path):
161 # type: (str) -> LineReader
162
163 # TODO: should probably return mylib.File
164 # mylib.open() is currently only used in yaks/yaks_main and
165 # bin.osh_parse
166 raise NotImplementedError()
167
168else:
169 # Actual runtime
170 if cStringIO:
171 BufLineReader = cStringIO.StringIO
172 else: # Python 3
173 BufLineReader = io.StringIO
174
175 open = open
176
177
178class Writer:
179
180 def write(self, s):
181 # type: (str) -> None
182 raise NotImplementedError()
183
184 def flush(self):
185 # type: () -> None
186 raise NotImplementedError()
187
188 def isatty(self):
189 # type: () -> bool
190 raise NotImplementedError()
191
192 def close(self):
193 # type: () -> None
194 raise NotImplementedError()
195
196
197class BufWriter(Writer):
198 """Mimic StringIO API, but add clear() so we can reuse objects.
199
200 We can also add accelerators for directly writing numbers, to avoid
201 allocations when encoding JSON.
202 """
203
204 def __init__(self):
205 # type: () -> None
206 self.parts = []
207
208 def write(self, s):
209 # type: (str) -> None
210 self.parts.append(s)
211
212 def isatty(self):
213 # type: () -> bool
214 return False
215
216 def write_spaces(self, n):
217 # type: (int) -> None
218 """For JSON indenting. Avoid intermediate allocations in C++."""
219 self.parts.append(' ' * n)
220
221 def getvalue(self):
222 # type: () -> str
223 return ''.join(self.parts)
224
225 def clear(self):
226 # type: () -> None
227 del self.parts[:]
228
229 def close(self):
230 # type: () -> None
231
232 # No-op for now - we could invalidate write()?
233 pass
234
235
236def Stdout():
237 # type: () -> Writer
238 return sys.stdout
239
240
241def Stderr():
242 # type: () -> Writer
243 return sys.stderr
244
245
246def Stdin():
247 # type: () -> LineReader
248 return sys.stdin
249
250
251class switch(object):
252 """Translates to C switch on int.
253
254 with tagswitch(i) as case:
255 if case(42, 43):
256 print('hi')
257 elif case(99):
258 print('two')
259 else:
260 print('neither')
261 """
262
263 def __init__(self, value):
264 # type: (int) -> None
265 self.value = value
266
267 def __enter__(self):
268 # type: () -> switch
269 return self
270
271 def __exit__(self, type, value, traceback):
272 # type: (Any, Any, Any) -> bool
273 return False # Allows a traceback to occur
274
275 def __call__(self, *cases):
276 # type: (*Any) -> bool
277 return self.value in cases
278
279
280class str_switch(object):
281 """Translates to fast dispatch on string length, then memcmp()."""
282
283 def __init__(self, value):
284 # type: (str) -> None
285 self.value = value
286
287 def __enter__(self):
288 # type: () -> switch
289 return self
290
291 def __exit__(self, type, value, traceback):
292 # type: (Any, Any, Any) -> bool
293 return False # Allows a traceback to occur
294
295 def __call__(self, *cases):
296 # type: (*Any) -> bool
297 return self.value in cases
298
299
300class tagswitch(object):
301 """A ContextManager that translates to switch statement over ASDL types."""
302
303 def __init__(self, node):
304 # type: (Any) -> None
305 self.tag = node.tag()
306
307 def __enter__(self):
308 # type: () -> tagswitch
309 return self
310
311 def __exit__(self, type, value, traceback):
312 # type: (Any, Any, Any) -> bool
313 return False # Allows a traceback to occur
314
315 def __call__(self, *cases):
316 # type: (*Any) -> bool
317 return self.tag in cases
318
319
320if TYPE_CHECKING:
321 # Doesn't work
322 T = TypeVar('T')
323
324 class StackArray(Generic[T]):
325
326 def __init__(self):
327 self.items = [] # type: List[T]
328
329 def append(self, item):
330 # type: (T) -> None
331 self.items.append(item)
332
333 def pop(self):
334 # type: () -> T
335 return self.items.pop()
336
337 # Doesn't work, this is only for primitive types
338 #StackArray = NewType('StackArray', list)
339
340
341def MakeStackArray(item_type):
342 # type: (TypeVar) -> StackArray[item_type]
343 """
344 Convenience "constructor" used like this:
345
346 myarray = MakeStackArray(int)
347
348 The idiom could also be
349
350 myarray = cast('StackArray[int]', [])
351
352 But that's uglier.
353 """
354 return cast('StackArray[item_type]', [])
355
356
357if TYPE_CHECKING:
358 K = TypeVar('K')
359 V = TypeVar('V')
360
361
362def iteritems(d):
363 # type: (Dict[K, V]) -> Iterator[Tuple[K, V]]
364 """Make translation a bit easier."""
365 return d.iteritems()
366
367
368def split_once(s, delim):
369 # type: (str, str) -> Tuple[str, Optional[str]]
370 """Easier to call than split(s, 1) because of tuple unpacking."""
371
372 parts = s.split(delim, 1)
373 if len(parts) == 1:
374 no_str = None # type: Optional[str]
375 return s, no_str
376 else:
377 return parts[0], parts[1]
378
379
380def hex_lower(i):
381 # type: (int) -> str
382 return '%x' % i
383
384
385def dict_erase(d, key):
386 # type: (Dict[Any, Any], Any) -> None
387 """
388 Ensure that a key isn't in the Dict d. This makes C++ translation easier.
389 """
390 try:
391 del d[key]
392 except KeyError:
393 pass
394
395
396def str_cmp(s1, s2):
397 # type: (str, str) -> int
398 if s1 == s2:
399 return 0
400 if s1 < s2:
401 return -1
402 else:
403 return 1
404
405
406class UniqueObjects(object):
407 """A set of objects identified by their address in memory
408
409 Python's id(obj) returns the address of any object. But we don't simply
410 implement it, because it requires a uint64_t on 64-bit systems, while mycpp
411 only supports 'int'.
412
413 So we have a whole class.
414
415 Should be used for:
416
417 - Cycle detection when pretty printing, as Python's repr() does
418 - See CPython's Objects/object.c PyObject_Repr()
419 /* These methods are used to control infinite recursion in repr, str, print,
420 etc. Container objects that may recursively contain themselves,
421 e.g. builtin dictionaries and lists, should use Py_ReprEnter() and
422 Py_ReprLeave() to avoid infinite recursion.
423 */
424 - e.g. dictobject.c dict_repr() calls Py_ReprEnter() to print {...}
425 - In Python 2.7 a GLOBAL VAR is used
426
427 - It also checks for STACK OVERFLOW
428
429 - Packle serialization
430 """
431
432 def __init__(self):
433 # 64-bit id() -> small integer ID
434 self.addresses = {} # type: Dict[int, int]
435
436 def Contains(self, obj):
437 # type: (Any) -> bool
438 """ Convenience? """
439 return self.Get(obj) != -1
440
441 def MaybeAdd(self, obj):
442 # type: (Any) -> None
443 """ Convenience? """
444
445 # def AddNewObject(self, obj):
446 def Add(self, obj):
447 # type: (Any) -> None
448 """
449 Assert it isn't already there, and assign a new ID!
450
451 # Lib/pickle does:
452
453 self.memo[id(obj)] = memo_len, obj
454
455 I guess that's the object ID and a void*
456
457 Then it does:
458
459 x = self.memo.get(id(obj))
460
461 and
462
463 # If the object is already in the memo, this means it is
464 # recursive. In this case, throw away everything we put on the
465 # stack, and fetch the object back from the memo.
466 if id(obj) in self.memo:
467 write(POP + self.get(self.memo[id(obj)][0]))
468
469 BUT It only uses the numeric ID!
470 """
471 addr = id(obj)
472 assert addr not in self.addresses
473 self.addresses[addr] = len(self.addresses)
474
475 def Get(self, obj):
476 # type: (Any) -> int
477 """
478 Returns unique ID assigned
479
480 Returns -1 if it doesn't exist?
481 """
482 addr = id(obj)
483 return self.addresses.get(addr, -1)
484
485 # Note: self.memo.clear() doesn't appear to be used
486
487
488def probe(provider, name, *args):
489 # type: (str, str, Any) -> None
490 """Create a probe for use with profilers like linux perf and ebpf or dtrace."""
491 # Noop. Just a marker for mycpp to emit a DTRACE_PROBE()
492 return
493
494
495if 0:
496 # Prototype of Unix file descriptor I/O, compared with FILE* libc I/O.
497 # Doesn't seem like we need this now.
498
499 # Short versions of STDOUT_FILENO and STDERR_FILENO
500 kStdout = 1
501 kStderr = 2
502
503 def writeln(s, fd=kStdout):
504 # type: (str, int) -> None
505 """Write a line. The name is consistent with JavaScript writeln() and Rust.
506
507 e.g.
508 writeln("x = %d" % x, kStderr)
509
510 TODO: The Oil interpreter shouldn't use print() anywhere. Instead it can use
511 writeln(s) and writeln(s, kStderr)
512 """
513 posix.write(fd, s)
514 posix.write(fd, '\n')
515
516 class File(object):
517 """Custom file wrapper for Unix I/O like write() read()
518
519 Not C I/O like fwrite() fread(). There should be no flush().
520 """
521
522 def __init__(self, fd):
523 # type: (int) -> None
524 self.fd = fd
525
526 def write(self, s):
527 # type: (str) -> None
528 posix.write(self.fd, s)
529
530 def writeln(self, s):
531 # type: (str) -> None
532 writeln(s, fd=self.fd)