OILS / opy / _regtest / src / tools / osh2oil.py View on Github | oilshell.org

1211 lines, 527 significant
1from __future__ import print_function
2"""
3fix.py -- Do source transformations. Somewhat like 'go fix'.
4
5TODO: Change := to =, and var/const/set
6"""
7
8import sys
9
10from asdl import const
11from core import util
12from core import word
13from osh.meta import ast, Id
14
15from _devbuild.gen import runtime_asdl
16
17word_style_e = runtime_asdl.word_style_e
18
19log = util.log
20
21command_e = ast.command_e
22redir_e = ast.redir_e
23word_e = ast.word_e
24word_part_e = ast.word_part_e
25arith_expr_e = ast.arith_expr_e
26bool_expr_e = ast.bool_expr_e
27lhs_expr_e = ast.lhs_expr_e
28
29
30class Cursor(object):
31 """
32 Wrapper for printing/transforming a complete source file stored in a single
33 arena.
34 """
35 def __init__(self, arena, f):
36 self.arena = arena
37 self.f = f
38 self.next_span_id = 0
39
40 def PrintUntil(self, until_span_id):
41 # Sometimes we add +1
42 assert until_span_id < const.NO_INTEGER, 'Missing span ID, got %d' % until_span_id
43 #log('PrintUntil %d', until_span_id)
44 for span_id in range(self.next_span_id, until_span_id):
45 #log('Looking up span id %d', span_id)
46 span = self.arena.GetLineSpan(span_id)
47 #log('SPAN %s', span)
48
49 line = self.arena.GetLine(span.line_id)
50 piece = line[span.col : span.col + span.length]
51 self.f.write(piece)
52 # Spacing
53 #self.f.write('%r' % piece)
54 #self.f.write('__')
55
56 self.next_span_id = until_span_id
57
58 def SkipUntil(self, next_span_id):
59 """Skip everything before next_span_id.
60 Printing will start at next_span_id
61 """
62 assert next_span_id != const.NO_INTEGER, next_span_id
63 self.next_span_id = next_span_id
64
65
66def PrintAsOil(arena, node, debug_spans):
67 #print node
68 #print(spans)
69 if debug_spans:
70 for i, span in enumerate(arena.spans):
71 line = arena.GetLine(span.line_id)
72 piece = line[span.col : span.col + span.length]
73 print('%5d %r' % (i, piece), file=sys.stderr)
74 print('(%d spans)' % len(arena.spans), file=sys.stderr)
75
76 cursor = Cursor(arena, sys.stdout)
77 fixer = OilPrinter(cursor, arena, sys.stdout)
78 fixer.DoCommand(node, None, at_top_level=True) # no local symbols yet
79 fixer.End()
80
81
82 # Cases:
83 #
84 # - Does it look like $foo?
85 # - Pedantic mode, then:
86 # x = @split(foo) No globbing here!
87 # @split($1) or @1 ?
88 # @-foo @-1 in expression mode
89 # And then for command mode, you will have *@1 and *@foo. Split first
90 # then glob.
91 #
92 # - Nice mode, then foo
93 # --assume no-word-splitting
94 # - Does it look like $(( 1 + 2 )) ? or $(echo hi)
95 # pedantic mode: $(1 + 2) or @[echo hi] ?
96 # nice mode: $(1 + 2) or $[echo hi]
97 #
98 # - Does it look like "$foo" or "${foo:-}"? Then it's just x = foo
99 # x = foo or 'default'
100 # - Does it contain any substitutions? Then whole thing is double quoted
101 # - Otherwise single quoted
102 #
103 # PROBLEM: ~ substitution. That is disabled by "".
104 # You can turn it into $HOME I guess
105 # const foo = $HOME/hello
106 # const foo = $~/bar # hm I kind of don't like this but OK
107 # const foo = "$~/bar"
108 # const foo = [ ~/bar ][0] # does this make sense?
109 # const foo = `~/bar`
110
111 # I think ~ should be like $ -- special. Maybe even inside double quotes?
112 # Or only at the front?
113
114
115# QEFS is wrong? Because RHS never gets split! It can always be foo=$1/foo.
116# Not used because RHS not split:
117# $x -> @-x and ${x} -> @-x
118# ${x:-default} -> @-(x or 'default')
119
120def _GetRhsStyle(w):
121 # NOTE: Pattern matching style would be a lot nicer for this...
122
123 # Arith and command sub both retain $() and $[], so they are not pure
124 # "expressions".
125 VAR_SUBS = (word_part_e.SimpleVarSub, word_part_e.BracedVarSub,
126 word_part_e.TildeSubPart)
127 OTHER_SUBS = (word_part_e.CommandSubPart, word_part_e.ArithSubPart)
128
129 ALL_SUBS = VAR_SUBS + OTHER_SUBS
130
131 # Actually splitting NEVER HAPPENS ON ASSIGNMENT. LEAVE IT OFF.
132
133 if len(w.parts) == 0:
134 raise AssertionError(w)
135
136 elif len(w.parts) == 1:
137 part0 = w.parts[0]
138 if part0.tag in VAR_SUBS:
139 # $x -> x and ${x} -> x and ${x:-default} -> x or 'default'
140 # ~ -> homedir()
141 # ~andy -> homedir('andy')
142 # tilde()
143 # tilde('andy') ?
144 return word_style_e.Expr
145 elif part0.tag in OTHER_SUBS:
146 return word_style_e.Unquoted
147
148 elif part0.tag == word_part_e.DoubleQuotedPart:
149 if len(part0.parts) == 1:
150 dq_part0 = part0.parts[0]
151 # "$x" -> x and "${x}" -> x and "${x:-default}" -> x or 'default'
152 if dq_part0.tag in VAR_SUBS:
153 return word_style_e.Expr
154 elif dq_part0.tag in OTHER_SUBS:
155 return word_style_e.Unquoted
156
157 # Tilde subs also cause double quoted style.
158 for part in w.parts:
159 if part.tag == word_part_e.DoubleQuotedPart:
160 for dq_part in part.parts:
161 if dq_part.tag in ALL_SUBS:
162 return word_style_e.DQ
163 elif part.tag in ALL_SUBS:
164 return word_style_e.DQ
165
166 return word_style_e.SQ
167
168
169# TODO: Change to --assume, and have a default for each one?
170#
171# NICE mode: Assume that the user isn't relying on word splitting. A lot of
172# users want this!
173#
174# Problem cases:
175#
176# for name in $(find ...); do echo $name; done
177#
178# This doesn't split. Heuristic:
179#
180# This should be a bunch of flags:
181#
182# --assume 'no-word-splitting no-undefined' etc.
183# globals-defined-first-outside-func (then we can generated := vs. ::=)
184# --split-output-from-commands 'find ls' # tokenize these
185
186# Special case: "find" is assumed to produce multiple things that you will want
187# to split? But that doesn't go within function calls. Hm.
188#
189# $(find -type f) -> @[find -type f]
190
191NICE = 0
192
193# Try to convert with pedantic correctness. Not sure if users will want this
194# though. Most people are not super principled about their shell programs.
195# But experts might want it. Experts might want to run ShellCheck first and
196# quote everything, and then everything will be unquoted.
197#
198# "$foo" "${foo}" -> $foo $foo
199# $foo -> @-foo -> split then glob?
200# *@foo maybe
201# $(find -type f) -> @[find -type f]
202
203PEDANTIC = 1
204
205
206class OilPrinter(object):
207 """
208 Convert osh code to oil.
209
210 - command invocations
211 - find invocations
212 - xargs
213 """
214 def __init__(self, cursor, arena, f, mode=NICE):
215 self.cursor = cursor
216 self.arena = arena
217 self.f = f
218 # In PEDANTIC mode, we translate unquoted $foo to @-foo, which means it will
219 # be split and globbed?
220 self.mode = mode
221
222 def _DebugSpid(self, spid):
223 span = self.arena.GetLineSpan(spid)
224 line = self.arena.GetLine(span.line_id)
225 # TODO: This should be factored out
226 s = line[span.col : span.col + span.length]
227 print('SPID %d = %r' % (spid, s), file=sys.stderr)
228
229 def End(self):
230 """Make sure we print until the end of the file."""
231 end_id = len(self.arena.spans)
232 self.cursor.PrintUntil(end_id)
233
234 def DoRedirect(self, node, local_symbols):
235 #print(node, file=sys.stderr)
236 self.cursor.PrintUntil(node.spids[0])
237
238 # TODO:
239 # - Do < and <& the same way.
240 # - How to handle here docs and here docs?
241 # - >> becomes >+ or >-, or maybe >>>
242
243 if node.tag == redir_e.Redir:
244 if node.fd == const.NO_INTEGER:
245 if node.op_id == Id.Redir_Great:
246 self.f.write('>') # Allow us to replace the operator
247 self.cursor.SkipUntil(node.spids[0] + 1)
248 elif node.op_id == Id.Redir_GreatAnd:
249 self.f.write('> !') # Replace >& 2 with > !2
250 spid = word.LeftMostSpanForWord(node.arg_word)
251 self.cursor.SkipUntil(spid)
252 #self.DoWordInCommand(node.arg_word)
253
254 else:
255 # NOTE: Spacing like !2>err.txt vs !2 > err.txt can be done in the
256 # formatter.
257 self.f.write('!%d ' % node.fd)
258 if node.op_id == Id.Redir_Great:
259 self.f.write('>')
260 self.cursor.SkipUntil(node.spids[0] + 1)
261 elif node.op_id == Id.Redir_GreatAnd:
262 self.f.write('> !') # Replace 1>& 2 with !1 > !2
263 spid = word.LeftMostSpanForWord(node.arg_word)
264 self.cursor.SkipUntil(spid)
265
266 self.DoWordInCommand(node.arg_word, local_symbols)
267
268 elif node.tag == redir_e.HereDoc:
269 # TODO:
270 # If do_expansion, then """, else '''
271 # HereDoc LST node needs spids for both opening and closing delimiter.
272 raise NotImplementedError(node.__class__.__name__)
273
274 else:
275 raise AssertionError(node.__class__.__name__)
276
277 # <<< 'here word'
278 # << 'here word'
279 #
280 # 2> out.txt
281 # !2 > out.txt
282
283 # cat 1<< EOF
284 # hello $name
285 # EOF
286 # cat !1 << """
287 # hello $name
288 # """
289 #
290 # cat << 'EOF'
291 # no expansion
292 # EOF
293 # cat <<- 'EOF'
294 # no expansion and indented
295 #
296 # cat << '''
297 # no expansion
298 # '''
299 # cat << '''
300 # no expansion and indented
301 # '''
302
303 # Warn about multiple here docs on a line.
304 # As an obscure feature, allow
305 # cat << \'ONE' << \"TWO"
306 # 123
307 # ONE
308 # 234
309 # TWO
310 # The _ is an indicator that it's not a string to be piped in.
311 pass
312
313 def DoAssignment(self, node, at_top_level, local_symbols):
314 """
315 local_symbols:
316 - Add every 'local' declaration to it
317 - problem: what if you have local in an "if" ?
318 - we could treat it like nested scope and see what happens? Do any
319 programs have a problem with it?
320 case/if/for/while/BraceGroup all define scopes or what?
321 You don't want inconsistency of variables that could be defined at
322 any point.
323 - or maybe you only need it within "if / case" ? Well I guess
324 for/while can break out of the loop and cause problems. A break is
325 an "if".
326
327 - for subsequent
328 """
329 # Change RHS to expression language. Bare words not allowed. foo -> 'foo'
330
331 has_rhs = False # TODO: This is on a per-variable basis.
332 # local foo -> var foo = ''
333 # readonly foo -> setconst foo
334 # export foo -> export foo
335
336 # TODO:
337 # - This depends on self.mode.
338 # - And we also need the enclosing FuncDef node to analyze.
339 # - or we need a symbol table for the current function. Forget about
340 #
341 # Oil keywords:
342 # - global : scope qualifier
343 # - var, const : mutability
344 # - setconst, export : state mutation
345 #
346 # Operators:
347 # = and :=
348 #
349 # NOTE: Bash also has "unset". Does anyone use it?
350 # You can use "delete" like Python I guess. It's not the opposite of
351 # set.
352
353 # NOTE:
354 # - We CAN tell if a variable has been defined locally.
355 # - We CANNOT tell if it's been defined globally, because different files
356 # share the same global namespace, and we can't statically figure out what
357 # files are in the program.
358 defined_locally = False # is it a local variable in this function?
359 # can't tell if global
360 # We can change it from = to := or ::= (in pedantic mode)
361 new_assign_op_e = None
362
363 if node.keyword == Id.Assign_Local:
364 # Assume that 'local' it's a declaration. In osh, it's an error if
365 # locals are redefined. In bash, it's OK to do 'local f=1; local f=2'.
366 # Could have a flag if enough people do this.
367 if at_top_level:
368 raise RuntimeError('local at top level is invalid')
369
370 if defined_locally:
371 raise RuntimeError("Can't redefine local")
372
373 keyword_spid = node.spids[0]
374 self.cursor.PrintUntil(keyword_spid)
375 self.cursor.SkipUntil(keyword_spid + 1)
376 self.f.write('var')
377
378 if local_symbols is not None:
379 for pair in node.pairs:
380 # NOTE: Not handling local a[b]=c
381 if pair.lhs.tag == lhs_expr_e.LhsName:
382 #print("REGISTERED %s" % pair.lhs.name)
383 local_symbols[pair.lhs.name] = True
384
385 elif node.keyword == Id.Assign_None:
386 self.cursor.PrintUntil(node.spids[0])
387
388 # For now, just detect whether the FIRST assignment on the line has been
389 # declared locally. We might want to split every line into separate
390 # statements.
391 if local_symbols is not None:
392 lhs0 = node.pairs[0].lhs
393 if lhs0.tag == lhs_expr_e.LhsName and lhs0.name in local_symbols:
394 defined_locally = True
395 #print("CHECKING NAME", lhs0.name, defined_locally, local_symbols)
396
397 # need semantic analysis.
398 # Would be nice to assume that it's a local though.
399 if at_top_level:
400 self.f.write('global ') # can't be redefined
401 new_assign_op_e = '::='
402 #self.f.write('global TODO := TODO') # mutate global or define it
403 elif defined_locally:
404 new_assign_op_e = ':=' # assume mutation of local
405 #self.f.write('[local mutated]')
406 else:
407 # we're in a function, but it's not defined locally.
408 self.f.write('global ') # assume mutation of local
409 if self.mode == PEDANTIC: # assume globals defined
410 new_assign_op_e = '::='
411 else:
412 new_assign_op_e = ':='
413
414 elif node.keyword == Id.Assign_Readonly:
415 # Explicit const. Assume it can't be redefined.
416 # Verb.
417 #
418 # Top level;
419 # readonly FOO=bar -> const FOO = 'bar'
420 # readonly FOO -> freeze FOO
421 # function level:
422 # readonly FOO=bar -> const global FOO ::= 'bar'
423 # readonly FOO -> freeze FOO
424 keyword_spid = node.spids[0]
425 if at_top_level:
426 self.cursor.PrintUntil(keyword_spid)
427 self.cursor.SkipUntil(keyword_spid + 1)
428 self.f.write('const') # can't be redefined
429 elif defined_locally:
430 self.f.write('setconst FOO = "bar"')
431 else:
432 self.f.write('setconst global FOO = "bar"')
433
434 elif node.keyword == Id.Assign_Declare:
435 # declare -rx foo spam=eggs
436 # export foo
437 # setconst foo
438 #
439 # spam = eggs
440 # export spam
441
442 # Have to parse the flags
443 self.f.write('TODO ')
444
445 # foo=bar spam=eggs -> foo = 'bar', spam = 'eggs'
446 n = len(node.pairs)
447 for i, pair in enumerate(node.pairs):
448 assert pair.lhs.tag == lhs_expr_e.LhsName
449
450 left_spid = pair.spids[0]
451 self.cursor.PrintUntil(left_spid)
452 # Assume skipping over one Lit_VarLike token
453 self.cursor.SkipUntil(left_spid + 1)
454
455 # Replace name. I guess it's Lit_Chars.
456 self.f.write(pair.lhs.name)
457 op = new_assign_op_e if new_assign_op_e else '='
458 self.f.write(' %s ' % op)
459
460 # foo=bar -> foo = 'bar'
461 #print('RHS', pair.rhs, file=sys.stderr)
462 if pair.rhs is None:
463 self.f.write("''") # local i -> var i = ''
464 else:
465 self.DoWordAsExpr(pair.rhs, local_symbols)
466
467 if i != n - 1:
468 self.f.write(',')
469
470 def DoCommand(self, node, local_symbols, at_top_level=False):
471 if node.tag == command_e.CommandList:
472 # TODO: How to distinguish between echo hi; echo bye; and on separate
473 # lines
474 for child in node.children:
475 self.DoCommand(child, local_symbols)
476
477 elif node.tag == command_e.SimpleCommand:
478 # How to preserve spaces between words? Do you want to do it?
479 # Well you need to test this:
480 #
481 # echo foo \
482 # bar
483
484 # TODO: Need to print until the left most part of the phrase? the phrase
485 # is a word, binding, redirect.
486 #self.cursor.PrintUntil()
487
488 if node.more_env:
489 (left_spid,) = node.more_env[0].spids
490 self.cursor.PrintUntil(left_spid)
491 self.f.write('env ')
492
493 # We only need to transform the right side, not left side.
494 for pair in node.more_env:
495 self.DoWordInCommand(pair.val, local_symbols)
496
497 # More translations:
498 # - . to source
499 # - eval to sh-eval
500
501 if node.words:
502 first_word = node.words[0]
503 ok, val, quoted = word.StaticEval(first_word)
504 word0_spid = word.LeftMostSpanForWord(first_word)
505 if ok and not quoted:
506 if val == '[':
507 last_word = node.words[-1]
508 # Check if last word is ]
509 ok, val, quoted = word.StaticEval(last_word)
510 if ok and not quoted and val == ']':
511 # Replace [ with 'test'
512 self.cursor.PrintUntil(word0_spid)
513 self.cursor.SkipUntil(word0_spid + 1)
514 self.f.write('test')
515
516 for w in node.words[1:-1]:
517 self.DoWordInCommand(w, local_symbols)
518
519 # Now omit ]
520 last_spid = word.LeftMostSpanForWord(last_word)
521 self.cursor.PrintUntil(last_spid - 1) # Get the space before
522 self.cursor.SkipUntil(last_spid + 1) # ] takes one spid
523 return
524 else:
525 raise RuntimeError('Got [ without ]')
526
527 elif val == '.':
528 self.cursor.PrintUntil(word0_spid)
529 self.cursor.SkipUntil(word0_spid + 1)
530 self.f.write('source')
531 return
532
533 for w in node.words:
534 self.DoWordInCommand(w, local_symbols)
535
536 # NOTE: This will change to "phrase"? Word or redirect.
537 for r in node.redirects:
538 self.DoRedirect(r, local_symbols)
539
540 # TODO: Print the terminator. Could be \n or ;
541 # Need to print env like PYTHONPATH = 'foo' && ls
542 # Need to print redirects:
543 # < > are the same. << is here string, and >> is assignment.
544 # append is >+
545
546 # TODO: static_eval of simple command
547 # - [ -> "test". Eliminate trailing ].
548 # - . -> source, etc.
549
550 elif node.tag == command_e.Assignment:
551 self.DoAssignment(node, at_top_level, local_symbols)
552
553 elif node.tag == command_e.Pipeline:
554 # Obscure: |& turns into |- or |+ for stderr.
555 # TODO:
556 # if ! true; then -> if not true {
557
558 # if ! echo | grep; then -> if not { echo | grep } {
559 # }
560 # not is like do {}, but it negates the return value I guess.
561
562 for child in node.children:
563 self.DoCommand(child, local_symbols)
564
565 elif node.tag == command_e.AndOr:
566 for child in node.children:
567 self.DoCommand(child, local_symbols)
568
569 elif node.tag == command_e.Sentence:
570 # 'ls &' to 'fork ls'
571 # Keep ; the same.
572 self.DoCommand(node.child, local_symbols)
573
574 # This has to be different in the function case.
575 elif node.tag == command_e.BraceGroup:
576 # { echo hi; } -> do { echo hi }
577 # For now it might be OK to keep 'do { echo hi; }
578 #left_spid, right_spid = node.spids
579 (left_spid,) = node.spids
580
581 self.cursor.PrintUntil(left_spid)
582 self.cursor.SkipUntil(left_spid + 1)
583 self.f.write('do {')
584
585 for child in node.children:
586 self.DoCommand(child, local_symbols)
587
588 elif node.tag == command_e.Subshell:
589 # (echo hi) -> shell echo hi
590 # (echo hi; echo bye) -> shell {echo hi; echo bye}
591
592 (left_spid, right_spid) = node.spids
593
594 self.cursor.PrintUntil(left_spid)
595 self.cursor.SkipUntil(left_spid + 1)
596 self.f.write('shell {')
597
598 self.DoCommand(node.child, local_symbols)
599
600 #self._DebugSpid(right_spid)
601 #self._DebugSpid(right_spid + 1)
602
603 #print('RIGHT SPID', right_spid)
604 self.cursor.PrintUntil(right_spid)
605 self.cursor.SkipUntil(right_spid + 1)
606 self.f.write('}')
607
608 elif node.tag == command_e.DParen:
609 # Just change (( )) to ( )
610 # Test it with while loop
611 self.DoArithExpr(node.child, local_symbols)
612
613 elif node.tag == command_e.DBracket:
614 # [[ 1 -eq 2 ]] to (1 == 2)
615 self.DoBoolExpr(node.expr)
616
617 elif node.tag == command_e.FuncDef:
618 # TODO: skip name
619 #self.f.write('proc %s' % node.name)
620
621 # New symbol table for every function.
622 new_local_symbols = {}
623
624 # Should be the left most span, including 'function'
625 self.cursor.PrintUntil(node.spids[0])
626
627 self.f.write('proc ')
628 self.f.write(node.name)
629 self.cursor.SkipUntil(node.spids[1])
630
631 if node.body.tag == command_e.BraceGroup:
632 # Don't add "do" like a standalone brace group. Just use {}.
633 for child in node.body.children:
634 self.DoCommand(child, new_local_symbols)
635 else:
636 pass
637 # Add {}.
638 # proc foo {
639 # shell {echo hi; echo bye}
640 # }
641 #self.DoCommand(node.body)
642
643 elif node.tag == command_e.BraceGroup:
644 for child in node.children:
645 self.DoCommand(child, local_symbols)
646
647 elif node.tag == command_e.DoGroup:
648 do_spid, done_spid = node.spids
649 self.cursor.PrintUntil(do_spid)
650 self.cursor.SkipUntil(do_spid + 1)
651 self.f.write('{')
652
653 for child in node.children:
654 self.DoCommand(child, local_symbols)
655
656 self.cursor.PrintUntil(done_spid)
657 self.cursor.SkipUntil(done_spid + 1)
658 self.f.write('}')
659
660 elif node.tag == command_e.ForEach:
661 # Need to preserve spaces between words, because there can be line
662 # wrapping.
663 # for x in a b c \
664 # d e f; do
665
666 in_spid, semi_spid = node.spids
667
668 if in_spid == const.NO_INTEGER:
669 #self.cursor.PrintUntil() # 'for x' and then space
670 self.f.write('for %s in @Argv ' % node.iter_name)
671 self.cursor.SkipUntil(node.body.spids[0])
672 else:
673 self.cursor.PrintUntil(in_spid + 1) # 'for x in' and then space
674 self.f.write('[')
675 for w in node.iter_words:
676 self.DoWordInCommand(w, local_symbols)
677 self.f.write(']')
678 #print("SKIPPING SEMI %d" % semi_spid, file=sys.stderr)
679
680 if semi_spid != const.NO_INTEGER:
681 self.cursor.PrintUntil(semi_spid)
682 self.cursor.SkipUntil(semi_spid + 1)
683
684 self.DoCommand(node.body, local_symbols)
685
686 elif node.tag == command_e.ForExpr:
687 # Change (( )) to ( ), and then _FixDoGroup
688 pass
689
690 elif node.tag == command_e.While:
691 cond = node.cond
692 if len(cond) == 1 and cond[0].tag == command_e.Sentence:
693 spid = cond[0].terminator.span_id
694 self.cursor.PrintUntil(spid)
695 self.cursor.SkipUntil(spid + 1)
696
697 self.DoCommand(node.body, local_symbols)
698
699 elif node.tag == command_e.If:
700 else_spid, fi_spid = node.spids
701
702 # if foo; then -> if foo {
703 # elif foo; then -> } elif foo {
704 for arm in node.arms:
705 elif_spid, then_spid = arm.spids
706 if elif_spid != const.NO_INTEGER:
707 self.cursor.PrintUntil(elif_spid)
708 self.f.write('} ')
709
710 cond = arm.cond
711 if len(cond) == 1 and cond[0].tag == command_e.Sentence:
712 sentence = cond[0]
713 self.DoCommand(sentence, local_symbols)
714
715 # Remove semi-colon
716 semi_spid = sentence.terminator.span_id
717 self.cursor.PrintUntil(semi_spid)
718 self.cursor.SkipUntil(semi_spid + 1)
719 else:
720 for child in arm.cond:
721 self.DoCommand(child, local_symbols)
722
723 self.cursor.PrintUntil(then_spid)
724 self.cursor.SkipUntil(then_spid + 1)
725 self.f.write('{')
726
727 for child in arm.action:
728 self.DoCommand(child, local_symbols)
729
730 # else -> } else {
731 if node.else_action:
732 self.cursor.PrintUntil(else_spid)
733 self.f.write('} ')
734 self.cursor.PrintUntil(else_spid + 1)
735 self.f.write(' {')
736
737 for child in node.else_action:
738 self.DoCommand(child, local_symbols)
739
740 # fi -> }
741 self.cursor.PrintUntil(fi_spid)
742 self.cursor.SkipUntil(fi_spid + 1)
743 self.f.write('}')
744
745 elif node.tag == command_e.Case:
746 case_spid, in_spid, esac_spid = node.spids
747 self.cursor.PrintUntil(case_spid)
748 self.cursor.SkipUntil(case_spid + 1)
749 self.f.write('matchstr')
750
751 # Reformat "$1" to $1
752 self.DoWordInCommand(node.to_match, local_symbols)
753
754 self.cursor.PrintUntil(in_spid)
755 self.cursor.SkipUntil(in_spid + 1)
756 self.f.write('{') # matchstr $var {
757
758 # each arm needs the ) and the ;; node to skip over?
759 for arm in node.arms:
760 left_spid, rparen_spid, dsemi_spid, last_spid = arm.spids
761 #print(left_spid, rparen_spid, dsemi_spid)
762
763 self.cursor.PrintUntil(left_spid)
764 # Hm maybe keep | because it's semi-deprecated? You can use
765 # reload|force-reload {
766 # }
767 # e/reload|force-reload/ {
768 # }
769 # / 'reload' or 'force-reload' / {
770 # }
771 #
772 # Yeah it's the more abbreviated syntax.
773
774 # change | to 'or'
775 for pat in arm.pat_list:
776 pass
777
778 # Skip this
779 self.cursor.PrintUntil(rparen_spid)
780 self.cursor.SkipUntil(rparen_spid + 1)
781 self.f.write(' {') # surround it with { }
782
783 for child in arm.action:
784 self.DoCommand(child, local_symbols)
785
786 if dsemi_spid != const.NO_INTEGER:
787 self.cursor.PrintUntil(dsemi_spid)
788 self.cursor.SkipUntil(dsemi_spid + 1)
789 # NOTE: indentation here will be off because ;; is likely indented
790 # with body.
791 self.f.write('}')
792 elif last_spid != const.NO_INTEGER:
793 self.cursor.PrintUntil(last_spid)
794 # NOTE: Indentation is also off here. Arbitrarily put 4 spaces.
795 self.f.write(' }\n')
796 else:
797 raise AssertionError(
798 "Expected with dsemi_spid or last_spid in case arm")
799
800 self.cursor.PrintUntil(esac_spid)
801 self.cursor.SkipUntil(esac_spid + 1)
802 self.f.write('}') # strmatch $var {
803
804 elif node.tag == command_e.NoOp:
805 pass
806
807 elif node.tag == command_e.ControlFlow:
808 # No change for break / return / continue
809 pass
810
811 elif node.tag == command_e.TimeBlock:
812 self.DoCommand(node.pipeline, local_symbols)
813
814 else:
815 #log('Command not handled: %s', node)
816 raise AssertionError(node.__class__.__name__)
817
818 def DoWordAsExpr(self, node, local_symbols):
819 style = _GetRhsStyle(node)
820 if style == word_style_e.SQ:
821 self.f.write("'")
822 self.DoWordInCommand(node, local_symbols)
823 self.f.write("'")
824 elif style == word_style_e.DQ:
825 self.f.write('"')
826 self.DoWordInCommand(node, local_symbols)
827 self.f.write('"')
828 else:
829 # "${foo:-default}" -> foo or 'default'
830 # ${foo:-default} -> @split(foo or 'default')
831 # @(foo or 'default') -- implicit split.
832
833 if word.IsVarSub(node): # ${1} or "$1"
834 # Do it in expression mode
835 pass
836 # NOTE: ArithSub with $(1 +2 ) is different than 1 + 2 because of
837 # conversion to string.
838
839 # For now, just stub it out
840 self.DoWordInCommand(node, local_symbols)
841
842 def DoWordInCommand(self, node, local_symbols):
843 """
844 New reserved symbols:
845 echo == must be changed to echo '==' because = is a reserved symbol.
846 echo @$foo -> echo "@$foo" because @ is reserved
847
848 Problems:
849 rm --verbose=true
850 rm '--verbose=true' -- is this bad?
851
852 Same with comma
853 foo, bar = 1
854
855 # I guess we can allow this
856 ls --long foo,bar
857
858 or force:
859 (foo, bar) = 1
860
861 Maybe we need a clever 'pre-lex'
862 overwhelmingly the second char will be ' '
863
864 foo/bar/foo.py
865 foo.py
866 ./hello
867 foo_bar
868 [a-zA-Z0-9] / - . _ -- filename chars
869
870
871 first word:
872 var, const, export, setconst, global
873 func, proc, do, not, shell,
874 maybe: time, coproc, etc.
875
876 = -- generic expression, = 1+2
877
878 non-filename char AFTER first word
879 cmd:
880 ' ' foo bar baz
881 '\n' foo
882 '<' foo < bar
883 '>' foo > bar
884 ! ls !2 > !1
885 | who | wc -l
886 |- who |- wc -l
887
888 expr:
889 = foo = bar
890 , a, b = x
891 [ a[x] = 1
892 ( f(x) for( while( if(
893
894 1+2 -- I think this tries to run the command
895 """
896 # Are we getting rid of word joining? Or maybe keep it but discourage and
897 # provide alternatives.
898 #
899 # You don't really have a problem with byte strings, those are b'foo', but
900 # that's in expression mode, not command mode.
901
902 # Problems:
903 # - Tilde sub can't be quoted. ls ~/foo/"foo" are incompatible with the
904 # rule.
905 # - Globs can't be quoted. ls 'foo'*.py can't be ls "foo*.py" -- it means
906 # something different.
907 # Might need to finish more of the globber to figure this out.
908
909 # What about here docs words? It's a double quoted part, but with
910 # different formatting!
911 if node.tag == word_e.CompoundWord:
912
913 # UNQUOTE simple var subs
914
915 # TODO: I think we have to print the beginning and the end?
916
917 #left_spid = word.LeftMostSpanForWord(node)
918 #right_spid = word.RightMostSpanForWord(node)
919 #right_spid = -1
920 #print('DoWordInCommand %s %s' % (left_spid, right_spid), file=sys.stderr)
921
922 # Special case for "$@". Wow this needs pattern matching!
923 # TODO:
924 # "$foo" -> $foo
925 # "${foo}" -> $foo
926
927 if (len(node.parts) == 1 and
928 node.parts[0].tag == word_part_e.DoubleQuotedPart):
929 dq_part = node.parts[0]
930
931 # TODO: Double quoted part needs left and right IDs
932 left_spid, right_spid = dq_part.spids
933 assert right_spid != const.NO_INTEGER, right_spid
934
935 if len(dq_part.parts) == 1:
936 part0 = dq_part.parts[0]
937 if part0.tag == word_part_e.SimpleVarSub:
938 vsub_part = dq_part.parts[0]
939 if vsub_part.token.id == Id.VSub_At:
940 # NOTE: This is off for double quoted part. Hack to subtract 1.
941 self.cursor.PrintUntil(left_spid)
942 self.cursor.SkipUntil(right_spid + 1) # " then $@ then "
943 self.f.write('@Argv')
944 return # Done replacing
945
946 # "$1" -> $1, "$foo" -> $foo
947 if vsub_part.token.id in (Id.VSub_Number, Id.VSub_Name):
948 self.cursor.PrintUntil(left_spid)
949 self.cursor.SkipUntil(right_spid + 1)
950 self.f.write(vsub_part.token.val)
951 return
952
953 # Single arith sub, command sub, etc.
954 # On the other hand, an unquoted one needs to turn into
955 #
956 # $(echo one two) -> @[echo one two]
957 # `echo one two` -> @[echo one two]
958 #
959 # ${var:-'the default'} -> @$(var or 'the default')
960 #
961 # $((1 + 2)) -> $(1 + 2) -- this is OK unquoted
962
963 elif part0.tag == word_part_e.BracedVarSub:
964 # Skip over quote
965 self.cursor.PrintUntil(left_spid)
966 self.cursor.SkipUntil(left_spid + 1)
967 self.DoWordPart(part0, local_symbols)
968 self.cursor.SkipUntil(right_spid + 1)
969 return
970
971 elif part0.tag == word_part_e.CommandSubPart:
972 self.cursor.PrintUntil(left_spid)
973 self.cursor.SkipUntil(left_spid + 1)
974 self.DoWordPart(part0, local_symbols)
975 self.cursor.SkipUntil(right_spid + 1)
976 return
977
978 # It's None for here docs I think.
979 #log("NODE %s", node)
980 #if left_spid is not None and left_spid >= 0:
981 #span = self.arena.GetLineSpan(span_id)
982 #print(span)
983
984 #self.cursor.PrintUntil(left_spid)
985 #pass
986
987 # TODO: 'foo'"bar" should be "foobar", etc.
988 # If any part is double quoted, you can always double quote the whole
989 # thing?
990 for part in node.parts:
991 self.DoWordPart(part, local_symbols)
992
993 #if right_spid >= 0:
994 #self.cursor.PrintUntil(right_spid)
995 #pass
996
997 else:
998 raise AssertionError(node.__class__.__name__)
999
1000 def DoWordPart(self, node, local_symbols, quoted=False):
1001 span_id = word.LeftMostSpanForPart(node)
1002 if span_id is not None and span_id != const.NO_INTEGER:
1003 span = self.arena.GetLineSpan(span_id)
1004 #print(span)
1005
1006 self.cursor.PrintUntil(span_id)
1007
1008 if node.tag == word_part_e.ArrayLiteralPart:
1009 pass
1010
1011 elif node.tag == word_part_e.EscapedLiteralPart:
1012 if quoted:
1013 pass
1014 else:
1015 # If unquoted \e, it should quoted instead. ' ' vs. \<invisible space>
1016 # Hm is this necessary though? I think the only motivation is changing
1017 # \{ and \( for macros. And ' ' to be readable/visible.
1018 t = node.token
1019 val = t.val[1:]
1020 assert len(val) == 1, val
1021 if val != '\n':
1022 self.cursor.PrintUntil(t.span_id)
1023 self.cursor.SkipUntil(t.span_id + 1)
1024 self.f.write("'%s'" % val)
1025
1026 elif node.tag == word_part_e.LiteralPart:
1027 # Print it literally.
1028 # TODO: We might want to do it all on the word level though. For
1029 # example, foo"bar" becomes "foobar" in oil.
1030 spid = node.token.span_id
1031 if spid is None:
1032 #raise RuntimeError('%s has no span_id' % node.token)
1033 # TODO: Fix word.TildeDetect to construct proper tokens.
1034 print('WARNING:%s has no span_id' % node.token, file=sys.stderr)
1035 else:
1036 self.cursor.PrintUntil(spid + 1)
1037
1038 elif node.tag == word_part_e.TildeSubPart: # No change
1039 pass
1040
1041 elif node.tag == word_part_e.SingleQuotedPart:
1042 # TODO:
1043 # '\n' is '\\n'
1044 # $'\n' is '\n'
1045 # TODO: Should print until right_spid
1046 # left_spid, right_spid = node.spids
1047 if node.tokens: # Empty string has no tokens
1048 last_spid = node.tokens[-1].span_id
1049 self.cursor.PrintUntil(last_spid + 1)
1050
1051 elif node.tag == word_part_e.DoubleQuotedPart:
1052 for part in node.parts:
1053 self.DoWordPart(part, local_symbols, quoted=True)
1054
1055 elif node.tag == word_part_e.SimpleVarSub:
1056 spid = node.token.span_id
1057 op_id = node.token.id
1058
1059 if op_id == Id.VSub_Name:
1060 self.cursor.PrintUntil(spid + 1)
1061
1062 elif op_id == Id.VSub_Number:
1063 self.cursor.PrintUntil(spid + 1)
1064
1065 elif op_id == Id.VSub_Bang: # $!
1066 self.f.write('$BgPid') # Job most recently placed in background
1067 self.cursor.SkipUntil(spid + 1)
1068
1069 elif op_id == Id.VSub_At: # $@
1070 self.f.write('$ifsjoin(Argv)')
1071 self.cursor.SkipUntil(spid + 1)
1072
1073 elif op_id == Id.VSub_Pound: # $#
1074 self.f.write('$Argc')
1075 self.cursor.SkipUntil(spid + 1)
1076
1077 elif op_id == Id.VSub_Dollar: # $$
1078 self.f.write('$Pid')
1079 self.cursor.SkipUntil(spid + 1)
1080
1081 elif op_id == Id.VSub_Star: # $*
1082 # PEDANTIC: Depends if quoted or unquoted
1083 self.f.write('$ifsjoin(Argv)')
1084 self.cursor.SkipUntil(spid + 1)
1085
1086 elif op_id == Id.VSub_Hyphen: # $*
1087 self.f.write('$Flags')
1088 self.cursor.SkipUntil(spid + 1)
1089
1090 elif op_id == Id.VSub_QMark: # $?
1091 self.f.write('$Status')
1092 self.cursor.SkipUntil(spid + 1)
1093
1094 else:
1095 raise AssertionError(op_id)
1096
1097 elif node.tag == word_part_e.BracedVarSub:
1098 left_spid, right_spid = node.spids
1099
1100 # NOTE: Why do we need this but we don't need it in command sub?
1101 self.cursor.PrintUntil(left_spid)
1102
1103 name_spid = node.token.span_id
1104 op_id = node.token.id
1105
1106 parens_needed = True
1107 if node.bracket_op:
1108 # a[1]
1109 # These two change the sigil! ${a[@]} is now @a!
1110 # a[@]
1111 # a[*]
1112 pass
1113
1114 if node.prefix_op:
1115 # len()
1116 pass
1117 if node.suffix_op:
1118 # foo.trimLeft()
1119 # foo.trimGlobLeft()
1120 # foo.trimGlobLeft(longest=True)
1121 #
1122 # python lstrip() does something different
1123
1124 # a[1:1]
1125
1126 # .replace()
1127 # .replaceGlob()
1128
1129 pass
1130
1131 if op_id == Id.VSub_QMark:
1132 self.cursor.PrintUntil(name_spid + 1)
1133
1134 if parens_needed:
1135 # Skip over left bracket and write our own.
1136 self.f.write('$(')
1137 self.cursor.SkipUntil(left_spid + 1)
1138
1139 # Placeholder for now
1140 self.cursor.PrintUntil(right_spid)
1141
1142 # Skip over right bracket and write our own.
1143 self.f.write(')')
1144 else:
1145 pass
1146
1147 self.cursor.SkipUntil(right_spid + 1)
1148
1149 elif node.tag == word_part_e.CommandSubPart:
1150 left_spid, right_spid = node.spids
1151
1152 #self.cursor.PrintUntil(left_spid)
1153 self.f.write('$[')
1154 self.cursor.SkipUntil(left_spid + 1)
1155
1156 self.DoCommand(node.command_list, local_symbols)
1157
1158 self.f.write(']')
1159 self.cursor.SkipUntil(right_spid + 1)
1160 # change to $[echo hi]
1161
1162 elif node.tag == word_part_e.ArithSubPart:
1163 left_spid, right_spid = node.spids
1164
1165 # Skip over left bracket and write our own.
1166 self.f.write('$(')
1167 self.cursor.SkipUntil(left_spid + 1)
1168
1169 # NOTE: This doesn't do anything yet.
1170 self.DoArithExpr(node.anode, local_symbols)
1171 # Placeholder for now
1172 self.cursor.PrintUntil(right_spid - 1)
1173
1174 # Skip over right bracket and write our own.
1175 self.f.write(')')
1176 self.cursor.SkipUntil(right_spid + 1)
1177
1178 else:
1179 raise AssertionError(node.__class__.__name__)
1180
1181 def DoArithExpr(self, node, local_symbols):
1182 if node.tag == arith_expr_e.ArithBinary:
1183 # Maybe I should just write the left span and right span for each word?
1184 #self.f.write(str(node.left))
1185
1186 if node.op_id == Id.Arith_Plus:
1187 # NOTE: Right isn't necessarily a word!
1188 r_id = word.LeftMostSpanForWord(node.right.w)
1189 #self.cursor.SkipUntil(r_id)
1190 #self.f.write('PLUS')
1191
1192 #self.f.write(str(node.right))
1193 elif node.tag == arith_expr_e.ArithWord:
1194 self.DoWordInCommand(node.w, local_symbols)
1195
1196 else:
1197 raise AssertionError(node.__class__.__name__)
1198
1199 def DoBoolExpr(self, node):
1200 # TODO: switch on node.tag
1201 pass
1202
1203# WordPart?
1204
1205# array_item
1206#
1207# These get turned into expressions
1208#
1209# bracket_op
1210# suffix_op
1211# prefix_op