summaryrefslogtreecommitdiffstats
path: root/bitbake/lib
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib')
-rw-r--r--bitbake/lib/codegen.py570
-rw-r--r--bitbake/lib/ply/__init__.py4
-rw-r--r--bitbake/lib/ply/lex.py1058
-rw-r--r--bitbake/lib/ply/yacc.py3276
-rw-r--r--bitbake/lib/pysh/__init__.py0
-rw-r--r--bitbake/lib/pysh/builtin.py710
-rw-r--r--bitbake/lib/pysh/interp.py1367
-rw-r--r--bitbake/lib/pysh/lsprof.py116
-rw-r--r--bitbake/lib/pysh/pysh.py167
-rw-r--r--bitbake/lib/pysh/pyshlex.py888
-rw-r--r--bitbake/lib/pysh/pyshyacc.py772
-rw-r--r--bitbake/lib/pysh/sherrors.py41
-rw-r--r--bitbake/lib/pysh/subprocess_fix.py77
13 files changed, 9046 insertions, 0 deletions
diff --git a/bitbake/lib/codegen.py b/bitbake/lib/codegen.py
new file mode 100644
index 0000000000..be772d5107
--- /dev/null
+++ b/bitbake/lib/codegen.py
@@ -0,0 +1,570 @@
1# -*- coding: utf-8 -*-
2"""
3 codegen
4 ~~~~~~~
5
6 Extension to ast that allow ast -> python code generation.
7
8 :copyright: Copyright 2008 by Armin Ronacher.
9 :license: BSD.
10"""
11from ast import *
12
13BOOLOP_SYMBOLS = {
14 And: 'and',
15 Or: 'or'
16}
17
18BINOP_SYMBOLS = {
19 Add: '+',
20 Sub: '-',
21 Mult: '*',
22 Div: '/',
23 FloorDiv: '//',
24 Mod: '%',
25 LShift: '<<',
26 RShift: '>>',
27 BitOr: '|',
28 BitAnd: '&',
29 BitXor: '^'
30}
31
32CMPOP_SYMBOLS = {
33 Eq: '==',
34 Gt: '>',
35 GtE: '>=',
36 In: 'in',
37 Is: 'is',
38 IsNot: 'is not',
39 Lt: '<',
40 LtE: '<=',
41 NotEq: '!=',
42 NotIn: 'not in'
43}
44
45UNARYOP_SYMBOLS = {
46 Invert: '~',
47 Not: 'not',
48 UAdd: '+',
49 USub: '-'
50}
51
52ALL_SYMBOLS = {}
53ALL_SYMBOLS.update(BOOLOP_SYMBOLS)
54ALL_SYMBOLS.update(BINOP_SYMBOLS)
55ALL_SYMBOLS.update(CMPOP_SYMBOLS)
56ALL_SYMBOLS.update(UNARYOP_SYMBOLS)
57
58def to_source(node, indent_with=' ' * 4, add_line_information=False):
59 """This function can convert a node tree back into python sourcecode.
60 This is useful for debugging purposes, especially if you're dealing with
61 custom asts not generated by python itself.
62
63 It could be that the sourcecode is evaluable when the AST itself is not
64 compilable / evaluable. The reason for this is that the AST contains some
65 more data than regular sourcecode does, which is dropped during
66 conversion.
67
68 Each level of indentation is replaced with `indent_with`. Per default this
69 parameter is equal to four spaces as suggested by PEP 8, but it might be
70 adjusted to match the application's styleguide.
71
72 If `add_line_information` is set to `True` comments for the line numbers
73 of the nodes are added to the output. This can be used to spot wrong line
74 number information of statement nodes.
75 """
76 generator = SourceGenerator(indent_with, add_line_information)
77 generator.visit(node)
78 return ''.join(generator.result)
79
80
81class SourceGenerator(NodeVisitor):
82 """This visitor is able to transform a well formed syntax tree into python
83 sourcecode. For more details have a look at the docstring of the
84 `node_to_source` function.
85 """
86
87 def __init__(self, indent_with, add_line_information=False):
88 self.result = []
89 self.indent_with = indent_with
90 self.add_line_information = add_line_information
91 self.indentation = 0
92 self.new_lines = 0
93
94 def write(self, x):
95 if self.new_lines:
96 if self.result:
97 self.result.append('\n' * self.new_lines)
98 self.result.append(self.indent_with * self.indentation)
99 self.new_lines = 0
100 self.result.append(x)
101
102 def newline(self, node=None, extra=0):
103 self.new_lines = max(self.new_lines, 1 + extra)
104 if node is not None and self.add_line_information:
105 self.write('# line: %s' % node.lineno)
106 self.new_lines = 1
107
108 def body(self, statements):
109 self.new_line = True
110 self.indentation += 1
111 for stmt in statements:
112 self.visit(stmt)
113 self.indentation -= 1
114
115 def body_or_else(self, node):
116 self.body(node.body)
117 if node.orelse:
118 self.newline()
119 self.write('else:')
120 self.body(node.orelse)
121
122 def signature(self, node):
123 want_comma = []
124 def write_comma():
125 if want_comma:
126 self.write(', ')
127 else:
128 want_comma.append(True)
129
130 padding = [None] * (len(node.args) - len(node.defaults))
131 for arg, default in zip(node.args, padding + node.defaults):
132 write_comma()
133 self.visit(arg)
134 if default is not None:
135 self.write('=')
136 self.visit(default)
137 if node.vararg is not None:
138 write_comma()
139 self.write('*' + node.vararg)
140 if node.kwarg is not None:
141 write_comma()
142 self.write('**' + node.kwarg)
143
144 def decorators(self, node):
145 for decorator in node.decorator_list:
146 self.newline(decorator)
147 self.write('@')
148 self.visit(decorator)
149
150 # Statements
151
152 def visit_Assign(self, node):
153 self.newline(node)
154 for idx, target in enumerate(node.targets):
155 if idx:
156 self.write(', ')
157 self.visit(target)
158 self.write(' = ')
159 self.visit(node.value)
160
161 def visit_AugAssign(self, node):
162 self.newline(node)
163 self.visit(node.target)
164 self.write(BINOP_SYMBOLS[type(node.op)] + '=')
165 self.visit(node.value)
166
167 def visit_ImportFrom(self, node):
168 self.newline(node)
169 self.write('from %s%s import ' % ('.' * node.level, node.module))
170 for idx, item in enumerate(node.names):
171 if idx:
172 self.write(', ')
173 self.write(item)
174
175 def visit_Import(self, node):
176 self.newline(node)
177 for item in node.names:
178 self.write('import ')
179 self.visit(item)
180
181 def visit_Expr(self, node):
182 self.newline(node)
183 self.generic_visit(node)
184
185 def visit_FunctionDef(self, node):
186 self.newline(extra=1)
187 self.decorators(node)
188 self.newline(node)
189 self.write('def %s(' % node.name)
190 self.signature(node.args)
191 self.write('):')
192 self.body(node.body)
193
194 def visit_ClassDef(self, node):
195 have_args = []
196 def paren_or_comma():
197 if have_args:
198 self.write(', ')
199 else:
200 have_args.append(True)
201 self.write('(')
202
203 self.newline(extra=2)
204 self.decorators(node)
205 self.newline(node)
206 self.write('class %s' % node.name)
207 for base in node.bases:
208 paren_or_comma()
209 self.visit(base)
210 # XXX: the if here is used to keep this module compatible
211 # with python 2.6.
212 if hasattr(node, 'keywords'):
213 for keyword in node.keywords:
214 paren_or_comma()
215 self.write(keyword.arg + '=')
216 self.visit(keyword.value)
217 if node.starargs is not None:
218 paren_or_comma()
219 self.write('*')
220 self.visit(node.starargs)
221 if node.kwargs is not None:
222 paren_or_comma()
223 self.write('**')
224 self.visit(node.kwargs)
225 self.write(have_args and '):' or ':')
226 self.body(node.body)
227
228 def visit_If(self, node):
229 self.newline(node)
230 self.write('if ')
231 self.visit(node.test)
232 self.write(':')
233 self.body(node.body)
234 while True:
235 else_ = node.orelse
236 if len(else_) == 1 and isinstance(else_[0], If):
237 node = else_[0]
238 self.newline()
239 self.write('elif ')
240 self.visit(node.test)
241 self.write(':')
242 self.body(node.body)
243 else:
244 self.newline()
245 self.write('else:')
246 self.body(else_)
247 break
248
249 def visit_For(self, node):
250 self.newline(node)
251 self.write('for ')
252 self.visit(node.target)
253 self.write(' in ')
254 self.visit(node.iter)
255 self.write(':')
256 self.body_or_else(node)
257
258 def visit_While(self, node):
259 self.newline(node)
260 self.write('while ')
261 self.visit(node.test)
262 self.write(':')
263 self.body_or_else(node)
264
265 def visit_With(self, node):
266 self.newline(node)
267 self.write('with ')
268 self.visit(node.context_expr)
269 if node.optional_vars is not None:
270 self.write(' as ')
271 self.visit(node.optional_vars)
272 self.write(':')
273 self.body(node.body)
274
275 def visit_Pass(self, node):
276 self.newline(node)
277 self.write('pass')
278
279 def visit_Print(self, node):
280 # XXX: python 2.6 only
281 self.newline(node)
282 self.write('print ')
283 want_comma = False
284 if node.dest is not None:
285 self.write(' >> ')
286 self.visit(node.dest)
287 want_comma = True
288 for value in node.values:
289 if want_comma:
290 self.write(', ')
291 self.visit(value)
292 want_comma = True
293 if not node.nl:
294 self.write(',')
295
296 def visit_Delete(self, node):
297 self.newline(node)
298 self.write('del ')
299 for idx, target in enumerate(node):
300 if idx:
301 self.write(', ')
302 self.visit(target)
303
304 def visit_TryExcept(self, node):
305 self.newline(node)
306 self.write('try:')
307 self.body(node.body)
308 for handler in node.handlers:
309 self.visit(handler)
310
311 def visit_TryFinally(self, node):
312 self.newline(node)
313 self.write('try:')
314 self.body(node.body)
315 self.newline(node)
316 self.write('finally:')
317 self.body(node.finalbody)
318
319 def visit_Global(self, node):
320 self.newline(node)
321 self.write('global ' + ', '.join(node.names))
322
323 def visit_Nonlocal(self, node):
324 self.newline(node)
325 self.write('nonlocal ' + ', '.join(node.names))
326
327 def visit_Return(self, node):
328 self.newline(node)
329 self.write('return ')
330 self.visit(node.value)
331
332 def visit_Break(self, node):
333 self.newline(node)
334 self.write('break')
335
336 def visit_Continue(self, node):
337 self.newline(node)
338 self.write('continue')
339
340 def visit_Raise(self, node):
341 # XXX: Python 2.6 / 3.0 compatibility
342 self.newline(node)
343 self.write('raise')
344 if hasattr(node, 'exc') and node.exc is not None:
345 self.write(' ')
346 self.visit(node.exc)
347 if node.cause is not None:
348 self.write(' from ')
349 self.visit(node.cause)
350 elif hasattr(node, 'type') and node.type is not None:
351 self.visit(node.type)
352 if node.inst is not None:
353 self.write(', ')
354 self.visit(node.inst)
355 if node.tback is not None:
356 self.write(', ')
357 self.visit(node.tback)
358
359 # Expressions
360
361 def visit_Attribute(self, node):
362 self.visit(node.value)
363 self.write('.' + node.attr)
364
365 def visit_Call(self, node):
366 want_comma = []
367 def write_comma():
368 if want_comma:
369 self.write(', ')
370 else:
371 want_comma.append(True)
372
373 self.visit(node.func)
374 self.write('(')
375 for arg in node.args:
376 write_comma()
377 self.visit(arg)
378 for keyword in node.keywords:
379 write_comma()
380 self.write(keyword.arg + '=')
381 self.visit(keyword.value)
382 if node.starargs is not None:
383 write_comma()
384 self.write('*')
385 self.visit(node.starargs)
386 if node.kwargs is not None:
387 write_comma()
388 self.write('**')
389 self.visit(node.kwargs)
390 self.write(')')
391
392 def visit_Name(self, node):
393 self.write(node.id)
394
395 def visit_Str(self, node):
396 self.write(repr(node.s))
397
398 def visit_Bytes(self, node):
399 self.write(repr(node.s))
400
401 def visit_Num(self, node):
402 self.write(repr(node.n))
403
404 def visit_Tuple(self, node):
405 self.write('(')
406 idx = -1
407 for idx, item in enumerate(node.elts):
408 if idx:
409 self.write(', ')
410 self.visit(item)
411 self.write(idx and ')' or ',)')
412
413 def sequence_visit(left, right):
414 def visit(self, node):
415 self.write(left)
416 for idx, item in enumerate(node.elts):
417 if idx:
418 self.write(', ')
419 self.visit(item)
420 self.write(right)
421 return visit
422
423 visit_List = sequence_visit('[', ']')
424 visit_Set = sequence_visit('{', '}')
425 del sequence_visit
426
427 def visit_Dict(self, node):
428 self.write('{')
429 for idx, (key, value) in enumerate(zip(node.keys, node.values)):
430 if idx:
431 self.write(', ')
432 self.visit(key)
433 self.write(': ')
434 self.visit(value)
435 self.write('}')
436
437 def visit_BinOp(self, node):
438 self.visit(node.left)
439 self.write(' %s ' % BINOP_SYMBOLS[type(node.op)])
440 self.visit(node.right)
441
442 def visit_BoolOp(self, node):
443 self.write('(')
444 for idx, value in enumerate(node.values):
445 if idx:
446 self.write(' %s ' % BOOLOP_SYMBOLS[type(node.op)])
447 self.visit(value)
448 self.write(')')
449
450 def visit_Compare(self, node):
451 self.write('(')
452 self.write(node.left)
453 for op, right in zip(node.ops, node.comparators):
454 self.write(' %s %%' % CMPOP_SYMBOLS[type(op)])
455 self.visit(right)
456 self.write(')')
457
458 def visit_UnaryOp(self, node):
459 self.write('(')
460 op = UNARYOP_SYMBOLS[type(node.op)]
461 self.write(op)
462 if op == 'not':
463 self.write(' ')
464 self.visit(node.operand)
465 self.write(')')
466
467 def visit_Subscript(self, node):
468 self.visit(node.value)
469 self.write('[')
470 self.visit(node.slice)
471 self.write(']')
472
473 def visit_Slice(self, node):
474 if node.lower is not None:
475 self.visit(node.lower)
476 self.write(':')
477 if node.upper is not None:
478 self.visit(node.upper)
479 if node.step is not None:
480 self.write(':')
481 if not (isinstance(node.step, Name) and node.step.id == 'None'):
482 self.visit(node.step)
483
484 def visit_ExtSlice(self, node):
485 for idx, item in node.dims:
486 if idx:
487 self.write(', ')
488 self.visit(item)
489
490 def visit_Yield(self, node):
491 self.write('yield ')
492 self.visit(node.value)
493
494 def visit_Lambda(self, node):
495 self.write('lambda ')
496 self.signature(node.args)
497 self.write(': ')
498 self.visit(node.body)
499
500 def visit_Ellipsis(self, node):
501 self.write('Ellipsis')
502
503 def generator_visit(left, right):
504 def visit(self, node):
505 self.write(left)
506 self.visit(node.elt)
507 for comprehension in node.generators:
508 self.visit(comprehension)
509 self.write(right)
510 return visit
511
512 visit_ListComp = generator_visit('[', ']')
513 visit_GeneratorExp = generator_visit('(', ')')
514 visit_SetComp = generator_visit('{', '}')
515 del generator_visit
516
517 def visit_DictComp(self, node):
518 self.write('{')
519 self.visit(node.key)
520 self.write(': ')
521 self.visit(node.value)
522 for comprehension in node.generators:
523 self.visit(comprehension)
524 self.write('}')
525
526 def visit_IfExp(self, node):
527 self.visit(node.body)
528 self.write(' if ')
529 self.visit(node.test)
530 self.write(' else ')
531 self.visit(node.orelse)
532
533 def visit_Starred(self, node):
534 self.write('*')
535 self.visit(node.value)
536
537 def visit_Repr(self, node):
538 # XXX: python 2.6 only
539 self.write('`')
540 self.visit(node.value)
541 self.write('`')
542
543 # Helper Nodes
544
545 def visit_alias(self, node):
546 self.write(node.name)
547 if node.asname is not None:
548 self.write(' as ' + node.asname)
549
550 def visit_comprehension(self, node):
551 self.write(' for ')
552 self.visit(node.target)
553 self.write(' in ')
554 self.visit(node.iter)
555 if node.ifs:
556 for if_ in node.ifs:
557 self.write(' if ')
558 self.visit(if_)
559
560 def visit_excepthandler(self, node):
561 self.newline(node)
562 self.write('except')
563 if node.type is not None:
564 self.write(' ')
565 self.visit(node.type)
566 if node.name is not None:
567 self.write(' as ')
568 self.visit(node.name)
569 self.write(':')
570 self.body(node.body)
diff --git a/bitbake/lib/ply/__init__.py b/bitbake/lib/ply/__init__.py
new file mode 100644
index 0000000000..853a985542
--- /dev/null
+++ b/bitbake/lib/ply/__init__.py
@@ -0,0 +1,4 @@
1# PLY package
2# Author: David Beazley (dave@dabeaz.com)
3
4__all__ = ['lex','yacc']
diff --git a/bitbake/lib/ply/lex.py b/bitbake/lib/ply/lex.py
new file mode 100644
index 0000000000..267ec100fc
--- /dev/null
+++ b/bitbake/lib/ply/lex.py
@@ -0,0 +1,1058 @@
1# -----------------------------------------------------------------------------
2# ply: lex.py
3#
4# Copyright (C) 2001-2009,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33
34__version__ = "3.3"
35__tabversion__ = "3.2" # Version of table file used
36
37import re, sys, types, copy, os
38
39# This tuple contains known string types
40try:
41 # Python 2.6
42 StringTypes = (types.StringType, types.UnicodeType)
43except AttributeError:
44 # Python 3.0
45 StringTypes = (str, bytes)
46
47# Extract the code attribute of a function. Different implementations
48# are for Python 2/3 compatibility.
49
50if sys.version_info[0] < 3:
51 def func_code(f):
52 return f.func_code
53else:
54 def func_code(f):
55 return f.__code__
56
57# This regular expression is used to match valid token names
58_is_identifier = re.compile(r'^[a-zA-Z0-9_]+$')
59
60# Exception thrown when invalid token encountered and no default error
61# handler is defined.
62
63class LexError(Exception):
64 def __init__(self,message,s):
65 self.args = (message,)
66 self.text = s
67
68# Token class. This class is used to represent the tokens produced.
69class LexToken(object):
70 def __str__(self):
71 return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
72 def __repr__(self):
73 return str(self)
74
75# This object is a stand-in for a logging object created by the
76# logging module.
77
78class PlyLogger(object):
79 def __init__(self,f):
80 self.f = f
81 def critical(self,msg,*args,**kwargs):
82 self.f.write((msg % args) + "\n")
83
84 def warning(self,msg,*args,**kwargs):
85 self.f.write("WARNING: "+ (msg % args) + "\n")
86
87 def error(self,msg,*args,**kwargs):
88 self.f.write("ERROR: " + (msg % args) + "\n")
89
90 info = critical
91 debug = critical
92
93# Null logger is used when no output is generated. Does nothing.
94class NullLogger(object):
95 def __getattribute__(self,name):
96 return self
97 def __call__(self,*args,**kwargs):
98 return self
99
100# -----------------------------------------------------------------------------
101# === Lexing Engine ===
102#
103# The following Lexer class implements the lexer runtime. There are only
104# a few public methods and attributes:
105#
106# input() - Store a new string in the lexer
107# token() - Get the next token
108# clone() - Clone the lexer
109#
110# lineno - Current line number
111# lexpos - Current position in the input string
112# -----------------------------------------------------------------------------
113
114class Lexer:
115 def __init__(self):
116 self.lexre = None # Master regular expression. This is a list of
117 # tuples (re,findex) where re is a compiled
118 # regular expression and findex is a list
119 # mapping regex group numbers to rules
120 self.lexretext = None # Current regular expression strings
121 self.lexstatere = {} # Dictionary mapping lexer states to master regexs
122 self.lexstateretext = {} # Dictionary mapping lexer states to regex strings
123 self.lexstaterenames = {} # Dictionary mapping lexer states to symbol names
124 self.lexstate = "INITIAL" # Current lexer state
125 self.lexstatestack = [] # Stack of lexer states
126 self.lexstateinfo = None # State information
127 self.lexstateignore = {} # Dictionary of ignored characters for each state
128 self.lexstateerrorf = {} # Dictionary of error functions for each state
129 self.lexreflags = 0 # Optional re compile flags
130 self.lexdata = None # Actual input data (as a string)
131 self.lexpos = 0 # Current position in input text
132 self.lexlen = 0 # Length of the input text
133 self.lexerrorf = None # Error rule (if any)
134 self.lextokens = None # List of valid tokens
135 self.lexignore = "" # Ignored characters
136 self.lexliterals = "" # Literal characters that can be passed through
137 self.lexmodule = None # Module
138 self.lineno = 1 # Current line number
139 self.lexoptimize = 0 # Optimized mode
140
141 def clone(self,object=None):
142 c = copy.copy(self)
143
144 # If the object parameter has been supplied, it means we are attaching the
145 # lexer to a new object. In this case, we have to rebind all methods in
146 # the lexstatere and lexstateerrorf tables.
147
148 if object:
149 newtab = { }
150 for key, ritem in self.lexstatere.items():
151 newre = []
152 for cre, findex in ritem:
153 newfindex = []
154 for f in findex:
155 if not f or not f[0]:
156 newfindex.append(f)
157 continue
158 newfindex.append((getattr(object,f[0].__name__),f[1]))
159 newre.append((cre,newfindex))
160 newtab[key] = newre
161 c.lexstatere = newtab
162 c.lexstateerrorf = { }
163 for key, ef in self.lexstateerrorf.items():
164 c.lexstateerrorf[key] = getattr(object,ef.__name__)
165 c.lexmodule = object
166 return c
167
168 # ------------------------------------------------------------
169 # writetab() - Write lexer information to a table file
170 # ------------------------------------------------------------
171 def writetab(self,tabfile,outputdir=""):
172 if isinstance(tabfile,types.ModuleType):
173 return
174 basetabfilename = tabfile.split(".")[-1]
175 filename = os.path.join(outputdir,basetabfilename)+".py"
176 tf = open(filename,"w")
177 tf.write("# %s.py. This file automatically created by PLY (version %s). Don't edit!\n" % (tabfile,__version__))
178 tf.write("_tabversion = %s\n" % repr(__version__))
179 tf.write("_lextokens = %s\n" % repr(self.lextokens))
180 tf.write("_lexreflags = %s\n" % repr(self.lexreflags))
181 tf.write("_lexliterals = %s\n" % repr(self.lexliterals))
182 tf.write("_lexstateinfo = %s\n" % repr(self.lexstateinfo))
183
184 tabre = { }
185 # Collect all functions in the initial state
186 initial = self.lexstatere["INITIAL"]
187 initialfuncs = []
188 for part in initial:
189 for f in part[1]:
190 if f and f[0]:
191 initialfuncs.append(f)
192
193 for key, lre in self.lexstatere.items():
194 titem = []
195 for i in range(len(lre)):
196 titem.append((self.lexstateretext[key][i],_funcs_to_names(lre[i][1],self.lexstaterenames[key][i])))
197 tabre[key] = titem
198
199 tf.write("_lexstatere = %s\n" % repr(tabre))
200 tf.write("_lexstateignore = %s\n" % repr(self.lexstateignore))
201
202 taberr = { }
203 for key, ef in self.lexstateerrorf.items():
204 if ef:
205 taberr[key] = ef.__name__
206 else:
207 taberr[key] = None
208 tf.write("_lexstateerrorf = %s\n" % repr(taberr))
209 tf.close()
210
211 # ------------------------------------------------------------
212 # readtab() - Read lexer information from a tab file
213 # ------------------------------------------------------------
214 def readtab(self,tabfile,fdict):
215 if isinstance(tabfile,types.ModuleType):
216 lextab = tabfile
217 else:
218 if sys.version_info[0] < 3:
219 exec("import %s as lextab" % tabfile)
220 else:
221 env = { }
222 exec("import %s as lextab" % tabfile, env,env)
223 lextab = env['lextab']
224
225 if getattr(lextab,"_tabversion","0.0") != __version__:
226 raise ImportError("Inconsistent PLY version")
227
228 self.lextokens = lextab._lextokens
229 self.lexreflags = lextab._lexreflags
230 self.lexliterals = lextab._lexliterals
231 self.lexstateinfo = lextab._lexstateinfo
232 self.lexstateignore = lextab._lexstateignore
233 self.lexstatere = { }
234 self.lexstateretext = { }
235 for key,lre in lextab._lexstatere.items():
236 titem = []
237 txtitem = []
238 for i in range(len(lre)):
239 titem.append((re.compile(lre[i][0],lextab._lexreflags | re.VERBOSE),_names_to_funcs(lre[i][1],fdict)))
240 txtitem.append(lre[i][0])
241 self.lexstatere[key] = titem
242 self.lexstateretext[key] = txtitem
243 self.lexstateerrorf = { }
244 for key,ef in lextab._lexstateerrorf.items():
245 self.lexstateerrorf[key] = fdict[ef]
246 self.begin('INITIAL')
247
248 # ------------------------------------------------------------
249 # input() - Push a new string into the lexer
250 # ------------------------------------------------------------
251 def input(self,s):
252 # Pull off the first character to see if s looks like a string
253 c = s[:1]
254 if not isinstance(c,StringTypes):
255 raise ValueError("Expected a string")
256 self.lexdata = s
257 self.lexpos = 0
258 self.lexlen = len(s)
259
260 # ------------------------------------------------------------
261 # begin() - Changes the lexing state
262 # ------------------------------------------------------------
263 def begin(self,state):
264 if not state in self.lexstatere:
265 raise ValueError("Undefined state")
266 self.lexre = self.lexstatere[state]
267 self.lexretext = self.lexstateretext[state]
268 self.lexignore = self.lexstateignore.get(state,"")
269 self.lexerrorf = self.lexstateerrorf.get(state,None)
270 self.lexstate = state
271
272 # ------------------------------------------------------------
273 # push_state() - Changes the lexing state and saves old on stack
274 # ------------------------------------------------------------
275 def push_state(self,state):
276 self.lexstatestack.append(self.lexstate)
277 self.begin(state)
278
279 # ------------------------------------------------------------
280 # pop_state() - Restores the previous state
281 # ------------------------------------------------------------
282 def pop_state(self):
283 self.begin(self.lexstatestack.pop())
284
285 # ------------------------------------------------------------
286 # current_state() - Returns the current lexing state
287 # ------------------------------------------------------------
288 def current_state(self):
289 return self.lexstate
290
291 # ------------------------------------------------------------
292 # skip() - Skip ahead n characters
293 # ------------------------------------------------------------
294 def skip(self,n):
295 self.lexpos += n
296
297 # ------------------------------------------------------------
298 # opttoken() - Return the next token from the Lexer
299 #
300 # Note: This function has been carefully implemented to be as fast
301 # as possible. Don't make changes unless you really know what
302 # you are doing
303 # ------------------------------------------------------------
304 def token(self):
305 # Make local copies of frequently referenced attributes
306 lexpos = self.lexpos
307 lexlen = self.lexlen
308 lexignore = self.lexignore
309 lexdata = self.lexdata
310
311 while lexpos < lexlen:
312 # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
313 if lexdata[lexpos] in lexignore:
314 lexpos += 1
315 continue
316
317 # Look for a regular expression match
318 for lexre,lexindexfunc in self.lexre:
319 m = lexre.match(lexdata,lexpos)
320 if not m: continue
321
322 # Create a token for return
323 tok = LexToken()
324 tok.value = m.group()
325 tok.lineno = self.lineno
326 tok.lexpos = lexpos
327
328 i = m.lastindex
329 func,tok.type = lexindexfunc[i]
330
331 if not func:
332 # If no token type was set, it's an ignored token
333 if tok.type:
334 self.lexpos = m.end()
335 return tok
336 else:
337 lexpos = m.end()
338 break
339
340 lexpos = m.end()
341
342 # If token is processed by a function, call it
343
344 tok.lexer = self # Set additional attributes useful in token rules
345 self.lexmatch = m
346 self.lexpos = lexpos
347
348 newtok = func(tok)
349
350 # Every function must return a token, if nothing, we just move to next token
351 if not newtok:
352 lexpos = self.lexpos # This is here in case user has updated lexpos.
353 lexignore = self.lexignore # This is here in case there was a state change
354 break
355
356 # Verify type of the token. If not in the token map, raise an error
357 if not self.lexoptimize:
358 if not newtok.type in self.lextokens:
359 raise LexError("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
360 func_code(func).co_filename, func_code(func).co_firstlineno,
361 func.__name__, newtok.type),lexdata[lexpos:])
362
363 return newtok
364 else:
365 # No match, see if in literals
366 if lexdata[lexpos] in self.lexliterals:
367 tok = LexToken()
368 tok.value = lexdata[lexpos]
369 tok.lineno = self.lineno
370 tok.type = tok.value
371 tok.lexpos = lexpos
372 self.lexpos = lexpos + 1
373 return tok
374
375 # No match. Call t_error() if defined.
376 if self.lexerrorf:
377 tok = LexToken()
378 tok.value = self.lexdata[lexpos:]
379 tok.lineno = self.lineno
380 tok.type = "error"
381 tok.lexer = self
382 tok.lexpos = lexpos
383 self.lexpos = lexpos
384 newtok = self.lexerrorf(tok)
385 if lexpos == self.lexpos:
386 # Error method didn't change text position at all. This is an error.
387 raise LexError("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
388 lexpos = self.lexpos
389 if not newtok: continue
390 return newtok
391
392 self.lexpos = lexpos
393 raise LexError("Illegal character '%s' at index %d" % (lexdata[lexpos],lexpos), lexdata[lexpos:])
394
395 self.lexpos = lexpos + 1
396 if self.lexdata is None:
397 raise RuntimeError("No input string given with input()")
398 return None
399
400 # Iterator interface
401 def __iter__(self):
402 return self
403
404 def next(self):
405 t = self.token()
406 if t is None:
407 raise StopIteration
408 return t
409
410 __next__ = next
411
412# -----------------------------------------------------------------------------
413# ==== Lex Builder ===
414#
415# The functions and classes below are used to collect lexing information
416# and build a Lexer object from it.
417# -----------------------------------------------------------------------------
418
419# -----------------------------------------------------------------------------
420# get_caller_module_dict()
421#
422# This function returns a dictionary containing all of the symbols defined within
423# a caller further down the call stack. This is used to get the environment
424# associated with the yacc() call if none was provided.
425# -----------------------------------------------------------------------------
426
427def get_caller_module_dict(levels):
428 try:
429 raise RuntimeError
430 except RuntimeError:
431 e,b,t = sys.exc_info()
432 f = t.tb_frame
433 while levels > 0:
434 f = f.f_back
435 levels -= 1
436 ldict = f.f_globals.copy()
437 if f.f_globals != f.f_locals:
438 ldict.update(f.f_locals)
439
440 return ldict
441
442# -----------------------------------------------------------------------------
443# _funcs_to_names()
444#
445# Given a list of regular expression functions, this converts it to a list
446# suitable for output to a table file
447# -----------------------------------------------------------------------------
448
449def _funcs_to_names(funclist,namelist):
450 result = []
451 for f,name in zip(funclist,namelist):
452 if f and f[0]:
453 result.append((name, f[1]))
454 else:
455 result.append(f)
456 return result
457
458# -----------------------------------------------------------------------------
459# _names_to_funcs()
460#
461# Given a list of regular expression function names, this converts it back to
462# functions.
463# -----------------------------------------------------------------------------
464
465def _names_to_funcs(namelist,fdict):
466 result = []
467 for n in namelist:
468 if n and n[0]:
469 result.append((fdict[n[0]],n[1]))
470 else:
471 result.append(n)
472 return result
473
474# -----------------------------------------------------------------------------
475# _form_master_re()
476#
477# This function takes a list of all of the regex components and attempts to
478# form the master regular expression. Given limitations in the Python re
479# module, it may be necessary to break the master regex into separate expressions.
480# -----------------------------------------------------------------------------
481
482def _form_master_re(relist,reflags,ldict,toknames):
483 if not relist: return []
484 regex = "|".join(relist)
485 try:
486 lexre = re.compile(regex,re.VERBOSE | reflags)
487
488 # Build the index to function map for the matching engine
489 lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
490 lexindexnames = lexindexfunc[:]
491
492 for f,i in lexre.groupindex.items():
493 handle = ldict.get(f,None)
494 if type(handle) in (types.FunctionType, types.MethodType):
495 lexindexfunc[i] = (handle,toknames[f])
496 lexindexnames[i] = f
497 elif handle is not None:
498 lexindexnames[i] = f
499 if f.find("ignore_") > 0:
500 lexindexfunc[i] = (None,None)
501 else:
502 lexindexfunc[i] = (None, toknames[f])
503
504 return [(lexre,lexindexfunc)],[regex],[lexindexnames]
505 except Exception:
506 m = int(len(relist)/2)
507 if m == 0: m = 1
508 llist, lre, lnames = _form_master_re(relist[:m],reflags,ldict,toknames)
509 rlist, rre, rnames = _form_master_re(relist[m:],reflags,ldict,toknames)
510 return llist+rlist, lre+rre, lnames+rnames
511
512# -----------------------------------------------------------------------------
513# def _statetoken(s,names)
514#
515# Given a declaration name s of the form "t_" and a dictionary whose keys are
516# state names, this function returns a tuple (states,tokenname) where states
517# is a tuple of state names and tokenname is the name of the token. For example,
518# calling this with s = "t_foo_bar_SPAM" might return (('foo','bar'),'SPAM')
519# -----------------------------------------------------------------------------
520
521def _statetoken(s,names):
522 nonstate = 1
523 parts = s.split("_")
524 for i in range(1,len(parts)):
525 if not parts[i] in names and parts[i] != 'ANY': break
526 if i > 1:
527 states = tuple(parts[1:i])
528 else:
529 states = ('INITIAL',)
530
531 if 'ANY' in states:
532 states = tuple(names)
533
534 tokenname = "_".join(parts[i:])
535 return (states,tokenname)
536
537
538# -----------------------------------------------------------------------------
539# LexerReflect()
540#
541# This class represents information needed to build a lexer as extracted from a
542# user's input file.
543# -----------------------------------------------------------------------------
544class LexerReflect(object):
545 def __init__(self,ldict,log=None,reflags=0):
546 self.ldict = ldict
547 self.error_func = None
548 self.tokens = []
549 self.reflags = reflags
550 self.stateinfo = { 'INITIAL' : 'inclusive'}
551 self.files = {}
552 self.error = 0
553
554 if log is None:
555 self.log = PlyLogger(sys.stderr)
556 else:
557 self.log = log
558
559 # Get all of the basic information
560 def get_all(self):
561 self.get_tokens()
562 self.get_literals()
563 self.get_states()
564 self.get_rules()
565
566 # Validate all of the information
567 def validate_all(self):
568 self.validate_tokens()
569 self.validate_literals()
570 self.validate_rules()
571 return self.error
572
573 # Get the tokens map
574 def get_tokens(self):
575 tokens = self.ldict.get("tokens",None)
576 if not tokens:
577 self.log.error("No token list is defined")
578 self.error = 1
579 return
580
581 if not isinstance(tokens,(list, tuple)):
582 self.log.error("tokens must be a list or tuple")
583 self.error = 1
584 return
585
586 if not tokens:
587 self.log.error("tokens is empty")
588 self.error = 1
589 return
590
591 self.tokens = tokens
592
593 # Validate the tokens
594 def validate_tokens(self):
595 terminals = {}
596 for n in self.tokens:
597 if not _is_identifier.match(n):
598 self.log.error("Bad token name '%s'",n)
599 self.error = 1
600 if n in terminals:
601 self.log.warning("Token '%s' multiply defined", n)
602 terminals[n] = 1
603
604 # Get the literals specifier
605 def get_literals(self):
606 self.literals = self.ldict.get("literals","")
607
608 # Validate literals
609 def validate_literals(self):
610 try:
611 for c in self.literals:
612 if not isinstance(c,StringTypes) or len(c) > 1:
613 self.log.error("Invalid literal %s. Must be a single character", repr(c))
614 self.error = 1
615 continue
616
617 except TypeError:
618 self.log.error("Invalid literals specification. literals must be a sequence of characters")
619 self.error = 1
620
621 def get_states(self):
622 self.states = self.ldict.get("states",None)
623 # Build statemap
624 if self.states:
625 if not isinstance(self.states,(tuple,list)):
626 self.log.error("states must be defined as a tuple or list")
627 self.error = 1
628 else:
629 for s in self.states:
630 if not isinstance(s,tuple) or len(s) != 2:
631 self.log.error("Invalid state specifier %s. Must be a tuple (statename,'exclusive|inclusive')",repr(s))
632 self.error = 1
633 continue
634 name, statetype = s
635 if not isinstance(name,StringTypes):
636 self.log.error("State name %s must be a string", repr(name))
637 self.error = 1
638 continue
639 if not (statetype == 'inclusive' or statetype == 'exclusive'):
640 self.log.error("State type for state %s must be 'inclusive' or 'exclusive'",name)
641 self.error = 1
642 continue
643 if name in self.stateinfo:
644 self.log.error("State '%s' already defined",name)
645 self.error = 1
646 continue
647 self.stateinfo[name] = statetype
648
649 # Get all of the symbols with a t_ prefix and sort them into various
650 # categories (functions, strings, error functions, and ignore characters)
651
652 def get_rules(self):
653 tsymbols = [f for f in self.ldict if f[:2] == 't_' ]
654
655 # Now build up a list of functions and a list of strings
656
657 self.toknames = { } # Mapping of symbols to token names
658 self.funcsym = { } # Symbols defined as functions
659 self.strsym = { } # Symbols defined as strings
660 self.ignore = { } # Ignore strings by state
661 self.errorf = { } # Error functions by state
662
663 for s in self.stateinfo:
664 self.funcsym[s] = []
665 self.strsym[s] = []
666
667 if len(tsymbols) == 0:
668 self.log.error("No rules of the form t_rulename are defined")
669 self.error = 1
670 return
671
672 for f in tsymbols:
673 t = self.ldict[f]
674 states, tokname = _statetoken(f,self.stateinfo)
675 self.toknames[f] = tokname
676
677 if hasattr(t,"__call__"):
678 if tokname == 'error':
679 for s in states:
680 self.errorf[s] = t
681 elif tokname == 'ignore':
682 line = func_code(t).co_firstlineno
683 file = func_code(t).co_filename
684 self.log.error("%s:%d: Rule '%s' must be defined as a string",file,line,t.__name__)
685 self.error = 1
686 else:
687 for s in states:
688 self.funcsym[s].append((f,t))
689 elif isinstance(t, StringTypes):
690 if tokname == 'ignore':
691 for s in states:
692 self.ignore[s] = t
693 if "\\" in t:
694 self.log.warning("%s contains a literal backslash '\\'",f)
695
696 elif tokname == 'error':
697 self.log.error("Rule '%s' must be defined as a function", f)
698 self.error = 1
699 else:
700 for s in states:
701 self.strsym[s].append((f,t))
702 else:
703 self.log.error("%s not defined as a function or string", f)
704 self.error = 1
705
706 # Sort the functions by line number
707 for f in self.funcsym.values():
708 if sys.version_info[0] < 3:
709 f.sort(lambda x,y: cmp(func_code(x[1]).co_firstlineno,func_code(y[1]).co_firstlineno))
710 else:
711 # Python 3.0
712 f.sort(key=lambda x: func_code(x[1]).co_firstlineno)
713
714 # Sort the strings by regular expression length
715 for s in self.strsym.values():
716 if sys.version_info[0] < 3:
717 s.sort(lambda x,y: (len(x[1]) < len(y[1])) - (len(x[1]) > len(y[1])))
718 else:
719 # Python 3.0
720 s.sort(key=lambda x: len(x[1]),reverse=True)
721
722 # Validate all of the t_rules collected
723 def validate_rules(self):
724 for state in self.stateinfo:
725 # Validate all rules defined by functions
726
727
728
729 for fname, f in self.funcsym[state]:
730 line = func_code(f).co_firstlineno
731 file = func_code(f).co_filename
732 self.files[file] = 1
733
734 tokname = self.toknames[fname]
735 if isinstance(f, types.MethodType):
736 reqargs = 2
737 else:
738 reqargs = 1
739 nargs = func_code(f).co_argcount
740 if nargs > reqargs:
741 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
742 self.error = 1
743 continue
744
745 if nargs < reqargs:
746 self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
747 self.error = 1
748 continue
749
750 if not f.__doc__:
751 self.log.error("%s:%d: No regular expression defined for rule '%s'",file,line,f.__name__)
752 self.error = 1
753 continue
754
755 try:
756 c = re.compile("(?P<%s>%s)" % (fname,f.__doc__), re.VERBOSE | self.reflags)
757 if c.match(""):
758 self.log.error("%s:%d: Regular expression for rule '%s' matches empty string", file,line,f.__name__)
759 self.error = 1
760 except re.error:
761 _etype, e, _etrace = sys.exc_info()
762 self.log.error("%s:%d: Invalid regular expression for rule '%s'. %s", file,line,f.__name__,e)
763 if '#' in f.__doc__:
764 self.log.error("%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'",file,line, f.__name__)
765 self.error = 1
766
767 # Validate all rules defined by strings
768 for name,r in self.strsym[state]:
769 tokname = self.toknames[name]
770 if tokname == 'error':
771 self.log.error("Rule '%s' must be defined as a function", name)
772 self.error = 1
773 continue
774
775 if not tokname in self.tokens and tokname.find("ignore_") < 0:
776 self.log.error("Rule '%s' defined for an unspecified token %s",name,tokname)
777 self.error = 1
778 continue
779
780 try:
781 c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | self.reflags)
782 if (c.match("")):
783 self.log.error("Regular expression for rule '%s' matches empty string",name)
784 self.error = 1
785 except re.error:
786 _etype, e, _etrace = sys.exc_info()
787 self.log.error("Invalid regular expression for rule '%s'. %s",name,e)
788 if '#' in r:
789 self.log.error("Make sure '#' in rule '%s' is escaped with '\\#'",name)
790 self.error = 1
791
792 if not self.funcsym[state] and not self.strsym[state]:
793 self.log.error("No rules defined for state '%s'",state)
794 self.error = 1
795
796 # Validate the error function
797 efunc = self.errorf.get(state,None)
798 if efunc:
799 f = efunc
800 line = func_code(f).co_firstlineno
801 file = func_code(f).co_filename
802 self.files[file] = 1
803
804 if isinstance(f, types.MethodType):
805 reqargs = 2
806 else:
807 reqargs = 1
808 nargs = func_code(f).co_argcount
809 if nargs > reqargs:
810 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,f.__name__)
811 self.error = 1
812
813 if nargs < reqargs:
814 self.log.error("%s:%d: Rule '%s' requires an argument", file,line,f.__name__)
815 self.error = 1
816
817 for f in self.files:
818 self.validate_file(f)
819
820
821 # -----------------------------------------------------------------------------
822 # validate_file()
823 #
824 # This checks to see if there are duplicated t_rulename() functions or strings
825 # in the parser input file. This is done using a simple regular expression
826 # match on each line in the given file.
827 # -----------------------------------------------------------------------------
828
829 def validate_file(self,filename):
830 import os.path
831 base,ext = os.path.splitext(filename)
832 if ext != '.py': return # No idea what the file is. Return OK
833
834 try:
835 f = open(filename)
836 lines = f.readlines()
837 f.close()
838 except IOError:
839 return # Couldn't find the file. Don't worry about it
840
841 fre = re.compile(r'\s*def\s+(t_[a-zA-Z_0-9]*)\(')
842 sre = re.compile(r'\s*(t_[a-zA-Z_0-9]*)\s*=')
843
844 counthash = { }
845 linen = 1
846 for l in lines:
847 m = fre.match(l)
848 if not m:
849 m = sre.match(l)
850 if m:
851 name = m.group(1)
852 prev = counthash.get(name)
853 if not prev:
854 counthash[name] = linen
855 else:
856 self.log.error("%s:%d: Rule %s redefined. Previously defined on line %d",filename,linen,name,prev)
857 self.error = 1
858 linen += 1
859
860# -----------------------------------------------------------------------------
861# lex(module)
862#
863# Build all of the regular expression rules from definitions in the supplied module
864# -----------------------------------------------------------------------------
865def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0,nowarn=0,outputdir="", debuglog=None, errorlog=None):
866 global lexer
867 ldict = None
868 stateinfo = { 'INITIAL' : 'inclusive'}
869 lexobj = Lexer()
870 lexobj.lexoptimize = optimize
871 global token,input
872
873 if errorlog is None:
874 errorlog = PlyLogger(sys.stderr)
875
876 if debug:
877 if debuglog is None:
878 debuglog = PlyLogger(sys.stderr)
879
880 # Get the module dictionary used for the lexer
881 if object: module = object
882
883 if module:
884 _items = [(k,getattr(module,k)) for k in dir(module)]
885 ldict = dict(_items)
886 else:
887 ldict = get_caller_module_dict(2)
888
889 # Collect parser information from the dictionary
890 linfo = LexerReflect(ldict,log=errorlog,reflags=reflags)
891 linfo.get_all()
892 if not optimize:
893 if linfo.validate_all():
894 raise SyntaxError("Can't build lexer")
895
896 if optimize and lextab:
897 try:
898 lexobj.readtab(lextab,ldict)
899 token = lexobj.token
900 input = lexobj.input
901 lexer = lexobj
902 return lexobj
903
904 except ImportError:
905 pass
906
907 # Dump some basic debugging information
908 if debug:
909 debuglog.info("lex: tokens = %r", linfo.tokens)
910 debuglog.info("lex: literals = %r", linfo.literals)
911 debuglog.info("lex: states = %r", linfo.stateinfo)
912
913 # Build a dictionary of valid token names
914 lexobj.lextokens = { }
915 for n in linfo.tokens:
916 lexobj.lextokens[n] = 1
917
918 # Get literals specification
919 if isinstance(linfo.literals,(list,tuple)):
920 lexobj.lexliterals = type(linfo.literals[0])().join(linfo.literals)
921 else:
922 lexobj.lexliterals = linfo.literals
923
924 # Get the stateinfo dictionary
925 stateinfo = linfo.stateinfo
926
927 regexs = { }
928 # Build the master regular expressions
929 for state in stateinfo:
930 regex_list = []
931
932 # Add rules defined by functions first
933 for fname, f in linfo.funcsym[state]:
934 line = func_code(f).co_firstlineno
935 file = func_code(f).co_filename
936 regex_list.append("(?P<%s>%s)" % (fname,f.__doc__))
937 if debug:
938 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",fname,f.__doc__, state)
939
940 # Now add all of the simple rules
941 for name,r in linfo.strsym[state]:
942 regex_list.append("(?P<%s>%s)" % (name,r))
943 if debug:
944 debuglog.info("lex: Adding rule %s -> '%s' (state '%s')",name,r, state)
945
946 regexs[state] = regex_list
947
948 # Build the master regular expressions
949
950 if debug:
951 debuglog.info("lex: ==== MASTER REGEXS FOLLOW ====")
952
953 for state in regexs:
954 lexre, re_text, re_names = _form_master_re(regexs[state],reflags,ldict,linfo.toknames)
955 lexobj.lexstatere[state] = lexre
956 lexobj.lexstateretext[state] = re_text
957 lexobj.lexstaterenames[state] = re_names
958 if debug:
959 for i in range(len(re_text)):
960 debuglog.info("lex: state '%s' : regex[%d] = '%s'",state, i, re_text[i])
961
962 # For inclusive states, we need to add the regular expressions from the INITIAL state
963 for state,stype in stateinfo.items():
964 if state != "INITIAL" and stype == 'inclusive':
965 lexobj.lexstatere[state].extend(lexobj.lexstatere['INITIAL'])
966 lexobj.lexstateretext[state].extend(lexobj.lexstateretext['INITIAL'])
967 lexobj.lexstaterenames[state].extend(lexobj.lexstaterenames['INITIAL'])
968
969 lexobj.lexstateinfo = stateinfo
970 lexobj.lexre = lexobj.lexstatere["INITIAL"]
971 lexobj.lexretext = lexobj.lexstateretext["INITIAL"]
972 lexobj.lexreflags = reflags
973
974 # Set up ignore variables
975 lexobj.lexstateignore = linfo.ignore
976 lexobj.lexignore = lexobj.lexstateignore.get("INITIAL","")
977
978 # Set up error functions
979 lexobj.lexstateerrorf = linfo.errorf
980 lexobj.lexerrorf = linfo.errorf.get("INITIAL",None)
981 if not lexobj.lexerrorf:
982 errorlog.warning("No t_error rule is defined")
983
984 # Check state information for ignore and error rules
985 for s,stype in stateinfo.items():
986 if stype == 'exclusive':
987 if not s in linfo.errorf:
988 errorlog.warning("No error rule is defined for exclusive state '%s'", s)
989 if not s in linfo.ignore and lexobj.lexignore:
990 errorlog.warning("No ignore rule is defined for exclusive state '%s'", s)
991 elif stype == 'inclusive':
992 if not s in linfo.errorf:
993 linfo.errorf[s] = linfo.errorf.get("INITIAL",None)
994 if not s in linfo.ignore:
995 linfo.ignore[s] = linfo.ignore.get("INITIAL","")
996
997 # Create global versions of the token() and input() functions
998 token = lexobj.token
999 input = lexobj.input
1000 lexer = lexobj
1001
1002 # If in optimize mode, we write the lextab
1003 if lextab and optimize:
1004 lexobj.writetab(lextab,outputdir)
1005
1006 return lexobj
1007
1008# -----------------------------------------------------------------------------
1009# runmain()
1010#
1011# This runs the lexer as a main program
1012# -----------------------------------------------------------------------------
1013
1014def runmain(lexer=None,data=None):
1015 if not data:
1016 try:
1017 filename = sys.argv[1]
1018 f = open(filename)
1019 data = f.read()
1020 f.close()
1021 except IndexError:
1022 sys.stdout.write("Reading from standard input (type EOF to end):\n")
1023 data = sys.stdin.read()
1024
1025 if lexer:
1026 _input = lexer.input
1027 else:
1028 _input = input
1029 _input(data)
1030 if lexer:
1031 _token = lexer.token
1032 else:
1033 _token = token
1034
1035 while 1:
1036 tok = _token()
1037 if not tok: break
1038 sys.stdout.write("(%s,%r,%d,%d)\n" % (tok.type, tok.value, tok.lineno,tok.lexpos))
1039
1040# -----------------------------------------------------------------------------
1041# @TOKEN(regex)
1042#
1043# This decorator function can be used to set the regex expression on a function
1044# when its docstring might need to be set in an alternative way
1045# -----------------------------------------------------------------------------
1046
1047def TOKEN(r):
1048 def set_doc(f):
1049 if hasattr(r,"__call__"):
1050 f.__doc__ = r.__doc__
1051 else:
1052 f.__doc__ = r
1053 return f
1054 return set_doc
1055
1056# Alternative spelling of the TOKEN decorator
1057Token = TOKEN
1058
diff --git a/bitbake/lib/ply/yacc.py b/bitbake/lib/ply/yacc.py
new file mode 100644
index 0000000000..6168fd9a03
--- /dev/null
+++ b/bitbake/lib/ply/yacc.py
@@ -0,0 +1,3276 @@
1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2009,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13# this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15# this list of conditions and the following disclaimer in the documentation
16# and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18# endorse or promote products derived from this software without
19# specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammer is specified by supplying the BNF inside
36# Python documentation strings. The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system. PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist). However, most of the variables used during table
43# construction are defined in terms of global variables. Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing. LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style." Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62__version__ = "3.3"
63__tabversion__ = "3.2" # Table version
64
65#-----------------------------------------------------------------------------
66# === User configurable parameters ===
67#
68# Change these to modify the default behavior of yacc (if you wish)
69#-----------------------------------------------------------------------------
70
71yaccdebug = 0 # Debugging mode. If set, yacc generates a
72 # a 'parser.out' file in the current directory
73
74debug_file = 'parser.out' # Default name of the debugging file
75tab_module = 'parsetab' # Default name of the table module
76default_lr = 'LALR' # Default LR table generation method
77
78error_count = 3 # Number of symbols that must be shifted to leave recovery mode
79
80yaccdevel = 0 # Set to True if developing yacc. This turns off optimized
81 # implementations of certain functions.
82
83resultlimit = 40 # Size limit of results when running in debug mode.
84
85pickle_protocol = 0 # Protocol to use when writing pickle files
86
87import re, types, sys, os.path
88
89# Compatibility function for python 2.6/3.0
90if sys.version_info[0] < 3:
91 def func_code(f):
92 return f.func_code
93else:
94 def func_code(f):
95 return f.__code__
96
97# Compatibility
98try:
99 MAXINT = sys.maxint
100except AttributeError:
101 MAXINT = sys.maxsize
102
103# Python 2.x/3.0 compatibility.
104def load_ply_lex():
105 if sys.version_info[0] < 3:
106 import lex
107 else:
108 import ply.lex as lex
109 return lex
110
111# This object is a stand-in for a logging object created by the
112# logging module. PLY will use this by default to create things
113# such as the parser.out file. If a user wants more detailed
114# information, they can create their own logging object and pass
115# it into PLY.
116
117class PlyLogger(object):
118 def __init__(self,f):
119 self.f = f
120 def debug(self,msg,*args,**kwargs):
121 self.f.write((msg % args) + "\n")
122 info = debug
123
124 def warning(self,msg,*args,**kwargs):
125 self.f.write("WARNING: "+ (msg % args) + "\n")
126
127 def error(self,msg,*args,**kwargs):
128 self.f.write("ERROR: " + (msg % args) + "\n")
129
130 critical = debug
131
132# Null logger is used when no output is generated. Does nothing.
133class NullLogger(object):
134 def __getattribute__(self,name):
135 return self
136 def __call__(self,*args,**kwargs):
137 return self
138
139# Exception raised for yacc-related errors
140class YaccError(Exception): pass
141
142# Format the result message that the parser produces when running in debug mode.
143def format_result(r):
144 repr_str = repr(r)
145 if '\n' in repr_str: repr_str = repr(repr_str)
146 if len(repr_str) > resultlimit:
147 repr_str = repr_str[:resultlimit]+" ..."
148 result = "<%s @ 0x%x> (%s)" % (type(r).__name__,id(r),repr_str)
149 return result
150
151
152# Format stack entries when the parser is running in debug mode
153def format_stack_entry(r):
154 repr_str = repr(r)
155 if '\n' in repr_str: repr_str = repr(repr_str)
156 if len(repr_str) < 16:
157 return repr_str
158 else:
159 return "<%s @ 0x%x>" % (type(r).__name__,id(r))
160
161#-----------------------------------------------------------------------------
162# === LR Parsing Engine ===
163#
164# The following classes are used for the LR parser itself. These are not
165# used during table construction and are independent of the actual LR
166# table generation algorithm
167#-----------------------------------------------------------------------------
168
169# This class is used to hold non-terminal grammar symbols during parsing.
170# It normally has the following attributes set:
171# .type = Grammar symbol type
172# .value = Symbol value
173# .lineno = Starting line number
174# .endlineno = Ending line number (optional, set automatically)
175# .lexpos = Starting lex position
176# .endlexpos = Ending lex position (optional, set automatically)
177
178class YaccSymbol:
179 def __str__(self): return self.type
180 def __repr__(self): return str(self)
181
182# This class is a wrapper around the objects actually passed to each
183# grammar rule. Index lookup and assignment actually assign the
184# .value attribute of the underlying YaccSymbol object.
185# The lineno() method returns the line number of a given
186# item (or 0 if not defined). The linespan() method returns
187# a tuple of (startline,endline) representing the range of lines
188# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
189# representing the range of positional information for a symbol.
190
191class YaccProduction:
192 def __init__(self,s,stack=None):
193 self.slice = s
194 self.stack = stack
195 self.lexer = None
196 self.parser= None
197 def __getitem__(self,n):
198 if n >= 0: return self.slice[n].value
199 else: return self.stack[n].value
200
201 def __setitem__(self,n,v):
202 self.slice[n].value = v
203
204 def __getslice__(self,i,j):
205 return [s.value for s in self.slice[i:j]]
206
207 def __len__(self):
208 return len(self.slice)
209
210 def lineno(self,n):
211 return getattr(self.slice[n],"lineno",0)
212
213 def set_lineno(self,n,lineno):
214 self.slice[n].lineno = lineno
215
216 def linespan(self,n):
217 startline = getattr(self.slice[n],"lineno",0)
218 endline = getattr(self.slice[n],"endlineno",startline)
219 return startline,endline
220
221 def lexpos(self,n):
222 return getattr(self.slice[n],"lexpos",0)
223
224 def lexspan(self,n):
225 startpos = getattr(self.slice[n],"lexpos",0)
226 endpos = getattr(self.slice[n],"endlexpos",startpos)
227 return startpos,endpos
228
229 def error(self):
230 raise SyntaxError
231
232
233# -----------------------------------------------------------------------------
234# == LRParser ==
235#
236# The LR Parsing engine.
237# -----------------------------------------------------------------------------
238
239class LRParser:
240 def __init__(self,lrtab,errorf):
241 self.productions = lrtab.lr_productions
242 self.action = lrtab.lr_action
243 self.goto = lrtab.lr_goto
244 self.errorfunc = errorf
245
246 def errok(self):
247 self.errorok = 1
248
249 def restart(self):
250 del self.statestack[:]
251 del self.symstack[:]
252 sym = YaccSymbol()
253 sym.type = '$end'
254 self.symstack.append(sym)
255 self.statestack.append(0)
256
257 def parse(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
258 if debug or yaccdevel:
259 if isinstance(debug,int):
260 debug = PlyLogger(sys.stderr)
261 return self.parsedebug(input,lexer,debug,tracking,tokenfunc)
262 elif tracking:
263 return self.parseopt(input,lexer,debug,tracking,tokenfunc)
264 else:
265 return self.parseopt_notrack(input,lexer,debug,tracking,tokenfunc)
266
267
268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
269 # parsedebug().
270 #
271 # This is the debugging enabled version of parse(). All changes made to the
272 # parsing engine should be made here. For the non-debugging version,
273 # copy this code to a method parseopt() and delete all of the sections
274 # enclosed in:
275 #
276 # #--! DEBUG
277 # statements
278 # #--! DEBUG
279 #
280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
281
282 def parsedebug(self,input=None,lexer=None,debug=None,tracking=0,tokenfunc=None):
283 lookahead = None # Current lookahead symbol
284 lookaheadstack = [ ] # Stack of lookahead symbols
285 actions = self.action # Local reference to action table (to avoid lookup on self.)
286 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
287 prod = self.productions # Local reference to production list (to avoid lookup on self.)
288 pslice = YaccProduction(None) # Production object passed to grammar rules
289 errorcount = 0 # Used during error recovery
290
291 # --! DEBUG
292 debug.info("PLY: PARSE DEBUG START")
293 # --! DEBUG
294
295 # If no lexer was given, we will try to use the lex module
296 if not lexer:
297 lex = load_ply_lex()
298 lexer = lex.lexer
299
300 # Set up the lexer and parser objects on pslice
301 pslice.lexer = lexer
302 pslice.parser = self
303
304 # If input was supplied, pass to lexer
305 if input is not None:
306 lexer.input(input)
307
308 if tokenfunc is None:
309 # Tokenize function
310 get_token = lexer.token
311 else:
312 get_token = tokenfunc
313
314 # Set up the state and symbol stacks
315
316 statestack = [ ] # Stack of parsing states
317 self.statestack = statestack
318 symstack = [ ] # Stack of grammar symbols
319 self.symstack = symstack
320
321 pslice.stack = symstack # Put in the production
322 errtoken = None # Err token
323
324 # The start state is assumed to be (0,$end)
325
326 statestack.append(0)
327 sym = YaccSymbol()
328 sym.type = "$end"
329 symstack.append(sym)
330 state = 0
331 while 1:
332 # Get the next symbol on the input. If a lookahead symbol
333 # is already set, we just use that. Otherwise, we'll pull
334 # the next token off of the lookaheadstack or from the lexer
335
336 # --! DEBUG
337 debug.debug('')
338 debug.debug('State : %s', state)
339 # --! DEBUG
340
341 if not lookahead:
342 if not lookaheadstack:
343 lookahead = get_token() # Get the next token
344 else:
345 lookahead = lookaheadstack.pop()
346 if not lookahead:
347 lookahead = YaccSymbol()
348 lookahead.type = "$end"
349
350 # --! DEBUG
351 debug.debug('Stack : %s',
352 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
353 # --! DEBUG
354
355 # Check the action table
356 ltype = lookahead.type
357 t = actions[state].get(ltype)
358
359 if t is not None:
360 if t > 0:
361 # shift a symbol on the stack
362 statestack.append(t)
363 state = t
364
365 # --! DEBUG
366 debug.debug("Action : Shift and goto state %s", t)
367 # --! DEBUG
368
369 symstack.append(lookahead)
370 lookahead = None
371
372 # Decrease error count on successful shift
373 if errorcount: errorcount -=1
374 continue
375
376 if t < 0:
377 # reduce a symbol on the stack, emit a production
378 p = prod[-t]
379 pname = p.name
380 plen = p.len
381
382 # Get production function
383 sym = YaccSymbol()
384 sym.type = pname # Production name
385 sym.value = None
386
387 # --! DEBUG
388 if plen:
389 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, "["+",".join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+"]",-t)
390 else:
391 debug.info("Action : Reduce rule [%s] with %s and goto state %d", p.str, [],-t)
392
393 # --! DEBUG
394
395 if plen:
396 targ = symstack[-plen-1:]
397 targ[0] = sym
398
399 # --! TRACKING
400 if tracking:
401 t1 = targ[1]
402 sym.lineno = t1.lineno
403 sym.lexpos = t1.lexpos
404 t1 = targ[-1]
405 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
406 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
407
408 # --! TRACKING
409
410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
411 # The code enclosed in this section is duplicated
412 # below as a performance optimization. Make sure
413 # changes get made in both locations.
414
415 pslice.slice = targ
416
417 try:
418 # Call the grammar rule with our special slice object
419 del symstack[-plen:]
420 del statestack[-plen:]
421 p.callable(pslice)
422 # --! DEBUG
423 debug.info("Result : %s", format_result(pslice[0]))
424 # --! DEBUG
425 symstack.append(sym)
426 state = goto[statestack[-1]][pname]
427 statestack.append(state)
428 except SyntaxError:
429 # If an error was set. Enter error recovery state
430 lookaheadstack.append(lookahead)
431 symstack.pop()
432 statestack.pop()
433 state = statestack[-1]
434 sym.type = 'error'
435 lookahead = sym
436 errorcount = error_count
437 self.errorok = 0
438 continue
439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
440
441 else:
442
443 # --! TRACKING
444 if tracking:
445 sym.lineno = lexer.lineno
446 sym.lexpos = lexer.lexpos
447 # --! TRACKING
448
449 targ = [ sym ]
450
451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
452 # The code enclosed in this section is duplicated
453 # above as a performance optimization. Make sure
454 # changes get made in both locations.
455
456 pslice.slice = targ
457
458 try:
459 # Call the grammar rule with our special slice object
460 p.callable(pslice)
461 # --! DEBUG
462 debug.info("Result : %s", format_result(pslice[0]))
463 # --! DEBUG
464 symstack.append(sym)
465 state = goto[statestack[-1]][pname]
466 statestack.append(state)
467 except SyntaxError:
468 # If an error was set. Enter error recovery state
469 lookaheadstack.append(lookahead)
470 symstack.pop()
471 statestack.pop()
472 state = statestack[-1]
473 sym.type = 'error'
474 lookahead = sym
475 errorcount = error_count
476 self.errorok = 0
477 continue
478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
479
480 if t == 0:
481 n = symstack[-1]
482 result = getattr(n,"value",None)
483 # --! DEBUG
484 debug.info("Done : Returning %s", format_result(result))
485 debug.info("PLY: PARSE DEBUG END")
486 # --! DEBUG
487 return result
488
489 if t == None:
490
491 # --! DEBUG
492 debug.error('Error : %s',
493 ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
494 # --! DEBUG
495
496 # We have some kind of parsing error here. To handle
497 # this, we are going to push the current token onto
498 # the tokenstack and replace it with an 'error' token.
499 # If there are any synchronization rules, they may
500 # catch it.
501 #
502 # In addition to pushing the error token, we call call
503 # the user defined p_error() function if this is the
504 # first syntax error. This function is only called if
505 # errorcount == 0.
506 if errorcount == 0 or self.errorok:
507 errorcount = error_count
508 self.errorok = 0
509 errtoken = lookahead
510 if errtoken.type == "$end":
511 errtoken = None # End of file!
512 if self.errorfunc:
513 global errok,token,restart
514 errok = self.errok # Set some special functions available in error recovery
515 token = get_token
516 restart = self.restart
517 if errtoken and not hasattr(errtoken,'lexer'):
518 errtoken.lexer = lexer
519 tok = self.errorfunc(errtoken)
520 del errok, token, restart # Delete special functions
521
522 if self.errorok:
523 # User must have done some kind of panic
524 # mode recovery on their own. The
525 # returned token is the next lookahead
526 lookahead = tok
527 errtoken = None
528 continue
529 else:
530 if errtoken:
531 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
532 else: lineno = 0
533 if lineno:
534 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
535 else:
536 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
537 else:
538 sys.stderr.write("yacc: Parse error in input. EOF\n")
539 return
540
541 else:
542 errorcount = error_count
543
544 # case 1: the statestack only has 1 entry on it. If we're in this state, the
545 # entire parse has been rolled back and we're completely hosed. The token is
546 # discarded and we just keep going.
547
548 if len(statestack) <= 1 and lookahead.type != "$end":
549 lookahead = None
550 errtoken = None
551 state = 0
552 # Nuke the pushback stack
553 del lookaheadstack[:]
554 continue
555
556 # case 2: the statestack has a couple of entries on it, but we're
557 # at the end of the file. nuke the top entry and generate an error token
558
559 # Start nuking entries on the stack
560 if lookahead.type == "$end":
561 # Whoa. We're really hosed here. Bail out
562 return
563
564 if lookahead.type != 'error':
565 sym = symstack[-1]
566 if sym.type == 'error':
567 # Hmmm. Error is on top of stack, we'll just nuke input
568 # symbol and continue
569 lookahead = None
570 continue
571 t = YaccSymbol()
572 t.type = 'error'
573 if hasattr(lookahead,"lineno"):
574 t.lineno = lookahead.lineno
575 t.value = lookahead
576 lookaheadstack.append(lookahead)
577 lookahead = t
578 else:
579 symstack.pop()
580 statestack.pop()
581 state = statestack[-1] # Potential bug fix
582
583 continue
584
585 # Call an error function here
586 raise RuntimeError("yacc: internal parser error!!!\n")
587
588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
589 # parseopt().
590 #
591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
592 # Edit the debug version above, then copy any modifications to the method
593 # below while removing #--! DEBUG sections.
594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
595
596
597 def parseopt(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
598 lookahead = None # Current lookahead symbol
599 lookaheadstack = [ ] # Stack of lookahead symbols
600 actions = self.action # Local reference to action table (to avoid lookup on self.)
601 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
602 prod = self.productions # Local reference to production list (to avoid lookup on self.)
603 pslice = YaccProduction(None) # Production object passed to grammar rules
604 errorcount = 0 # Used during error recovery
605
606 # If no lexer was given, we will try to use the lex module
607 if not lexer:
608 lex = load_ply_lex()
609 lexer = lex.lexer
610
611 # Set up the lexer and parser objects on pslice
612 pslice.lexer = lexer
613 pslice.parser = self
614
615 # If input was supplied, pass to lexer
616 if input is not None:
617 lexer.input(input)
618
619 if tokenfunc is None:
620 # Tokenize function
621 get_token = lexer.token
622 else:
623 get_token = tokenfunc
624
625 # Set up the state and symbol stacks
626
627 statestack = [ ] # Stack of parsing states
628 self.statestack = statestack
629 symstack = [ ] # Stack of grammar symbols
630 self.symstack = symstack
631
632 pslice.stack = symstack # Put in the production
633 errtoken = None # Err token
634
635 # The start state is assumed to be (0,$end)
636
637 statestack.append(0)
638 sym = YaccSymbol()
639 sym.type = '$end'
640 symstack.append(sym)
641 state = 0
642 while 1:
643 # Get the next symbol on the input. If a lookahead symbol
644 # is already set, we just use that. Otherwise, we'll pull
645 # the next token off of the lookaheadstack or from the lexer
646
647 if not lookahead:
648 if not lookaheadstack:
649 lookahead = get_token() # Get the next token
650 else:
651 lookahead = lookaheadstack.pop()
652 if not lookahead:
653 lookahead = YaccSymbol()
654 lookahead.type = '$end'
655
656 # Check the action table
657 ltype = lookahead.type
658 t = actions[state].get(ltype)
659
660 if t is not None:
661 if t > 0:
662 # shift a symbol on the stack
663 statestack.append(t)
664 state = t
665
666 symstack.append(lookahead)
667 lookahead = None
668
669 # Decrease error count on successful shift
670 if errorcount: errorcount -=1
671 continue
672
673 if t < 0:
674 # reduce a symbol on the stack, emit a production
675 p = prod[-t]
676 pname = p.name
677 plen = p.len
678
679 # Get production function
680 sym = YaccSymbol()
681 sym.type = pname # Production name
682 sym.value = None
683
684 if plen:
685 targ = symstack[-plen-1:]
686 targ[0] = sym
687
688 # --! TRACKING
689 if tracking:
690 t1 = targ[1]
691 sym.lineno = t1.lineno
692 sym.lexpos = t1.lexpos
693 t1 = targ[-1]
694 sym.endlineno = getattr(t1,"endlineno",t1.lineno)
695 sym.endlexpos = getattr(t1,"endlexpos",t1.lexpos)
696
697 # --! TRACKING
698
699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
700 # The code enclosed in this section is duplicated
701 # below as a performance optimization. Make sure
702 # changes get made in both locations.
703
704 pslice.slice = targ
705
706 try:
707 # Call the grammar rule with our special slice object
708 del symstack[-plen:]
709 del statestack[-plen:]
710 p.callable(pslice)
711 symstack.append(sym)
712 state = goto[statestack[-1]][pname]
713 statestack.append(state)
714 except SyntaxError:
715 # If an error was set. Enter error recovery state
716 lookaheadstack.append(lookahead)
717 symstack.pop()
718 statestack.pop()
719 state = statestack[-1]
720 sym.type = 'error'
721 lookahead = sym
722 errorcount = error_count
723 self.errorok = 0
724 continue
725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
726
727 else:
728
729 # --! TRACKING
730 if tracking:
731 sym.lineno = lexer.lineno
732 sym.lexpos = lexer.lexpos
733 # --! TRACKING
734
735 targ = [ sym ]
736
737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
738 # The code enclosed in this section is duplicated
739 # above as a performance optimization. Make sure
740 # changes get made in both locations.
741
742 pslice.slice = targ
743
744 try:
745 # Call the grammar rule with our special slice object
746 p.callable(pslice)
747 symstack.append(sym)
748 state = goto[statestack[-1]][pname]
749 statestack.append(state)
750 except SyntaxError:
751 # If an error was set. Enter error recovery state
752 lookaheadstack.append(lookahead)
753 symstack.pop()
754 statestack.pop()
755 state = statestack[-1]
756 sym.type = 'error'
757 lookahead = sym
758 errorcount = error_count
759 self.errorok = 0
760 continue
761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
762
763 if t == 0:
764 n = symstack[-1]
765 return getattr(n,"value",None)
766
767 if t == None:
768
769 # We have some kind of parsing error here. To handle
770 # this, we are going to push the current token onto
771 # the tokenstack and replace it with an 'error' token.
772 # If there are any synchronization rules, they may
773 # catch it.
774 #
775 # In addition to pushing the error token, we call call
776 # the user defined p_error() function if this is the
777 # first syntax error. This function is only called if
778 # errorcount == 0.
779 if errorcount == 0 or self.errorok:
780 errorcount = error_count
781 self.errorok = 0
782 errtoken = lookahead
783 if errtoken.type == '$end':
784 errtoken = None # End of file!
785 if self.errorfunc:
786 global errok,token,restart
787 errok = self.errok # Set some special functions available in error recovery
788 token = get_token
789 restart = self.restart
790 if errtoken and not hasattr(errtoken,'lexer'):
791 errtoken.lexer = lexer
792 tok = self.errorfunc(errtoken)
793 del errok, token, restart # Delete special functions
794
795 if self.errorok:
796 # User must have done some kind of panic
797 # mode recovery on their own. The
798 # returned token is the next lookahead
799 lookahead = tok
800 errtoken = None
801 continue
802 else:
803 if errtoken:
804 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
805 else: lineno = 0
806 if lineno:
807 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
808 else:
809 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
810 else:
811 sys.stderr.write("yacc: Parse error in input. EOF\n")
812 return
813
814 else:
815 errorcount = error_count
816
817 # case 1: the statestack only has 1 entry on it. If we're in this state, the
818 # entire parse has been rolled back and we're completely hosed. The token is
819 # discarded and we just keep going.
820
821 if len(statestack) <= 1 and lookahead.type != '$end':
822 lookahead = None
823 errtoken = None
824 state = 0
825 # Nuke the pushback stack
826 del lookaheadstack[:]
827 continue
828
829 # case 2: the statestack has a couple of entries on it, but we're
830 # at the end of the file. nuke the top entry and generate an error token
831
832 # Start nuking entries on the stack
833 if lookahead.type == '$end':
834 # Whoa. We're really hosed here. Bail out
835 return
836
837 if lookahead.type != 'error':
838 sym = symstack[-1]
839 if sym.type == 'error':
840 # Hmmm. Error is on top of stack, we'll just nuke input
841 # symbol and continue
842 lookahead = None
843 continue
844 t = YaccSymbol()
845 t.type = 'error'
846 if hasattr(lookahead,"lineno"):
847 t.lineno = lookahead.lineno
848 t.value = lookahead
849 lookaheadstack.append(lookahead)
850 lookahead = t
851 else:
852 symstack.pop()
853 statestack.pop()
854 state = statestack[-1] # Potential bug fix
855
856 continue
857
858 # Call an error function here
859 raise RuntimeError("yacc: internal parser error!!!\n")
860
861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
862 # parseopt_notrack().
863 #
864 # Optimized version of parseopt() with line number tracking removed.
865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
866 # code in the #--! TRACKING sections
867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
868
869 def parseopt_notrack(self,input=None,lexer=None,debug=0,tracking=0,tokenfunc=None):
870 lookahead = None # Current lookahead symbol
871 lookaheadstack = [ ] # Stack of lookahead symbols
872 actions = self.action # Local reference to action table (to avoid lookup on self.)
873 goto = self.goto # Local reference to goto table (to avoid lookup on self.)
874 prod = self.productions # Local reference to production list (to avoid lookup on self.)
875 pslice = YaccProduction(None) # Production object passed to grammar rules
876 errorcount = 0 # Used during error recovery
877
878 # If no lexer was given, we will try to use the lex module
879 if not lexer:
880 lex = load_ply_lex()
881 lexer = lex.lexer
882
883 # Set up the lexer and parser objects on pslice
884 pslice.lexer = lexer
885 pslice.parser = self
886
887 # If input was supplied, pass to lexer
888 if input is not None:
889 lexer.input(input)
890
891 if tokenfunc is None:
892 # Tokenize function
893 get_token = lexer.token
894 else:
895 get_token = tokenfunc
896
897 # Set up the state and symbol stacks
898
899 statestack = [ ] # Stack of parsing states
900 self.statestack = statestack
901 symstack = [ ] # Stack of grammar symbols
902 self.symstack = symstack
903
904 pslice.stack = symstack # Put in the production
905 errtoken = None # Err token
906
907 # The start state is assumed to be (0,$end)
908
909 statestack.append(0)
910 sym = YaccSymbol()
911 sym.type = '$end'
912 symstack.append(sym)
913 state = 0
914 while 1:
915 # Get the next symbol on the input. If a lookahead symbol
916 # is already set, we just use that. Otherwise, we'll pull
917 # the next token off of the lookaheadstack or from the lexer
918
919 if not lookahead:
920 if not lookaheadstack:
921 lookahead = get_token() # Get the next token
922 else:
923 lookahead = lookaheadstack.pop()
924 if not lookahead:
925 lookahead = YaccSymbol()
926 lookahead.type = '$end'
927
928 # Check the action table
929 ltype = lookahead.type
930 t = actions[state].get(ltype)
931
932 if t is not None:
933 if t > 0:
934 # shift a symbol on the stack
935 statestack.append(t)
936 state = t
937
938 symstack.append(lookahead)
939 lookahead = None
940
941 # Decrease error count on successful shift
942 if errorcount: errorcount -=1
943 continue
944
945 if t < 0:
946 # reduce a symbol on the stack, emit a production
947 p = prod[-t]
948 pname = p.name
949 plen = p.len
950
951 # Get production function
952 sym = YaccSymbol()
953 sym.type = pname # Production name
954 sym.value = None
955
956 if plen:
957 targ = symstack[-plen-1:]
958 targ[0] = sym
959
960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
961 # The code enclosed in this section is duplicated
962 # below as a performance optimization. Make sure
963 # changes get made in both locations.
964
965 pslice.slice = targ
966
967 try:
968 # Call the grammar rule with our special slice object
969 del symstack[-plen:]
970 del statestack[-plen:]
971 p.callable(pslice)
972 symstack.append(sym)
973 state = goto[statestack[-1]][pname]
974 statestack.append(state)
975 except SyntaxError:
976 # If an error was set. Enter error recovery state
977 lookaheadstack.append(lookahead)
978 symstack.pop()
979 statestack.pop()
980 state = statestack[-1]
981 sym.type = 'error'
982 lookahead = sym
983 errorcount = error_count
984 self.errorok = 0
985 continue
986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
987
988 else:
989
990 targ = [ sym ]
991
992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
993 # The code enclosed in this section is duplicated
994 # above as a performance optimization. Make sure
995 # changes get made in both locations.
996
997 pslice.slice = targ
998
999 try:
1000 # Call the grammar rule with our special slice object
1001 p.callable(pslice)
1002 symstack.append(sym)
1003 state = goto[statestack[-1]][pname]
1004 statestack.append(state)
1005 except SyntaxError:
1006 # If an error was set. Enter error recovery state
1007 lookaheadstack.append(lookahead)
1008 symstack.pop()
1009 statestack.pop()
1010 state = statestack[-1]
1011 sym.type = 'error'
1012 lookahead = sym
1013 errorcount = error_count
1014 self.errorok = 0
1015 continue
1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1017
1018 if t == 0:
1019 n = symstack[-1]
1020 return getattr(n,"value",None)
1021
1022 if t == None:
1023
1024 # We have some kind of parsing error here. To handle
1025 # this, we are going to push the current token onto
1026 # the tokenstack and replace it with an 'error' token.
1027 # If there are any synchronization rules, they may
1028 # catch it.
1029 #
1030 # In addition to pushing the error token, we call call
1031 # the user defined p_error() function if this is the
1032 # first syntax error. This function is only called if
1033 # errorcount == 0.
1034 if errorcount == 0 or self.errorok:
1035 errorcount = error_count
1036 self.errorok = 0
1037 errtoken = lookahead
1038 if errtoken.type == '$end':
1039 errtoken = None # End of file!
1040 if self.errorfunc:
1041 global errok,token,restart
1042 errok = self.errok # Set some special functions available in error recovery
1043 token = get_token
1044 restart = self.restart
1045 if errtoken and not hasattr(errtoken,'lexer'):
1046 errtoken.lexer = lexer
1047 tok = self.errorfunc(errtoken)
1048 del errok, token, restart # Delete special functions
1049
1050 if self.errorok:
1051 # User must have done some kind of panic
1052 # mode recovery on their own. The
1053 # returned token is the next lookahead
1054 lookahead = tok
1055 errtoken = None
1056 continue
1057 else:
1058 if errtoken:
1059 if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
1060 else: lineno = 0
1061 if lineno:
1062 sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
1063 else:
1064 sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
1065 else:
1066 sys.stderr.write("yacc: Parse error in input. EOF\n")
1067 return
1068
1069 else:
1070 errorcount = error_count
1071
1072 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1073 # entire parse has been rolled back and we're completely hosed. The token is
1074 # discarded and we just keep going.
1075
1076 if len(statestack) <= 1 and lookahead.type != '$end':
1077 lookahead = None
1078 errtoken = None
1079 state = 0
1080 # Nuke the pushback stack
1081 del lookaheadstack[:]
1082 continue
1083
1084 # case 2: the statestack has a couple of entries on it, but we're
1085 # at the end of the file. nuke the top entry and generate an error token
1086
1087 # Start nuking entries on the stack
1088 if lookahead.type == '$end':
1089 # Whoa. We're really hosed here. Bail out
1090 return
1091
1092 if lookahead.type != 'error':
1093 sym = symstack[-1]
1094 if sym.type == 'error':
1095 # Hmmm. Error is on top of stack, we'll just nuke input
1096 # symbol and continue
1097 lookahead = None
1098 continue
1099 t = YaccSymbol()
1100 t.type = 'error'
1101 if hasattr(lookahead,"lineno"):
1102 t.lineno = lookahead.lineno
1103 t.value = lookahead
1104 lookaheadstack.append(lookahead)
1105 lookahead = t
1106 else:
1107 symstack.pop()
1108 statestack.pop()
1109 state = statestack[-1] # Potential bug fix
1110
1111 continue
1112
1113 # Call an error function here
1114 raise RuntimeError("yacc: internal parser error!!!\n")
1115
1116# -----------------------------------------------------------------------------
1117# === Grammar Representation ===
1118#
1119# The following functions, classes, and variables are used to represent and
1120# manipulate the rules that make up a grammar.
1121# -----------------------------------------------------------------------------
1122
1123import re
1124
1125# regex matching identifiers
1126_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1127
1128# -----------------------------------------------------------------------------
1129# class Production:
1130#
1131# This class stores the raw information about a single production or grammar rule.
1132# A grammar rule refers to a specification such as this:
1133#
1134# expr : expr PLUS term
1135#
1136# Here are the basic attributes defined on all productions
1137#
1138# name - Name of the production. For example 'expr'
1139# prod - A list of symbols on the right side ['expr','PLUS','term']
1140# prec - Production precedence level
1141# number - Production number.
1142# func - Function that executes on reduce
1143# file - File where production function is defined
1144# lineno - Line number where production function is defined
1145#
1146# The following attributes are defined or optional.
1147#
1148# len - Length of the production (number of symbols on right hand side)
1149# usyms - Set of unique symbols found in the production
1150# -----------------------------------------------------------------------------
1151
1152class Production(object):
1153 reduced = 0
1154 def __init__(self,number,name,prod,precedence=('right',0),func=None,file='',line=0):
1155 self.name = name
1156 self.prod = tuple(prod)
1157 self.number = number
1158 self.func = func
1159 self.callable = None
1160 self.file = file
1161 self.line = line
1162 self.prec = precedence
1163
1164 # Internal settings used during table construction
1165
1166 self.len = len(self.prod) # Length of the production
1167
1168 # Create a list of unique production symbols used in the production
1169 self.usyms = [ ]
1170 for s in self.prod:
1171 if s not in self.usyms:
1172 self.usyms.append(s)
1173
1174 # List of all LR items for the production
1175 self.lr_items = []
1176 self.lr_next = None
1177
1178 # Create a string representation
1179 if self.prod:
1180 self.str = "%s -> %s" % (self.name," ".join(self.prod))
1181 else:
1182 self.str = "%s -> <empty>" % self.name
1183
1184 def __str__(self):
1185 return self.str
1186
1187 def __repr__(self):
1188 return "Production("+str(self)+")"
1189
1190 def __len__(self):
1191 return len(self.prod)
1192
1193 def __nonzero__(self):
1194 return 1
1195
1196 def __getitem__(self,index):
1197 return self.prod[index]
1198
1199 # Return the nth lr_item from the production (or None if at the end)
1200 def lr_item(self,n):
1201 if n > len(self.prod): return None
1202 p = LRItem(self,n)
1203
1204 # Precompute the list of productions immediately following. Hack. Remove later
1205 try:
1206 p.lr_after = Prodnames[p.prod[n+1]]
1207 except (IndexError,KeyError):
1208 p.lr_after = []
1209 try:
1210 p.lr_before = p.prod[n-1]
1211 except IndexError:
1212 p.lr_before = None
1213
1214 return p
1215
1216 # Bind the production function name to a callable
1217 def bind(self,pdict):
1218 if self.func:
1219 self.callable = pdict[self.func]
1220
1221# This class serves as a minimal standin for Production objects when
1222# reading table data from files. It only contains information
1223# actually used by the LR parsing engine, plus some additional
1224# debugging information.
1225class MiniProduction(object):
1226 def __init__(self,str,name,len,func,file,line):
1227 self.name = name
1228 self.len = len
1229 self.func = func
1230 self.callable = None
1231 self.file = file
1232 self.line = line
1233 self.str = str
1234 def __str__(self):
1235 return self.str
1236 def __repr__(self):
1237 return "MiniProduction(%s)" % self.str
1238
1239 # Bind the production function name to a callable
1240 def bind(self,pdict):
1241 if self.func:
1242 self.callable = pdict[self.func]
1243
1244
1245# -----------------------------------------------------------------------------
1246# class LRItem
1247#
1248# This class represents a specific stage of parsing a production rule. For
1249# example:
1250#
1251# expr : expr . PLUS term
1252#
1253# In the above, the "." represents the current location of the parse. Here
1254# basic attributes:
1255#
1256# name - Name of the production. For example 'expr'
1257# prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1258# number - Production number.
1259#
1260# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1261# then lr_next refers to 'expr -> expr PLUS . term'
1262# lr_index - LR item index (location of the ".") in the prod list.
1263# lookaheads - LALR lookahead symbols for this item
1264# len - Length of the production (number of symbols on right hand side)
1265# lr_after - List of all productions that immediately follow
1266# lr_before - Grammar symbol immediately before
1267# -----------------------------------------------------------------------------
1268
1269class LRItem(object):
1270 def __init__(self,p,n):
1271 self.name = p.name
1272 self.prod = list(p.prod)
1273 self.number = p.number
1274 self.lr_index = n
1275 self.lookaheads = { }
1276 self.prod.insert(n,".")
1277 self.prod = tuple(self.prod)
1278 self.len = len(self.prod)
1279 self.usyms = p.usyms
1280
1281 def __str__(self):
1282 if self.prod:
1283 s = "%s -> %s" % (self.name," ".join(self.prod))
1284 else:
1285 s = "%s -> <empty>" % self.name
1286 return s
1287
1288 def __repr__(self):
1289 return "LRItem("+str(self)+")"
1290
1291# -----------------------------------------------------------------------------
1292# rightmost_terminal()
1293#
1294# Return the rightmost terminal from a list of symbols. Used in add_production()
1295# -----------------------------------------------------------------------------
1296def rightmost_terminal(symbols, terminals):
1297 i = len(symbols) - 1
1298 while i >= 0:
1299 if symbols[i] in terminals:
1300 return symbols[i]
1301 i -= 1
1302 return None
1303
1304# -----------------------------------------------------------------------------
1305# === GRAMMAR CLASS ===
1306#
1307# The following class represents the contents of the specified grammar along
1308# with various computed properties such as first sets, follow sets, LR items, etc.
1309# This data is used for critical parts of the table generation process later.
1310# -----------------------------------------------------------------------------
1311
1312class GrammarError(YaccError): pass
1313
1314class Grammar(object):
1315 def __init__(self,terminals):
1316 self.Productions = [None] # A list of all of the productions. The first
1317 # entry is always reserved for the purpose of
1318 # building an augmented grammar
1319
1320 self.Prodnames = { } # A dictionary mapping the names of nonterminals to a list of all
1321 # productions of that nonterminal.
1322
1323 self.Prodmap = { } # A dictionary that is only used to detect duplicate
1324 # productions.
1325
1326 self.Terminals = { } # A dictionary mapping the names of terminal symbols to a
1327 # list of the rules where they are used.
1328
1329 for term in terminals:
1330 self.Terminals[term] = []
1331
1332 self.Terminals['error'] = []
1333
1334 self.Nonterminals = { } # A dictionary mapping names of nonterminals to a list
1335 # of rule numbers where they are used.
1336
1337 self.First = { } # A dictionary of precomputed FIRST(x) symbols
1338
1339 self.Follow = { } # A dictionary of precomputed FOLLOW(x) symbols
1340
1341 self.Precedence = { } # Precedence rules for each terminal. Contains tuples of the
1342 # form ('right',level) or ('nonassoc', level) or ('left',level)
1343
1344 self.UsedPrecedence = { } # Precedence rules that were actually used by the grammer.
1345 # This is only used to provide error checking and to generate
1346 # a warning about unused precedence rules.
1347
1348 self.Start = None # Starting symbol for the grammar
1349
1350
1351 def __len__(self):
1352 return len(self.Productions)
1353
1354 def __getitem__(self,index):
1355 return self.Productions[index]
1356
1357 # -----------------------------------------------------------------------------
1358 # set_precedence()
1359 #
1360 # Sets the precedence for a given terminal. assoc is the associativity such as
1361 # 'left','right', or 'nonassoc'. level is a numeric level.
1362 #
1363 # -----------------------------------------------------------------------------
1364
1365 def set_precedence(self,term,assoc,level):
1366 assert self.Productions == [None],"Must call set_precedence() before add_production()"
1367 if term in self.Precedence:
1368 raise GrammarError("Precedence already specified for terminal '%s'" % term)
1369 if assoc not in ['left','right','nonassoc']:
1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371 self.Precedence[term] = (assoc,level)
1372
1373 # -----------------------------------------------------------------------------
1374 # add_production()
1375 #
1376 # Given an action function, this function assembles a production rule and
1377 # computes its precedence level.
1378 #
1379 # The production rule is supplied as a list of symbols. For example,
1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1381 # symbols ['expr','PLUS','term'].
1382 #
1383 # Precedence is determined by the precedence of the right-most non-terminal
1384 # or the precedence of a terminal specified by %prec.
1385 #
1386 # A variety of error checks are performed to make sure production symbols
1387 # are valid and that %prec is used correctly.
1388 # -----------------------------------------------------------------------------
1389
1390 def add_production(self,prodname,syms,func=None,file='',line=0):
1391
1392 if prodname in self.Terminals:
1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line,prodname))
1394 if prodname == 'error':
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line,prodname))
1396 if not _is_identifier.match(prodname):
1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line,prodname))
1398
1399 # Look for literal tokens
1400 for n,s in enumerate(syms):
1401 if s[0] in "'\"":
1402 try:
1403 c = eval(s)
1404 if (len(c) > 1):
1405 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line,s, prodname))
1406 if not c in self.Terminals:
1407 self.Terminals[c] = []
1408 syms[n] = c
1409 continue
1410 except SyntaxError:
1411 pass
1412 if not _is_identifier.match(s) and s != '%prec':
1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line,s, prodname))
1414
1415 # Determine the precedence level
1416 if '%prec' in syms:
1417 if syms[-1] == '%prec':
1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line))
1419 if syms[-2] != '%prec':
1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line))
1421 precname = syms[-1]
1422 prodprec = self.Precedence.get(precname,None)
1423 if not prodprec:
1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line,precname))
1425 else:
1426 self.UsedPrecedence[precname] = 1
1427 del syms[-2:] # Drop %prec from the rule
1428 else:
1429 # If no %prec, precedence is determined by the rightmost terminal symbol
1430 precname = rightmost_terminal(syms,self.Terminals)
1431 prodprec = self.Precedence.get(precname,('right',0))
1432
1433 # See if the rule is already in the rulemap
1434 map = "%s -> %s" % (prodname,syms)
1435 if map in self.Prodmap:
1436 m = self.Prodmap[map]
1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line, m) +
1438 "Previous definition at %s:%d" % (m.file, m.line))
1439
1440 # From this point on, everything is valid. Create a new Production instance
1441 pnumber = len(self.Productions)
1442 if not prodname in self.Nonterminals:
1443 self.Nonterminals[prodname] = [ ]
1444
1445 # Add the production number to Terminals and Nonterminals
1446 for t in syms:
1447 if t in self.Terminals:
1448 self.Terminals[t].append(pnumber)
1449 else:
1450 if not t in self.Nonterminals:
1451 self.Nonterminals[t] = [ ]
1452 self.Nonterminals[t].append(pnumber)
1453
1454 # Create a production and add it to the list of productions
1455 p = Production(pnumber,prodname,syms,prodprec,func,file,line)
1456 self.Productions.append(p)
1457 self.Prodmap[map] = p
1458
1459 # Add to the global productions list
1460 try:
1461 self.Prodnames[prodname].append(p)
1462 except KeyError:
1463 self.Prodnames[prodname] = [ p ]
1464 return 0
1465
1466 # -----------------------------------------------------------------------------
1467 # set_start()
1468 #
1469 # Sets the starting symbol and creates the augmented grammar. Production
1470 # rule 0 is S' -> start where start is the start symbol.
1471 # -----------------------------------------------------------------------------
1472
1473 def set_start(self,start=None):
1474 if not start:
1475 start = self.Productions[1].name
1476 if start not in self.Nonterminals:
1477 raise GrammarError("start symbol %s undefined" % start)
1478 self.Productions[0] = Production(0,"S'",[start])
1479 self.Nonterminals[start].append(0)
1480 self.Start = start
1481
1482 # -----------------------------------------------------------------------------
1483 # find_unreachable()
1484 #
1485 # Find all of the nonterminal symbols that can't be reached from the starting
1486 # symbol. Returns a list of nonterminals that can't be reached.
1487 # -----------------------------------------------------------------------------
1488
1489 def find_unreachable(self):
1490
1491 # Mark all symbols that are reachable from a symbol s
1492 def mark_reachable_from(s):
1493 if reachable[s]:
1494 # We've already reached symbol s.
1495 return
1496 reachable[s] = 1
1497 for p in self.Prodnames.get(s,[]):
1498 for r in p.prod:
1499 mark_reachable_from(r)
1500
1501 reachable = { }
1502 for s in list(self.Terminals) + list(self.Nonterminals):
1503 reachable[s] = 0
1504
1505 mark_reachable_from( self.Productions[0].prod[0] )
1506
1507 return [s for s in list(self.Nonterminals)
1508 if not reachable[s]]
1509
1510 # -----------------------------------------------------------------------------
1511 # infinite_cycles()
1512 #
1513 # This function looks at the various parsing rules and tries to detect
1514 # infinite recursion cycles (grammar rules where there is no possible way
1515 # to derive a string of only terminals).
1516 # -----------------------------------------------------------------------------
1517
1518 def infinite_cycles(self):
1519 terminates = {}
1520
1521 # Terminals:
1522 for t in self.Terminals:
1523 terminates[t] = 1
1524
1525 terminates['$end'] = 1
1526
1527 # Nonterminals:
1528
1529 # Initialize to false:
1530 for n in self.Nonterminals:
1531 terminates[n] = 0
1532
1533 # Then propagate termination until no change:
1534 while 1:
1535 some_change = 0
1536 for (n,pl) in self.Prodnames.items():
1537 # Nonterminal n terminates iff any of its productions terminates.
1538 for p in pl:
1539 # Production p terminates iff all of its rhs symbols terminate.
1540 for s in p.prod:
1541 if not terminates[s]:
1542 # The symbol s does not terminate,
1543 # so production p does not terminate.
1544 p_terminates = 0
1545 break
1546 else:
1547 # didn't break from the loop,
1548 # so every symbol s terminates
1549 # so production p terminates.
1550 p_terminates = 1
1551
1552 if p_terminates:
1553 # symbol n terminates!
1554 if not terminates[n]:
1555 terminates[n] = 1
1556 some_change = 1
1557 # Don't need to consider any more productions for this n.
1558 break
1559
1560 if not some_change:
1561 break
1562
1563 infinite = []
1564 for (s,term) in terminates.items():
1565 if not term:
1566 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1567 # s is used-but-not-defined, and we've already warned of that,
1568 # so it would be overkill to say that it's also non-terminating.
1569 pass
1570 else:
1571 infinite.append(s)
1572
1573 return infinite
1574
1575
1576 # -----------------------------------------------------------------------------
1577 # undefined_symbols()
1578 #
1579 # Find all symbols that were used the grammar, but not defined as tokens or
1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1581 # and prod is the production where the symbol was used.
1582 # -----------------------------------------------------------------------------
1583 def undefined_symbols(self):
1584 result = []
1585 for p in self.Productions:
1586 if not p: continue
1587
1588 for s in p.prod:
1589 if not s in self.Prodnames and not s in self.Terminals and s != 'error':
1590 result.append((s,p))
1591 return result
1592
1593 # -----------------------------------------------------------------------------
1594 # unused_terminals()
1595 #
1596 # Find all terminals that were defined, but not used by the grammar. Returns
1597 # a list of all symbols.
1598 # -----------------------------------------------------------------------------
1599 def unused_terminals(self):
1600 unused_tok = []
1601 for s,v in self.Terminals.items():
1602 if s != 'error' and not v:
1603 unused_tok.append(s)
1604
1605 return unused_tok
1606
1607 # ------------------------------------------------------------------------------
1608 # unused_rules()
1609 #
1610 # Find all grammar rules that were defined, but not used (maybe not reachable)
1611 # Returns a list of productions.
1612 # ------------------------------------------------------------------------------
1613
1614 def unused_rules(self):
1615 unused_prod = []
1616 for s,v in self.Nonterminals.items():
1617 if not v:
1618 p = self.Prodnames[s][0]
1619 unused_prod.append(p)
1620 return unused_prod
1621
1622 # -----------------------------------------------------------------------------
1623 # unused_precedence()
1624 #
1625 # Returns a list of tuples (term,precedence) corresponding to precedence
1626 # rules that were never used by the grammar. term is the name of the terminal
1627 # on which precedence was applied and precedence is a string such as 'left' or
1628 # 'right' corresponding to the type of precedence.
1629 # -----------------------------------------------------------------------------
1630
1631 def unused_precedence(self):
1632 unused = []
1633 for termname in self.Precedence:
1634 if not (termname in self.Terminals or termname in self.UsedPrecedence):
1635 unused.append((termname,self.Precedence[termname][0]))
1636
1637 return unused
1638
1639 # -------------------------------------------------------------------------
1640 # _first()
1641 #
1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1643 #
1644 # During execution of compute_first1, the result may be incomplete.
1645 # Afterward (e.g., when called from compute_follow()), it will be complete.
1646 # -------------------------------------------------------------------------
1647 def _first(self,beta):
1648
1649 # We are computing First(x1,x2,x3,...,xn)
1650 result = [ ]
1651 for x in beta:
1652 x_produces_empty = 0
1653
1654 # Add all the non-<empty> symbols of First[x] to the result.
1655 for f in self.First[x]:
1656 if f == '<empty>':
1657 x_produces_empty = 1
1658 else:
1659 if f not in result: result.append(f)
1660
1661 if x_produces_empty:
1662 # We have to consider the next x in beta,
1663 # i.e. stay in the loop.
1664 pass
1665 else:
1666 # We don't have to consider any further symbols in beta.
1667 break
1668 else:
1669 # There was no 'break' from the loop,
1670 # so x_produces_empty was true for all x in beta,
1671 # so beta produces empty as well.
1672 result.append('<empty>')
1673
1674 return result
1675
1676 # -------------------------------------------------------------------------
1677 # compute_first()
1678 #
1679 # Compute the value of FIRST1(X) for all symbols
1680 # -------------------------------------------------------------------------
1681 def compute_first(self):
1682 if self.First:
1683 return self.First
1684
1685 # Terminals:
1686 for t in self.Terminals:
1687 self.First[t] = [t]
1688
1689 self.First['$end'] = ['$end']
1690
1691 # Nonterminals:
1692
1693 # Initialize to the empty set:
1694 for n in self.Nonterminals:
1695 self.First[n] = []
1696
1697 # Then propagate symbols until no change:
1698 while 1:
1699 some_change = 0
1700 for n in self.Nonterminals:
1701 for p in self.Prodnames[n]:
1702 for f in self._first(p.prod):
1703 if f not in self.First[n]:
1704 self.First[n].append( f )
1705 some_change = 1
1706 if not some_change:
1707 break
1708
1709 return self.First
1710
1711 # ---------------------------------------------------------------------
1712 # compute_follow()
1713 #
1714 # Computes all of the follow sets for every non-terminal symbol. The
1715 # follow set is the set of all symbols that might follow a given
1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1717 # ---------------------------------------------------------------------
1718 def compute_follow(self,start=None):
1719 # If already computed, return the result
1720 if self.Follow:
1721 return self.Follow
1722
1723 # If first sets not computed yet, do that first.
1724 if not self.First:
1725 self.compute_first()
1726
1727 # Add '$end' to the follow list of the start symbol
1728 for k in self.Nonterminals:
1729 self.Follow[k] = [ ]
1730
1731 if not start:
1732 start = self.Productions[1].name
1733
1734 self.Follow[start] = [ '$end' ]
1735
1736 while 1:
1737 didadd = 0
1738 for p in self.Productions[1:]:
1739 # Here is the production set
1740 for i in range(len(p.prod)):
1741 B = p.prod[i]
1742 if B in self.Nonterminals:
1743 # Okay. We got a non-terminal in a production
1744 fst = self._first(p.prod[i+1:])
1745 hasempty = 0
1746 for f in fst:
1747 if f != '<empty>' and f not in self.Follow[B]:
1748 self.Follow[B].append(f)
1749 didadd = 1
1750 if f == '<empty>':
1751 hasempty = 1
1752 if hasempty or i == (len(p.prod)-1):
1753 # Add elements of follow(a) to follow(b)
1754 for f in self.Follow[p.name]:
1755 if f not in self.Follow[B]:
1756 self.Follow[B].append(f)
1757 didadd = 1
1758 if not didadd: break
1759 return self.Follow
1760
1761
1762 # -----------------------------------------------------------------------------
1763 # build_lritems()
1764 #
1765 # This function walks the list of productions and builds a complete set of the
1766 # LR items. The LR items are stored in two ways: First, they are uniquely
1767 # numbered and placed in the list _lritems. Second, a linked list of LR items
1768 # is built for each production. For example:
1769 #
1770 # E -> E PLUS E
1771 #
1772 # Creates the list
1773 #
1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1775 # -----------------------------------------------------------------------------
1776
1777 def build_lritems(self):
1778 for p in self.Productions:
1779 lastlri = p
1780 i = 0
1781 lr_items = []
1782 while 1:
1783 if i > len(p):
1784 lri = None
1785 else:
1786 lri = LRItem(p,i)
1787 # Precompute the list of productions immediately following
1788 try:
1789 lri.lr_after = self.Prodnames[lri.prod[i+1]]
1790 except (IndexError,KeyError):
1791 lri.lr_after = []
1792 try:
1793 lri.lr_before = lri.prod[i-1]
1794 except IndexError:
1795 lri.lr_before = None
1796
1797 lastlri.lr_next = lri
1798 if not lri: break
1799 lr_items.append(lri)
1800 lastlri = lri
1801 i += 1
1802 p.lr_items = lr_items
1803
1804# -----------------------------------------------------------------------------
1805# == Class LRTable ==
1806#
1807# This basic class represents a basic table of LR parsing information.
1808# Methods for generating the tables are not defined here. They are defined
1809# in the derived class LRGeneratedTable.
1810# -----------------------------------------------------------------------------
1811
1812class VersionError(YaccError): pass
1813
1814class LRTable(object):
1815 def __init__(self):
1816 self.lr_action = None
1817 self.lr_goto = None
1818 self.lr_productions = None
1819 self.lr_method = None
1820
1821 def read_table(self,module):
1822 if isinstance(module,types.ModuleType):
1823 parsetab = module
1824 else:
1825 if sys.version_info[0] < 3:
1826 exec("import %s as parsetab" % module)
1827 else:
1828 env = { }
1829 exec("import %s as parsetab" % module, env, env)
1830 parsetab = env['parsetab']
1831
1832 if parsetab._tabversion != __tabversion__:
1833 raise VersionError("yacc table file version is out of date")
1834
1835 self.lr_action = parsetab._lr_action
1836 self.lr_goto = parsetab._lr_goto
1837
1838 self.lr_productions = []
1839 for p in parsetab._lr_productions:
1840 self.lr_productions.append(MiniProduction(*p))
1841
1842 self.lr_method = parsetab._lr_method
1843 return parsetab._lr_signature
1844
1845 def read_pickle(self,filename):
1846 try:
1847 import cPickle as pickle
1848 except ImportError:
1849 import pickle
1850
1851 in_f = open(filename,"rb")
1852
1853 tabversion = pickle.load(in_f)
1854 if tabversion != __tabversion__:
1855 raise VersionError("yacc table file version is out of date")
1856 self.lr_method = pickle.load(in_f)
1857 signature = pickle.load(in_f)
1858 self.lr_action = pickle.load(in_f)
1859 self.lr_goto = pickle.load(in_f)
1860 productions = pickle.load(in_f)
1861
1862 self.lr_productions = []
1863 for p in productions:
1864 self.lr_productions.append(MiniProduction(*p))
1865
1866 in_f.close()
1867 return signature
1868
1869 # Bind all production function names to callable objects in pdict
1870 def bind_callables(self,pdict):
1871 for p in self.lr_productions:
1872 p.bind(pdict)
1873
1874# -----------------------------------------------------------------------------
1875# === LR Generator ===
1876#
1877# The following classes and functions are used to generate LR parsing tables on
1878# a grammar.
1879# -----------------------------------------------------------------------------
1880
1881# -----------------------------------------------------------------------------
1882# digraph()
1883# traverse()
1884#
1885# The following two functions are used to compute set valued functions
1886# of the form:
1887#
1888# F(x) = F'(x) U U{F(y) | x R y}
1889#
1890# This is used to compute the values of Read() sets as well as FOLLOW sets
1891# in LALR(1) generation.
1892#
1893# Inputs: X - An input set
1894# R - A relation
1895# FP - Set-valued function
1896# ------------------------------------------------------------------------------
1897
1898def digraph(X,R,FP):
1899 N = { }
1900 for x in X:
1901 N[x] = 0
1902 stack = []
1903 F = { }
1904 for x in X:
1905 if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
1906 return F
1907
1908def traverse(x,N,stack,F,X,R,FP):
1909 stack.append(x)
1910 d = len(stack)
1911 N[x] = d
1912 F[x] = FP(x) # F(X) <- F'(x)
1913
1914 rel = R(x) # Get y's related to x
1915 for y in rel:
1916 if N[y] == 0:
1917 traverse(y,N,stack,F,X,R,FP)
1918 N[x] = min(N[x],N[y])
1919 for a in F.get(y,[]):
1920 if a not in F[x]: F[x].append(a)
1921 if N[x] == d:
1922 N[stack[-1]] = MAXINT
1923 F[stack[-1]] = F[x]
1924 element = stack.pop()
1925 while element != x:
1926 N[stack[-1]] = MAXINT
1927 F[stack[-1]] = F[x]
1928 element = stack.pop()
1929
1930class LALRError(YaccError): pass
1931
1932# -----------------------------------------------------------------------------
1933# == LRGeneratedTable ==
1934#
1935# This class implements the LR table generation algorithm. There are no
1936# public methods except for write()
1937# -----------------------------------------------------------------------------
1938
1939class LRGeneratedTable(LRTable):
1940 def __init__(self,grammar,method='LALR',log=None):
1941 if method not in ['SLR','LALR']:
1942 raise LALRError("Unsupported method %s" % method)
1943
1944 self.grammar = grammar
1945 self.lr_method = method
1946
1947 # Set up the logger
1948 if not log:
1949 log = NullLogger()
1950 self.log = log
1951
1952 # Internal attributes
1953 self.lr_action = {} # Action table
1954 self.lr_goto = {} # Goto table
1955 self.lr_productions = grammar.Productions # Copy of grammar Production array
1956 self.lr_goto_cache = {} # Cache of computed gotos
1957 self.lr0_cidhash = {} # Cache of closures
1958
1959 self._add_count = 0 # Internal counter used to detect cycles
1960
1961 # Diagonistic information filled in by the table generator
1962 self.sr_conflict = 0
1963 self.rr_conflict = 0
1964 self.conflicts = [] # List of conflicts
1965
1966 self.sr_conflicts = []
1967 self.rr_conflicts = []
1968
1969 # Build the tables
1970 self.grammar.build_lritems()
1971 self.grammar.compute_first()
1972 self.grammar.compute_follow()
1973 self.lr_parse_table()
1974
1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1976
1977 def lr0_closure(self,I):
1978 self._add_count += 1
1979
1980 # Add everything in I to J
1981 J = I[:]
1982 didadd = 1
1983 while didadd:
1984 didadd = 0
1985 for j in J:
1986 for x in j.lr_after:
1987 if getattr(x,"lr0_added",0) == self._add_count: continue
1988 # Add B --> .G to J
1989 J.append(x.lr_next)
1990 x.lr0_added = self._add_count
1991 didadd = 1
1992
1993 return J
1994
1995 # Compute the LR(0) goto function goto(I,X) where I is a set
1996 # of LR(0) items and X is a grammar symbol. This function is written
1997 # in a way that guarantees uniqueness of the generated goto sets
1998 # (i.e. the same goto set will never be returned as two different Python
1999 # objects). With uniqueness, we can later do fast set comparisons using
2000 # id(obj) instead of element-wise comparison.
2001
2002 def lr0_goto(self,I,x):
2003 # First we look for a previously cached entry
2004 g = self.lr_goto_cache.get((id(I),x),None)
2005 if g: return g
2006
2007 # Now we generate the goto set in a way that guarantees uniqueness
2008 # of the result
2009
2010 s = self.lr_goto_cache.get(x,None)
2011 if not s:
2012 s = { }
2013 self.lr_goto_cache[x] = s
2014
2015 gs = [ ]
2016 for p in I:
2017 n = p.lr_next
2018 if n and n.lr_before == x:
2019 s1 = s.get(id(n),None)
2020 if not s1:
2021 s1 = { }
2022 s[id(n)] = s1
2023 gs.append(n)
2024 s = s1
2025 g = s.get('$end',None)
2026 if not g:
2027 if gs:
2028 g = self.lr0_closure(gs)
2029 s['$end'] = g
2030 else:
2031 s['$end'] = gs
2032 self.lr_goto_cache[(id(I),x)] = g
2033 return g
2034
2035 # Compute the LR(0) sets of item function
2036 def lr0_items(self):
2037
2038 C = [ self.lr0_closure([self.grammar.Productions[0].lr_next]) ]
2039 i = 0
2040 for I in C:
2041 self.lr0_cidhash[id(I)] = i
2042 i += 1
2043
2044 # Loop over the items in C and each grammar symbols
2045 i = 0
2046 while i < len(C):
2047 I = C[i]
2048 i += 1
2049
2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2051 asyms = { }
2052 for ii in I:
2053 for s in ii.usyms:
2054 asyms[s] = None
2055
2056 for x in asyms:
2057 g = self.lr0_goto(I,x)
2058 if not g: continue
2059 if id(g) in self.lr0_cidhash: continue
2060 self.lr0_cidhash[id(g)] = len(C)
2061 C.append(g)
2062
2063 return C
2064
2065 # -----------------------------------------------------------------------------
2066 # ==== LALR(1) Parsing ====
2067 #
2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2069 # relying upon Follow() sets when performing reductions, a more selective
2070 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2071 # Thus, we mainly just have to focus on calculating the lookahead sets.
2072 #
2073 # The method used here is due to DeRemer and Pennelo (1982).
2074 #
2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2078 #
2079 # Further details can also be found in:
2080 #
2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2082 # McGraw-Hill Book Company, (1985).
2083 #
2084 # -----------------------------------------------------------------------------
2085
2086 # -----------------------------------------------------------------------------
2087 # compute_nullable_nonterminals()
2088 #
2089 # Creates a dictionary containing all of the non-terminals that might produce
2090 # an empty production.
2091 # -----------------------------------------------------------------------------
2092
2093 def compute_nullable_nonterminals(self):
2094 nullable = {}
2095 num_nullable = 0
2096 while 1:
2097 for p in self.grammar.Productions[1:]:
2098 if p.len == 0:
2099 nullable[p.name] = 1
2100 continue
2101 for t in p.prod:
2102 if not t in nullable: break
2103 else:
2104 nullable[p.name] = 1
2105 if len(nullable) == num_nullable: break
2106 num_nullable = len(nullable)
2107 return nullable
2108
2109 # -----------------------------------------------------------------------------
2110 # find_nonterminal_trans(C)
2111 #
2112 # Given a set of LR(0) items, this functions finds all of the non-terminal
2113 # transitions. These are transitions in which a dot appears immediately before
2114 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2115 # is the state number and N is the nonterminal symbol.
2116 #
2117 # The input C is the set of LR(0) items.
2118 # -----------------------------------------------------------------------------
2119
2120 def find_nonterminal_transitions(self,C):
2121 trans = []
2122 for state in range(len(C)):
2123 for p in C[state]:
2124 if p.lr_index < p.len - 1:
2125 t = (state,p.prod[p.lr_index+1])
2126 if t[1] in self.grammar.Nonterminals:
2127 if t not in trans: trans.append(t)
2128 state = state + 1
2129 return trans
2130
2131 # -----------------------------------------------------------------------------
2132 # dr_relation()
2133 #
2134 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2136 #
2137 # Returns a list of terminals.
2138 # -----------------------------------------------------------------------------
2139
2140 def dr_relation(self,C,trans,nullable):
2141 dr_set = { }
2142 state,N = trans
2143 terms = []
2144
2145 g = self.lr0_goto(C[state],N)
2146 for p in g:
2147 if p.lr_index < p.len - 1:
2148 a = p.prod[p.lr_index+1]
2149 if a in self.grammar.Terminals:
2150 if a not in terms: terms.append(a)
2151
2152 # This extra bit is to handle the start state
2153 if state == 0 and N == self.grammar.Productions[0].prod[0]:
2154 terms.append('$end')
2155
2156 return terms
2157
2158 # -----------------------------------------------------------------------------
2159 # reads_relation()
2160 #
2161 # Computes the READS() relation (p,A) READS (t,C).
2162 # -----------------------------------------------------------------------------
2163
2164 def reads_relation(self,C, trans, empty):
2165 # Look for empty transitions
2166 rel = []
2167 state, N = trans
2168
2169 g = self.lr0_goto(C[state],N)
2170 j = self.lr0_cidhash.get(id(g),-1)
2171 for p in g:
2172 if p.lr_index < p.len - 1:
2173 a = p.prod[p.lr_index + 1]
2174 if a in empty:
2175 rel.append((j,a))
2176
2177 return rel
2178
2179 # -----------------------------------------------------------------------------
2180 # compute_lookback_includes()
2181 #
2182 # Determines the lookback and includes relations
2183 #
2184 # LOOKBACK:
2185 #
2186 # This relation is determined by running the LR(0) state machine forward.
2187 # For example, starting with a production "N : . A B C", we run it forward
2188 # to obtain "N : A B C ." We then build a relationship between this final
2189 # state and the starting state. These relationships are stored in a dictionary
2190 # lookdict.
2191 #
2192 # INCLUDES:
2193 #
2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2195 #
2196 # This relation is used to determine non-terminal transitions that occur
2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2198 # if the following holds:
2199 #
2200 # B -> LAT, where T -> epsilon and p' -L-> p
2201 #
2202 # L is essentially a prefix (which may be empty), T is a suffix that must be
2203 # able to derive an empty string. State p' must lead to state p with the string L.
2204 #
2205 # -----------------------------------------------------------------------------
2206
2207 def compute_lookback_includes(self,C,trans,nullable):
2208
2209 lookdict = {} # Dictionary of lookback relations
2210 includedict = {} # Dictionary of include relations
2211
2212 # Make a dictionary of non-terminal transitions
2213 dtrans = {}
2214 for t in trans:
2215 dtrans[t] = 1
2216
2217 # Loop over all transitions and compute lookbacks and includes
2218 for state,N in trans:
2219 lookb = []
2220 includes = []
2221 for p in C[state]:
2222 if p.name != N: continue
2223
2224 # Okay, we have a name match. We now follow the production all the way
2225 # through the state machine until we get the . on the right hand side
2226
2227 lr_index = p.lr_index
2228 j = state
2229 while lr_index < p.len - 1:
2230 lr_index = lr_index + 1
2231 t = p.prod[lr_index]
2232
2233 # Check to see if this symbol and state are a non-terminal transition
2234 if (j,t) in dtrans:
2235 # Yes. Okay, there is some chance that this is an includes relation
2236 # the only way to know for certain is whether the rest of the
2237 # production derives empty
2238
2239 li = lr_index + 1
2240 while li < p.len:
2241 if p.prod[li] in self.grammar.Terminals: break # No forget it
2242 if not p.prod[li] in nullable: break
2243 li = li + 1
2244 else:
2245 # Appears to be a relation between (j,t) and (state,N)
2246 includes.append((j,t))
2247
2248 g = self.lr0_goto(C[j],t) # Go to next set
2249 j = self.lr0_cidhash.get(id(g),-1) # Go to next state
2250
2251 # When we get here, j is the final state, now we have to locate the production
2252 for r in C[j]:
2253 if r.name != p.name: continue
2254 if r.len != p.len: continue
2255 i = 0
2256 # This look is comparing a production ". A B C" with "A B C ."
2257 while i < r.lr_index:
2258 if r.prod[i] != p.prod[i+1]: break
2259 i = i + 1
2260 else:
2261 lookb.append((j,r))
2262 for i in includes:
2263 if not i in includedict: includedict[i] = []
2264 includedict[i].append((state,N))
2265 lookdict[(state,N)] = lookb
2266
2267 return lookdict,includedict
2268
2269 # -----------------------------------------------------------------------------
2270 # compute_read_sets()
2271 #
2272 # Given a set of LR(0) items, this function computes the read sets.
2273 #
2274 # Inputs: C = Set of LR(0) items
2275 # ntrans = Set of nonterminal transitions
2276 # nullable = Set of empty transitions
2277 #
2278 # Returns a set containing the read sets
2279 # -----------------------------------------------------------------------------
2280
2281 def compute_read_sets(self,C, ntrans, nullable):
2282 FP = lambda x: self.dr_relation(C,x,nullable)
2283 R = lambda x: self.reads_relation(C,x,nullable)
2284 F = digraph(ntrans,R,FP)
2285 return F
2286
2287 # -----------------------------------------------------------------------------
2288 # compute_follow_sets()
2289 #
2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2291 # and an include set, this function computes the follow sets
2292 #
2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2294 #
2295 # Inputs:
2296 # ntrans = Set of nonterminal transitions
2297 # readsets = Readset (previously computed)
2298 # inclsets = Include sets (previously computed)
2299 #
2300 # Returns a set containing the follow sets
2301 # -----------------------------------------------------------------------------
2302
2303 def compute_follow_sets(self,ntrans,readsets,inclsets):
2304 FP = lambda x: readsets[x]
2305 R = lambda x: inclsets.get(x,[])
2306 F = digraph(ntrans,R,FP)
2307 return F
2308
2309 # -----------------------------------------------------------------------------
2310 # add_lookaheads()
2311 #
2312 # Attaches the lookahead symbols to grammar rules.
2313 #
2314 # Inputs: lookbacks - Set of lookback relations
2315 # followset - Computed follow set
2316 #
2317 # This function directly attaches the lookaheads to productions contained
2318 # in the lookbacks set
2319 # -----------------------------------------------------------------------------
2320
2321 def add_lookaheads(self,lookbacks,followset):
2322 for trans,lb in lookbacks.items():
2323 # Loop over productions in lookback
2324 for state,p in lb:
2325 if not state in p.lookaheads:
2326 p.lookaheads[state] = []
2327 f = followset.get(trans,[])
2328 for a in f:
2329 if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
2330
2331 # -----------------------------------------------------------------------------
2332 # add_lalr_lookaheads()
2333 #
2334 # This function does all of the work of adding lookahead information for use
2335 # with LALR parsing
2336 # -----------------------------------------------------------------------------
2337
2338 def add_lalr_lookaheads(self,C):
2339 # Determine all of the nullable nonterminals
2340 nullable = self.compute_nullable_nonterminals()
2341
2342 # Find all non-terminal transitions
2343 trans = self.find_nonterminal_transitions(C)
2344
2345 # Compute read sets
2346 readsets = self.compute_read_sets(C,trans,nullable)
2347
2348 # Compute lookback/includes relations
2349 lookd, included = self.compute_lookback_includes(C,trans,nullable)
2350
2351 # Compute LALR FOLLOW sets
2352 followsets = self.compute_follow_sets(trans,readsets,included)
2353
2354 # Add all of the lookaheads
2355 self.add_lookaheads(lookd,followsets)
2356
2357 # -----------------------------------------------------------------------------
2358 # lr_parse_table()
2359 #
2360 # This function constructs the parse tables for SLR or LALR
2361 # -----------------------------------------------------------------------------
2362 def lr_parse_table(self):
2363 Productions = self.grammar.Productions
2364 Precedence = self.grammar.Precedence
2365 goto = self.lr_goto # Goto array
2366 action = self.lr_action # Action array
2367 log = self.log # Logger for output
2368
2369 actionp = { } # Action production array (temporary)
2370
2371 log.info("Parsing method: %s", self.lr_method)
2372
2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2374 # This determines the number of states
2375
2376 C = self.lr0_items()
2377
2378 if self.lr_method == 'LALR':
2379 self.add_lalr_lookaheads(C)
2380
2381 # Build the parser table, state by state
2382 st = 0
2383 for I in C:
2384 # Loop over each production in I
2385 actlist = [ ] # List of actions
2386 st_action = { }
2387 st_actionp = { }
2388 st_goto = { }
2389 log.info("")
2390 log.info("state %d", st)
2391 log.info("")
2392 for p in I:
2393 log.info(" (%d) %s", p.number, str(p))
2394 log.info("")
2395
2396 for p in I:
2397 if p.len == p.lr_index + 1:
2398 if p.name == "S'":
2399 # Start symbol. Accept!
2400 st_action["$end"] = 0
2401 st_actionp["$end"] = p
2402 else:
2403 # We are at the end of a production. Reduce!
2404 if self.lr_method == 'LALR':
2405 laheads = p.lookaheads[st]
2406 else:
2407 laheads = self.grammar.Follow[p.name]
2408 for a in laheads:
2409 actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
2410 r = st_action.get(a,None)
2411 if r is not None:
2412 # Whoa. Have a shift/reduce or reduce/reduce conflict
2413 if r > 0:
2414 # Need to decide on shift or reduce here
2415 # By default we favor shifting. Need to add
2416 # some precedence rules here.
2417 sprec,slevel = Productions[st_actionp[a].number].prec
2418 rprec,rlevel = Precedence.get(a,('right',0))
2419 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2420 # We really need to reduce here.
2421 st_action[a] = -p.number
2422 st_actionp[a] = p
2423 if not slevel and not rlevel:
2424 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2425 self.sr_conflicts.append((st,a,'reduce'))
2426 Productions[p.number].reduced += 1
2427 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2428 st_action[a] = None
2429 else:
2430 # Hmmm. Guess we'll keep the shift
2431 if not rlevel:
2432 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2433 self.sr_conflicts.append((st,a,'shift'))
2434 elif r < 0:
2435 # Reduce/reduce conflict. In this case, we favor the rule
2436 # that was defined first in the grammar file
2437 oldp = Productions[-r]
2438 pp = Productions[p.number]
2439 if oldp.line > pp.line:
2440 st_action[a] = -p.number
2441 st_actionp[a] = p
2442 chosenp,rejectp = pp,oldp
2443 Productions[p.number].reduced += 1
2444 Productions[oldp.number].reduced -= 1
2445 else:
2446 chosenp,rejectp = oldp,pp
2447 self.rr_conflicts.append((st,chosenp,rejectp))
2448 log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a,st_actionp[a].number, st_actionp[a])
2449 else:
2450 raise LALRError("Unknown conflict in state %d" % st)
2451 else:
2452 st_action[a] = -p.number
2453 st_actionp[a] = p
2454 Productions[p.number].reduced += 1
2455 else:
2456 i = p.lr_index
2457 a = p.prod[i+1] # Get symbol right after the "."
2458 if a in self.grammar.Terminals:
2459 g = self.lr0_goto(I,a)
2460 j = self.lr0_cidhash.get(id(g),-1)
2461 if j >= 0:
2462 # We are in a shift state
2463 actlist.append((a,p,"shift and go to state %d" % j))
2464 r = st_action.get(a,None)
2465 if r is not None:
2466 # Whoa have a shift/reduce or shift/shift conflict
2467 if r > 0:
2468 if r != j:
2469 raise LALRError("Shift/shift conflict in state %d" % st)
2470 elif r < 0:
2471 # Do a precedence check.
2472 # - if precedence of reduce rule is higher, we reduce.
2473 # - if precedence of reduce is same and left assoc, we reduce.
2474 # - otherwise we shift
2475 rprec,rlevel = Productions[st_actionp[a].number].prec
2476 sprec,slevel = Precedence.get(a,('right',0))
2477 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2478 # We decide to shift here... highest precedence to shift
2479 Productions[st_actionp[a].number].reduced -= 1
2480 st_action[a] = j
2481 st_actionp[a] = p
2482 if not rlevel:
2483 log.info(" ! shift/reduce conflict for %s resolved as shift",a)
2484 self.sr_conflicts.append((st,a,'shift'))
2485 elif (slevel == rlevel) and (rprec == 'nonassoc'):
2486 st_action[a] = None
2487 else:
2488 # Hmmm. Guess we'll keep the reduce
2489 if not slevel and not rlevel:
2490 log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
2491 self.sr_conflicts.append((st,a,'reduce'))
2492
2493 else:
2494 raise LALRError("Unknown conflict in state %d" % st)
2495 else:
2496 st_action[a] = j
2497 st_actionp[a] = p
2498
2499 # Print the actions associated with each terminal
2500 _actprint = { }
2501 for a,p,m in actlist:
2502 if a in st_action:
2503 if p is st_actionp[a]:
2504 log.info(" %-15s %s",a,m)
2505 _actprint[(a,m)] = 1
2506 log.info("")
2507 # Print the actions that were not used. (debugging)
2508 not_used = 0
2509 for a,p,m in actlist:
2510 if a in st_action:
2511 if p is not st_actionp[a]:
2512 if not (a,m) in _actprint:
2513 log.debug(" ! %-15s [ %s ]",a,m)
2514 not_used = 1
2515 _actprint[(a,m)] = 1
2516 if not_used:
2517 log.debug("")
2518
2519 # Construct the goto table for this state
2520
2521 nkeys = { }
2522 for ii in I:
2523 for s in ii.usyms:
2524 if s in self.grammar.Nonterminals:
2525 nkeys[s] = None
2526 for n in nkeys:
2527 g = self.lr0_goto(I,n)
2528 j = self.lr0_cidhash.get(id(g),-1)
2529 if j >= 0:
2530 st_goto[n] = j
2531 log.info(" %-30s shift and go to state %d",n,j)
2532
2533 action[st] = st_action
2534 actionp[st] = st_actionp
2535 goto[st] = st_goto
2536 st += 1
2537
2538
2539 # -----------------------------------------------------------------------------
2540 # write()
2541 #
2542 # This function writes the LR parsing tables to a file
2543 # -----------------------------------------------------------------------------
2544
2545 def write_table(self,modulename,outputdir='',signature=""):
2546 basemodulename = modulename.split(".")[-1]
2547 filename = os.path.join(outputdir,basemodulename) + ".py"
2548 try:
2549 f = open(filename,"w")
2550
2551 f.write("""
2552# %s
2553# This file is automatically generated. Do not edit.
2554_tabversion = %r
2555
2556_lr_method = %r
2557
2558_lr_signature = %r
2559 """ % (filename, __tabversion__, self.lr_method, signature))
2560
2561 # Change smaller to 0 to go back to original tables
2562 smaller = 1
2563
2564 # Factor out names to try and make smaller
2565 if smaller:
2566 items = { }
2567
2568 for s,nd in self.lr_action.items():
2569 for name,v in nd.items():
2570 i = items.get(name)
2571 if not i:
2572 i = ([],[])
2573 items[name] = i
2574 i[0].append(s)
2575 i[1].append(v)
2576
2577 f.write("\n_lr_action_items = {")
2578 for k,v in items.items():
2579 f.write("%r:([" % k)
2580 for i in v[0]:
2581 f.write("%r," % i)
2582 f.write("],[")
2583 for i in v[1]:
2584 f.write("%r," % i)
2585
2586 f.write("]),")
2587 f.write("}\n")
2588
2589 f.write("""
2590_lr_action = { }
2591for _k, _v in _lr_action_items.items():
2592 for _x,_y in zip(_v[0],_v[1]):
2593 if not _x in _lr_action: _lr_action[_x] = { }
2594 _lr_action[_x][_k] = _y
2595del _lr_action_items
2596""")
2597
2598 else:
2599 f.write("\n_lr_action = { ");
2600 for k,v in self.lr_action.items():
2601 f.write("(%r,%r):%r," % (k[0],k[1],v))
2602 f.write("}\n");
2603
2604 if smaller:
2605 # Factor out names to try and make smaller
2606 items = { }
2607
2608 for s,nd in self.lr_goto.items():
2609 for name,v in nd.items():
2610 i = items.get(name)
2611 if not i:
2612 i = ([],[])
2613 items[name] = i
2614 i[0].append(s)
2615 i[1].append(v)
2616
2617 f.write("\n_lr_goto_items = {")
2618 for k,v in items.items():
2619 f.write("%r:([" % k)
2620 for i in v[0]:
2621 f.write("%r," % i)
2622 f.write("],[")
2623 for i in v[1]:
2624 f.write("%r," % i)
2625
2626 f.write("]),")
2627 f.write("}\n")
2628
2629 f.write("""
2630_lr_goto = { }
2631for _k, _v in _lr_goto_items.items():
2632 for _x,_y in zip(_v[0],_v[1]):
2633 if not _x in _lr_goto: _lr_goto[_x] = { }
2634 _lr_goto[_x][_k] = _y
2635del _lr_goto_items
2636""")
2637 else:
2638 f.write("\n_lr_goto = { ");
2639 for k,v in self.lr_goto.items():
2640 f.write("(%r,%r):%r," % (k[0],k[1],v))
2641 f.write("}\n");
2642
2643 # Write production table
2644 f.write("_lr_productions = [\n")
2645 for p in self.lr_productions:
2646 if p.func:
2647 f.write(" (%r,%r,%d,%r,%r,%d),\n" % (p.str,p.name, p.len, p.func,p.file,p.line))
2648 else:
2649 f.write(" (%r,%r,%d,None,None,None),\n" % (str(p),p.name, p.len))
2650 f.write("]\n")
2651 f.close()
2652
2653 except IOError:
2654 e = sys.exc_info()[1]
2655 sys.stderr.write("Unable to create '%s'\n" % filename)
2656 sys.stderr.write(str(e)+"\n")
2657 return
2658
2659
2660 # -----------------------------------------------------------------------------
2661 # pickle_table()
2662 #
2663 # This function pickles the LR parsing tables to a supplied file object
2664 # -----------------------------------------------------------------------------
2665
2666 def pickle_table(self,filename,signature=""):
2667 try:
2668 import cPickle as pickle
2669 except ImportError:
2670 import pickle
2671 outf = open(filename,"wb")
2672 pickle.dump(__tabversion__,outf,pickle_protocol)
2673 pickle.dump(self.lr_method,outf,pickle_protocol)
2674 pickle.dump(signature,outf,pickle_protocol)
2675 pickle.dump(self.lr_action,outf,pickle_protocol)
2676 pickle.dump(self.lr_goto,outf,pickle_protocol)
2677
2678 outp = []
2679 for p in self.lr_productions:
2680 if p.func:
2681 outp.append((p.str,p.name, p.len, p.func,p.file,p.line))
2682 else:
2683 outp.append((str(p),p.name,p.len,None,None,None))
2684 pickle.dump(outp,outf,pickle_protocol)
2685 outf.close()
2686
2687# -----------------------------------------------------------------------------
2688# === INTROSPECTION ===
2689#
2690# The following functions and classes are used to implement the PLY
2691# introspection features followed by the yacc() function itself.
2692# -----------------------------------------------------------------------------
2693
2694# -----------------------------------------------------------------------------
2695# get_caller_module_dict()
2696#
2697# This function returns a dictionary containing all of the symbols defined within
2698# a caller further down the call stack. This is used to get the environment
2699# associated with the yacc() call if none was provided.
2700# -----------------------------------------------------------------------------
2701
2702def get_caller_module_dict(levels):
2703 try:
2704 raise RuntimeError
2705 except RuntimeError:
2706 e,b,t = sys.exc_info()
2707 f = t.tb_frame
2708 while levels > 0:
2709 f = f.f_back
2710 levels -= 1
2711 ldict = f.f_globals.copy()
2712 if f.f_globals != f.f_locals:
2713 ldict.update(f.f_locals)
2714
2715 return ldict
2716
2717# -----------------------------------------------------------------------------
2718# parse_grammar()
2719#
2720# This takes a raw grammar rule string and parses it into production data
2721# -----------------------------------------------------------------------------
2722def parse_grammar(doc,file,line):
2723 grammar = []
2724 # Split the doc string into lines
2725 pstrings = doc.splitlines()
2726 lastp = None
2727 dline = line
2728 for ps in pstrings:
2729 dline += 1
2730 p = ps.split()
2731 if not p: continue
2732 try:
2733 if p[0] == '|':
2734 # This is a continuation of a previous rule
2735 if not lastp:
2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline))
2737 prodname = lastp
2738 syms = p[1:]
2739 else:
2740 prodname = p[0]
2741 lastp = prodname
2742 syms = p[2:]
2743 assign = p[1]
2744 if assign != ':' and assign != '::=':
2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline))
2746
2747 grammar.append((file,dline,prodname,syms))
2748 except SyntaxError:
2749 raise
2750 except Exception:
2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline,ps.strip()))
2752
2753 return grammar
2754
2755# -----------------------------------------------------------------------------
2756# ParserReflect()
2757#
2758# This class represents information extracted for building a parser including
2759# start symbol, error function, tokens, precedence list, action functions,
2760# etc.
2761# -----------------------------------------------------------------------------
2762class ParserReflect(object):
2763 def __init__(self,pdict,log=None):
2764 self.pdict = pdict
2765 self.start = None
2766 self.error_func = None
2767 self.tokens = None
2768 self.files = {}
2769 self.grammar = []
2770 self.error = 0
2771
2772 if log is None:
2773 self.log = PlyLogger(sys.stderr)
2774 else:
2775 self.log = log
2776
2777 # Get all of the basic information
2778 def get_all(self):
2779 self.get_start()
2780 self.get_error_func()
2781 self.get_tokens()
2782 self.get_precedence()
2783 self.get_pfunctions()
2784
2785 # Validate all of the information
2786 def validate_all(self):
2787 self.validate_start()
2788 self.validate_error_func()
2789 self.validate_tokens()
2790 self.validate_precedence()
2791 self.validate_pfunctions()
2792 self.validate_files()
2793 return self.error
2794
2795 # Compute a signature over the grammar
2796 def signature(self):
2797 try:
2798 from hashlib import md5
2799 except ImportError:
2800 from md5 import md5
2801 try:
2802 sig = md5()
2803 if self.start:
2804 sig.update(self.start.encode('latin-1'))
2805 if self.prec:
2806 sig.update("".join(["".join(p) for p in self.prec]).encode('latin-1'))
2807 if self.tokens:
2808 sig.update(" ".join(self.tokens).encode('latin-1'))
2809 for f in self.pfuncs:
2810 if f[3]:
2811 sig.update(f[3].encode('latin-1'))
2812 except (TypeError,ValueError):
2813 pass
2814 return sig.digest()
2815
2816 # -----------------------------------------------------------------------------
2817 # validate_file()
2818 #
2819 # This method checks to see if there are duplicated p_rulename() functions
2820 # in the parser module file. Without this function, it is really easy for
2821 # users to make mistakes by cutting and pasting code fragments (and it's a real
2822 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2823 # we just do a little regular expression pattern matching of def statements
2824 # to try and detect duplicates.
2825 # -----------------------------------------------------------------------------
2826
2827 def validate_files(self):
2828 # Match def p_funcname(
2829 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2830
2831 for filename in self.files.keys():
2832 base,ext = os.path.splitext(filename)
2833 if ext != '.py': return 1 # No idea. Assume it's okay.
2834
2835 try:
2836 f = open(filename)
2837 lines = f.readlines()
2838 f.close()
2839 except IOError:
2840 continue
2841
2842 counthash = { }
2843 for linen,l in enumerate(lines):
2844 linen += 1
2845 m = fre.match(l)
2846 if m:
2847 name = m.group(1)
2848 prev = counthash.get(name)
2849 if not prev:
2850 counthash[name] = linen
2851 else:
2852 self.log.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename,linen,name,prev)
2853
2854 # Get the start symbol
2855 def get_start(self):
2856 self.start = self.pdict.get('start')
2857
2858 # Validate the start symbol
2859 def validate_start(self):
2860 if self.start is not None:
2861 if not isinstance(self.start,str):
2862 self.log.error("'start' must be a string")
2863
2864 # Look for error handler
2865 def get_error_func(self):
2866 self.error_func = self.pdict.get('p_error')
2867
2868 # Validate the error function
2869 def validate_error_func(self):
2870 if self.error_func:
2871 if isinstance(self.error_func,types.FunctionType):
2872 ismethod = 0
2873 elif isinstance(self.error_func, types.MethodType):
2874 ismethod = 1
2875 else:
2876 self.log.error("'p_error' defined, but is not a function or method")
2877 self.error = 1
2878 return
2879
2880 eline = func_code(self.error_func).co_firstlineno
2881 efile = func_code(self.error_func).co_filename
2882 self.files[efile] = 1
2883
2884 if (func_code(self.error_func).co_argcount != 1+ismethod):
2885 self.log.error("%s:%d: p_error() requires 1 argument",efile,eline)
2886 self.error = 1
2887
2888 # Get the tokens map
2889 def get_tokens(self):
2890 tokens = self.pdict.get("tokens",None)
2891 if not tokens:
2892 self.log.error("No token list is defined")
2893 self.error = 1
2894 return
2895
2896 if not isinstance(tokens,(list, tuple)):
2897 self.log.error("tokens must be a list or tuple")
2898 self.error = 1
2899 return
2900
2901 if not tokens:
2902 self.log.error("tokens is empty")
2903 self.error = 1
2904 return
2905
2906 self.tokens = tokens
2907
2908 # Validate the tokens
2909 def validate_tokens(self):
2910 # Validate the tokens.
2911 if 'error' in self.tokens:
2912 self.log.error("Illegal token name 'error'. Is a reserved word")
2913 self.error = 1
2914 return
2915
2916 terminals = {}
2917 for n in self.tokens:
2918 if n in terminals:
2919 self.log.warning("Token '%s' multiply defined", n)
2920 terminals[n] = 1
2921
2922 # Get the precedence map (if any)
2923 def get_precedence(self):
2924 self.prec = self.pdict.get("precedence",None)
2925
2926 # Validate and parse the precedence map
2927 def validate_precedence(self):
2928 preclist = []
2929 if self.prec:
2930 if not isinstance(self.prec,(list,tuple)):
2931 self.log.error("precedence must be a list or tuple")
2932 self.error = 1
2933 return
2934 for level,p in enumerate(self.prec):
2935 if not isinstance(p,(list,tuple)):
2936 self.log.error("Bad precedence table")
2937 self.error = 1
2938 return
2939
2940 if len(p) < 2:
2941 self.log.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p)
2942 self.error = 1
2943 return
2944 assoc = p[0]
2945 if not isinstance(assoc,str):
2946 self.log.error("precedence associativity must be a string")
2947 self.error = 1
2948 return
2949 for term in p[1:]:
2950 if not isinstance(term,str):
2951 self.log.error("precedence items must be strings")
2952 self.error = 1
2953 return
2954 preclist.append((term,assoc,level+1))
2955 self.preclist = preclist
2956
2957 # Get all p_functions from the grammar
2958 def get_pfunctions(self):
2959 p_functions = []
2960 for name, item in self.pdict.items():
2961 if name[:2] != 'p_': continue
2962 if name == 'p_error': continue
2963 if isinstance(item,(types.FunctionType,types.MethodType)):
2964 line = func_code(item).co_firstlineno
2965 file = func_code(item).co_filename
2966 p_functions.append((line,file,name,item.__doc__))
2967
2968 # Sort all of the actions by line number
2969 p_functions.sort()
2970 self.pfuncs = p_functions
2971
2972
2973 # Validate all of the p_functions
2974 def validate_pfunctions(self):
2975 grammar = []
2976 # Check for non-empty symbols
2977 if len(self.pfuncs) == 0:
2978 self.log.error("no rules of the form p_rulename are defined")
2979 self.error = 1
2980 return
2981
2982 for line, file, name, doc in self.pfuncs:
2983 func = self.pdict[name]
2984 if isinstance(func, types.MethodType):
2985 reqargs = 2
2986 else:
2987 reqargs = 1
2988 if func_code(func).co_argcount > reqargs:
2989 self.log.error("%s:%d: Rule '%s' has too many arguments",file,line,func.__name__)
2990 self.error = 1
2991 elif func_code(func).co_argcount < reqargs:
2992 self.log.error("%s:%d: Rule '%s' requires an argument",file,line,func.__name__)
2993 self.error = 1
2994 elif not func.__doc__:
2995 self.log.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line,func.__name__)
2996 else:
2997 try:
2998 parsed_g = parse_grammar(doc,file,line)
2999 for g in parsed_g:
3000 grammar.append((name, g))
3001 except SyntaxError:
3002 e = sys.exc_info()[1]
3003 self.log.error(str(e))
3004 self.error = 1
3005
3006 # Looks like a valid grammar rule
3007 # Mark the file in which defined.
3008 self.files[file] = 1
3009
3010 # Secondary validation step that looks for p_ definitions that are not functions
3011 # or functions that look like they might be grammar rules.
3012
3013 for n,v in self.pdict.items():
3014 if n[0:2] == 'p_' and isinstance(v, (types.FunctionType, types.MethodType)): continue
3015 if n[0:2] == 't_': continue
3016 if n[0:2] == 'p_' and n != 'p_error':
3017 self.log.warning("'%s' not defined as a function", n)
3018 if ((isinstance(v,types.FunctionType) and func_code(v).co_argcount == 1) or
3019 (isinstance(v,types.MethodType) and func_code(v).co_argcount == 2)):
3020 try:
3021 doc = v.__doc__.split(" ")
3022 if doc[1] == ':':
3023 self.log.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024 func_code(v).co_filename, func_code(v).co_firstlineno,n)
3025 except Exception:
3026 pass
3027
3028 self.grammar = grammar
3029
3030# -----------------------------------------------------------------------------
3031# yacc(module)
3032#
3033# Build a parser
3034# -----------------------------------------------------------------------------
3035
3036def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3037 check_recursion=1, optimize=0, write_tables=1, debugfile=debug_file,outputdir='',
3038 debuglog=None, errorlog = None, picklefile=None):
3039
3040 global parse # Reference to the parsing method of the last built parser
3041
3042 # If pickling is enabled, table files are not created
3043
3044 if picklefile:
3045 write_tables = 0
3046
3047 if errorlog is None:
3048 errorlog = PlyLogger(sys.stderr)
3049
3050 # Get the module dictionary used for the parser
3051 if module:
3052 _items = [(k,getattr(module,k)) for k in dir(module)]
3053 pdict = dict(_items)
3054 else:
3055 pdict = get_caller_module_dict(2)
3056
3057 # Collect parser information from the dictionary
3058 pinfo = ParserReflect(pdict,log=errorlog)
3059 pinfo.get_all()
3060
3061 if pinfo.error:
3062 raise YaccError("Unable to build parser")
3063
3064 # Check signature against table files (if any)
3065 signature = pinfo.signature()
3066
3067 # Read the tables
3068 try:
3069 lr = LRTable()
3070 if picklefile:
3071 read_signature = lr.read_pickle(picklefile)
3072 else:
3073 read_signature = lr.read_table(tabmodule)
3074 if optimize or (read_signature == signature):
3075 try:
3076 lr.bind_callables(pinfo.pdict)
3077 parser = LRParser(lr,pinfo.error_func)
3078 parse = parser.parse
3079 return parser
3080 except Exception:
3081 e = sys.exc_info()[1]
3082 errorlog.warning("There was a problem loading the table file: %s", repr(e))
3083 except VersionError:
3084 e = sys.exc_info()
3085 errorlog.warning(str(e))
3086 except Exception:
3087 pass
3088
3089 if debuglog is None:
3090 if debug:
3091 debuglog = PlyLogger(open(debugfile,"w"))
3092 else:
3093 debuglog = NullLogger()
3094
3095 debuglog.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__)
3096
3097
3098 errors = 0
3099
3100 # Validate the parser information
3101 if pinfo.validate_all():
3102 raise YaccError("Unable to build parser")
3103
3104 if not pinfo.error_func:
3105 errorlog.warning("no p_error() function is defined")
3106
3107 # Create a grammar object
3108 grammar = Grammar(pinfo.tokens)
3109
3110 # Set precedence level for terminals
3111 for term, assoc, level in pinfo.preclist:
3112 try:
3113 grammar.set_precedence(term,assoc,level)
3114 except GrammarError:
3115 e = sys.exc_info()[1]
3116 errorlog.warning("%s",str(e))
3117
3118 # Add productions to the grammar
3119 for funcname, gram in pinfo.grammar:
3120 file, line, prodname, syms = gram
3121 try:
3122 grammar.add_production(prodname,syms,funcname,file,line)
3123 except GrammarError:
3124 e = sys.exc_info()[1]
3125 errorlog.error("%s",str(e))
3126 errors = 1
3127
3128 # Set the grammar start symbols
3129 try:
3130 if start is None:
3131 grammar.set_start(pinfo.start)
3132 else:
3133 grammar.set_start(start)
3134 except GrammarError:
3135 e = sys.exc_info()[1]
3136 errorlog.error(str(e))
3137 errors = 1
3138
3139 if errors:
3140 raise YaccError("Unable to build parser")
3141
3142 # Verify the grammar structure
3143 undefined_symbols = grammar.undefined_symbols()
3144 for sym, prod in undefined_symbols:
3145 errorlog.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod.file,prod.line,sym)
3146 errors = 1
3147
3148 unused_terminals = grammar.unused_terminals()
3149 if unused_terminals:
3150 debuglog.info("")
3151 debuglog.info("Unused terminals:")
3152 debuglog.info("")
3153 for term in unused_terminals:
3154 errorlog.warning("Token '%s' defined, but not used", term)
3155 debuglog.info(" %s", term)
3156
3157 # Print out all productions to the debug log
3158 if debug:
3159 debuglog.info("")
3160 debuglog.info("Grammar")
3161 debuglog.info("")
3162 for n,p in enumerate(grammar.Productions):
3163 debuglog.info("Rule %-5d %s", n, p)
3164
3165 # Find unused non-terminals
3166 unused_rules = grammar.unused_rules()
3167 for prod in unused_rules:
3168 errorlog.warning("%s:%d: Rule '%s' defined, but not used", prod.file, prod.line, prod.name)
3169
3170 if len(unused_terminals) == 1:
3171 errorlog.warning("There is 1 unused token")
3172 if len(unused_terminals) > 1:
3173 errorlog.warning("There are %d unused tokens", len(unused_terminals))
3174
3175 if len(unused_rules) == 1:
3176 errorlog.warning("There is 1 unused rule")
3177 if len(unused_rules) > 1:
3178 errorlog.warning("There are %d unused rules", len(unused_rules))
3179
3180 if debug:
3181 debuglog.info("")
3182 debuglog.info("Terminals, with rules where they appear")
3183 debuglog.info("")
3184 terms = list(grammar.Terminals)
3185 terms.sort()
3186 for term in terms:
3187 debuglog.info("%-20s : %s", term, " ".join([str(s) for s in grammar.Terminals[term]]))
3188
3189 debuglog.info("")
3190 debuglog.info("Nonterminals, with rules where they appear")
3191 debuglog.info("")
3192 nonterms = list(grammar.Nonterminals)
3193 nonterms.sort()
3194 for nonterm in nonterms:
3195 debuglog.info("%-20s : %s", nonterm, " ".join([str(s) for s in grammar.Nonterminals[nonterm]]))
3196 debuglog.info("")
3197
3198 if check_recursion:
3199 unreachable = grammar.find_unreachable()
3200 for u in unreachable:
3201 errorlog.warning("Symbol '%s' is unreachable",u)
3202
3203 infinite = grammar.infinite_cycles()
3204 for inf in infinite:
3205 errorlog.error("Infinite recursion detected for symbol '%s'", inf)
3206 errors = 1
3207
3208 unused_prec = grammar.unused_precedence()
3209 for term, assoc in unused_prec:
3210 errorlog.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc, term)
3211 errors = 1
3212
3213 if errors:
3214 raise YaccError("Unable to build parser")
3215
3216 # Run the LRGeneratedTable on the grammar
3217 if debug:
3218 errorlog.debug("Generating %s tables", method)
3219
3220 lr = LRGeneratedTable(grammar,method,debuglog)
3221
3222 if debug:
3223 num_sr = len(lr.sr_conflicts)
3224
3225 # Report shift/reduce and reduce/reduce conflicts
3226 if num_sr == 1:
3227 errorlog.warning("1 shift/reduce conflict")
3228 elif num_sr > 1:
3229 errorlog.warning("%d shift/reduce conflicts", num_sr)
3230
3231 num_rr = len(lr.rr_conflicts)
3232 if num_rr == 1:
3233 errorlog.warning("1 reduce/reduce conflict")
3234 elif num_rr > 1:
3235 errorlog.warning("%d reduce/reduce conflicts", num_rr)
3236
3237 # Write out conflicts to the output file
3238 if debug and (lr.sr_conflicts or lr.rr_conflicts):
3239 debuglog.warning("")
3240 debuglog.warning("Conflicts:")
3241 debuglog.warning("")
3242
3243 for state, tok, resolution in lr.sr_conflicts:
3244 debuglog.warning("shift/reduce conflict for %s in state %d resolved as %s", tok, state, resolution)
3245
3246 already_reported = {}
3247 for state, rule, rejected in lr.rr_conflicts:
3248 if (state,id(rule),id(rejected)) in already_reported:
3249 continue
3250 debuglog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3251 debuglog.warning("rejected rule (%s) in state %d", rejected,state)
3252 errorlog.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state, rule)
3253 errorlog.warning("rejected rule (%s) in state %d", rejected, state)
3254 already_reported[state,id(rule),id(rejected)] = 1
3255
3256 warned_never = []
3257 for state, rule, rejected in lr.rr_conflicts:
3258 if not rejected.reduced and (rejected not in warned_never):
3259 debuglog.warning("Rule (%s) is never reduced", rejected)
3260 errorlog.warning("Rule (%s) is never reduced", rejected)
3261 warned_never.append(rejected)
3262
3263 # Write the table file if requested
3264 if write_tables:
3265 lr.write_table(tabmodule,outputdir,signature)
3266
3267 # Write a pickled version of the tables
3268 if picklefile:
3269 lr.pickle_table(picklefile,signature)
3270
3271 # Build the parser
3272 lr.bind_callables(pinfo.pdict)
3273 parser = LRParser(lr,pinfo.error_func)
3274
3275 parse = parser.parse
3276 return parser
diff --git a/bitbake/lib/pysh/__init__.py b/bitbake/lib/pysh/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/bitbake/lib/pysh/__init__.py
diff --git a/bitbake/lib/pysh/builtin.py b/bitbake/lib/pysh/builtin.py
new file mode 100644
index 0000000000..25ad22eb74
--- /dev/null
+++ b/bitbake/lib/pysh/builtin.py
@@ -0,0 +1,710 @@
1# builtin.py - builtins and utilities definitions for pysh.
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8"""Builtin and internal utilities implementations.
9
10- Beware not to use python interpreter environment as if it were the shell
11environment. For instance, commands working directory must be explicitely handled
12through env['PWD'] instead of relying on python working directory.
13"""
14import errno
15import optparse
16import os
17import re
18import subprocess
19import sys
20import time
21
22def has_subprocess_bug():
23 return getattr(subprocess, 'list2cmdline') and \
24 ( subprocess.list2cmdline(['']) == '' or \
25 subprocess.list2cmdline(['foo|bar']) == 'foo|bar')
26
27# Detect python bug 1634343: "subprocess swallows empty arguments under win32"
28# <http://sourceforge.net/tracker/index.php?func=detail&aid=1634343&group_id=5470&atid=105470>
29# Also detect: "[ 1710802 ] subprocess must escape redirection characters under win32"
30# <http://sourceforge.net/tracker/index.php?func=detail&aid=1710802&group_id=5470&atid=105470>
31if has_subprocess_bug():
32 import subprocess_fix
33 subprocess.list2cmdline = subprocess_fix.list2cmdline
34
35from sherrors import *
36
37class NonExitingParser(optparse.OptionParser):
38 """OptionParser default behaviour upon error is to print the error message and
39 exit. Raise a utility error instead.
40 """
41 def error(self, msg):
42 raise UtilityError(msg)
43
44#-------------------------------------------------------------------------------
45# set special builtin
46#-------------------------------------------------------------------------------
47OPT_SET = NonExitingParser(usage="set - set or unset options and positional parameters")
48OPT_SET.add_option( '-f', action='store_true', dest='has_f', default=False,
49 help='The shell shall disable pathname expansion.')
50OPT_SET.add_option('-e', action='store_true', dest='has_e', default=False,
51 help="""When this option is on, if a simple command fails for any of the \
52 reasons listed in Consequences of Shell Errors or returns an exit status \
53 value >0, and is not part of the compound list following a while, until, \
54 or if keyword, and is not a part of an AND or OR list, and is not a \
55 pipeline preceded by the ! reserved word, then the shell shall immediately \
56 exit.""")
57OPT_SET.add_option('-x', action='store_true', dest='has_x', default=False,
58 help="""The shell shall write to standard error a trace for each command \
59 after it expands the command and before it executes it. It is unspecified \
60 whether the command that turns tracing off is traced.""")
61
62def builtin_set(name, args, interp, env, stdin, stdout, stderr, debugflags):
63 if 'debug-utility' in debugflags:
64 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
65
66 option, args = OPT_SET.parse_args(args)
67 env = interp.get_env()
68
69 if option.has_f:
70 env.set_opt('-f')
71 if option.has_e:
72 env.set_opt('-e')
73 if option.has_x:
74 env.set_opt('-x')
75 return 0
76
77#-------------------------------------------------------------------------------
78# shift special builtin
79#-------------------------------------------------------------------------------
80def builtin_shift(name, args, interp, env, stdin, stdout, stderr, debugflags):
81 if 'debug-utility' in debugflags:
82 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
83
84 params = interp.get_env().get_positional_args()
85 if args:
86 try:
87 n = int(args[0])
88 if n > len(params):
89 raise ValueError()
90 except ValueError:
91 return 1
92 else:
93 n = 1
94
95 params[:n] = []
96 interp.get_env().set_positional_args(params)
97 return 0
98
99#-------------------------------------------------------------------------------
100# export special builtin
101#-------------------------------------------------------------------------------
102OPT_EXPORT = NonExitingParser(usage="set - set or unset options and positional parameters")
103OPT_EXPORT.add_option('-p', action='store_true', dest='has_p', default=False)
104
105def builtin_export(name, args, interp, env, stdin, stdout, stderr, debugflags):
106 if 'debug-utility' in debugflags:
107 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
108
109 option, args = OPT_EXPORT.parse_args(args)
110 if option.has_p:
111 raise NotImplementedError()
112
113 for arg in args:
114 try:
115 name, value = arg.split('=', 1)
116 except ValueError:
117 name, value = arg, None
118 env = interp.get_env().export(name, value)
119
120 return 0
121
122#-------------------------------------------------------------------------------
123# return special builtin
124#-------------------------------------------------------------------------------
125def builtin_return(name, args, interp, env, stdin, stdout, stderr, debugflags):
126 if 'debug-utility' in debugflags:
127 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
128 res = 0
129 if args:
130 try:
131 res = int(args[0])
132 except ValueError:
133 res = 0
134 if not 0<=res<=255:
135 res = 0
136
137 # BUG: should be last executed command exit code
138 raise ReturnSignal(res)
139
140#-------------------------------------------------------------------------------
141# trap special builtin
142#-------------------------------------------------------------------------------
143def builtin_trap(name, args, interp, env, stdin, stdout, stderr, debugflags):
144 if 'debug-utility' in debugflags:
145 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
146 if len(args) < 2:
147 stderr.write('trap: usage: trap [[arg] signal_spec ...]\n')
148 return 2
149
150 action = args[0]
151 for sig in args[1:]:
152 try:
153 env.traps[sig] = action
154 except Exception, e:
155 stderr.write('trap: %s\n' % str(e))
156 return 0
157
158#-------------------------------------------------------------------------------
159# unset special builtin
160#-------------------------------------------------------------------------------
161OPT_UNSET = NonExitingParser("unset - unset values and attributes of variables and functions")
162OPT_UNSET.add_option( '-f', action='store_true', dest='has_f', default=False)
163OPT_UNSET.add_option( '-v', action='store_true', dest='has_v', default=False)
164
165def builtin_unset(name, args, interp, env, stdin, stdout, stderr, debugflags):
166 if 'debug-utility' in debugflags:
167 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
168
169 option, args = OPT_UNSET.parse_args(args)
170
171 status = 0
172 env = interp.get_env()
173 for arg in args:
174 try:
175 if option.has_f:
176 env.remove_function(arg)
177 else:
178 del env[arg]
179 except KeyError:
180 pass
181 except VarAssignmentError:
182 status = 1
183
184 return status
185
186#-------------------------------------------------------------------------------
187# wait special builtin
188#-------------------------------------------------------------------------------
189def builtin_wait(name, args, interp, env, stdin, stdout, stderr, debugflags):
190 if 'debug-utility' in debugflags:
191 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
192
193 return interp.wait([int(arg) for arg in args])
194
195#-------------------------------------------------------------------------------
196# cat utility
197#-------------------------------------------------------------------------------
198def utility_cat(name, args, interp, env, stdin, stdout, stderr, debugflags):
199 if 'debug-utility' in debugflags:
200 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
201
202 if not args:
203 args = ['-']
204
205 status = 0
206 for arg in args:
207 if arg == '-':
208 data = stdin.read()
209 else:
210 path = os.path.join(env['PWD'], arg)
211 try:
212 f = file(path, 'rb')
213 try:
214 data = f.read()
215 finally:
216 f.close()
217 except IOError, e:
218 if e.errno != errno.ENOENT:
219 raise
220 status = 1
221 continue
222 stdout.write(data)
223 stdout.flush()
224 return status
225
226#-------------------------------------------------------------------------------
227# cd utility
228#-------------------------------------------------------------------------------
229OPT_CD = NonExitingParser("cd - change the working directory")
230
231def utility_cd(name, args, interp, env, stdin, stdout, stderr, debugflags):
232 if 'debug-utility' in debugflags:
233 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
234
235 option, args = OPT_CD.parse_args(args)
236 env = interp.get_env()
237
238 directory = None
239 printdir = False
240 if not args:
241 home = env.get('HOME')
242 if home:
243 # Unspecified, do nothing
244 return 0
245 else:
246 directory = home
247 elif len(args)==1:
248 directory = args[0]
249 if directory=='-':
250 if 'OLDPWD' not in env:
251 raise UtilityError("OLDPWD not set")
252 printdir = True
253 directory = env['OLDPWD']
254 else:
255 raise UtilityError("too many arguments")
256
257 curpath = None
258 # Absolute directories will be handled correctly by the os.path.join call.
259 if not directory.startswith('.') and not directory.startswith('..'):
260 cdpaths = env.get('CDPATH', '.').split(';')
261 for cdpath in cdpaths:
262 p = os.path.join(cdpath, directory)
263 if os.path.isdir(p):
264 curpath = p
265 break
266
267 if curpath is None:
268 curpath = directory
269 curpath = os.path.join(env['PWD'], directory)
270
271 env['OLDPWD'] = env['PWD']
272 env['PWD'] = curpath
273 if printdir:
274 stdout.write('%s\n' % curpath)
275 return 0
276
277#-------------------------------------------------------------------------------
278# colon utility
279#-------------------------------------------------------------------------------
280def utility_colon(name, args, interp, env, stdin, stdout, stderr, debugflags):
281 if 'debug-utility' in debugflags:
282 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
283 return 0
284
285#-------------------------------------------------------------------------------
286# echo utility
287#-------------------------------------------------------------------------------
288def utility_echo(name, args, interp, env, stdin, stdout, stderr, debugflags):
289 if 'debug-utility' in debugflags:
290 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
291
292 # Echo only takes arguments, no options. Use printf if you need fancy stuff.
293 output = ' '.join(args) + '\n'
294 stdout.write(output)
295 stdout.flush()
296 return 0
297
298#-------------------------------------------------------------------------------
299# egrep utility
300#-------------------------------------------------------------------------------
301# egrep is usually a shell script.
302# Unfortunately, pysh does not support shell scripts *with arguments* right now,
303# so the redirection is implemented here, assuming grep is available.
304def utility_egrep(name, args, interp, env, stdin, stdout, stderr, debugflags):
305 if 'debug-utility' in debugflags:
306 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
307
308 return run_command('grep', ['-E'] + args, interp, env, stdin, stdout,
309 stderr, debugflags)
310
311#-------------------------------------------------------------------------------
312# env utility
313#-------------------------------------------------------------------------------
314def utility_env(name, args, interp, env, stdin, stdout, stderr, debugflags):
315 if 'debug-utility' in debugflags:
316 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
317
318 if args and args[0]=='-i':
319 raise NotImplementedError('env: -i option is not implemented')
320
321 i = 0
322 for arg in args:
323 if '=' not in arg:
324 break
325 # Update the current environment
326 name, value = arg.split('=', 1)
327 env[name] = value
328 i += 1
329
330 if args[i:]:
331 # Find then execute the specified interpreter
332 utility = env.find_in_path(args[i])
333 if not utility:
334 return 127
335 args[i:i+1] = utility
336 name = args[i]
337 args = args[i+1:]
338 try:
339 return run_command(name, args, interp, env, stdin, stdout, stderr,
340 debugflags)
341 except UtilityError:
342 stderr.write('env: failed to execute %s' % ' '.join([name]+args))
343 return 126
344 else:
345 for pair in env.get_variables().iteritems():
346 stdout.write('%s=%s\n' % pair)
347 return 0
348
349#-------------------------------------------------------------------------------
350# exit utility
351#-------------------------------------------------------------------------------
352def utility_exit(name, args, interp, env, stdin, stdout, stderr, debugflags):
353 if 'debug-utility' in debugflags:
354 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
355
356 res = None
357 if args:
358 try:
359 res = int(args[0])
360 except ValueError:
361 res = None
362 if not 0<=res<=255:
363 res = None
364
365 if res is None:
366 # BUG: should be last executed command exit code
367 res = 0
368
369 raise ExitSignal(res)
370
371#-------------------------------------------------------------------------------
372# fgrep utility
373#-------------------------------------------------------------------------------
374# see egrep
375def utility_fgrep(name, args, interp, env, stdin, stdout, stderr, debugflags):
376 if 'debug-utility' in debugflags:
377 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
378
379 return run_command('grep', ['-F'] + args, interp, env, stdin, stdout,
380 stderr, debugflags)
381
382#-------------------------------------------------------------------------------
383# gunzip utility
384#-------------------------------------------------------------------------------
385# see egrep
386def utility_gunzip(name, args, interp, env, stdin, stdout, stderr, debugflags):
387 if 'debug-utility' in debugflags:
388 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
389
390 return run_command('gzip', ['-d'] + args, interp, env, stdin, stdout,
391 stderr, debugflags)
392
393#-------------------------------------------------------------------------------
394# kill utility
395#-------------------------------------------------------------------------------
396def utility_kill(name, args, interp, env, stdin, stdout, stderr, debugflags):
397 if 'debug-utility' in debugflags:
398 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
399
400 for arg in args:
401 pid = int(arg)
402 status = subprocess.call(['pskill', '/T', str(pid)],
403 shell=True,
404 stdout=subprocess.PIPE,
405 stderr=subprocess.PIPE)
406 # pskill is asynchronous, hence the stupid polling loop
407 while 1:
408 p = subprocess.Popen(['pslist', str(pid)],
409 shell=True,
410 stdout=subprocess.PIPE,
411 stderr=subprocess.STDOUT)
412 output = p.communicate()[0]
413 if ('process %d was not' % pid) in output:
414 break
415 time.sleep(1)
416 return status
417
418#-------------------------------------------------------------------------------
419# mkdir utility
420#-------------------------------------------------------------------------------
421OPT_MKDIR = NonExitingParser("mkdir - make directories.")
422OPT_MKDIR.add_option('-p', action='store_true', dest='has_p', default=False)
423
424def utility_mkdir(name, args, interp, env, stdin, stdout, stderr, debugflags):
425 if 'debug-utility' in debugflags:
426 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
427
428 # TODO: implement umask
429 # TODO: implement proper utility error report
430 option, args = OPT_MKDIR.parse_args(args)
431 for arg in args:
432 path = os.path.join(env['PWD'], arg)
433 if option.has_p:
434 try:
435 os.makedirs(path)
436 except IOError, e:
437 if e.errno != errno.EEXIST:
438 raise
439 else:
440 os.mkdir(path)
441 return 0
442
443#-------------------------------------------------------------------------------
444# netstat utility
445#-------------------------------------------------------------------------------
446def utility_netstat(name, args, interp, env, stdin, stdout, stderr, debugflags):
447 # Do you really expect me to implement netstat ?
448 # This empty form is enough for Mercurial tests since it's
449 # supposed to generate nothing upon success. Faking this test
450 # is not a big deal either.
451 if 'debug-utility' in debugflags:
452 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
453 return 0
454
455#-------------------------------------------------------------------------------
456# pwd utility
457#-------------------------------------------------------------------------------
458OPT_PWD = NonExitingParser("pwd - return working directory name")
459OPT_PWD.add_option('-L', action='store_true', dest='has_L', default=True,
460 help="""If the PWD environment variable contains an absolute pathname of \
461 the current directory that does not contain the filenames dot or dot-dot, \
462 pwd shall write this pathname to standard output. Otherwise, the -L option \
463 shall behave as the -P option.""")
464OPT_PWD.add_option('-P', action='store_true', dest='has_L', default=False,
465 help="""The absolute pathname written shall not contain filenames that, in \
466 the context of the pathname, refer to files of type symbolic link.""")
467
468def utility_pwd(name, args, interp, env, stdin, stdout, stderr, debugflags):
469 if 'debug-utility' in debugflags:
470 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
471
472 option, args = OPT_PWD.parse_args(args)
473 stdout.write('%s\n' % env['PWD'])
474 return 0
475
476#-------------------------------------------------------------------------------
477# printf utility
478#-------------------------------------------------------------------------------
479RE_UNESCAPE = re.compile(r'(\\x[a-zA-Z0-9]{2}|\\[0-7]{1,3}|\\.)')
480
481def utility_printf(name, args, interp, env, stdin, stdout, stderr, debugflags):
482 if 'debug-utility' in debugflags:
483 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
484
485 def replace(m):
486 assert m.group()
487 g = m.group()[1:]
488 if g.startswith('x'):
489 return chr(int(g[1:], 16))
490 if len(g) <= 3 and len([c for c in g if c in '01234567']) == len(g):
491 # Yay, an octal number
492 return chr(int(g, 8))
493 return {
494 'a': '\a',
495 'b': '\b',
496 'f': '\f',
497 'n': '\n',
498 'r': '\r',
499 't': '\t',
500 'v': '\v',
501 '\\': '\\',
502 }.get(g)
503
504 # Convert escape sequences
505 format = re.sub(RE_UNESCAPE, replace, args[0])
506 stdout.write(format % tuple(args[1:]))
507 return 0
508
509#-------------------------------------------------------------------------------
510# true utility
511#-------------------------------------------------------------------------------
512def utility_true(name, args, interp, env, stdin, stdout, stderr, debugflags):
513 if 'debug-utility' in debugflags:
514 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
515 return 0
516
517#-------------------------------------------------------------------------------
518# sed utility
519#-------------------------------------------------------------------------------
520RE_SED = re.compile(r'^s(.).*\1[a-zA-Z]*$')
521
522# cygwin sed fails with some expressions when they do not end with a single space.
523# see unit tests for details. Interestingly, the same expressions works perfectly
524# in cygwin shell.
525def utility_sed(name, args, interp, env, stdin, stdout, stderr, debugflags):
526 if 'debug-utility' in debugflags:
527 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
528
529 # Scan pattern arguments and append a space if necessary
530 for i in xrange(len(args)):
531 if not RE_SED.search(args[i]):
532 continue
533 args[i] = args[i] + ' '
534
535 return run_command(name, args, interp, env, stdin, stdout,
536 stderr, debugflags)
537
538#-------------------------------------------------------------------------------
539# sleep utility
540#-------------------------------------------------------------------------------
541def utility_sleep(name, args, interp, env, stdin, stdout, stderr, debugflags):
542 if 'debug-utility' in debugflags:
543 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
544 time.sleep(int(args[0]))
545 return 0
546
547#-------------------------------------------------------------------------------
548# sort utility
549#-------------------------------------------------------------------------------
550OPT_SORT = NonExitingParser("sort - sort, merge, or sequence check text files")
551
552def utility_sort(name, args, interp, env, stdin, stdout, stderr, debugflags):
553
554 def sort(path):
555 if path == '-':
556 lines = stdin.readlines()
557 else:
558 try:
559 f = file(path)
560 try:
561 lines = f.readlines()
562 finally:
563 f.close()
564 except IOError, e:
565 stderr.write(str(e) + '\n')
566 return 1
567
568 if lines and lines[-1][-1]!='\n':
569 lines[-1] = lines[-1] + '\n'
570 return lines
571
572 if 'debug-utility' in debugflags:
573 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
574
575 option, args = OPT_SORT.parse_args(args)
576 alllines = []
577
578 if len(args)<=0:
579 args += ['-']
580
581 # Load all files lines
582 curdir = os.getcwd()
583 try:
584 os.chdir(env['PWD'])
585 for path in args:
586 alllines += sort(path)
587 finally:
588 os.chdir(curdir)
589
590 alllines.sort()
591 for line in alllines:
592 stdout.write(line)
593 return 0
594
595#-------------------------------------------------------------------------------
596# hg utility
597#-------------------------------------------------------------------------------
598
599hgcommands = [
600 'add',
601 'addremove',
602 'commit', 'ci',
603 'debugrename',
604 'debugwalk',
605 'falabala', # Dummy command used in a mercurial test
606 'incoming',
607 'locate',
608 'pull',
609 'push',
610 'qinit',
611 'remove', 'rm',
612 'rename', 'mv',
613 'revert',
614 'showconfig',
615 'status', 'st',
616 'strip',
617 ]
618
619def rewriteslashes(name, args):
620 # Several hg commands output file paths, rewrite the separators
621 if len(args) > 1 and name.lower().endswith('python') \
622 and args[0].endswith('hg'):
623 for cmd in hgcommands:
624 if cmd in args[1:]:
625 return True
626
627 # svn output contains many paths with OS specific separators.
628 # Normalize these to unix paths.
629 base = os.path.basename(name)
630 if base.startswith('svn'):
631 return True
632
633 return False
634
635def rewritehg(output):
636 if not output:
637 return output
638 # Rewrite os specific messages
639 output = output.replace(': The system cannot find the file specified',
640 ': No such file or directory')
641 output = re.sub(': Access is denied.*$', ': Permission denied', output)
642 output = output.replace(': No connection could be made because the target machine actively refused it',
643 ': Connection refused')
644 return output
645
646
647def run_command(name, args, interp, env, stdin, stdout,
648 stderr, debugflags):
649 # Execute the command
650 if 'debug-utility' in debugflags:
651 print interp.log(' '.join([name, str(args), interp['PWD']]) + '\n')
652
653 hgbin = interp.options().hgbinary
654 ishg = hgbin and ('hg' in name or args and 'hg' in args[0])
655 unixoutput = 'cygwin' in name or ishg
656
657 exec_env = env.get_variables()
658 try:
659 # BUG: comparing file descriptor is clearly not a reliable way to tell
660 # whether they point on the same underlying object. But in pysh limited
661 # scope this is usually right, we do not expect complicated redirections
662 # besides usual 2>&1.
663 # Still there is one case we have but cannot deal with is when stdout
664 # and stderr are redirected *by pysh caller*. This the reason for the
665 # --redirect pysh() option.
666 # Now, we want to know they are the same because we sometimes need to
667 # transform the command output, mostly remove CR-LF to ensure that
668 # command output is unix-like. Cygwin utilies are a special case because
669 # they explicitely set their output streams to binary mode, so we have
670 # nothing to do. For all others commands, we have to guess whether they
671 # are sending text data, in which case the transformation must be done.
672 # Again, the NUL character test is unreliable but should be enough for
673 # hg tests.
674 redirected = stdout.fileno()==stderr.fileno()
675 if not redirected:
676 p = subprocess.Popen([name] + args, cwd=env['PWD'], env=exec_env,
677 stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
678 else:
679 p = subprocess.Popen([name] + args, cwd=env['PWD'], env=exec_env,
680 stdin=stdin, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
681 out, err = p.communicate()
682 except WindowsError, e:
683 raise UtilityError(str(e))
684
685 if not unixoutput:
686 def encode(s):
687 if '\0' in s:
688 return s
689 return s.replace('\r\n', '\n')
690 else:
691 encode = lambda s: s
692
693 if rewriteslashes(name, args):
694 encode1_ = encode
695 def encode(s):
696 s = encode1_(s)
697 s = s.replace('\\\\', '\\')
698 s = s.replace('\\', '/')
699 return s
700
701 if ishg:
702 encode2_ = encode
703 def encode(s):
704 return rewritehg(encode2_(s))
705
706 stdout.write(encode(out))
707 if not redirected:
708 stderr.write(encode(err))
709 return p.returncode
710
diff --git a/bitbake/lib/pysh/interp.py b/bitbake/lib/pysh/interp.py
new file mode 100644
index 0000000000..efe5181e1e
--- /dev/null
+++ b/bitbake/lib/pysh/interp.py
@@ -0,0 +1,1367 @@
1# interp.py - shell interpreter for pysh.
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8"""Implement the shell interpreter.
9
10Most references are made to "The Open Group Base Specifications Issue 6".
11<http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html>
12"""
13# TODO: document the fact input streams must implement fileno() so Popen will work correctly.
14# it requires non-stdin stream to be implemented as files. Still to be tested...
15# DOC: pathsep is used in PATH instead of ':'. Clearly, there are path syntax issues here.
16# TODO: stop command execution upon error.
17# TODO: sort out the filename/io_number mess. It should be possible to use filenames only.
18# TODO: review subshell implementation
19# TODO: test environment cloning for non-special builtins
20# TODO: set -x should not rebuild commands from tokens, assignments/redirections are lost
21# TODO: unit test for variable assignment
22# TODO: test error management wrt error type/utility type
23# TODO: test for binary output everywhere
24# BUG: debug-parsing does not pass log file to PLY. Maybe a PLY upgrade is necessary.
25import base64
26import cPickle as pickle
27import errno
28import glob
29import os
30import re
31import subprocess
32import sys
33import tempfile
34
35try:
36 s = set()
37 del s
38except NameError:
39 from Set import Set as set
40
41import builtin
42from sherrors import *
43import pyshlex
44import pyshyacc
45
46def mappend(func, *args, **kargs):
47 """Like map but assume func returns a list. Returned lists are merged into
48 a single one.
49 """
50 return reduce(lambda a,b: a+b, map(func, *args, **kargs), [])
51
52class FileWrapper:
53 """File object wrapper to ease debugging.
54
55 Allow mode checking and implement file duplication through a simple
56 reference counting scheme. Not sure the latter is really useful since
57 only real file descriptors can be used.
58 """
59 def __init__(self, mode, file, close=True):
60 if mode not in ('r', 'w', 'a'):
61 raise IOError('invalid mode: %s' % mode)
62 self._mode = mode
63 self._close = close
64 if isinstance(file, FileWrapper):
65 if file._refcount[0] <= 0:
66 raise IOError(0, 'Error')
67 self._refcount = file._refcount
68 self._refcount[0] += 1
69 self._file = file._file
70 else:
71 self._refcount = [1]
72 self._file = file
73
74 def dup(self):
75 return FileWrapper(self._mode, self, self._close)
76
77 def fileno(self):
78 """fileno() should be only necessary for input streams."""
79 return self._file.fileno()
80
81 def read(self, size=-1):
82 if self._mode!='r':
83 raise IOError(0, 'Error')
84 return self._file.read(size)
85
86 def readlines(self, *args, **kwargs):
87 return self._file.readlines(*args, **kwargs)
88
89 def write(self, s):
90 if self._mode not in ('w', 'a'):
91 raise IOError(0, 'Error')
92 return self._file.write(s)
93
94 def flush(self):
95 self._file.flush()
96
97 def close(self):
98 if not self._refcount:
99 return
100 assert self._refcount[0] > 0
101
102 self._refcount[0] -= 1
103 if self._refcount[0] == 0:
104 self._mode = 'c'
105 if self._close:
106 self._file.close()
107 self._refcount = None
108
109 def mode(self):
110 return self._mode
111
112 def __getattr__(self, name):
113 if name == 'name':
114 self.name = getattr(self._file, name)
115 return self.name
116 else:
117 raise AttributeError(name)
118
119 def __del__(self):
120 self.close()
121
122
123def win32_open_devnull(mode):
124 return open('NUL', mode)
125
126
127class Redirections:
128 """Stores open files and their mapping to pseudo-sh file descriptor.
129 """
130 # BUG: redirections are not handled correctly: 1>&3 2>&3 3>&4 does
131 # not make 1 to redirect to 4
132 def __init__(self, stdin=None, stdout=None, stderr=None):
133 self._descriptors = {}
134 if stdin is not None:
135 self._add_descriptor(0, stdin)
136 if stdout is not None:
137 self._add_descriptor(1, stdout)
138 if stderr is not None:
139 self._add_descriptor(2, stderr)
140
141 def add_here_document(self, interp, name, content, io_number=None):
142 if io_number is None:
143 io_number = 0
144
145 if name==pyshlex.unquote_wordtree(name):
146 content = interp.expand_here_document(('TOKEN', content))
147
148 # Write document content in a temporary file
149 tmp = tempfile.TemporaryFile()
150 try:
151 tmp.write(content)
152 tmp.flush()
153 tmp.seek(0)
154 self._add_descriptor(io_number, FileWrapper('r', tmp))
155 except:
156 tmp.close()
157 raise
158
159 def add(self, interp, op, filename, io_number=None):
160 if op not in ('<', '>', '>|', '>>', '>&'):
161 # TODO: add descriptor duplication and here_documents
162 raise RedirectionError('Unsupported redirection operator "%s"' % op)
163
164 if io_number is not None:
165 io_number = int(io_number)
166
167 if (op == '>&' and filename.isdigit()) or filename=='-':
168 # No expansion for file descriptors, quote them if you want a filename
169 fullname = filename
170 else:
171 if filename.startswith('/'):
172 # TODO: win32 kludge
173 if filename=='/dev/null':
174 fullname = 'NUL'
175 else:
176 # TODO: handle absolute pathnames, they are unlikely to exist on the
177 # current platform (win32 for instance).
178 raise NotImplementedError()
179 else:
180 fullname = interp.expand_redirection(('TOKEN', filename))
181 if not fullname:
182 raise RedirectionError('%s: ambiguous redirect' % filename)
183 # Build absolute path based on PWD
184 fullname = os.path.join(interp.get_env()['PWD'], fullname)
185
186 if op=='<':
187 return self._add_input_redirection(interp, fullname, io_number)
188 elif op in ('>', '>|'):
189 clobber = ('>|'==op)
190 return self._add_output_redirection(interp, fullname, io_number, clobber)
191 elif op=='>>':
192 return self._add_output_appending(interp, fullname, io_number)
193 elif op=='>&':
194 return self._dup_output_descriptor(fullname, io_number)
195
196 def close(self):
197 if self._descriptors is not None:
198 for desc in self._descriptors.itervalues():
199 desc.flush()
200 desc.close()
201 self._descriptors = None
202
203 def stdin(self):
204 return self._descriptors[0]
205
206 def stdout(self):
207 return self._descriptors[1]
208
209 def stderr(self):
210 return self._descriptors[2]
211
212 def clone(self):
213 clone = Redirections()
214 for desc, fileobj in self._descriptors.iteritems():
215 clone._descriptors[desc] = fileobj.dup()
216 return clone
217
218 def _add_output_redirection(self, interp, filename, io_number, clobber):
219 if io_number is None:
220 # io_number default to standard output
221 io_number = 1
222
223 if not clobber and interp.get_env().has_opt('-C') and os.path.isfile(filename):
224 # File already exist in no-clobber mode, bail out
225 raise RedirectionError('File "%s" already exists' % filename)
226
227 # Open and register
228 self._add_file_descriptor(io_number, filename, 'w')
229
230 def _add_output_appending(self, interp, filename, io_number):
231 if io_number is None:
232 io_number = 1
233 self._add_file_descriptor(io_number, filename, 'a')
234
235 def _add_input_redirection(self, interp, filename, io_number):
236 if io_number is None:
237 io_number = 0
238 self._add_file_descriptor(io_number, filename, 'r')
239
240 def _add_file_descriptor(self, io_number, filename, mode):
241 try:
242 if filename.startswith('/'):
243 if filename=='/dev/null':
244 f = win32_open_devnull(mode+'b')
245 else:
246 # TODO: handle absolute pathnames, they are unlikely to exist on the
247 # current platform (win32 for instance).
248 raise NotImplementedError('cannot open absolute path %s' % repr(filename))
249 else:
250 f = file(filename, mode+'b')
251 except IOError, e:
252 raise RedirectionError(str(e))
253
254 wrapper = None
255 try:
256 wrapper = FileWrapper(mode, f)
257 f = None
258 self._add_descriptor(io_number, wrapper)
259 except:
260 if f: f.close()
261 if wrapper: wrapper.close()
262 raise
263
264 def _dup_output_descriptor(self, source_fd, dest_fd):
265 if source_fd is None:
266 source_fd = 1
267 self._dup_file_descriptor(source_fd, dest_fd, 'w')
268
269 def _dup_file_descriptor(self, source_fd, dest_fd, mode):
270 source_fd = int(source_fd)
271 if source_fd not in self._descriptors:
272 raise RedirectionError('"%s" is not a valid file descriptor' % str(source_fd))
273 source = self._descriptors[source_fd]
274
275 if source.mode()!=mode:
276 raise RedirectionError('Descriptor %s cannot be duplicated in mode "%s"' % (str(source), mode))
277
278 if dest_fd=='-':
279 # Close the source descriptor
280 del self._descriptors[source_fd]
281 source.close()
282 else:
283 dest_fd = int(dest_fd)
284 if dest_fd not in self._descriptors:
285 raise RedirectionError('Cannot replace file descriptor %s' % str(dest_fd))
286
287 dest = self._descriptors[dest_fd]
288 if dest.mode()!=mode:
289 raise RedirectionError('Descriptor %s cannot be cannot be redirected in mode "%s"' % (str(dest), mode))
290
291 self._descriptors[dest_fd] = source.dup()
292 dest.close()
293
294 def _add_descriptor(self, io_number, file):
295 io_number = int(io_number)
296
297 if io_number in self._descriptors:
298 # Close the current descriptor
299 d = self._descriptors[io_number]
300 del self._descriptors[io_number]
301 d.close()
302
303 self._descriptors[io_number] = file
304
305 def __str__(self):
306 names = [('%d=%r' % (k, getattr(v, 'name', None))) for k,v
307 in self._descriptors.iteritems()]
308 names = ','.join(names)
309 return 'Redirections(%s)' % names
310
311 def __del__(self):
312 self.close()
313
314def cygwin_to_windows_path(path):
315 """Turn /cygdrive/c/foo into c:/foo, or return path if it
316 is not a cygwin path.
317 """
318 if not path.startswith('/cygdrive/'):
319 return path
320 path = path[len('/cygdrive/'):]
321 path = path[:1] + ':' + path[1:]
322 return path
323
324def win32_to_unix_path(path):
325 if path is not None:
326 path = path.replace('\\', '/')
327 return path
328
329_RE_SHEBANG = re.compile(r'^\#!\s?([^\s]+)(?:\s([^\s]+))?')
330_SHEBANG_CMDS = {
331 '/usr/bin/env': 'env',
332 '/bin/sh': 'pysh',
333 'python': 'python',
334}
335
336def resolve_shebang(path, ignoreshell=False):
337 """Return a list of arguments as shebang interpreter call or an empty list
338 if path does not refer to an executable script.
339 See <http://www.opengroup.org/austin/docs/austin_51r2.txt>.
340
341 ignoreshell - set to True to ignore sh shebangs. Return an empty list instead.
342 """
343 try:
344 f = file(path)
345 try:
346 # At most 80 characters in the first line
347 header = f.read(80).splitlines()[0]
348 finally:
349 f.close()
350
351 m = _RE_SHEBANG.search(header)
352 if not m:
353 return []
354 cmd, arg = m.group(1,2)
355 if os.path.isfile(cmd):
356 # Keep this one, the hg script for instance contains a weird windows
357 # shebang referencing the current python install.
358 cmdfile = os.path.basename(cmd).lower()
359 if cmdfile == 'python.exe':
360 cmd = 'python'
361 pass
362 elif cmd not in _SHEBANG_CMDS:
363 raise CommandNotFound('Unknown interpreter "%s" referenced in '\
364 'shebang' % header)
365 cmd = _SHEBANG_CMDS.get(cmd)
366 if cmd is None or (ignoreshell and cmd == 'pysh'):
367 return []
368 if arg is None:
369 return [cmd, win32_to_unix_path(path)]
370 return [cmd, arg, win32_to_unix_path(path)]
371 except IOError, e:
372 if e.errno!=errno.ENOENT and \
373 (e.errno!=errno.EPERM and not os.path.isdir(path)): # Opening a directory raises EPERM
374 raise
375 return []
376
377def win32_find_in_path(name, path):
378 if isinstance(path, str):
379 path = path.split(os.pathsep)
380
381 exts = os.environ.get('PATHEXT', '').lower().split(os.pathsep)
382 for p in path:
383 p_name = os.path.join(p, name)
384
385 prefix = resolve_shebang(p_name)
386 if prefix:
387 return prefix
388
389 for ext in exts:
390 p_name_ext = p_name + ext
391 if os.path.exists(p_name_ext):
392 return [win32_to_unix_path(p_name_ext)]
393 return []
394
395class Traps(dict):
396 def __setitem__(self, key, value):
397 if key not in ('EXIT',):
398 raise NotImplementedError()
399 super(Traps, self).__setitem__(key, value)
400
401# IFS white spaces character class
402_IFS_WHITESPACES = (' ', '\t', '\n')
403
404class Environment:
405 """Environment holds environment variables, export table, function
406 definitions and whatever is defined in 2.12 "Shell Execution Environment",
407 redirection excepted.
408 """
409 def __init__(self, pwd):
410 self._opt = set() #Shell options
411
412 self._functions = {}
413 self._env = {'?': '0', '#': '0'}
414 self._exported = set([
415 'HOME', 'IFS', 'PATH'
416 ])
417
418 # Set environment vars with side-effects
419 self._ifs_ws = None # Set of IFS whitespace characters
420 self._ifs_re = None # Regular expression used to split between words using IFS classes
421 self['IFS'] = ''.join(_IFS_WHITESPACES) #Default environment values
422 self['PWD'] = pwd
423 self.traps = Traps()
424
425 def clone(self, subshell=False):
426 env = Environment(self['PWD'])
427 env._opt = set(self._opt)
428 for k,v in self.get_variables().iteritems():
429 if k in self._exported:
430 env.export(k,v)
431 elif subshell:
432 env[k] = v
433
434 if subshell:
435 env._functions = dict(self._functions)
436
437 return env
438
439 def __getitem__(self, key):
440 if key in ('@', '*', '-', '$'):
441 raise NotImplementedError('%s is not implemented' % repr(key))
442 return self._env[key]
443
444 def get(self, key, defval=None):
445 try:
446 return self[key]
447 except KeyError:
448 return defval
449
450 def __setitem__(self, key, value):
451 if key=='IFS':
452 # Update the whitespace/non-whitespace classes
453 self._update_ifs(value)
454 elif key=='PWD':
455 pwd = os.path.abspath(value)
456 if not os.path.isdir(pwd):
457 raise VarAssignmentError('Invalid directory %s' % value)
458 value = pwd
459 elif key in ('?', '!'):
460 value = str(int(value))
461 self._env[key] = value
462
463 def __delitem__(self, key):
464 if key in ('IFS', 'PWD', '?'):
465 raise VarAssignmentError('%s cannot be unset' % key)
466 del self._env[key]
467
468 def __contains__(self, item):
469 return item in self._env
470
471 def set_positional_args(self, args):
472 """Set the content of 'args' as positional argument from 1 to len(args).
473 Return previous argument as a list of strings.
474 """
475 # Save and remove previous arguments
476 prevargs = []
477 for i in xrange(int(self._env['#'])):
478 i = str(i+1)
479 prevargs.append(self._env[i])
480 del self._env[i]
481 self._env['#'] = '0'
482
483 #Set new ones
484 for i,arg in enumerate(args):
485 self._env[str(i+1)] = str(arg)
486 self._env['#'] = str(len(args))
487
488 return prevargs
489
490 def get_positional_args(self):
491 return [self._env[str(i+1)] for i in xrange(int(self._env['#']))]
492
493 def get_variables(self):
494 return dict(self._env)
495
496 def export(self, key, value=None):
497 if value is not None:
498 self[key] = value
499 self._exported.add(key)
500
501 def get_exported(self):
502 return [(k,self._env.get(k)) for k in self._exported]
503
504 def split_fields(self, word):
505 if not self._ifs_ws or not word:
506 return [word]
507 return re.split(self._ifs_re, word)
508
509 def _update_ifs(self, value):
510 """Update the split_fields related variables when IFS character set is
511 changed.
512 """
513 # TODO: handle NULL IFS
514
515 # Separate characters in whitespace and non-whitespace
516 chars = set(value)
517 ws = [c for c in chars if c in _IFS_WHITESPACES]
518 nws = [c for c in chars if c not in _IFS_WHITESPACES]
519
520 # Keep whitespaces in a string for left and right stripping
521 self._ifs_ws = ''.join(ws)
522
523 # Build a regexp to split fields
524 trailing = '[' + ''.join([re.escape(c) for c in ws]) + ']'
525 if nws:
526 # First, the single non-whitespace occurence.
527 nws = '[' + ''.join([re.escape(c) for c in nws]) + ']'
528 nws = '(?:' + trailing + '*' + nws + trailing + '*' + '|' + trailing + '+)'
529 else:
530 # Then mix all parts with quantifiers
531 nws = trailing + '+'
532 self._ifs_re = re.compile(nws)
533
534 def has_opt(self, opt, val=None):
535 return (opt, val) in self._opt
536
537 def set_opt(self, opt, val=None):
538 self._opt.add((opt, val))
539
540 def find_in_path(self, name, pwd=False):
541 path = self._env.get('PATH', '').split(os.pathsep)
542 if pwd:
543 path[:0] = [self['PWD']]
544 if os.name == 'nt':
545 return win32_find_in_path(name, self._env.get('PATH', ''))
546 else:
547 raise NotImplementedError()
548
549 def define_function(self, name, body):
550 if not is_name(name):
551 raise ShellSyntaxError('%s is not a valid function name' % repr(name))
552 self._functions[name] = body
553
554 def remove_function(self, name):
555 del self._functions[name]
556
557 def is_function(self, name):
558 return name in self._functions
559
560 def get_function(self, name):
561 return self._functions.get(name)
562
563
564name_charset = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
565name_charset = dict(zip(name_charset,name_charset))
566
567def match_name(s):
568 """Return the length in characters of the longest prefix made of name
569 allowed characters in s.
570 """
571 for i,c in enumerate(s):
572 if c not in name_charset:
573 return s[:i]
574 return s
575
576def is_name(s):
577 return len([c for c in s if c not in name_charset])<=0
578
579def is_special_param(c):
580 return len(c)==1 and c in ('@','*','#','?','-','$','!','0')
581
582def utility_not_implemented(name, *args, **kwargs):
583 raise NotImplementedError('%s utility is not implemented' % name)
584
585
586class Utility:
587 """Define utilities properties:
588 func -- utility callable. See builtin module for utility samples.
589 is_special -- see XCU 2.8.
590 """
591 def __init__(self, func, is_special=0):
592 self.func = func
593 self.is_special = bool(is_special)
594
595
596def encodeargs(args):
597 def encodearg(s):
598 lines = base64.encodestring(s)
599 lines = [l.splitlines()[0] for l in lines]
600 return ''.join(lines)
601
602 s = pickle.dumps(args)
603 return encodearg(s)
604
605def decodeargs(s):
606 s = base64.decodestring(s)
607 return pickle.loads(s)
608
609
610class GlobError(Exception):
611 pass
612
613class Options:
614 def __init__(self):
615 # True if Mercurial operates with binary streams
616 self.hgbinary = True
617
618class Interpreter:
619 # Implementation is very basic: the execute() method just makes a DFS on the
620 # AST and execute nodes one by one. Nodes are tuple (name,obj) where name
621 # is a string identifier and obj the AST element returned by the parser.
622 #
623 # Handler are named after the node identifiers.
624 # TODO: check node names and remove the switch in execute with some
625 # dynamic getattr() call to find node handlers.
626 """Shell interpreter.
627
628 The following debugging flags can be passed:
629 debug-parsing - enable PLY debugging.
630 debug-tree - print the generated AST.
631 debug-cmd - trace command execution before word expansion, plus exit status.
632 debug-utility - trace utility execution.
633 """
634
635 # List supported commands.
636 COMMANDS = {
637 'cat': Utility(builtin.utility_cat,),
638 'cd': Utility(builtin.utility_cd,),
639 ':': Utility(builtin.utility_colon,),
640 'echo': Utility(builtin.utility_echo),
641 'env': Utility(builtin.utility_env),
642 'exit': Utility(builtin.utility_exit),
643 'export': Utility(builtin.builtin_export, is_special=1),
644 'egrep': Utility(builtin.utility_egrep),
645 'fgrep': Utility(builtin.utility_fgrep),
646 'gunzip': Utility(builtin.utility_gunzip),
647 'kill': Utility(builtin.utility_kill),
648 'mkdir': Utility(builtin.utility_mkdir),
649 'netstat': Utility(builtin.utility_netstat),
650 'printf': Utility(builtin.utility_printf),
651 'pwd': Utility(builtin.utility_pwd),
652 'return': Utility(builtin.builtin_return, is_special=1),
653 'sed': Utility(builtin.utility_sed,),
654 'set': Utility(builtin.builtin_set,),
655 'shift': Utility(builtin.builtin_shift,),
656 'sleep': Utility(builtin.utility_sleep,),
657 'sort': Utility(builtin.utility_sort,),
658 'trap': Utility(builtin.builtin_trap, is_special=1),
659 'true': Utility(builtin.utility_true),
660 'unset': Utility(builtin.builtin_unset, is_special=1),
661 'wait': Utility(builtin.builtin_wait, is_special=1),
662 }
663
664 def __init__(self, pwd, debugflags = [], env=None, redirs=None, stdin=None,
665 stdout=None, stderr=None, opts=Options()):
666 self._env = env
667 if self._env is None:
668 self._env = Environment(pwd)
669 self._children = {}
670
671 self._redirs = redirs
672 self._close_redirs = False
673
674 if self._redirs is None:
675 if stdin is None:
676 stdin = sys.stdin
677 if stdout is None:
678 stdout = sys.stdout
679 if stderr is None:
680 stderr = sys.stderr
681 stdin = FileWrapper('r', stdin, False)
682 stdout = FileWrapper('w', stdout, False)
683 stderr = FileWrapper('w', stderr, False)
684 self._redirs = Redirections(stdin, stdout, stderr)
685 self._close_redirs = True
686
687 self._debugflags = list(debugflags)
688 self._logfile = sys.stderr
689 self._options = opts
690
691 def close(self):
692 """Must be called when the interpreter is no longer used."""
693 script = self._env.traps.get('EXIT')
694 if script:
695 try:
696 self.execute_script(script=script)
697 except:
698 pass
699
700 if self._redirs is not None and self._close_redirs:
701 self._redirs.close()
702 self._redirs = None
703
704 def log(self, s):
705 self._logfile.write(s)
706 self._logfile.flush()
707
708 def __getitem__(self, key):
709 return self._env[key]
710
711 def __setitem__(self, key, value):
712 self._env[key] = value
713
714 def options(self):
715 return self._options
716
717 def redirect(self, redirs, ios):
718 def add_redir(io):
719 if isinstance(io, pyshyacc.IORedirect):
720 redirs.add(self, io.op, io.filename, io.io_number)
721 else:
722 redirs.add_here_document(self, io.name, io.content, io.io_number)
723
724 map(add_redir, ios)
725 return redirs
726
727 def execute_script(self, script=None, ast=None, sourced=False,
728 scriptpath=None):
729 """If script is not None, parse the input. Otherwise takes the supplied
730 AST. Then execute the AST.
731 Return the script exit status.
732 """
733 try:
734 if scriptpath is not None:
735 self._env['0'] = os.path.abspath(scriptpath)
736
737 if script is not None:
738 debug_parsing = ('debug-parsing' in self._debugflags)
739 cmds, script = pyshyacc.parse(script, True, debug_parsing)
740 if 'debug-tree' in self._debugflags:
741 pyshyacc.print_commands(cmds, self._logfile)
742 self._logfile.flush()
743 else:
744 cmds, script = ast, ''
745
746 status = 0
747 for cmd in cmds:
748 try:
749 status = self.execute(cmd)
750 except ExitSignal, e:
751 if sourced:
752 raise
753 status = int(e.args[0])
754 return status
755 except ShellError:
756 self._env['?'] = 1
757 raise
758 if 'debug-utility' in self._debugflags or 'debug-cmd' in self._debugflags:
759 self.log('returncode ' + str(status)+ '\n')
760 return status
761 except CommandNotFound, e:
762 print >>self._redirs.stderr, str(e)
763 self._redirs.stderr.flush()
764 # Command not found by non-interactive shell
765 # return 127
766 raise
767 except RedirectionError, e:
768 # TODO: should be handled depending on the utility status
769 print >>self._redirs.stderr, str(e)
770 self._redirs.stderr.flush()
771 # Command not found by non-interactive shell
772 # return 127
773 raise
774
775 def dotcommand(self, env, args):
776 if len(args) < 1:
777 raise ShellError('. expects at least one argument')
778 path = args[0]
779 if '/' not in path:
780 found = env.find_in_path(args[0], True)
781 if found:
782 path = found[0]
783 script = file(path).read()
784 return self.execute_script(script=script, sourced=True)
785
786 def execute(self, token, redirs=None):
787 """Execute and AST subtree with supplied redirections overriding default
788 interpreter ones.
789 Return the exit status.
790 """
791 if not token:
792 return 0
793
794 if redirs is None:
795 redirs = self._redirs
796
797 if isinstance(token, list):
798 # Commands sequence
799 res = 0
800 for t in token:
801 res = self.execute(t, redirs)
802 return res
803
804 type, value = token
805 status = 0
806 if type=='simple_command':
807 redirs_copy = redirs.clone()
808 try:
809 # TODO: define and handle command return values
810 # TODO: implement set -e
811 status = self._execute_simple_command(value, redirs_copy)
812 finally:
813 redirs_copy.close()
814 elif type=='pipeline':
815 status = self._execute_pipeline(value, redirs)
816 elif type=='and_or':
817 status = self._execute_and_or(value, redirs)
818 elif type=='for_clause':
819 status = self._execute_for_clause(value, redirs)
820 elif type=='while_clause':
821 status = self._execute_while_clause(value, redirs)
822 elif type=='function_definition':
823 status = self._execute_function_definition(value, redirs)
824 elif type=='brace_group':
825 status = self._execute_brace_group(value, redirs)
826 elif type=='if_clause':
827 status = self._execute_if_clause(value, redirs)
828 elif type=='subshell':
829 status = self.subshell(ast=value.cmds, redirs=redirs)
830 elif type=='async':
831 status = self._asynclist(value)
832 elif type=='redirect_list':
833 redirs_copy = self.redirect(redirs.clone(), value.redirs)
834 try:
835 status = self.execute(value.cmd, redirs_copy)
836 finally:
837 redirs_copy.close()
838 else:
839 raise NotImplementedError('Unsupported token type ' + type)
840
841 if status < 0:
842 status = 255
843 return status
844
845 def _execute_if_clause(self, if_clause, redirs):
846 cond_status = self.execute(if_clause.cond, redirs)
847 if cond_status==0:
848 return self.execute(if_clause.if_cmds, redirs)
849 else:
850 return self.execute(if_clause.else_cmds, redirs)
851
852 def _execute_brace_group(self, group, redirs):
853 status = 0
854 for cmd in group.cmds:
855 status = self.execute(cmd, redirs)
856 return status
857
858 def _execute_function_definition(self, fundef, redirs):
859 self._env.define_function(fundef.name, fundef.body)
860 return 0
861
862 def _execute_while_clause(self, while_clause, redirs):
863 status = 0
864 while 1:
865 cond_status = 0
866 for cond in while_clause.condition:
867 cond_status = self.execute(cond, redirs)
868
869 if cond_status:
870 break
871
872 for cmd in while_clause.cmds:
873 status = self.execute(cmd, redirs)
874
875 return status
876
877 def _execute_for_clause(self, for_clause, redirs):
878 if not is_name(for_clause.name):
879 raise ShellSyntaxError('%s is not a valid name' % repr(for_clause.name))
880 items = mappend(self.expand_token, for_clause.items)
881
882 status = 0
883 for item in items:
884 self._env[for_clause.name] = item
885 for cmd in for_clause.cmds:
886 status = self.execute(cmd, redirs)
887 return status
888
889 def _execute_and_or(self, or_and, redirs):
890 res = self.execute(or_and.left, redirs)
891 if (or_and.op=='&&' and res==0) or (or_and.op!='&&' and res!=0):
892 res = self.execute(or_and.right, redirs)
893 return res
894
895 def _execute_pipeline(self, pipeline, redirs):
896 if len(pipeline.commands)==1:
897 status = self.execute(pipeline.commands[0], redirs)
898 else:
899 # Execute all commands one after the other
900 status = 0
901 inpath, outpath = None, None
902 try:
903 # Commands inputs and outputs cannot really be plugged as done
904 # by a real shell. Run commands sequentially and chain their
905 # input/output throught temporary files.
906 tmpfd, inpath = tempfile.mkstemp()
907 os.close(tmpfd)
908 tmpfd, outpath = tempfile.mkstemp()
909 os.close(tmpfd)
910
911 inpath = win32_to_unix_path(inpath)
912 outpath = win32_to_unix_path(outpath)
913
914 for i, cmd in enumerate(pipeline.commands):
915 call_redirs = redirs.clone()
916 try:
917 if i!=0:
918 call_redirs.add(self, '<', inpath)
919 if i!=len(pipeline.commands)-1:
920 call_redirs.add(self, '>', outpath)
921
922 status = self.execute(cmd, call_redirs)
923
924 # Chain inputs/outputs
925 inpath, outpath = outpath, inpath
926 finally:
927 call_redirs.close()
928 finally:
929 if inpath: os.remove(inpath)
930 if outpath: os.remove(outpath)
931
932 if pipeline.reverse_status:
933 status = int(not status)
934 self._env['?'] = status
935 return status
936
937 def _execute_function(self, name, args, interp, env, stdin, stdout, stderr, *others):
938 assert interp is self
939
940 func = env.get_function(name)
941 #Set positional parameters
942 prevargs = None
943 try:
944 prevargs = env.set_positional_args(args)
945 try:
946 redirs = Redirections(stdin.dup(), stdout.dup(), stderr.dup())
947 try:
948 status = self.execute(func, redirs)
949 finally:
950 redirs.close()
951 except ReturnSignal, e:
952 status = int(e.args[0])
953 env['?'] = status
954 return status
955 finally:
956 #Reset positional parameters
957 if prevargs is not None:
958 env.set_positional_args(prevargs)
959
960 def _execute_simple_command(self, token, redirs):
961 """Can raise ReturnSignal when return builtin is called, ExitSignal when
962 exit is called, and other shell exceptions upon builtin failures.
963 """
964 debug_command = 'debug-cmd' in self._debugflags
965 if debug_command:
966 self.log('word' + repr(token.words) + '\n')
967 self.log('assigns' + repr(token.assigns) + '\n')
968 self.log('redirs' + repr(token.redirs) + '\n')
969
970 is_special = None
971 env = self._env
972
973 try:
974 # Word expansion
975 args = []
976 for word in token.words:
977 args += self.expand_token(word)
978 if is_special is None and args:
979 is_special = env.is_function(args[0]) or \
980 (args[0] in self.COMMANDS and self.COMMANDS[args[0]].is_special)
981
982 if debug_command:
983 self.log('_execute_simple_command' + str(args) + '\n')
984
985 if not args:
986 # Redirections happen is a subshell
987 redirs = redirs.clone()
988 elif not is_special:
989 env = self._env.clone()
990
991 # Redirections
992 self.redirect(redirs, token.redirs)
993
994 # Variables assignments
995 res = 0
996 for type,(k,v) in token.assigns:
997 status, expanded = self.expand_variable((k,v))
998 if status is not None:
999 res = status
1000 if args:
1001 env.export(k, expanded)
1002 else:
1003 env[k] = expanded
1004
1005 if args and args[0] in ('.', 'source'):
1006 res = self.dotcommand(env, args[1:])
1007 elif args:
1008 if args[0] in self.COMMANDS:
1009 command = self.COMMANDS[args[0]]
1010 elif env.is_function(args[0]):
1011 command = Utility(self._execute_function, is_special=True)
1012 else:
1013 if not '/' in args[0].replace('\\', '/'):
1014 cmd = env.find_in_path(args[0])
1015 if not cmd:
1016 # TODO: test error code on unknown command => 127
1017 raise CommandNotFound('Unknown command: "%s"' % args[0])
1018 else:
1019 # Handle commands like '/cygdrive/c/foo.bat'
1020 cmd = cygwin_to_windows_path(args[0])
1021 if not os.path.exists(cmd):
1022 raise CommandNotFound('%s: No such file or directory' % args[0])
1023 shebang = resolve_shebang(cmd)
1024 if shebang:
1025 cmd = shebang
1026 else:
1027 cmd = [cmd]
1028 args[0:1] = cmd
1029 command = Utility(builtin.run_command)
1030
1031 # Command execution
1032 if 'debug-cmd' in self._debugflags:
1033 self.log('redirections ' + str(redirs) + '\n')
1034
1035 res = command.func(args[0], args[1:], self, env,
1036 redirs.stdin(), redirs.stdout(),
1037 redirs.stderr(), self._debugflags)
1038
1039 if self._env.has_opt('-x'):
1040 # Trace command execution in shell environment
1041 # BUG: would be hard to reproduce a real shell behaviour since
1042 # the AST is not annotated with source lines/tokens.
1043 self._redirs.stdout().write(' '.join(args))
1044
1045 except ReturnSignal:
1046 raise
1047 except ShellError, e:
1048 if is_special or isinstance(e, (ExitSignal,
1049 ShellSyntaxError, ExpansionError)):
1050 raise e
1051 self._redirs.stderr().write(str(e)+'\n')
1052 return 1
1053
1054 return res
1055
1056 def expand_token(self, word):
1057 """Expand a word as specified in [2.6 Word Expansions]. Return the list
1058 of expanded words.
1059 """
1060 status, wtrees = self._expand_word(word)
1061 return map(pyshlex.wordtree_as_string, wtrees)
1062
1063 def expand_variable(self, word):
1064 """Return a status code (or None if no command expansion occurred)
1065 and a single word.
1066 """
1067 status, wtrees = self._expand_word(word, pathname=False, split=False)
1068 words = map(pyshlex.wordtree_as_string, wtrees)
1069 assert len(words)==1
1070 return status, words[0]
1071
1072 def expand_here_document(self, word):
1073 """Return the expanded document as a single word. The here document is
1074 assumed to be unquoted.
1075 """
1076 status, wtrees = self._expand_word(word, pathname=False,
1077 split=False, here_document=True)
1078 words = map(pyshlex.wordtree_as_string, wtrees)
1079 assert len(words)==1
1080 return words[0]
1081
1082 def expand_redirection(self, word):
1083 """Return a single word."""
1084 return self.expand_variable(word)[1]
1085
1086 def get_env(self):
1087 return self._env
1088
1089 def _expand_word(self, token, pathname=True, split=True, here_document=False):
1090 wtree = pyshlex.make_wordtree(token[1], here_document=here_document)
1091
1092 # TODO: implement tilde expansion
1093 def expand(wtree):
1094 """Return a pseudo wordtree: the tree or its subelements can be empty
1095 lists when no value result from the expansion.
1096 """
1097 status = None
1098 for part in wtree:
1099 if not isinstance(part, list):
1100 continue
1101 if part[0]in ("'", '\\'):
1102 continue
1103 elif part[0] in ('`', '$('):
1104 status, result = self._expand_command(part)
1105 part[:] = result
1106 elif part[0] in ('$', '${'):
1107 part[:] = self._expand_parameter(part, wtree[0]=='"', split)
1108 elif part[0] in ('', '"'):
1109 status, result = expand(part)
1110 part[:] = result
1111 else:
1112 raise NotImplementedError('%s expansion is not implemented'
1113 % part[0])
1114 # [] is returned when an expansion result in no-field,
1115 # like an empty $@
1116 wtree = [p for p in wtree if p != []]
1117 if len(wtree) < 3:
1118 return status, []
1119 return status, wtree
1120
1121 status, wtree = expand(wtree)
1122 if len(wtree) == 0:
1123 return status, wtree
1124 wtree = pyshlex.normalize_wordtree(wtree)
1125
1126 if split:
1127 wtrees = self._split_fields(wtree)
1128 else:
1129 wtrees = [wtree]
1130
1131 if pathname:
1132 wtrees = mappend(self._expand_pathname, wtrees)
1133
1134 wtrees = map(self._remove_quotes, wtrees)
1135 return status, wtrees
1136
1137 def _expand_command(self, wtree):
1138 # BUG: there is something to do with backslashes and quoted
1139 # characters here
1140 command = pyshlex.wordtree_as_string(wtree[1:-1])
1141 status, output = self.subshell_output(command)
1142 return status, ['', output, '']
1143
1144 def _expand_parameter(self, wtree, quoted=False, split=False):
1145 """Return a valid wtree or an empty list when no parameter results."""
1146 # Get the parameter name
1147 # TODO: implement weird expansion rules with ':'
1148 name = pyshlex.wordtree_as_string(wtree[1:-1])
1149 if not is_name(name) and not is_special_param(name):
1150 raise ExpansionError('Bad substitution "%s"' % name)
1151 # TODO: implement special parameters
1152 if name in ('@', '*'):
1153 args = self._env.get_positional_args()
1154 if len(args) == 0:
1155 return []
1156 if len(args)<2:
1157 return ['', ''.join(args), '']
1158
1159 sep = self._env.get('IFS', '')[:1]
1160 if split and quoted and name=='@':
1161 # Introduce a new token to tell the caller that these parameters
1162 # cause a split as specified in 2.5.2
1163 return ['@'] + args + ['']
1164 else:
1165 return ['', sep.join(args), '']
1166
1167 return ['', self._env.get(name, ''), '']
1168
1169 def _split_fields(self, wtree):
1170 def is_empty(split):
1171 return split==['', '', '']
1172
1173 def split_positional(quoted):
1174 # Return a list of wtree split according positional parameters rules.
1175 # All remaining '@' groups are removed.
1176 assert quoted[0]=='"'
1177
1178 splits = [[]]
1179 for part in quoted:
1180 if not isinstance(part, list) or part[0]!='@':
1181 splits[-1].append(part)
1182 else:
1183 # Empty or single argument list were dealt with already
1184 assert len(part)>3
1185 # First argument must join with the beginning part of the original word
1186 splits[-1].append(part[1])
1187 # Create double-quotes expressions for every argument after the first
1188 for arg in part[2:-1]:
1189 splits[-1].append('"')
1190 splits.append(['"', arg])
1191 return splits
1192
1193 # At this point, all expansions but pathnames have occured. Only quoted
1194 # and positional sequences remain. Thus, all candidates for field splitting
1195 # are in the tree root, or are positional splits ('@') and lie in root
1196 # children.
1197 if not wtree or wtree[0] not in ('', '"'):
1198 # The whole token is quoted or empty, nothing to split
1199 return [wtree]
1200
1201 if wtree[0]=='"':
1202 wtree = ['', wtree, '']
1203
1204 result = [['', '']]
1205 for part in wtree[1:-1]:
1206 if isinstance(part, list):
1207 if part[0]=='"':
1208 splits = split_positional(part)
1209 if len(splits)<=1:
1210 result[-1] += [part, '']
1211 else:
1212 # Terminate the current split
1213 result[-1] += [splits[0], '']
1214 result += splits[1:-1]
1215 # Create a new split
1216 result += [['', splits[-1], '']]
1217 else:
1218 result[-1] += [part, '']
1219 else:
1220 splits = self._env.split_fields(part)
1221 if len(splits)<=1:
1222 # No split
1223 result[-1][-1] += part
1224 else:
1225 # Terminate the current resulting part and create a new one
1226 result[-1][-1] += splits[0]
1227 result[-1].append('')
1228 result += [['', r, ''] for r in splits[1:-1]]
1229 result += [['', splits[-1]]]
1230 result[-1].append('')
1231
1232 # Leading and trailing empty groups come from leading/trailing blanks
1233 if result and is_empty(result[-1]):
1234 result[-1:] = []
1235 if result and is_empty(result[0]):
1236 result[:1] = []
1237 return result
1238
1239 def _expand_pathname(self, wtree):
1240 """See [2.6.6 Pathname Expansion]."""
1241 if self._env.has_opt('-f'):
1242 return [wtree]
1243
1244 # All expansions have been performed, only quoted sequences should remain
1245 # in the tree. Generate the pattern by folding the tree, escaping special
1246 # characters when appear quoted
1247 special_chars = '*?[]'
1248
1249 def make_pattern(wtree):
1250 subpattern = []
1251 for part in wtree[1:-1]:
1252 if isinstance(part, list):
1253 part = make_pattern(part)
1254 elif wtree[0]!='':
1255 for c in part:
1256 # Meta-characters cannot be quoted
1257 if c in special_chars:
1258 raise GlobError()
1259 subpattern.append(part)
1260 return ''.join(subpattern)
1261
1262 def pwd_glob(pattern):
1263 cwd = os.getcwd()
1264 os.chdir(self._env['PWD'])
1265 try:
1266 return glob.glob(pattern)
1267 finally:
1268 os.chdir(cwd)
1269
1270 #TODO: check working directory issues here wrt relative patterns
1271 try:
1272 pattern = make_pattern(wtree)
1273 paths = pwd_glob(pattern)
1274 except GlobError:
1275 # BUG: Meta-characters were found in quoted sequences. The should
1276 # have been used literally but this is unsupported in current glob module.
1277 # Instead we consider the whole tree must be used literally and
1278 # therefore there is no point in globbing. This is wrong when meta
1279 # characters are mixed with quoted meta in the same pattern like:
1280 # < foo*"py*" >
1281 paths = []
1282
1283 if not paths:
1284 return [wtree]
1285 return [['', path, ''] for path in paths]
1286
1287 def _remove_quotes(self, wtree):
1288 """See [2.6.7 Quote Removal]."""
1289
1290 def unquote(wtree):
1291 unquoted = []
1292 for part in wtree[1:-1]:
1293 if isinstance(part, list):
1294 part = unquote(part)
1295 unquoted.append(part)
1296 return ''.join(unquoted)
1297
1298 return ['', unquote(wtree), '']
1299
1300 def subshell(self, script=None, ast=None, redirs=None):
1301 """Execute the script or AST in a subshell, with inherited redirections
1302 if redirs is not None.
1303 """
1304 if redirs:
1305 sub_redirs = redirs
1306 else:
1307 sub_redirs = redirs.clone()
1308
1309 subshell = None
1310 try:
1311 subshell = Interpreter(None, self._debugflags, self._env.clone(True),
1312 sub_redirs, opts=self._options)
1313 return subshell.execute_script(script, ast)
1314 finally:
1315 if not redirs: sub_redirs.close()
1316 if subshell: subshell.close()
1317
1318 def subshell_output(self, script):
1319 """Execute the script in a subshell and return the captured output."""
1320 # Create temporary file to capture subshell output
1321 tmpfd, tmppath = tempfile.mkstemp()
1322 try:
1323 tmpfile = os.fdopen(tmpfd, 'wb')
1324 stdout = FileWrapper('w', tmpfile)
1325
1326 redirs = Redirections(self._redirs.stdin().dup(),
1327 stdout,
1328 self._redirs.stderr().dup())
1329 try:
1330 status = self.subshell(script=script, redirs=redirs)
1331 finally:
1332 redirs.close()
1333 redirs = None
1334
1335 # Extract subshell standard output
1336 tmpfile = open(tmppath, 'rb')
1337 try:
1338 output = tmpfile.read()
1339 return status, output.rstrip('\n')
1340 finally:
1341 tmpfile.close()
1342 finally:
1343 os.remove(tmppath)
1344
1345 def _asynclist(self, cmd):
1346 args = (self._env.get_variables(), cmd)
1347 arg = encodeargs(args)
1348 assert len(args) < 30*1024
1349 cmd = ['pysh.bat', '--ast', '-c', arg]
1350 p = subprocess.Popen(cmd, cwd=self._env['PWD'])
1351 self._children[p.pid] = p
1352 self._env['!'] = p.pid
1353 return 0
1354
1355 def wait(self, pids=None):
1356 if not pids:
1357 pids = self._children.keys()
1358
1359 status = 127
1360 for pid in pids:
1361 if pid not in self._children:
1362 continue
1363 p = self._children.pop(pid)
1364 status = p.wait()
1365
1366 return status
1367
diff --git a/bitbake/lib/pysh/lsprof.py b/bitbake/lib/pysh/lsprof.py
new file mode 100644
index 0000000000..b1831c22a7
--- /dev/null
+++ b/bitbake/lib/pysh/lsprof.py
@@ -0,0 +1,116 @@
1#! /usr/bin/env python
2
3import sys
4from _lsprof import Profiler, profiler_entry
5
6__all__ = ['profile', 'Stats']
7
8def profile(f, *args, **kwds):
9 """XXX docstring"""
10 p = Profiler()
11 p.enable(subcalls=True, builtins=True)
12 try:
13 f(*args, **kwds)
14 finally:
15 p.disable()
16 return Stats(p.getstats())
17
18
19class Stats(object):
20 """XXX docstring"""
21
22 def __init__(self, data):
23 self.data = data
24
25 def sort(self, crit="inlinetime"):
26 """XXX docstring"""
27 if crit not in profiler_entry.__dict__:
28 raise ValueError("Can't sort by %s" % crit)
29 self.data.sort(lambda b, a: cmp(getattr(a, crit),
30 getattr(b, crit)))
31 for e in self.data:
32 if e.calls:
33 e.calls.sort(lambda b, a: cmp(getattr(a, crit),
34 getattr(b, crit)))
35
36 def pprint(self, top=None, file=None, limit=None, climit=None):
37 """XXX docstring"""
38 if file is None:
39 file = sys.stdout
40 d = self.data
41 if top is not None:
42 d = d[:top]
43 cols = "% 12s %12s %11.4f %11.4f %s\n"
44 hcols = "% 12s %12s %12s %12s %s\n"
45 cols2 = "+%12s %12s %11.4f %11.4f + %s\n"
46 file.write(hcols % ("CallCount", "Recursive", "Total(ms)",
47 "Inline(ms)", "module:lineno(function)"))
48 count = 0
49 for e in d:
50 file.write(cols % (e.callcount, e.reccallcount, e.totaltime,
51 e.inlinetime, label(e.code)))
52 count += 1
53 if limit is not None and count == limit:
54 return
55 ccount = 0
56 if e.calls:
57 for se in e.calls:
58 file.write(cols % ("+%s" % se.callcount, se.reccallcount,
59 se.totaltime, se.inlinetime,
60 "+%s" % label(se.code)))
61 count += 1
62 ccount += 1
63 if limit is not None and count == limit:
64 return
65 if climit is not None and ccount == climit:
66 break
67
68 def freeze(self):
69 """Replace all references to code objects with string
70 descriptions; this makes it possible to pickle the instance."""
71
72 # this code is probably rather ickier than it needs to be!
73 for i in range(len(self.data)):
74 e = self.data[i]
75 if not isinstance(e.code, str):
76 self.data[i] = type(e)((label(e.code),) + e[1:])
77 if e.calls:
78 for j in range(len(e.calls)):
79 se = e.calls[j]
80 if not isinstance(se.code, str):
81 e.calls[j] = type(se)((label(se.code),) + se[1:])
82
83_fn2mod = {}
84
85def label(code):
86 if isinstance(code, str):
87 return code
88 try:
89 mname = _fn2mod[code.co_filename]
90 except KeyError:
91 for k, v in sys.modules.items():
92 if v is None:
93 continue
94 if not hasattr(v, '__file__'):
95 continue
96 if not isinstance(v.__file__, str):
97 continue
98 if v.__file__.startswith(code.co_filename):
99 mname = _fn2mod[code.co_filename] = k
100 break
101 else:
102 mname = _fn2mod[code.co_filename] = '<%s>'%code.co_filename
103
104 return '%s:%d(%s)' % (mname, code.co_firstlineno, code.co_name)
105
106
107if __name__ == '__main__':
108 import os
109 sys.argv = sys.argv[1:]
110 if not sys.argv:
111 print >> sys.stderr, "usage: lsprof.py <script> <arguments...>"
112 sys.exit(2)
113 sys.path.insert(0, os.path.abspath(os.path.dirname(sys.argv[0])))
114 stats = profile(execfile, sys.argv[0], globals(), locals())
115 stats.sort()
116 stats.pprint()
diff --git a/bitbake/lib/pysh/pysh.py b/bitbake/lib/pysh/pysh.py
new file mode 100644
index 0000000000..b4e6145b51
--- /dev/null
+++ b/bitbake/lib/pysh/pysh.py
@@ -0,0 +1,167 @@
1# pysh.py - command processing for pysh.
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8import optparse
9import os
10import sys
11
12import interp
13
14SH_OPT = optparse.OptionParser(prog='pysh', usage="%prog [OPTIONS]", version='0.1')
15SH_OPT.add_option('-c', action='store_true', dest='command_string', default=None,
16 help='A string that shall be interpreted by the shell as one or more commands')
17SH_OPT.add_option('--redirect-to', dest='redirect_to', default=None,
18 help='Redirect script commands stdout and stderr to the specified file')
19# See utility_command in builtin.py about the reason for this flag.
20SH_OPT.add_option('--redirected', dest='redirected', action='store_true', default=False,
21 help='Tell the interpreter that stdout and stderr are actually the same objects, which is really stdout')
22SH_OPT.add_option('--debug-parsing', action='store_true', dest='debug_parsing', default=False,
23 help='Trace PLY execution')
24SH_OPT.add_option('--debug-tree', action='store_true', dest='debug_tree', default=False,
25 help='Display the generated syntax tree.')
26SH_OPT.add_option('--debug-cmd', action='store_true', dest='debug_cmd', default=False,
27 help='Trace command execution before parameters expansion and exit status.')
28SH_OPT.add_option('--debug-utility', action='store_true', dest='debug_utility', default=False,
29 help='Trace utility calls, after parameters expansions')
30SH_OPT.add_option('--ast', action='store_true', dest='ast', default=False,
31 help='Encoded commands to execute in a subprocess')
32SH_OPT.add_option('--profile', action='store_true', default=False,
33 help='Profile pysh run')
34
35
36def split_args(args):
37 # Separate shell arguments from command ones
38 # Just stop at the first argument not starting with a dash. I know, this is completely broken,
39 # it ignores files starting with a dash or may take option values for command file. This is not
40 # supposed to happen for now
41 command_index = len(args)
42 for i,arg in enumerate(args):
43 if not arg.startswith('-'):
44 command_index = i
45 break
46
47 return args[:command_index], args[command_index:]
48
49
50def fixenv(env):
51 path = env.get('PATH')
52 if path is not None:
53 parts = path.split(os.pathsep)
54 # Remove Windows utilities from PATH, they are useless at best and
55 # some of them (find) may be confused with other utilities.
56 parts = [p for p in parts if 'system32' not in p.lower()]
57 env['PATH'] = os.pathsep.join(parts)
58 if env.get('HOME') is None:
59 # Several utilities, including cvsps, cannot work without
60 # a defined HOME directory.
61 env['HOME'] = os.path.expanduser('~')
62 return env
63
64def _sh(cwd, shargs, cmdargs, options, debugflags=None, env=None):
65 if os.environ.get('PYSH_TEXT') != '1':
66 import msvcrt
67 for fp in (sys.stdin, sys.stdout, sys.stderr):
68 msvcrt.setmode(fp.fileno(), os.O_BINARY)
69
70 hgbin = os.environ.get('PYSH_HGTEXT') != '1'
71
72 if debugflags is None:
73 debugflags = []
74 if options.debug_parsing: debugflags.append('debug-parsing')
75 if options.debug_utility: debugflags.append('debug-utility')
76 if options.debug_cmd: debugflags.append('debug-cmd')
77 if options.debug_tree: debugflags.append('debug-tree')
78
79 if env is None:
80 env = fixenv(dict(os.environ))
81 if cwd is None:
82 cwd = os.getcwd()
83
84 if not cmdargs:
85 # Nothing to do
86 return 0
87
88 ast = None
89 command_file = None
90 if options.command_string:
91 input = cmdargs[0]
92 if not options.ast:
93 input += '\n'
94 else:
95 args, input = interp.decodeargs(input), None
96 env, ast = args
97 cwd = env.get('PWD', cwd)
98 else:
99 command_file = cmdargs[0]
100 arguments = cmdargs[1:]
101
102 prefix = interp.resolve_shebang(command_file, ignoreshell=True)
103 if prefix:
104 input = ' '.join(prefix + [command_file] + arguments)
105 else:
106 # Read commands from file
107 f = file(command_file)
108 try:
109 # Trailing newline to help the parser
110 input = f.read() + '\n'
111 finally:
112 f.close()
113
114 redirect = None
115 try:
116 if options.redirected:
117 stdout = sys.stdout
118 stderr = stdout
119 elif options.redirect_to:
120 redirect = open(options.redirect_to, 'wb')
121 stdout = redirect
122 stderr = redirect
123 else:
124 stdout = sys.stdout
125 stderr = sys.stderr
126
127 # TODO: set arguments to environment variables
128 opts = interp.Options()
129 opts.hgbinary = hgbin
130 ip = interp.Interpreter(cwd, debugflags, stdout=stdout, stderr=stderr,
131 opts=opts)
132 try:
133 # Export given environment in shell object
134 for k,v in env.iteritems():
135 ip.get_env().export(k,v)
136 return ip.execute_script(input, ast, scriptpath=command_file)
137 finally:
138 ip.close()
139 finally:
140 if redirect is not None:
141 redirect.close()
142
143def sh(cwd=None, args=None, debugflags=None, env=None):
144 if args is None:
145 args = sys.argv[1:]
146 shargs, cmdargs = split_args(args)
147 options, shargs = SH_OPT.parse_args(shargs)
148
149 if options.profile:
150 import lsprof
151 p = lsprof.Profiler()
152 p.enable(subcalls=True)
153 try:
154 return _sh(cwd, shargs, cmdargs, options, debugflags, env)
155 finally:
156 p.disable()
157 stats = lsprof.Stats(p.getstats())
158 stats.sort()
159 stats.pprint(top=10, file=sys.stderr, climit=5)
160 else:
161 return _sh(cwd, shargs, cmdargs, options, debugflags, env)
162
163def main():
164 sys.exit(sh())
165
166if __name__=='__main__':
167 main()
diff --git a/bitbake/lib/pysh/pyshlex.py b/bitbake/lib/pysh/pyshlex.py
new file mode 100644
index 0000000000..b977b5e869
--- /dev/null
+++ b/bitbake/lib/pysh/pyshlex.py
@@ -0,0 +1,888 @@
1# pyshlex.py - PLY compatible lexer for pysh.
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8# TODO:
9# - review all "char in 'abc'" snippets: the empty string can be matched
10# - test line continuations within quoted/expansion strings
11# - eof is buggy wrt sublexers
12# - the lexer cannot really work in pull mode as it would be required to run
13# PLY in pull mode. It was designed to work incrementally and it would not be
14# that hard to enable pull mode.
15import re
16try:
17 s = set()
18 del s
19except NameError:
20 from Set import Set as set
21
22from ply import lex
23from sherrors import *
24
25class NeedMore(Exception):
26 pass
27
28def is_blank(c):
29 return c in (' ', '\t')
30
31_RE_DIGITS = re.compile(r'^\d+$')
32
33def are_digits(s):
34 return _RE_DIGITS.search(s) is not None
35
36_OPERATORS = dict([
37 ('&&', 'AND_IF'),
38 ('||', 'OR_IF'),
39 (';;', 'DSEMI'),
40 ('<<', 'DLESS'),
41 ('>>', 'DGREAT'),
42 ('<&', 'LESSAND'),
43 ('>&', 'GREATAND'),
44 ('<>', 'LESSGREAT'),
45 ('<<-', 'DLESSDASH'),
46 ('>|', 'CLOBBER'),
47 ('&', 'AMP'),
48 (';', 'COMMA'),
49 ('<', 'LESS'),
50 ('>', 'GREATER'),
51 ('(', 'LPARENS'),
52 (')', 'RPARENS'),
53])
54
55#Make a function to silence pychecker "Local variable shadows global"
56def make_partial_ops():
57 partials = {}
58 for k in _OPERATORS:
59 for i in range(1, len(k)+1):
60 partials[k[:i]] = None
61 return partials
62
63_PARTIAL_OPERATORS = make_partial_ops()
64
65def is_partial_op(s):
66 """Return True if s matches a non-empty subpart of an operator starting
67 at its first character.
68 """
69 return s in _PARTIAL_OPERATORS
70
71def is_op(s):
72 """If s matches an operator, returns the operator identifier. Return None
73 otherwise.
74 """
75 return _OPERATORS.get(s)
76
77_RESERVEDS = dict([
78 ('if', 'If'),
79 ('then', 'Then'),
80 ('else', 'Else'),
81 ('elif', 'Elif'),
82 ('fi', 'Fi'),
83 ('do', 'Do'),
84 ('done', 'Done'),
85 ('case', 'Case'),
86 ('esac', 'Esac'),
87 ('while', 'While'),
88 ('until', 'Until'),
89 ('for', 'For'),
90 ('{', 'Lbrace'),
91 ('}', 'Rbrace'),
92 ('!', 'Bang'),
93 ('in', 'In'),
94 ('|', 'PIPE'),
95])
96
97def get_reserved(s):
98 return _RESERVEDS.get(s)
99
100_RE_NAME = re.compile(r'^[0-9a-zA-Z_]+$')
101
102def is_name(s):
103 return _RE_NAME.search(s) is not None
104
105def find_chars(seq, chars):
106 for i,v in enumerate(seq):
107 if v in chars:
108 return i,v
109 return -1, None
110
111class WordLexer:
112 """WordLexer parse quoted or expansion expressions and return an expression
113 tree. The input string can be any well formed sequence beginning with quoting
114 or expansion character. Embedded expressions are handled recursively. The
115 resulting tree is made of lists and strings. Lists represent quoted or
116 expansion expressions. Each list first element is the opening separator,
117 the last one the closing separator. In-between can be any number of strings
118 or lists for sub-expressions. Non quoted/expansion expression can written as
119 strings or as lists with empty strings as starting and ending delimiters.
120 """
121
122 NAME_CHARSET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
123 NAME_CHARSET = dict(zip(NAME_CHARSET, NAME_CHARSET))
124
125 SPECIAL_CHARSET = '@*#?-$!0'
126
127 #Characters which can be escaped depends on the current delimiters
128 ESCAPABLE = {
129 '`': set(['$', '\\', '`']),
130 '"': set(['$', '\\', '`', '"']),
131 "'": set(),
132 }
133
134 def __init__(self, heredoc = False):
135 # _buffer is the unprocessed input characters buffer
136 self._buffer = []
137 # _stack is empty or contains a quoted list being processed
138 # (this is the DFS path to the quoted expression being evaluated).
139 self._stack = []
140 self._escapable = None
141 # True when parsing unquoted here documents
142 self._heredoc = heredoc
143
144 def add(self, data, eof=False):
145 """Feed the lexer with more data. If the quoted expression can be
146 delimited, return a tuple (expr, remaining) containing the expression
147 tree and the unconsumed data.
148 Otherwise, raise NeedMore.
149 """
150 self._buffer += list(data)
151 self._parse(eof)
152
153 result = self._stack[0]
154 remaining = ''.join(self._buffer)
155 self._stack = []
156 self._buffer = []
157 return result, remaining
158
159 def _is_escapable(self, c, delim=None):
160 if delim is None:
161 if self._heredoc:
162 # Backslashes works as if they were double quoted in unquoted
163 # here-documents
164 delim = '"'
165 else:
166 if len(self._stack)<=1:
167 return True
168 delim = self._stack[-2][0]
169
170 escapables = self.ESCAPABLE.get(delim, None)
171 return escapables is None or c in escapables
172
173 def _parse_squote(self, buf, result, eof):
174 if not buf:
175 raise NeedMore()
176 try:
177 pos = buf.index("'")
178 except ValueError:
179 raise NeedMore()
180 result[-1] += ''.join(buf[:pos])
181 result += ["'"]
182 return pos+1, True
183
184 def _parse_bquote(self, buf, result, eof):
185 if not buf:
186 raise NeedMore()
187
188 if buf[0]=='\n':
189 #Remove line continuations
190 result[:] = ['', '', '']
191 elif self._is_escapable(buf[0]):
192 result[-1] += buf[0]
193 result += ['']
194 else:
195 #Keep as such
196 result[:] = ['', '\\'+buf[0], '']
197
198 return 1, True
199
200 def _parse_dquote(self, buf, result, eof):
201 if not buf:
202 raise NeedMore()
203 pos, sep = find_chars(buf, '$\\`"')
204 if pos==-1:
205 raise NeedMore()
206
207 result[-1] += ''.join(buf[:pos])
208 if sep=='"':
209 result += ['"']
210 return pos+1, True
211 else:
212 #Keep everything until the separator and defer processing
213 return pos, False
214
215 def _parse_command(self, buf, result, eof):
216 if not buf:
217 raise NeedMore()
218
219 chars = '$\\`"\''
220 if result[0] == '$(':
221 chars += ')'
222 pos, sep = find_chars(buf, chars)
223 if pos == -1:
224 raise NeedMore()
225
226 result[-1] += ''.join(buf[:pos])
227 if (result[0]=='$(' and sep==')') or (result[0]=='`' and sep=='`'):
228 result += [sep]
229 return pos+1, True
230 else:
231 return pos, False
232
233 def _parse_parameter(self, buf, result, eof):
234 if not buf:
235 raise NeedMore()
236
237 pos, sep = find_chars(buf, '$\\`"\'}')
238 if pos==-1:
239 raise NeedMore()
240
241 result[-1] += ''.join(buf[:pos])
242 if sep=='}':
243 result += [sep]
244 return pos+1, True
245 else:
246 return pos, False
247
248 def _parse_dollar(self, buf, result, eof):
249 sep = result[0]
250 if sep=='$':
251 if not buf:
252 #TODO: handle empty $
253 raise NeedMore()
254 if buf[0]=='(':
255 if len(buf)==1:
256 raise NeedMore()
257
258 if buf[1]=='(':
259 result[0] = '$(('
260 buf[:2] = []
261 else:
262 result[0] = '$('
263 buf[:1] = []
264
265 elif buf[0]=='{':
266 result[0] = '${'
267 buf[:1] = []
268 else:
269 if buf[0] in self.SPECIAL_CHARSET:
270 result[-1] = buf[0]
271 read = 1
272 else:
273 for read,c in enumerate(buf):
274 if c not in self.NAME_CHARSET:
275 break
276 else:
277 if not eof:
278 raise NeedMore()
279 read += 1
280
281 result[-1] += ''.join(buf[0:read])
282
283 if not result[-1]:
284 result[:] = ['', result[0], '']
285 else:
286 result += ['']
287 return read,True
288
289 sep = result[0]
290 if sep=='$(':
291 parsefunc = self._parse_command
292 elif sep=='${':
293 parsefunc = self._parse_parameter
294 else:
295 raise NotImplementedError()
296
297 pos, closed = parsefunc(buf, result, eof)
298 return pos, closed
299
300 def _parse(self, eof):
301 buf = self._buffer
302 stack = self._stack
303 recurse = False
304
305 while 1:
306 if not stack or recurse:
307 if not buf:
308 raise NeedMore()
309 if buf[0] not in ('"\\`$\''):
310 raise ShellSyntaxError('Invalid quoted string sequence')
311 stack.append([buf[0], ''])
312 buf[:1] = []
313 recurse = False
314
315 result = stack[-1]
316 if result[0]=="'":
317 parsefunc = self._parse_squote
318 elif result[0]=='\\':
319 parsefunc = self._parse_bquote
320 elif result[0]=='"':
321 parsefunc = self._parse_dquote
322 elif result[0]=='`':
323 parsefunc = self._parse_command
324 elif result[0][0]=='$':
325 parsefunc = self._parse_dollar
326 else:
327 raise NotImplementedError()
328
329 read, closed = parsefunc(buf, result, eof)
330
331 buf[:read] = []
332 if closed:
333 if len(stack)>1:
334 #Merge in parent expression
335 parsed = stack.pop()
336 stack[-1] += [parsed]
337 stack[-1] += ['']
338 else:
339 break
340 else:
341 recurse = True
342
343def normalize_wordtree(wtree):
344 """Fold back every literal sequence (delimited with empty strings) into
345 parent sequence.
346 """
347 def normalize(wtree):
348 result = []
349 for part in wtree[1:-1]:
350 if isinstance(part, list):
351 part = normalize(part)
352 if part[0]=='':
353 #Move the part content back at current level
354 result += part[1:-1]
355 continue
356 elif not part:
357 #Remove empty strings
358 continue
359 result.append(part)
360 if not result:
361 result = ['']
362 return [wtree[0]] + result + [wtree[-1]]
363
364 return normalize(wtree)
365
366
367def make_wordtree(token, here_document=False):
368 """Parse a delimited token and return a tree similar to the ones returned by
369 WordLexer. token may contain any combinations of expansion/quoted fields and
370 non-ones.
371 """
372 tree = ['']
373 remaining = token
374 delimiters = '\\$`'
375 if not here_document:
376 delimiters += '\'"'
377
378 while 1:
379 pos, sep = find_chars(remaining, delimiters)
380 if pos==-1:
381 tree += [remaining, '']
382 return normalize_wordtree(tree)
383 tree.append(remaining[:pos])
384 remaining = remaining[pos:]
385
386 try:
387 result, remaining = WordLexer(heredoc = here_document).add(remaining, True)
388 except NeedMore:
389 raise ShellSyntaxError('Invalid token "%s"')
390 tree.append(result)
391
392
393def wordtree_as_string(wtree):
394 """Rewrite an expression tree generated by make_wordtree as string."""
395 def visit(node, output):
396 for child in node:
397 if isinstance(child, list):
398 visit(child, output)
399 else:
400 output.append(child)
401
402 output = []
403 visit(wtree, output)
404 return ''.join(output)
405
406
407def unquote_wordtree(wtree):
408 """Fold the word tree while removing quotes everywhere. Other expansion
409 sequences are joined as such.
410 """
411 def unquote(wtree):
412 unquoted = []
413 if wtree[0] in ('', "'", '"', '\\'):
414 wtree = wtree[1:-1]
415
416 for part in wtree:
417 if isinstance(part, list):
418 part = unquote(part)
419 unquoted.append(part)
420 return ''.join(unquoted)
421
422 return unquote(wtree)
423
424
425class HereDocLexer:
426 """HereDocLexer delimits whatever comes from the here-document starting newline
427 not included to the closing delimiter line included.
428 """
429 def __init__(self, op, delim):
430 assert op in ('<<', '<<-')
431 if not delim:
432 raise ShellSyntaxError('invalid here document delimiter %s' % str(delim))
433
434 self._op = op
435 self._delim = delim
436 self._buffer = []
437 self._token = []
438
439 def add(self, data, eof):
440 """If the here-document was delimited, return a tuple (content, remaining).
441 Raise NeedMore() otherwise.
442 """
443 self._buffer += list(data)
444 self._parse(eof)
445 token = ''.join(self._token)
446 remaining = ''.join(self._buffer)
447 self._token, self._remaining = [], []
448 return token, remaining
449
450 def _parse(self, eof):
451 while 1:
452 #Look for first unescaped newline. Quotes may be ignored
453 escaped = False
454 for i,c in enumerate(self._buffer):
455 if escaped:
456 escaped = False
457 elif c=='\\':
458 escaped = True
459 elif c=='\n':
460 break
461 else:
462 i = -1
463
464 if i==-1 or self._buffer[i]!='\n':
465 if not eof:
466 raise NeedMore()
467 #No more data, maybe the last line is closing delimiter
468 line = ''.join(self._buffer)
469 eol = ''
470 self._buffer[:] = []
471 else:
472 line = ''.join(self._buffer[:i])
473 eol = self._buffer[i]
474 self._buffer[:i+1] = []
475
476 if self._op=='<<-':
477 line = line.lstrip('\t')
478
479 if line==self._delim:
480 break
481
482 self._token += [line, eol]
483 if i==-1:
484 break
485
486class Token:
487 #TODO: check this is still in use
488 OPERATOR = 'OPERATOR'
489 WORD = 'WORD'
490
491 def __init__(self):
492 self.value = ''
493 self.type = None
494
495 def __getitem__(self, key):
496 #Behave like a two elements tuple
497 if key==0:
498 return self.type
499 if key==1:
500 return self.value
501 raise IndexError(key)
502
503
504class HereDoc:
505 def __init__(self, op, name=None):
506 self.op = op
507 self.name = name
508 self.pendings = []
509
510TK_COMMA = 'COMMA'
511TK_AMPERSAND = 'AMP'
512TK_OP = 'OP'
513TK_TOKEN = 'TOKEN'
514TK_COMMENT = 'COMMENT'
515TK_NEWLINE = 'NEWLINE'
516TK_IONUMBER = 'IO_NUMBER'
517TK_ASSIGNMENT = 'ASSIGNMENT_WORD'
518TK_HERENAME = 'HERENAME'
519
520class Lexer:
521 """Main lexer.
522
523 Call add() until the script AST is returned.
524 """
525 # Here-document handling makes the whole thing more complex because they basically
526 # force tokens to be reordered: here-content must come right after the operator
527 # and the here-document name, while some other tokens might be following the
528 # here-document expression on the same line.
529 #
530 # So, here-doc states are basically:
531 # *self._state==ST_NORMAL
532 # - self._heredoc.op is None: no here-document
533 # - self._heredoc.op is not None but name is: here-document operator matched,
534 # waiting for the document name/delimiter
535 # - self._heredoc.op and name are not None: here-document is ready, following
536 # tokens are being stored and will be pushed again when the document is
537 # completely parsed.
538 # *self._state==ST_HEREDOC
539 # - The here-document is being delimited by self._herelexer. Once it is done
540 # the content is pushed in front of the pending token list then all these
541 # tokens are pushed once again.
542 ST_NORMAL = 'ST_NORMAL'
543 ST_OP = 'ST_OP'
544 ST_BACKSLASH = 'ST_BACKSLASH'
545 ST_QUOTED = 'ST_QUOTED'
546 ST_COMMENT = 'ST_COMMENT'
547 ST_HEREDOC = 'ST_HEREDOC'
548
549 #Match end of backquote strings
550 RE_BACKQUOTE_END = re.compile(r'(?<!\\)(`)')
551
552 def __init__(self, parent_state = None):
553 self._input = []
554 self._pos = 0
555
556 self._token = ''
557 self._type = TK_TOKEN
558
559 self._state = self.ST_NORMAL
560 self._parent_state = parent_state
561 self._wordlexer = None
562
563 self._heredoc = HereDoc(None)
564 self._herelexer = None
565
566 ### Following attributes are not used for delimiting token and can safely
567 ### be changed after here-document detection (see _push_toke)
568
569 # Count the number of tokens following a 'For' reserved word. Needed to
570 # return an 'In' reserved word if it comes in third place.
571 self._for_count = None
572
573 def add(self, data, eof=False):
574 """Feed the lexer with data.
575
576 When eof is set to True, returns unconsumed data or raise if the lexer
577 is in the middle of a delimiting operation.
578 Raise NeedMore otherwise.
579 """
580 self._input += list(data)
581 self._parse(eof)
582 self._input[:self._pos] = []
583 return ''.join(self._input)
584
585 def _parse(self, eof):
586 while self._state:
587 if self._pos>=len(self._input):
588 if not eof:
589 raise NeedMore()
590 elif self._state not in (self.ST_OP, self.ST_QUOTED, self.ST_HEREDOC):
591 #Delimit the current token and leave cleanly
592 self._push_token('')
593 break
594 else:
595 #Let the sublexer handle the eof themselves
596 pass
597
598 if self._state==self.ST_NORMAL:
599 self._parse_normal()
600 elif self._state==self.ST_COMMENT:
601 self._parse_comment()
602 elif self._state==self.ST_OP:
603 self._parse_op(eof)
604 elif self._state==self.ST_QUOTED:
605 self._parse_quoted(eof)
606 elif self._state==self.ST_HEREDOC:
607 self._parse_heredoc(eof)
608 else:
609 assert False, "Unknown state " + str(self._state)
610
611 if self._heredoc.op is not None:
612 raise ShellSyntaxError('missing here-document delimiter')
613
614 def _parse_normal(self):
615 c = self._input[self._pos]
616 if c=='\n':
617 self._push_token(c)
618 self._token = c
619 self._type = TK_NEWLINE
620 self._push_token('')
621 self._pos += 1
622 elif c in ('\\', '\'', '"', '`', '$'):
623 self._state = self.ST_QUOTED
624 elif is_partial_op(c):
625 self._push_token(c)
626
627 self._type = TK_OP
628 self._token += c
629 self._pos += 1
630 self._state = self.ST_OP
631 elif is_blank(c):
632 self._push_token(c)
633
634 #Discard blanks
635 self._pos += 1
636 elif self._token:
637 self._token += c
638 self._pos += 1
639 elif c=='#':
640 self._state = self.ST_COMMENT
641 self._type = TK_COMMENT
642 self._pos += 1
643 else:
644 self._pos += 1
645 self._token += c
646
647 def _parse_op(self, eof):
648 assert self._token
649
650 while 1:
651 if self._pos>=len(self._input):
652 if not eof:
653 raise NeedMore()
654 c = ''
655 else:
656 c = self._input[self._pos]
657
658 op = self._token + c
659 if c and is_partial_op(op):
660 #Still parsing an operator
661 self._token = op
662 self._pos += 1
663 else:
664 #End of operator
665 self._push_token(c)
666 self._state = self.ST_NORMAL
667 break
668
669 def _parse_comment(self):
670 while 1:
671 if self._pos>=len(self._input):
672 raise NeedMore()
673
674 c = self._input[self._pos]
675 if c=='\n':
676 #End of comment, do not consume the end of line
677 self._state = self.ST_NORMAL
678 break
679 else:
680 self._token += c
681 self._pos += 1
682
683 def _parse_quoted(self, eof):
684 """Precondition: the starting backquote/dollar is still in the input queue."""
685 if not self._wordlexer:
686 self._wordlexer = WordLexer()
687
688 if self._pos<len(self._input):
689 #Transfer input queue character into the subparser
690 input = self._input[self._pos:]
691 self._pos += len(input)
692
693 wtree, remaining = self._wordlexer.add(input, eof)
694 self._wordlexer = None
695 self._token += wordtree_as_string(wtree)
696
697 #Put unparsed character back in the input queue
698 if remaining:
699 self._input[self._pos:self._pos] = list(remaining)
700 self._state = self.ST_NORMAL
701
702 def _parse_heredoc(self, eof):
703 assert not self._token
704
705 if self._herelexer is None:
706 self._herelexer = HereDocLexer(self._heredoc.op, self._heredoc.name)
707
708 if self._pos<len(self._input):
709 #Transfer input queue character into the subparser
710 input = self._input[self._pos:]
711 self._pos += len(input)
712
713 self._token, remaining = self._herelexer.add(input, eof)
714
715 #Reset here-document state
716 self._herelexer = None
717 heredoc, self._heredoc = self._heredoc, HereDoc(None)
718 if remaining:
719 self._input[self._pos:self._pos] = list(remaining)
720 self._state = self.ST_NORMAL
721
722 #Push pending tokens
723 heredoc.pendings[:0] = [(self._token, self._type, heredoc.name)]
724 for token, type, delim in heredoc.pendings:
725 self._token = token
726 self._type = type
727 self._push_token(delim)
728
729 def _push_token(self, delim):
730 if not self._token:
731 return 0
732
733 if self._heredoc.op is not None:
734 if self._heredoc.name is None:
735 #Here-document name
736 if self._type!=TK_TOKEN:
737 raise ShellSyntaxError("expecting here-document name, got '%s'" % self._token)
738 self._heredoc.name = unquote_wordtree(make_wordtree(self._token))
739 self._type = TK_HERENAME
740 else:
741 #Capture all tokens until the newline starting the here-document
742 if self._type==TK_NEWLINE:
743 assert self._state==self.ST_NORMAL
744 self._state = self.ST_HEREDOC
745
746 self._heredoc.pendings.append((self._token, self._type, delim))
747 self._token = ''
748 self._type = TK_TOKEN
749 return 1
750
751 # BEWARE: do not change parser state from here to the end of the function:
752 # when parsing between an here-document operator to the end of the line
753 # tokens are stored in self._heredoc.pendings. Therefore, they will not
754 # reach the section below.
755
756 #Check operators
757 if self._type==TK_OP:
758 #False positive because of partial op matching
759 op = is_op(self._token)
760 if not op:
761 self._type = TK_TOKEN
762 else:
763 #Map to the specific operator
764 self._type = op
765 if self._token in ('<<', '<<-'):
766 #Done here rather than in _parse_op because there is no need
767 #to change the parser state since we are still waiting for
768 #the here-document name
769 if self._heredoc.op is not None:
770 raise ShellSyntaxError("syntax error near token '%s'" % self._token)
771 assert self._heredoc.op is None
772 self._heredoc.op = self._token
773
774 if self._type==TK_TOKEN:
775 if '=' in self._token and not delim:
776 if self._token.startswith('='):
777 #Token is a WORD... a TOKEN that is.
778 pass
779 else:
780 prev = self._token[:self._token.find('=')]
781 if is_name(prev):
782 self._type = TK_ASSIGNMENT
783 else:
784 #Just a token (unspecified)
785 pass
786 else:
787 reserved = get_reserved(self._token)
788 if reserved is not None:
789 if reserved=='In' and self._for_count!=2:
790 #Sorry, not a reserved word after all
791 pass
792 else:
793 self._type = reserved
794 if reserved in ('For', 'Case'):
795 self._for_count = 0
796 elif are_digits(self._token) and delim in ('<', '>'):
797 #Detect IO_NUMBER
798 self._type = TK_IONUMBER
799 elif self._token==';':
800 self._type = TK_COMMA
801 elif self._token=='&':
802 self._type = TK_AMPERSAND
803 elif self._type==TK_COMMENT:
804 #Comments are not part of sh grammar, ignore them
805 self._token = ''
806 self._type = TK_TOKEN
807 return 0
808
809 if self._for_count is not None:
810 #Track token count in 'For' expression to detect 'In' reserved words.
811 #Can only be in third position, no need to go beyond
812 self._for_count += 1
813 if self._for_count==3:
814 self._for_count = None
815
816 self.on_token((self._token, self._type))
817 self._token = ''
818 self._type = TK_TOKEN
819 return 1
820
821 def on_token(self, token):
822 raise NotImplementedError
823
824
825tokens = [
826 TK_TOKEN,
827# To silence yacc unused token warnings
828# TK_COMMENT,
829 TK_NEWLINE,
830 TK_IONUMBER,
831 TK_ASSIGNMENT,
832 TK_HERENAME,
833]
834
835#Add specific operators
836tokens += _OPERATORS.values()
837#Add reserved words
838tokens += _RESERVEDS.values()
839
840class PLYLexer(Lexer):
841 """Bridge Lexer and PLY lexer interface."""
842 def __init__(self):
843 Lexer.__init__(self)
844 self._tokens = []
845 self._current = 0
846 self.lineno = 0
847
848 def on_token(self, token):
849 value, type = token
850
851 self.lineno = 0
852 t = lex.LexToken()
853 t.value = value
854 t.type = type
855 t.lexer = self
856 t.lexpos = 0
857 t.lineno = 0
858
859 self._tokens.append(t)
860
861 def is_empty(self):
862 return not bool(self._tokens)
863
864 #PLY compliant interface
865 def token(self):
866 if self._current>=len(self._tokens):
867 return None
868 t = self._tokens[self._current]
869 self._current += 1
870 return t
871
872
873def get_tokens(s):
874 """Parse the input string and return a tuple (tokens, unprocessed) where
875 tokens is a list of parsed tokens and unprocessed is the part of the input
876 string left untouched by the lexer.
877 """
878 lexer = PLYLexer()
879 untouched = lexer.add(s, True)
880 tokens = []
881 while 1:
882 token = lexer.token()
883 if token is None:
884 break
885 tokens.append(token)
886
887 tokens = [(t.value, t.type) for t in tokens]
888 return tokens, untouched
diff --git a/bitbake/lib/pysh/pyshyacc.py b/bitbake/lib/pysh/pyshyacc.py
new file mode 100644
index 0000000000..3d9510c0c3
--- /dev/null
+++ b/bitbake/lib/pysh/pyshyacc.py
@@ -0,0 +1,772 @@
1# pyshyacc.py - PLY grammar definition for pysh
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8"""PLY grammar file.
9"""
10import sys
11
12import pyshlex
13tokens = pyshlex.tokens
14
15from ply import yacc
16import sherrors
17
18class IORedirect:
19 def __init__(self, op, filename, io_number=None):
20 self.op = op
21 self.filename = filename
22 self.io_number = io_number
23
24class HereDocument:
25 def __init__(self, op, name, content, io_number=None):
26 self.op = op
27 self.name = name
28 self.content = content
29 self.io_number = io_number
30
31def make_io_redirect(p):
32 """Make an IORedirect instance from the input 'io_redirect' production."""
33 name, io_number, io_target = p
34 assert name=='io_redirect'
35
36 if io_target[0]=='io_file':
37 io_type, io_op, io_file = io_target
38 return IORedirect(io_op, io_file, io_number)
39 elif io_target[0]=='io_here':
40 io_type, io_op, io_name, io_content = io_target
41 return HereDocument(io_op, io_name, io_content, io_number)
42 else:
43 assert False, "Invalid IO redirection token %s" % repr(io_type)
44
45class SimpleCommand:
46 """
47 assigns contains (name, value) pairs.
48 """
49 def __init__(self, words, redirs, assigns):
50 self.words = list(words)
51 self.redirs = list(redirs)
52 self.assigns = list(assigns)
53
54class Pipeline:
55 def __init__(self, commands, reverse_status=False):
56 self.commands = list(commands)
57 assert self.commands #Grammar forbids this
58 self.reverse_status = reverse_status
59
60class AndOr:
61 def __init__(self, op, left, right):
62 self.op = str(op)
63 self.left = left
64 self.right = right
65
66class ForLoop:
67 def __init__(self, name, items, cmds):
68 self.name = str(name)
69 self.items = list(items)
70 self.cmds = list(cmds)
71
72class WhileLoop:
73 def __init__(self, condition, cmds):
74 self.condition = list(condition)
75 self.cmds = list(cmds)
76
77class UntilLoop:
78 def __init__(self, condition, cmds):
79 self.condition = list(condition)
80 self.cmds = list(cmds)
81
82class FunDef:
83 def __init__(self, name, body):
84 self.name = str(name)
85 self.body = body
86
87class BraceGroup:
88 def __init__(self, cmds):
89 self.cmds = list(cmds)
90
91class IfCond:
92 def __init__(self, cond, if_cmds, else_cmds):
93 self.cond = list(cond)
94 self.if_cmds = if_cmds
95 self.else_cmds = else_cmds
96
97class Case:
98 def __init__(self, name, items):
99 self.name = name
100 self.items = items
101
102class SubShell:
103 def __init__(self, cmds):
104 self.cmds = cmds
105
106class RedirectList:
107 def __init__(self, cmd, redirs):
108 self.cmd = cmd
109 self.redirs = list(redirs)
110
111def get_production(productions, ptype):
112 """productions must be a list of production tuples like (name, obj) where
113 name is the production string identifier.
114 Return the first production named 'ptype'. Raise KeyError if None can be
115 found.
116 """
117 for production in productions:
118 if production is not None and production[0]==ptype:
119 return production
120 raise KeyError(ptype)
121
122#-------------------------------------------------------------------------------
123# PLY grammar definition
124#-------------------------------------------------------------------------------
125
126def p_multiple_commands(p):
127 """multiple_commands : newline_sequence
128 | complete_command
129 | multiple_commands complete_command"""
130 if len(p)==2:
131 if p[1] is not None:
132 p[0] = [p[1]]
133 else:
134 p[0] = []
135 else:
136 p[0] = p[1] + [p[2]]
137
138def p_complete_command(p):
139 """complete_command : list separator
140 | list"""
141 if len(p)==3 and p[2] and p[2][1] == '&':
142 p[0] = ('async', p[1])
143 else:
144 p[0] = p[1]
145
146def p_list(p):
147 """list : list separator_op and_or
148 | and_or"""
149 if len(p)==2:
150 p[0] = [p[1]]
151 else:
152 #if p[2]!=';':
153 # raise NotImplementedError('AND-OR list asynchronous execution is not implemented')
154 p[0] = p[1] + [p[3]]
155
156def p_and_or(p):
157 """and_or : pipeline
158 | and_or AND_IF linebreak pipeline
159 | and_or OR_IF linebreak pipeline"""
160 if len(p)==2:
161 p[0] = p[1]
162 else:
163 p[0] = ('and_or', AndOr(p[2], p[1], p[4]))
164
165def p_maybe_bang_word(p):
166 """maybe_bang_word : Bang"""
167 p[0] = ('maybe_bang_word', p[1])
168
169def p_pipeline(p):
170 """pipeline : pipe_sequence
171 | bang_word pipe_sequence"""
172 if len(p)==3:
173 p[0] = ('pipeline', Pipeline(p[2][1:], True))
174 else:
175 p[0] = ('pipeline', Pipeline(p[1][1:]))
176
177def p_pipe_sequence(p):
178 """pipe_sequence : command
179 | pipe_sequence PIPE linebreak command"""
180 if len(p)==2:
181 p[0] = ['pipe_sequence', p[1]]
182 else:
183 p[0] = p[1] + [p[4]]
184
185def p_command(p):
186 """command : simple_command
187 | compound_command
188 | compound_command redirect_list
189 | function_definition"""
190
191 if p[1][0] in ( 'simple_command',
192 'for_clause',
193 'while_clause',
194 'until_clause',
195 'case_clause',
196 'if_clause',
197 'function_definition',
198 'subshell',
199 'brace_group',):
200 if len(p) == 2:
201 p[0] = p[1]
202 else:
203 p[0] = ('redirect_list', RedirectList(p[1], p[2][1:]))
204 else:
205 raise NotImplementedError('%s command is not implemented' % repr(p[1][0]))
206
207def p_compound_command(p):
208 """compound_command : brace_group
209 | subshell
210 | for_clause
211 | case_clause
212 | if_clause
213 | while_clause
214 | until_clause"""
215 p[0] = p[1]
216
217def p_subshell(p):
218 """subshell : LPARENS compound_list RPARENS"""
219 p[0] = ('subshell', SubShell(p[2][1:]))
220
221def p_compound_list(p):
222 """compound_list : term
223 | newline_list term
224 | term separator
225 | newline_list term separator"""
226 productions = p[1:]
227 try:
228 sep = get_production(productions, 'separator')
229 if sep[1]!=';':
230 raise NotImplementedError()
231 except KeyError:
232 pass
233 term = get_production(productions, 'term')
234 p[0] = ['compound_list'] + term[1:]
235
236def p_term(p):
237 """term : term separator and_or
238 | and_or"""
239 if len(p)==2:
240 p[0] = ['term', p[1]]
241 else:
242 if p[2] is not None and p[2][1] == '&':
243 p[0] = ['term', ('async', p[1][1:])] + [p[3]]
244 else:
245 p[0] = p[1] + [p[3]]
246
247def p_maybe_for_word(p):
248 # Rearrange 'For' priority wrt TOKEN. See p_for_word
249 """maybe_for_word : For"""
250 p[0] = ('maybe_for_word', p[1])
251
252def p_for_clause(p):
253 """for_clause : for_word name linebreak do_group
254 | for_word name linebreak in sequential_sep do_group
255 | for_word name linebreak in wordlist sequential_sep do_group"""
256 productions = p[1:]
257 do_group = get_production(productions, 'do_group')
258 try:
259 items = get_production(productions, 'in')[1:]
260 except KeyError:
261 raise NotImplementedError('"in" omission is not implemented')
262
263 try:
264 items = get_production(productions, 'wordlist')[1:]
265 except KeyError:
266 items = []
267
268 name = p[2]
269 p[0] = ('for_clause', ForLoop(name, items, do_group[1:]))
270
271def p_name(p):
272 """name : token""" #Was NAME instead of token
273 p[0] = p[1]
274
275def p_in(p):
276 """in : In"""
277 p[0] = ('in', p[1])
278
279def p_wordlist(p):
280 """wordlist : wordlist token
281 | token"""
282 if len(p)==2:
283 p[0] = ['wordlist', ('TOKEN', p[1])]
284 else:
285 p[0] = p[1] + [('TOKEN', p[2])]
286
287def p_case_clause(p):
288 """case_clause : Case token linebreak in linebreak case_list Esac
289 | Case token linebreak in linebreak case_list_ns Esac
290 | Case token linebreak in linebreak Esac"""
291 if len(p) < 8:
292 items = []
293 else:
294 items = p[6][1:]
295 name = p[2]
296 p[0] = ('case_clause', Case(name, [c[1] for c in items]))
297
298def p_case_list_ns(p):
299 """case_list_ns : case_list case_item_ns
300 | case_item_ns"""
301 p_case_list(p)
302
303def p_case_list(p):
304 """case_list : case_list case_item
305 | case_item"""
306 if len(p)==2:
307 p[0] = ['case_list', p[1]]
308 else:
309 p[0] = p[1] + [p[2]]
310
311def p_case_item_ns(p):
312 """case_item_ns : pattern RPARENS linebreak
313 | pattern RPARENS compound_list linebreak
314 | LPARENS pattern RPARENS linebreak
315 | LPARENS pattern RPARENS compound_list linebreak"""
316 p_case_item(p)
317
318def p_case_item(p):
319 """case_item : pattern RPARENS linebreak DSEMI linebreak
320 | pattern RPARENS compound_list DSEMI linebreak
321 | LPARENS pattern RPARENS linebreak DSEMI linebreak
322 | LPARENS pattern RPARENS compound_list DSEMI linebreak"""
323 if len(p) < 7:
324 name = p[1][1:]
325 else:
326 name = p[2][1:]
327
328 try:
329 cmds = get_production(p[1:], "compound_list")[1:]
330 except KeyError:
331 cmds = []
332
333 p[0] = ('case_item', (name, cmds))
334
335def p_pattern(p):
336 """pattern : token
337 | pattern PIPE token"""
338 if len(p)==2:
339 p[0] = ['pattern', ('TOKEN', p[1])]
340 else:
341 p[0] = p[1] + [('TOKEN', p[2])]
342
343def p_maybe_if_word(p):
344 # Rearrange 'If' priority wrt TOKEN. See p_if_word
345 """maybe_if_word : If"""
346 p[0] = ('maybe_if_word', p[1])
347
348def p_maybe_then_word(p):
349 # Rearrange 'Then' priority wrt TOKEN. See p_then_word
350 """maybe_then_word : Then"""
351 p[0] = ('maybe_then_word', p[1])
352
353def p_if_clause(p):
354 """if_clause : if_word compound_list then_word compound_list else_part Fi
355 | if_word compound_list then_word compound_list Fi"""
356 else_part = []
357 if len(p)==7:
358 else_part = p[5]
359 p[0] = ('if_clause', IfCond(p[2][1:], p[4][1:], else_part))
360
361def p_else_part(p):
362 """else_part : Elif compound_list then_word compound_list else_part
363 | Elif compound_list then_word compound_list
364 | Else compound_list"""
365 if len(p)==3:
366 p[0] = p[2][1:]
367 else:
368 else_part = []
369 if len(p)==6:
370 else_part = p[5]
371 p[0] = ('elif', IfCond(p[2][1:], p[4][1:], else_part))
372
373def p_while_clause(p):
374 """while_clause : While compound_list do_group"""
375 p[0] = ('while_clause', WhileLoop(p[2][1:], p[3][1:]))
376
377def p_maybe_until_word(p):
378 # Rearrange 'Until' priority wrt TOKEN. See p_until_word
379 """maybe_until_word : Until"""
380 p[0] = ('maybe_until_word', p[1])
381
382def p_until_clause(p):
383 """until_clause : until_word compound_list do_group"""
384 p[0] = ('until_clause', UntilLoop(p[2][1:], p[3][1:]))
385
386def p_function_definition(p):
387 """function_definition : fname LPARENS RPARENS linebreak function_body"""
388 p[0] = ('function_definition', FunDef(p[1], p[5]))
389
390def p_function_body(p):
391 """function_body : compound_command
392 | compound_command redirect_list"""
393 if len(p)!=2:
394 raise NotImplementedError('functions redirections lists are not implemented')
395 p[0] = p[1]
396
397def p_fname(p):
398 """fname : TOKEN""" #Was NAME instead of token
399 p[0] = p[1]
400
401def p_brace_group(p):
402 """brace_group : Lbrace compound_list Rbrace"""
403 p[0] = ('brace_group', BraceGroup(p[2][1:]))
404
405def p_maybe_done_word(p):
406 #See p_assignment_word for details.
407 """maybe_done_word : Done"""
408 p[0] = ('maybe_done_word', p[1])
409
410def p_maybe_do_word(p):
411 """maybe_do_word : Do"""
412 p[0] = ('maybe_do_word', p[1])
413
414def p_do_group(p):
415 """do_group : do_word compound_list done_word"""
416 #Do group contains a list of AndOr
417 p[0] = ['do_group'] + p[2][1:]
418
419def p_simple_command(p):
420 """simple_command : cmd_prefix cmd_word cmd_suffix
421 | cmd_prefix cmd_word
422 | cmd_prefix
423 | cmd_name cmd_suffix
424 | cmd_name"""
425 words, redirs, assigns = [], [], []
426 for e in p[1:]:
427 name = e[0]
428 if name in ('cmd_prefix', 'cmd_suffix'):
429 for sube in e[1:]:
430 subname = sube[0]
431 if subname=='io_redirect':
432 redirs.append(make_io_redirect(sube))
433 elif subname=='ASSIGNMENT_WORD':
434 assigns.append(sube)
435 else:
436 words.append(sube)
437 elif name in ('cmd_word', 'cmd_name'):
438 words.append(e)
439
440 cmd = SimpleCommand(words, redirs, assigns)
441 p[0] = ('simple_command', cmd)
442
443def p_cmd_name(p):
444 """cmd_name : TOKEN"""
445 p[0] = ('cmd_name', p[1])
446
447def p_cmd_word(p):
448 """cmd_word : token"""
449 p[0] = ('cmd_word', p[1])
450
451def p_maybe_assignment_word(p):
452 #See p_assignment_word for details.
453 """maybe_assignment_word : ASSIGNMENT_WORD"""
454 p[0] = ('maybe_assignment_word', p[1])
455
456def p_cmd_prefix(p):
457 """cmd_prefix : io_redirect
458 | cmd_prefix io_redirect
459 | assignment_word
460 | cmd_prefix assignment_word"""
461 try:
462 prefix = get_production(p[1:], 'cmd_prefix')
463 except KeyError:
464 prefix = ['cmd_prefix']
465
466 try:
467 value = get_production(p[1:], 'assignment_word')[1]
468 value = ('ASSIGNMENT_WORD', value.split('=', 1))
469 except KeyError:
470 value = get_production(p[1:], 'io_redirect')
471 p[0] = prefix + [value]
472
473def p_cmd_suffix(p):
474 """cmd_suffix : io_redirect
475 | cmd_suffix io_redirect
476 | token
477 | cmd_suffix token
478 | maybe_for_word
479 | cmd_suffix maybe_for_word
480 | maybe_done_word
481 | cmd_suffix maybe_done_word
482 | maybe_do_word
483 | cmd_suffix maybe_do_word
484 | maybe_until_word
485 | cmd_suffix maybe_until_word
486 | maybe_assignment_word
487 | cmd_suffix maybe_assignment_word
488 | maybe_if_word
489 | cmd_suffix maybe_if_word
490 | maybe_then_word
491 | cmd_suffix maybe_then_word
492 | maybe_bang_word
493 | cmd_suffix maybe_bang_word"""
494 try:
495 suffix = get_production(p[1:], 'cmd_suffix')
496 token = p[2]
497 except KeyError:
498 suffix = ['cmd_suffix']
499 token = p[1]
500
501 if isinstance(token, tuple):
502 if token[0]=='io_redirect':
503 p[0] = suffix + [token]
504 else:
505 #Convert maybe_* to TOKEN if necessary
506 p[0] = suffix + [('TOKEN', token[1])]
507 else:
508 p[0] = suffix + [('TOKEN', token)]
509
510def p_redirect_list(p):
511 """redirect_list : io_redirect
512 | redirect_list io_redirect"""
513 if len(p) == 2:
514 p[0] = ['redirect_list', make_io_redirect(p[1])]
515 else:
516 p[0] = p[1] + [make_io_redirect(p[2])]
517
518def p_io_redirect(p):
519 """io_redirect : io_file
520 | IO_NUMBER io_file
521 | io_here
522 | IO_NUMBER io_here"""
523 if len(p)==3:
524 p[0] = ('io_redirect', p[1], p[2])
525 else:
526 p[0] = ('io_redirect', None, p[1])
527
528def p_io_file(p):
529 #Return the tuple (operator, filename)
530 """io_file : LESS filename
531 | LESSAND filename
532 | GREATER filename
533 | GREATAND filename
534 | DGREAT filename
535 | LESSGREAT filename
536 | CLOBBER filename"""
537 #Extract the filename from the file
538 p[0] = ('io_file', p[1], p[2][1])
539
540def p_filename(p):
541 #Return the filename
542 """filename : TOKEN"""
543 p[0] = ('filename', p[1])
544
545def p_io_here(p):
546 """io_here : DLESS here_end
547 | DLESSDASH here_end"""
548 p[0] = ('io_here', p[1], p[2][1], p[2][2])
549
550def p_here_end(p):
551 """here_end : HERENAME TOKEN"""
552 p[0] = ('here_document', p[1], p[2])
553
554def p_newline_sequence(p):
555 # Nothing in the grammar can handle leading NEWLINE productions, so add
556 # this one with the lowest possible priority relatively to newline_list.
557 """newline_sequence : newline_list"""
558 p[0] = None
559
560def p_newline_list(p):
561 """newline_list : NEWLINE
562 | newline_list NEWLINE"""
563 p[0] = None
564
565def p_linebreak(p):
566 """linebreak : newline_list
567 | empty"""
568 p[0] = None
569
570def p_separator_op(p):
571 """separator_op : COMMA
572 | AMP"""
573 p[0] = p[1]
574
575def p_separator(p):
576 """separator : separator_op linebreak
577 | newline_list"""
578 if len(p)==2:
579 #Ignore newlines
580 p[0] = None
581 else:
582 #Keep the separator operator
583 p[0] = ('separator', p[1])
584
585def p_sequential_sep(p):
586 """sequential_sep : COMMA linebreak
587 | newline_list"""
588 p[0] = None
589
590# Low priority TOKEN => for_word conversion.
591# Let maybe_for_word be used as a token when necessary in higher priority
592# rules.
593def p_for_word(p):
594 """for_word : maybe_for_word"""
595 p[0] = p[1]
596
597def p_if_word(p):
598 """if_word : maybe_if_word"""
599 p[0] = p[1]
600
601def p_then_word(p):
602 """then_word : maybe_then_word"""
603 p[0] = p[1]
604
605def p_done_word(p):
606 """done_word : maybe_done_word"""
607 p[0] = p[1]
608
609def p_do_word(p):
610 """do_word : maybe_do_word"""
611 p[0] = p[1]
612
613def p_until_word(p):
614 """until_word : maybe_until_word"""
615 p[0] = p[1]
616
617def p_assignment_word(p):
618 """assignment_word : maybe_assignment_word"""
619 p[0] = ('assignment_word', p[1][1])
620
621def p_bang_word(p):
622 """bang_word : maybe_bang_word"""
623 p[0] = ('bang_word', p[1][1])
624
625def p_token(p):
626 """token : TOKEN
627 | Fi"""
628 p[0] = p[1]
629
630def p_empty(p):
631 'empty :'
632 p[0] = None
633
634# Error rule for syntax errors
635def p_error(p):
636 msg = []
637 w = msg.append
638 w('%r\n' % p)
639 w('followed by:\n')
640 for i in range(5):
641 n = yacc.token()
642 if not n:
643 break
644 w(' %r\n' % n)
645 raise sherrors.ShellSyntaxError(''.join(msg))
646
647# Build the parser
648try:
649 import pyshtables
650except ImportError:
651 yacc.yacc(tabmodule = 'pyshtables')
652else:
653 yacc.yacc(tabmodule = 'pysh.pyshtables', write_tables = 0, debug = 0)
654
655
656def parse(input, eof=False, debug=False):
657 """Parse a whole script at once and return the generated AST and unconsumed
658 data in a tuple.
659
660 NOTE: eof is probably meaningless for now, the parser being unable to work
661 in pull mode. It should be set to True.
662 """
663 lexer = pyshlex.PLYLexer()
664 remaining = lexer.add(input, eof)
665 if lexer.is_empty():
666 return [], remaining
667 if debug:
668 debug = 2
669 return yacc.parse(lexer=lexer, debug=debug), remaining
670
671#-------------------------------------------------------------------------------
672# AST rendering helpers
673#-------------------------------------------------------------------------------
674
675def format_commands(v):
676 """Return a tree made of strings and lists. Make command trees easier to
677 display.
678 """
679 if isinstance(v, list):
680 return [format_commands(c) for c in v]
681 if isinstance(v, tuple):
682 if len(v)==2 and isinstance(v[0], str) and not isinstance(v[1], str):
683 if v[0] == 'async':
684 return ['AsyncList', map(format_commands, v[1])]
685 else:
686 #Avoid decomposing tuples like ('pipeline', Pipeline(...))
687 return format_commands(v[1])
688 return format_commands(list(v))
689 elif isinstance(v, IfCond):
690 name = ['IfCond']
691 name += ['if', map(format_commands, v.cond)]
692 name += ['then', map(format_commands, v.if_cmds)]
693 name += ['else', map(format_commands, v.else_cmds)]
694 return name
695 elif isinstance(v, ForLoop):
696 name = ['ForLoop']
697 name += [repr(v.name)+' in ', map(str, v.items)]
698 name += ['commands', map(format_commands, v.cmds)]
699 return name
700 elif isinstance(v, AndOr):
701 return [v.op, format_commands(v.left), format_commands(v.right)]
702 elif isinstance(v, Pipeline):
703 name = 'Pipeline'
704 if v.reverse_status:
705 name = '!' + name
706 return [name, format_commands(v.commands)]
707 elif isinstance(v, SimpleCommand):
708 name = ['SimpleCommand']
709 if v.words:
710 name += ['words', map(str, v.words)]
711 if v.assigns:
712 assigns = [tuple(a[1]) for a in v.assigns]
713 name += ['assigns', map(str, assigns)]
714 if v.redirs:
715 name += ['redirs', map(format_commands, v.redirs)]
716 return name
717 elif isinstance(v, RedirectList):
718 name = ['RedirectList']
719 if v.redirs:
720 name += ['redirs', map(format_commands, v.redirs)]
721 name += ['command', format_commands(v.cmd)]
722 return name
723 elif isinstance(v, IORedirect):
724 return ' '.join(map(str, (v.io_number, v.op, v.filename)))
725 elif isinstance(v, HereDocument):
726 return ' '.join(map(str, (v.io_number, v.op, repr(v.name), repr(v.content))))
727 elif isinstance(v, SubShell):
728 return ['SubShell', map(format_commands, v.cmds)]
729 else:
730 return repr(v)
731
732def print_commands(cmds, output=sys.stdout):
733 """Pretty print a command tree."""
734 def print_tree(cmd, spaces, output):
735 if isinstance(cmd, list):
736 for c in cmd:
737 print_tree(c, spaces + 3, output)
738 else:
739 print >>output, ' '*spaces + str(cmd)
740
741 formatted = format_commands(cmds)
742 print_tree(formatted, 0, output)
743
744
745def stringify_commands(cmds):
746 """Serialize a command tree as a string.
747
748 Returned string is not pretty and is currently used for unit tests only.
749 """
750 def stringify(value):
751 output = []
752 if isinstance(value, list):
753 formatted = []
754 for v in value:
755 formatted.append(stringify(v))
756 formatted = ' '.join(formatted)
757 output.append(''.join(['<', formatted, '>']))
758 else:
759 output.append(value)
760 return ' '.join(output)
761
762 return stringify(format_commands(cmds))
763
764
765def visit_commands(cmds, callable):
766 """Visit the command tree and execute callable on every Pipeline and
767 SimpleCommand instances.
768 """
769 if isinstance(cmds, (tuple, list)):
770 map(lambda c: visit_commands(c,callable), cmds)
771 elif isinstance(cmds, (Pipeline, SimpleCommand)):
772 callable(cmds)
diff --git a/bitbake/lib/pysh/sherrors.py b/bitbake/lib/pysh/sherrors.py
new file mode 100644
index 0000000000..1d5bd53b3a
--- /dev/null
+++ b/bitbake/lib/pysh/sherrors.py
@@ -0,0 +1,41 @@
1# sherrors.py - shell errors and signals
2#
3# Copyright 2007 Patrick Mezard
4#
5# This software may be used and distributed according to the terms
6# of the GNU General Public License, incorporated herein by reference.
7
8"""Define shell exceptions and error codes.
9"""
10
11class ShellError(Exception):
12 pass
13
14class ShellSyntaxError(ShellError):
15 pass
16
17class UtilityError(ShellError):
18 """Raised upon utility syntax error (option or operand error)."""
19 pass
20
21class ExpansionError(ShellError):
22 pass
23
24class CommandNotFound(ShellError):
25 """Specified command was not found."""
26 pass
27
28class RedirectionError(ShellError):
29 pass
30
31class VarAssignmentError(ShellError):
32 """Variable assignment error."""
33 pass
34
35class ExitSignal(ShellError):
36 """Exit signal."""
37 pass
38
39class ReturnSignal(ShellError):
40 """Exit signal."""
41 pass \ No newline at end of file
diff --git a/bitbake/lib/pysh/subprocess_fix.py b/bitbake/lib/pysh/subprocess_fix.py
new file mode 100644
index 0000000000..46eca22802
--- /dev/null
+++ b/bitbake/lib/pysh/subprocess_fix.py
@@ -0,0 +1,77 @@
1# subprocess - Subprocesses with accessible I/O streams
2#
3# For more information about this module, see PEP 324.
4#
5# This module should remain compatible with Python 2.2, see PEP 291.
6#
7# Copyright (c) 2003-2005 by Peter Astrand <astrand@lysator.liu.se>
8#
9# Licensed to PSF under a Contributor Agreement.
10# See http://www.python.org/2.4/license for licensing details.
11
12def list2cmdline(seq):
13 """
14 Translate a sequence of arguments into a command line
15 string, using the same rules as the MS C runtime:
16
17 1) Arguments are delimited by white space, which is either a
18 space or a tab.
19
20 2) A string surrounded by double quotation marks is
21 interpreted as a single argument, regardless of white space
22 contained within. A quoted string can be embedded in an
23 argument.
24
25 3) A double quotation mark preceded by a backslash is
26 interpreted as a literal double quotation mark.
27
28 4) Backslashes are interpreted literally, unless they
29 immediately precede a double quotation mark.
30
31 5) If backslashes immediately precede a double quotation mark,
32 every pair of backslashes is interpreted as a literal
33 backslash. If the number of backslashes is odd, the last
34 backslash escapes the next double quotation mark as
35 described in rule 3.
36 """
37
38 # See
39 # http://msdn.microsoft.com/library/en-us/vccelng/htm/progs_12.asp
40 result = []
41 needquote = False
42 for arg in seq:
43 bs_buf = []
44
45 # Add a space to separate this argument from the others
46 if result:
47 result.append(' ')
48
49 needquote = (" " in arg) or ("\t" in arg) or ("|" in arg) or arg == ""
50 if needquote:
51 result.append('"')
52
53 for c in arg:
54 if c == '\\':
55 # Don't know if we need to double yet.
56 bs_buf.append(c)
57 elif c == '"':
58 # Double backspaces.
59 result.append('\\' * len(bs_buf)*2)
60 bs_buf = []
61 result.append('\\"')
62 else:
63 # Normal char
64 if bs_buf:
65 result.extend(bs_buf)
66 bs_buf = []
67 result.append(c)
68
69 # Add remaining backspaces, if any.
70 if bs_buf:
71 result.extend(bs_buf)
72
73 if needquote:
74 result.extend(bs_buf)
75 result.append('"')
76
77 return ''.join(result)