| Index: third_party/lit/lit/ShUtil.py
|
| diff --git a/third_party/lit/lit/ShUtil.py b/third_party/lit/lit/ShUtil.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..1945ba723bcd63b215d60b33b21b9a3c7157823c
|
| --- /dev/null
|
| +++ b/third_party/lit/lit/ShUtil.py
|
| @@ -0,0 +1,355 @@
|
| +from __future__ import absolute_import
|
| +import itertools
|
| +
|
| +import lit.util
|
| +from lit.ShCommands import Command, Pipeline, Seq
|
| +
|
| +class ShLexer:
|
| + def __init__(self, data, win32Escapes = False):
|
| + self.data = data
|
| + self.pos = 0
|
| + self.end = len(data)
|
| + self.win32Escapes = win32Escapes
|
| +
|
| + def eat(self):
|
| + c = self.data[self.pos]
|
| + self.pos += 1
|
| + return c
|
| +
|
| + def look(self):
|
| + return self.data[self.pos]
|
| +
|
| + def maybe_eat(self, c):
|
| + """
|
| + maybe_eat(c) - Consume the character c if it is the next character,
|
| + returning True if a character was consumed. """
|
| + if self.data[self.pos] == c:
|
| + self.pos += 1
|
| + return True
|
| + return False
|
| +
|
| + def lex_arg_fast(self, c):
|
| + # Get the leading whitespace free section.
|
| + chunk = self.data[self.pos - 1:].split(None, 1)[0]
|
| +
|
| + # If it has special characters, the fast path failed.
|
| + if ('|' in chunk or '&' in chunk or
|
| + '<' in chunk or '>' in chunk or
|
| + "'" in chunk or '"' in chunk or
|
| + ';' in chunk or '\\' in chunk):
|
| + return None
|
| +
|
| + self.pos = self.pos - 1 + len(chunk)
|
| + return chunk
|
| +
|
| + def lex_arg_slow(self, c):
|
| + if c in "'\"":
|
| + str = self.lex_arg_quoted(c)
|
| + else:
|
| + str = c
|
| + while self.pos != self.end:
|
| + c = self.look()
|
| + if c.isspace() or c in "|&;":
|
| + break
|
| + elif c in '><':
|
| + # This is an annoying case; we treat '2>' as a single token so
|
| + # we don't have to track whitespace tokens.
|
| +
|
| + # If the parse string isn't an integer, do the usual thing.
|
| + if not str.isdigit():
|
| + break
|
| +
|
| + # Otherwise, lex the operator and convert to a redirection
|
| + # token.
|
| + num = int(str)
|
| + tok = self.lex_one_token()
|
| + assert isinstance(tok, tuple) and len(tok) == 1
|
| + return (tok[0], num)
|
| + elif c == '"':
|
| + self.eat()
|
| + str += self.lex_arg_quoted('"')
|
| + elif c == "'":
|
| + self.eat()
|
| + str += self.lex_arg_quoted("'")
|
| + elif not self.win32Escapes and c == '\\':
|
| + # Outside of a string, '\\' escapes everything.
|
| + self.eat()
|
| + if self.pos == self.end:
|
| + lit.util.warning(
|
| + "escape at end of quoted argument in: %r" % self.data)
|
| + return str
|
| + str += self.eat()
|
| + else:
|
| + str += self.eat()
|
| + return str
|
| +
|
| + def lex_arg_quoted(self, delim):
|
| + str = ''
|
| + while self.pos != self.end:
|
| + c = self.eat()
|
| + if c == delim:
|
| + return str
|
| + elif c == '\\' and delim == '"':
|
| + # Inside a '"' quoted string, '\\' only escapes the quote
|
| + # character and backslash, otherwise it is preserved.
|
| + if self.pos == self.end:
|
| + lit.util.warning(
|
| + "escape at end of quoted argument in: %r" % self.data)
|
| + return str
|
| + c = self.eat()
|
| + if c == '"': #
|
| + str += '"'
|
| + elif c == '\\':
|
| + str += '\\'
|
| + else:
|
| + str += '\\' + c
|
| + else:
|
| + str += c
|
| + lit.util.warning("missing quote character in %r" % self.data)
|
| + return str
|
| +
|
| + def lex_arg_checked(self, c):
|
| + pos = self.pos
|
| + res = self.lex_arg_fast(c)
|
| + end = self.pos
|
| +
|
| + self.pos = pos
|
| + reference = self.lex_arg_slow(c)
|
| + if res is not None:
|
| + if res != reference:
|
| + raise ValueError("Fast path failure: %r != %r" % (
|
| + res, reference))
|
| + if self.pos != end:
|
| + raise ValueError("Fast path failure: %r != %r" % (
|
| + self.pos, end))
|
| + return reference
|
| +
|
| + def lex_arg(self, c):
|
| + return self.lex_arg_fast(c) or self.lex_arg_slow(c)
|
| +
|
| + def lex_one_token(self):
|
| + """
|
| + lex_one_token - Lex a single 'sh' token. """
|
| +
|
| + c = self.eat()
|
| + if c == ';':
|
| + return (c,)
|
| + if c == '|':
|
| + if self.maybe_eat('|'):
|
| + return ('||',)
|
| + return (c,)
|
| + if c == '&':
|
| + if self.maybe_eat('&'):
|
| + return ('&&',)
|
| + if self.maybe_eat('>'):
|
| + return ('&>',)
|
| + return (c,)
|
| + if c == '>':
|
| + if self.maybe_eat('&'):
|
| + return ('>&',)
|
| + if self.maybe_eat('>'):
|
| + return ('>>',)
|
| + return (c,)
|
| + if c == '<':
|
| + if self.maybe_eat('&'):
|
| + return ('<&',)
|
| + if self.maybe_eat('>'):
|
| + return ('<<',)
|
| + return (c,)
|
| +
|
| + return self.lex_arg(c)
|
| +
|
| + def lex(self):
|
| + while self.pos != self.end:
|
| + if self.look().isspace():
|
| + self.eat()
|
| + else:
|
| + yield self.lex_one_token()
|
| +
|
| +###
|
| +
|
| +class ShParser:
|
| + def __init__(self, data, win32Escapes = False, pipefail = False):
|
| + self.data = data
|
| + self.pipefail = pipefail
|
| + self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
|
| +
|
| + def lex(self):
|
| + for item in self.tokens:
|
| + return item
|
| + return None
|
| +
|
| + def look(self):
|
| + token = self.lex()
|
| + if token is not None:
|
| + self.tokens = itertools.chain([token], self.tokens)
|
| + return token
|
| +
|
| + def parse_command(self):
|
| + tok = self.lex()
|
| + if not tok:
|
| + raise ValueError("empty command!")
|
| + if isinstance(tok, tuple):
|
| + raise ValueError("syntax error near unexpected token %r" % tok[0])
|
| +
|
| + args = [tok]
|
| + redirects = []
|
| + while 1:
|
| + tok = self.look()
|
| +
|
| + # EOF?
|
| + if tok is None:
|
| + break
|
| +
|
| + # If this is an argument, just add it to the current command.
|
| + if isinstance(tok, str):
|
| + args.append(self.lex())
|
| + continue
|
| +
|
| + # Otherwise see if it is a terminator.
|
| + assert isinstance(tok, tuple)
|
| + if tok[0] in ('|',';','&','||','&&'):
|
| + break
|
| +
|
| + # Otherwise it must be a redirection.
|
| + op = self.lex()
|
| + arg = self.lex()
|
| + if not arg:
|
| + raise ValueError("syntax error near token %r" % op[0])
|
| + redirects.append((op, arg))
|
| +
|
| + return Command(args, redirects)
|
| +
|
| + def parse_pipeline(self):
|
| + negate = False
|
| +
|
| + commands = [self.parse_command()]
|
| + while self.look() == ('|',):
|
| + self.lex()
|
| + commands.append(self.parse_command())
|
| + return Pipeline(commands, negate, self.pipefail)
|
| +
|
| + def parse(self):
|
| + lhs = self.parse_pipeline()
|
| +
|
| + while self.look():
|
| + operator = self.lex()
|
| + assert isinstance(operator, tuple) and len(operator) == 1
|
| +
|
| + if not self.look():
|
| + raise ValueError(
|
| + "missing argument to operator %r" % operator[0])
|
| +
|
| + # FIXME: Operator precedence!!
|
| + lhs = Seq(lhs, operator[0], self.parse_pipeline())
|
| +
|
| + return lhs
|
| +
|
| +###
|
| +
|
| +import unittest
|
| +
|
| +class TestShLexer(unittest.TestCase):
|
| + def lex(self, str, *args, **kwargs):
|
| + return list(ShLexer(str, *args, **kwargs).lex())
|
| +
|
| + def test_basic(self):
|
| + self.assertEqual(self.lex('a|b>c&d<e;f'),
|
| + ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
|
| + ('<',), 'e', (';',), 'f'])
|
| +
|
| + def test_redirection_tokens(self):
|
| + self.assertEqual(self.lex('a2>c'),
|
| + ['a2', ('>',), 'c'])
|
| + self.assertEqual(self.lex('a 2>c'),
|
| + ['a', ('>',2), 'c'])
|
| +
|
| + def test_quoting(self):
|
| + self.assertEqual(self.lex(""" 'a' """),
|
| + ['a'])
|
| + self.assertEqual(self.lex(""" "hello\\"world" """),
|
| + ['hello"world'])
|
| + self.assertEqual(self.lex(""" "hello\\'world" """),
|
| + ["hello\\'world"])
|
| + self.assertEqual(self.lex(""" "hello\\\\world" """),
|
| + ["hello\\world"])
|
| + self.assertEqual(self.lex(""" he"llo wo"rld """),
|
| + ["hello world"])
|
| + self.assertEqual(self.lex(""" a\\ b a\\\\b """),
|
| + ["a b", "a\\b"])
|
| + self.assertEqual(self.lex(""" "" "" """),
|
| + ["", ""])
|
| + self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
|
| + ['a\\', 'b'])
|
| +
|
| +class TestShParse(unittest.TestCase):
|
| + def parse(self, str):
|
| + return ShParser(str).parse()
|
| +
|
| + def test_basic(self):
|
| + self.assertEqual(self.parse('echo hello'),
|
| + Pipeline([Command(['echo', 'hello'], [])], False))
|
| + self.assertEqual(self.parse('echo ""'),
|
| + Pipeline([Command(['echo', ''], [])], False))
|
| + self.assertEqual(self.parse("""echo -DFOO='a'"""),
|
| + Pipeline([Command(['echo', '-DFOO=a'], [])], False))
|
| + self.assertEqual(self.parse('echo -DFOO="a"'),
|
| + Pipeline([Command(['echo', '-DFOO=a'], [])], False))
|
| +
|
| + def test_redirection(self):
|
| + self.assertEqual(self.parse('echo hello > c'),
|
| + Pipeline([Command(['echo', 'hello'],
|
| + [((('>'),), 'c')])], False))
|
| + self.assertEqual(self.parse('echo hello > c >> d'),
|
| + Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
|
| + (('>>',), 'd')])], False))
|
| + self.assertEqual(self.parse('a 2>&1'),
|
| + Pipeline([Command(['a'], [(('>&',2), '1')])], False))
|
| +
|
| + def test_pipeline(self):
|
| + self.assertEqual(self.parse('a | b'),
|
| + Pipeline([Command(['a'], []),
|
| + Command(['b'], [])],
|
| + False))
|
| +
|
| + self.assertEqual(self.parse('a | b | c'),
|
| + Pipeline([Command(['a'], []),
|
| + Command(['b'], []),
|
| + Command(['c'], [])],
|
| + False))
|
| +
|
| + def test_list(self):
|
| + self.assertEqual(self.parse('a ; b'),
|
| + Seq(Pipeline([Command(['a'], [])], False),
|
| + ';',
|
| + Pipeline([Command(['b'], [])], False)))
|
| +
|
| + self.assertEqual(self.parse('a & b'),
|
| + Seq(Pipeline([Command(['a'], [])], False),
|
| + '&',
|
| + Pipeline([Command(['b'], [])], False)))
|
| +
|
| + self.assertEqual(self.parse('a && b'),
|
| + Seq(Pipeline([Command(['a'], [])], False),
|
| + '&&',
|
| + Pipeline([Command(['b'], [])], False)))
|
| +
|
| + self.assertEqual(self.parse('a || b'),
|
| + Seq(Pipeline([Command(['a'], [])], False),
|
| + '||',
|
| + Pipeline([Command(['b'], [])], False)))
|
| +
|
| + self.assertEqual(self.parse('a && b || c'),
|
| + Seq(Seq(Pipeline([Command(['a'], [])], False),
|
| + '&&',
|
| + Pipeline([Command(['b'], [])], False)),
|
| + '||',
|
| + Pipeline([Command(['c'], [])], False)))
|
| +
|
| + self.assertEqual(self.parse('a; b'),
|
| + Seq(Pipeline([Command(['a'], [])], False),
|
| + ';',
|
| + Pipeline([Command(['b'], [])], False)))
|
| +
|
| +if __name__ == '__main__':
|
| + unittest.main()
|
|
|