Skip to content
Permalink
51665236fa
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
855 lines (689 sloc) 23.1 KB
#!/usr/bin/env python
#
# Copyright 2008, Google Inc.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""pump v0.2.0 - Pretty Useful for Meta Programming.
A tool for preprocessor meta programming. Useful for generating
repetitive boilerplate code. Especially useful for writing C++
classes, functions, macros, and templates that need to work with
various number of arguments.
USAGE:
pump.py SOURCE_FILE
EXAMPLES:
pump.py foo.cc.pump
Converts foo.cc.pump to foo.cc.
GRAMMAR:
CODE ::= ATOMIC_CODE*
ATOMIC_CODE ::= $var ID = EXPRESSION
| $var ID = [[ CODE ]]
| $range ID EXPRESSION..EXPRESSION
| $for ID SEPARATOR [[ CODE ]]
| $($)
| $ID
| $(EXPRESSION)
| $if EXPRESSION [[ CODE ]] ELSE_BRANCH
| [[ CODE ]]
| RAW_CODE
SEPARATOR ::= RAW_CODE | EMPTY
ELSE_BRANCH ::= $else [[ CODE ]]
| $elif EXPRESSION [[ CODE ]] ELSE_BRANCH
| EMPTY
EXPRESSION has Python syntax.
"""
__author__ = 'wan@google.com (Zhanyong Wan)'
import os
import re
import sys
TOKEN_TABLE = [
(re.compile(r'\$var\s+'), '$var'),
(re.compile(r'\$elif\s+'), '$elif'),
(re.compile(r'\$else\s+'), '$else'),
(re.compile(r'\$for\s+'), '$for'),
(re.compile(r'\$if\s+'), '$if'),
(re.compile(r'\$range\s+'), '$range'),
(re.compile(r'\$[_A-Za-z]\w*'), '$id'),
(re.compile(r'\$\(\$\)'), '$($)'),
(re.compile(r'\$'), '$'),
(re.compile(r'\[\[\n?'), '[['),
(re.compile(r'\]\]\n?'), ']]'),
]
class Cursor:
"""Represents a position (line and column) in a text file."""
def __init__(self, line=-1, column=-1):
self.line = line
self.column = column
def __eq__(self, rhs):
return self.line == rhs.line and self.column == rhs.column
def __ne__(self, rhs):
return not self == rhs
def __lt__(self, rhs):
return self.line < rhs.line or (
self.line == rhs.line and self.column < rhs.column)
def __le__(self, rhs):
return self < rhs or self == rhs
def __gt__(self, rhs):
return rhs < self
def __ge__(self, rhs):
return rhs <= self
def __str__(self):
if self == Eof():
return 'EOF'
else:
return '%s(%s)' % (self.line + 1, self.column)
def __add__(self, offset):
return Cursor(self.line, self.column + offset)
def __sub__(self, offset):
return Cursor(self.line, self.column - offset)
def Clone(self):
"""Returns a copy of self."""
return Cursor(self.line, self.column)
# Special cursor to indicate the end-of-file.
def Eof():
"""Returns the special cursor to denote the end-of-file."""
return Cursor(-1, -1)
class Token:
"""Represents a token in a Pump source file."""
def __init__(self, start=None, end=None, value=None, token_type=None):
if start is None:
self.start = Eof()
else:
self.start = start
if end is None:
self.end = Eof()
else:
self.end = end
self.value = value
self.token_type = token_type
def __str__(self):
return 'Token @%s: \'%s\' type=%s' % (
self.start, self.value, self.token_type)
def Clone(self):
"""Returns a copy of self."""
return Token(self.start.Clone(), self.end.Clone(), self.value,
self.token_type)
def StartsWith(lines, pos, string):
"""Returns True iff the given position in lines starts with 'string'."""
return lines[pos.line][pos.column:].startswith(string)
def FindFirstInLine(line, token_table):
best_match_start = -1
for (regex, token_type) in token_table:
m = regex.search(line)
if m:
# We found regex in lines
if best_match_start < 0 or m.start() < best_match_start:
best_match_start = m.start()
best_match_length = m.end() - m.start()
best_match_token_type = token_type
if best_match_start < 0:
return None
return (best_match_start, best_match_length, best_match_token_type)
def FindFirst(lines, token_table, cursor):
"""Finds the first occurrence of any string in strings in lines."""
start = cursor.Clone()
cur_line_number = cursor.line
for line in lines[start.line:]:
if cur_line_number == start.line:
line = line[start.column:]
m = FindFirstInLine(line, token_table)
if m:
# We found a regex in line.
(start_column, length, token_type) = m
if cur_line_number == start.line:
start_column += start.column
found_start = Cursor(cur_line_number, start_column)
found_end = found_start + length
return MakeToken(lines, found_start, found_end, token_type)
cur_line_number += 1
# We failed to find str in lines
return None
def SubString(lines, start, end):
"""Returns a substring in lines."""
if end == Eof():
end = Cursor(len(lines) - 1, len(lines[-1]))
if start >= end:
return ''
if start.line == end.line:
return lines[start.line][start.column:end.column]
result_lines = ([lines[start.line][start.column:]] +
lines[start.line + 1:end.line] +
[lines[end.line][:end.column]])
return ''.join(result_lines)
def StripMetaComments(str):
"""Strip meta comments from each line in the given string."""
# First, completely remove lines containing nothing but a meta
# comment, including the trailing \n.
str = re.sub(r'^\s*\$\$.*\n', '', str)
# Then, remove meta comments from contentful lines.
return re.sub(r'\s*\$\$.*', '', str)
def MakeToken(lines, start, end, token_type):
"""Creates a new instance of Token."""
return Token(start, end, SubString(lines, start, end), token_type)
def ParseToken(lines, pos, regex, token_type):
line = lines[pos.line][pos.column:]
m = regex.search(line)
if m and not m.start():
return MakeToken(lines, pos, pos + m.end(), token_type)
else:
print 'ERROR: %s expected at %s.' % (token_type, pos)
sys.exit(1)
ID_REGEX = re.compile(r'[_A-Za-z]\w*')
EQ_REGEX = re.compile(r'=')
REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)')
OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*')
WHITE_SPACE_REGEX = re.compile(r'\s')
DOT_DOT_REGEX = re.compile(r'\.\.')
def Skip(lines, pos, regex):
line = lines[pos.line][pos.column:]
m = re.search(regex, line)
if m and not m.start():
return pos + m.end()
else:
return pos
def SkipUntil(lines, pos, regex, token_type):
line = lines[pos.line][pos.column:]
m = re.search(regex, line)
if m:
return pos + m.start()
else:
print ('ERROR: %s expected on line %s after column %s.' %
(token_type, pos.line + 1, pos.column))
sys.exit(1)
def ParseExpTokenInParens(lines, pos):
def ParseInParens(pos):
pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX)
pos = Skip(lines, pos, r'\(')
pos = Parse(pos)
pos = Skip(lines, pos, r'\)')
return pos
def Parse(pos):
pos = SkipUntil(lines, pos, r'\(|\)', ')')
if SubString(lines, pos, pos + 1) == '(':
pos = Parse(pos + 1)
pos = Skip(lines, pos, r'\)')
return Parse(pos)
else:
return pos
start = pos.Clone()
pos = ParseInParens(pos)
return MakeToken(lines, start, pos, 'exp')
def RStripNewLineFromToken(token):
if token.value.endswith('\n'):
return Token(token.start, token.end, token.value[:-1], token.token_type)
else:
return token
def TokenizeLines(lines, pos):
while True:
found = FindFirst(lines, TOKEN_TABLE, pos)
if not found:
yield MakeToken(lines, pos, Eof(), 'code')
return
if found.start == pos:
prev_token = None
prev_token_rstripped = None
else:
prev_token = MakeToken(lines, pos, found.start, 'code')
prev_token_rstripped = RStripNewLineFromToken(prev_token)
if found.token_type == '$var':
if prev_token_rstripped:
yield prev_token_rstripped
yield found
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
yield id_token
pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
eq_token = ParseToken(lines, pos, EQ_REGEX, '=')
yield eq_token
pos = Skip(lines, eq_token.end, r'\s*')
if SubString(lines, pos, pos + 2) != '[[':
exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp')
yield exp_token
pos = Cursor(exp_token.end.line + 1, 0)
elif found.token_type == '$for':
if prev_token_rstripped:
yield prev_token_rstripped
yield found
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
yield id_token
pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX)
elif found.token_type == '$range':
if prev_token_rstripped:
yield prev_token_rstripped
yield found
id_token = ParseToken(lines, found.end, ID_REGEX, 'id')
yield id_token
pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX)
dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..')
yield MakeToken(lines, pos, dots_pos, 'exp')
yield MakeToken(lines, dots_pos, dots_pos + 2, '..')
pos = dots_pos + 2
new_pos = Cursor(pos.line + 1, 0)
yield MakeToken(lines, pos, new_pos, 'exp')
pos = new_pos
elif found.token_type == '$':
if prev_token:
yield prev_token
yield found
exp_token = ParseExpTokenInParens(lines, found.end)
yield exp_token
pos = exp_token.end
elif (found.token_type == ']]' or found.token_type == '$if' or
found.token_type == '$elif' or found.token_type == '$else'):
if prev_token_rstripped:
yield prev_token_rstripped
yield found
pos = found.end
else:
if prev_token:
yield prev_token
yield found
pos = found.end
def Tokenize(s):
"""A generator that yields the tokens in the given string."""
if s != '':
lines = s.splitlines(True)
for token in TokenizeLines(lines, Cursor(0, 0)):
yield token
class CodeNode:
def __init__(self, atomic_code_list=None):
self.atomic_code = atomic_code_list
class VarNode:
def __init__(self, identifier=None, atomic_code=None):
self.identifier = identifier
self.atomic_code = atomic_code
class RangeNode:
def __init__(self, identifier=None, exp1=None, exp2=None):
self.identifier = identifier
self.exp1 = exp1
self.exp2 = exp2
class ForNode:
def __init__(self, identifier=None, sep=None, code=None):
self.identifier = identifier
self.sep = sep
self.code = code
class ElseNode:
def __init__(self, else_branch=None):
self.else_branch = else_branch
class IfNode:
def __init__(self, exp=None, then_branch=None, else_branch=None):
self.exp = exp
self.then_branch = then_branch
self.else_branch = else_branch
class RawCodeNode:
def __init__(self, token=None):
self.raw_code = token
class LiteralDollarNode:
def __init__(self, token):
self.token = token
class ExpNode:
def __init__(self, token, python_exp):
self.token = token
self.python_exp = python_exp
def PopFront(a_list):
head = a_list[0]
a_list[:1] = []
return head
def PushFront(a_list, elem):
a_list[:0] = [elem]
def PopToken(a_list, token_type=None):
token = PopFront(a_list)
if token_type is not None and token.token_type != token_type:
print 'ERROR: %s expected at %s' % (token_type, token.start)
print 'ERROR: %s found instead' % (token,)
sys.exit(1)
return token
def PeekToken(a_list):
if not a_list:
return None
return a_list[0]
def ParseExpNode(token):
python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value)
return ExpNode(token, python_exp)
def ParseElseNode(tokens):
def Pop(token_type=None):
return PopToken(tokens, token_type)
next = PeekToken(tokens)
if not next:
return None
if next.token_type == '$else':
Pop('$else')
Pop('[[')
code_node = ParseCodeNode(tokens)
Pop(']]')
return code_node
elif next.token_type == '$elif':
Pop('$elif')
exp = Pop('code')
Pop('[[')
code_node = ParseCodeNode(tokens)
Pop(']]')
inner_else_node = ParseElseNode(tokens)
return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)])
elif not next.value.strip():
Pop('code')
return ParseElseNode(tokens)
else:
return None
def ParseAtomicCodeNode(tokens):
def Pop(token_type=None):
return PopToken(tokens, token_type)
head = PopFront(tokens)
t = head.token_type
if t == 'code':
return RawCodeNode(head)
elif t == '$var':
id_token = Pop('id')
Pop('=')
next = PeekToken(tokens)
if next.token_type == 'exp':
exp_token = Pop()
return VarNode(id_token, ParseExpNode(exp_token))
Pop('[[')
code_node = ParseCodeNode(tokens)
Pop(']]')
return VarNode(id_token, code_node)
elif t == '$for':
id_token = Pop('id')
next_token = PeekToken(tokens)
if next_token.token_type == 'code':
sep_token = next_token
Pop('code')
else:
sep_token = None
Pop('[[')
code_node = ParseCodeNode(tokens)
Pop(']]')
return ForNode(id_token, sep_token, code_node)
elif t == '$if':
exp_token = Pop('code')
Pop('[[')
code_node = ParseCodeNode(tokens)
Pop(']]')
else_node = ParseElseNode(tokens)
return IfNode(ParseExpNode(exp_token), code_node, else_node)
elif t == '$range':
id_token = Pop('id')
exp1_token = Pop('exp')
Pop('..')
exp2_token = Pop('exp')
return RangeNode(id_token, ParseExpNode(exp1_token),
ParseExpNode(exp2_token))
elif t == '$id':
return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id'))
elif t == '$($)':
return LiteralDollarNode(head)
elif t == '$':
exp_token = Pop('exp')
return ParseExpNode(exp_token)
elif t == '[[':
code_node = ParseCodeNode(tokens)
Pop(']]')
return code_node
else:
PushFront(tokens, head)
return None
def ParseCodeNode(tokens):
atomic_code_list = []
while True:
if not tokens:
break
atomic_code_node = ParseAtomicCodeNode(tokens)
if atomic_code_node:
atomic_code_list.append(atomic_code_node)
else:
break
return CodeNode(atomic_code_list)
def ParseToAST(pump_src_text):
"""Convert the given Pump source text into an AST."""
tokens = list(Tokenize(pump_src_text))
code_node = ParseCodeNode(tokens)
return code_node
class Env:
def __init__(self):
self.variables = []
self.ranges = []
def Clone(self):
clone = Env()
clone.variables = self.variables[:]
clone.ranges = self.ranges[:]
return clone
def PushVariable(self, var, value):
# If value looks like an int, store it as an int.
try:
int_value = int(value)
if ('%s' % int_value) == value:
value = int_value
except Exception:
pass
self.variables[:0] = [(var, value)]
def PopVariable(self):
self.variables[:1] = []
def PushRange(self, var, lower, upper):
self.ranges[:0] = [(var, lower, upper)]
def PopRange(self):
self.ranges[:1] = []
def GetValue(self, identifier):
for (var, value) in self.variables:
if identifier == var:
return value
print 'ERROR: meta variable %s is undefined.' % (identifier,)
sys.exit(1)
def EvalExp(self, exp):
try:
result = eval(exp.python_exp)
except Exception, e:
print 'ERROR: caught exception %s: %s' % (e.__class__.__name__, e)
print ('ERROR: failed to evaluate meta expression %s at %s' %
(exp.python_exp, exp.token.start))
sys.exit(1)
return result
def GetRange(self, identifier):
for (var, lower, upper) in self.ranges:
if identifier == var:
return (lower, upper)
print 'ERROR: range %s is undefined.' % (identifier,)
sys.exit(1)
class Output:
def __init__(self):
self.string = ''
def GetLastLine(self):
index = self.string.rfind('\n')
if index < 0:
return ''
return self.string[index + 1:]
def Append(self, s):
self.string += s
def RunAtomicCode(env, node, output):
if isinstance(node, VarNode):
identifier = node.identifier.value.strip()
result = Output()
RunAtomicCode(env.Clone(), node.atomic_code, result)
value = result.string
env.PushVariable(identifier, value)
elif isinstance(node, RangeNode):
identifier = node.identifier.value.strip()
lower = int(env.EvalExp(node.exp1))
upper = int(env.EvalExp(node.exp2))
env.PushRange(identifier, lower, upper)
elif isinstance(node, ForNode):
identifier = node.identifier.value.strip()
if node.sep is None:
sep = ''
else:
sep = node.sep.value
(lower, upper) = env.GetRange(identifier)
for i in range(lower, upper + 1):
new_env = env.Clone()
new_env.PushVariable(identifier, i)
RunCode(new_env, node.code, output)
if i != upper:
output.Append(sep)
elif isinstance(node, RawCodeNode):
output.Append(node.raw_code.value)
elif isinstance(node, IfNode):
cond = env.EvalExp(node.exp)
if cond:
RunCode(env.Clone(), node.then_branch, output)
elif node.else_branch is not None:
RunCode(env.Clone(), node.else_branch, output)
elif isinstance(node, ExpNode):
value = env.EvalExp(node)
output.Append('%s' % (value,))
elif isinstance(node, LiteralDollarNode):
output.Append('$')
elif isinstance(node, CodeNode):
RunCode(env.Clone(), node, output)
else:
print 'BAD'
print node
sys.exit(1)
def RunCode(env, code_node, output):
for atomic_code in code_node.atomic_code:
RunAtomicCode(env, atomic_code, output)
def IsSingleLineComment(cur_line):
return '//' in cur_line
def IsInPreprocessorDirective(prev_lines, cur_line):
if cur_line.lstrip().startswith('#'):
return True
return prev_lines and prev_lines[-1].endswith('\\')
def WrapComment(line, output):
loc = line.find('//')
before_comment = line[:loc].rstrip()
if before_comment == '':
indent = loc
else:
output.append(before_comment)
indent = len(before_comment) - len(before_comment.lstrip())
prefix = indent*' ' + '// '
max_len = 80 - len(prefix)
comment = line[loc + 2:].strip()
segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != '']
cur_line = ''
for seg in segs:
if len((cur_line + seg).rstrip()) < max_len:
cur_line += seg
else:
if cur_line.strip() != '':
output.append(prefix + cur_line.rstrip())
cur_line = seg.lstrip()
if cur_line.strip() != '':
output.append(prefix + cur_line.strip())
def WrapCode(line, line_concat, output):
indent = len(line) - len(line.lstrip())
prefix = indent*' ' # Prefix of the current line
max_len = 80 - indent - len(line_concat) # Maximum length of the current line
new_prefix = prefix + 4*' ' # Prefix of a continuation line
new_max_len = max_len - 4 # Maximum length of a continuation line
# Prefers to wrap a line after a ',' or ';'.
segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != '']
cur_line = '' # The current line without leading spaces.
for seg in segs:
# If the line is still too long, wrap at a space.
while cur_line == '' and len(seg.strip()) > max_len:
seg = seg.lstrip()
split_at = seg.rfind(' ', 0, max_len)
output.append(prefix + seg[:split_at].strip() + line_concat)
seg = seg[split_at + 1:]
prefix = new_prefix
max_len = new_max_len
if len((cur_line + seg).rstrip()) < max_len:
cur_line = (cur_line + seg).lstrip()
else:
output.append(prefix + cur_line.rstrip() + line_concat)
prefix = new_prefix
max_len = new_max_len
cur_line = seg.lstrip()
if cur_line.strip() != '':
output.append(prefix + cur_line.strip())
def WrapPreprocessorDirective(line, output):
WrapCode(line, ' \\', output)
def WrapPlainCode(line, output):
WrapCode(line, '', output)
def IsMultiLineIWYUPragma(line):
return re.search(r'/\* IWYU pragma: ', line)
def IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or
re.match(r'^#include\s', line) or
# Don't break IWYU pragmas, either; that causes iwyu.py problems.
re.search(r'// IWYU pragma: ', line))
def WrapLongLine(line, output):
line = line.rstrip()
if len(line) <= 80:
output.append(line)
elif IsSingleLineComment(line):
if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
# The style guide made an exception to allow long header guard lines,
# includes and IWYU pragmas.
output.append(line)
else:
WrapComment(line, output)
elif IsInPreprocessorDirective(output, line):
if IsHeaderGuardIncludeOrOneLineIWYUPragma(line):
# The style guide made an exception to allow long header guard lines,
# includes and IWYU pragmas.
output.append(line)
else:
WrapPreprocessorDirective(line, output)
elif IsMultiLineIWYUPragma(line):
output.append(line)
else:
WrapPlainCode(line, output)
def BeautifyCode(string):
lines = string.splitlines()
output = []
for line in lines:
WrapLongLine(line, output)
output2 = [line.rstrip() for line in output]
return '\n'.join(output2) + '\n'
def ConvertFromPumpSource(src_text):
"""Return the text generated from the given Pump source text."""
ast = ParseToAST(StripMetaComments(src_text))
output = Output()
RunCode(Env(), ast, output)
return BeautifyCode(output.string)
def main(argv):
if len(argv) == 1:
print __doc__
sys.exit(1)
file_path = argv[-1]
output_str = ConvertFromPumpSource(file(file_path, 'r').read())
if file_path.endswith('.pump'):
output_file_path = file_path[:-5]
else:
output_file_path = '-'
if output_file_path == '-':
print output_str,
else:
output_file = file(output_file_path, 'w')
output_file.write('// This file was GENERATED by command:\n')
output_file.write('// %s %s\n' %
(os.path.basename(__file__), os.path.basename(file_path)))
output_file.write('// DO NOT EDIT BY HAND!!!\n\n')
output_file.write(output_str)
output_file.close()
if __name__ == '__main__':
main(sys.argv)