#!/usr/bin/env python
# -*- encoding: utf-8
## indenter.py - Top-level indenter for all files
##
## Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com>
##
## This file is part of The Mana World (Athena server)
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from collections import namedtuple
import cStringIO
import string
import subprocess
import sys
# Settings.
class LexSettings:
pad = 2
indent = 4
brace = True
nested_indent = 0 # 4
# Code.
Location = namedtuple('Location', ('name', 'line', 'column', 'text'))
if 1:
def _diagnostic(self, level, msg, to):
print('{file}:{line}:{column}: {level}: {msg}'.format(
file=self.name, line=self.line, column=self.column,
level=level, msg=msg),
file=sys.stderr)
print(self.text, file=sys.stderr)
if to:
assert to.name == self.name
assert to.line == self.line
assert to.column >= self.column
else:
to = self
print(' ' * (self.column - 1) + '^' + '~' * (to.column - self.column), file=sys.stderr)
def error(self, msg, to=None):
self._diagnostic('error', msg, to)
def warning(self, msg, to=None):
self._diagnostic('warning', msg, to)
def note(self, msg, to=None):
self._diagnostic('note', msg, to)
def fatal(self, msg, to=None):
self.error(msg, to)
sys.exit(1)
Location._diagnostic = _diagnostic
Location.error = error
Location.warning = warning
Location.note = note
Location.fatal = fatal
del _diagnostic, error, warning, note, fatal
class Reader(object):
__slots__ = ('_name', '_stream', '_buffer', '_line', '_column')
def __init__(self, name, stream, line=1, column=1):
''' Create a new character reader that is smart with lines.
'''
self._name = name
self._stream = stream
self._buffer = '\n'
self._line = line - 1
self._column = 0
column -= 1
self.adv()
self._buffer = ' ' * column + self._buffer
self._column = column
# no skew on input (actually belongs below)
def get(self):
''' Fetch the current character, or falsy on EOF
'''
if self._buffer:
return self._buffer[self._column]
else:
return None # less prone to accidental errors than ''
def loc(self):
''' Fetch the Location of the current character.
'''
# internally we store 0-based, but users want 1-based
# also, cut off the newline
return Location(self._name, self._line, self._column + 1,
self._buffer[:-1])
def adv(self):
if self._buffer[self._column] == '\n':
self._buffer = self._stream.readline()
self._line += 1
self._column = 0
if self._buffer and not self._buffer.endswith('\n'):
self._buffer += '\n'
else:
self._column += 1
def string_reader(s, name='<string>', line=1, column=1):
return Reader(name, cStringIO.StringIO(s), line, column)
def take_while(b, r, s):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
s = frozenset(s)
while True:
c = r.get()
if not c or c not in s:
break
b += c
r.adv()
def take_mlc(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
star = False
while True:
c = r.get()
r.adv()
b += c
if star and c == '/':
return
star = c == '*'
def take_slc(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
# if c == '\n': return
if c == '\n' and not bs:
return
r.adv()
b += c
bs = c == '\\'
def take_char(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
r.adv()
b += c
if not bs and c == '\'':
return
bs = not bs and c == '\\'
def take_str(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
r.adv()
b += c
if not bs and c == '"':
return
bs = not bs and c == '\\'
def round_up(i, a):
m = i % a
if m:
i += (a - m)
return i
class Table:
''' Aligned output
'''
def __init__(self):
self.buf = []
self.size = 0
def put1(self, line):
line = line.rstrip()
self.buf.append((line, ''))
def put2(self, left, right):
left = left.rstrip()
right = right.strip()
self.buf.append((left, right))
if right and len(left) > self.size:
self.size = len(left)
def flush(self):
self.size += LexSettings.pad
self.size = round_up(self.size, LexSettings.indent)
for l, r in self.buf:
if not r:
sys.stdout.writelines([l, '\n'])
else:
need = self.size - len(l)
sys.stdout.writelines([l, ' ' * need, r, '\n'])
del self.buf[:]
self.size = 0
def format_lex_or_yacc_definitions():
'definitions section (mostly used for options actually)'
table = Table()
in_code = False
code = ''
for line in sys.stdin:
if line == '%%\n':
break
if line == '%{\n':
in_code = True
continue
if in_code:
if line == '%}\n':
in_code = False
continue
code += line
continue
if not line.strip() or line != line.lstrip():
# starts with whitespace or is an empty line ('\n')
code += line
continue
if code.strip():
if LexSettings.brace:
table.put1('%{')
for line2 in indent_cpp_slop(code):
table.put1(LexSettings.nested_indent * ' ' + line2)
if LexSettings.brace:
table.put1('%}')
table.put1('')
elif code:
table.put1('')
code = ''
if line.startswith('%'):
# %top is flex, %code and %union are bison
union = line.startswith('%union')
if union or line.startswith('%top') or line.startswith('%code'):
# TODO fix stupidity when in strings or comments
count = line.count('{')
#code = ''
if union:
assert count <= 1
code += line[1:]
else:
if count:
assert count == 1
code += line[line.find('{'):]
table.put1(line[:line.find('{')])
else:
table.put1(line.rstrip())
assert line.count('}') == 0
for line in sys.stdin:
count += line.count('{') - line.count('}')
code += line
assert count >= 0
if count == 0:
break
if union:
first = True
for line2 in indent_cpp_slop(code):
if first:
line2 = '%' + line2
first = False
table.put1(line2)
else:
for line2 in indent_cpp_slop(code):
table.put1(LexSettings.nested_indent * ' ' + line2)
code = ''
else:
table.put1(line)
elif line[0].isalpha() or line[0] == '_':
table.put2(*line.split(None, 1))
else:
table.put1(line)
assert not in_code
del code
del in_code
table.flush()
sys.stdout.write('\n%%\n')
def format_lex_rules():
'rule section'
table = Table()
for line in sys.stdin:
if line == '%%\n':
break
if line.startswith('<') and not line.startswith('<<'):
raise NotImplementedError('start conditions not yet supported')
i = 0
p = 0
bs = False
while True:
if bs:
bs = False
i += 1
continue
if line[i] == '\\':
bs = True
i += 1
continue
if not p and line[i].isspace():
break
if line[i] == '"':
i += 1
while line[i] != '"':
if line[i] == '\\':
i += 1
i += 1
elif line[i] == '[':
i += 1
if line[i] == '^':
i += 1
while line[i] != ']':
i += 1
elif line[i] == '(':
p += 1
elif line[i] == ')':
assert p
p -= 1
i += 1
if not i:
table.put1('')
continue
del bs
del p
pattern = line[:i]
rule = line[i:]
del i
count = rule.count('{') - rule.count('}')
while count:
blah = next(sys.stdin)
rule += blah
count += blah.count('{') - blah.count('}')
rules = indent_cpp_slop(rule)
table.put2(pattern, rules[0])
for line in rules[1:]:
table.put1(line)
table.flush()
sys.stdout.write('%%\n')
def format_yacc_rules():
'''
tokens are any of:
word
word[namedref]
'c'
"str"
{ code }
break before {
break twice before a : or |
break twice before and thrice after a ;
put a softspace after everything except ;
'''
sys.stdout.write('\n')
softspace = '' # NOT reset by new lines
for line in sys.stdin:
if line == '%%\n':
break
line = line.strip()
while line:
if line.startswith("'"):
bs = False
for i, c in enumerate(line):
if bs:
continue
bs = c == '\\'
if i and c == "'":
break
else:
raise Exception('broken char')
i += 1
word = line[:i]
line = line[i:].lstrip()
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
if line.startswith('"'):
for i, c in enumerate(line):
if bs:
continue
bs = c == '\\'
if i and c == '"':
break
else:
raise Exception('broken string')
i += 1
word = line[:i]
line = line[i:].lstrip()
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
if line.startswith(':'):
line = line[1:].lstrip()
sys.stdout.write('\n\n:')
softspace = ' '
continue
if line.startswith('{'):
line += '\n'
lines = ''
# TODO fix braces in comments and strings
lo = 1
behold = 1
while behold:
i = line.find('}', lo)
if i == -1:
behold += line[lo:].count('{')
lines += line
line = next(sys.stdin)
lo = 0
else:
behold -= 1
i += 1
behold += line[lo:i].count('{')
lo = i
lines += line[:lo]
for line2 in indent_cpp_slop(lines):
sys.stdout.writelines(['\n', line2])
line = line[lo:].strip()
softspace = ' '
continue
if line.startswith(';'):
line = line[1:].lstrip()
sys.stdout.write('\n\n;\n\n\n')
softspace = ''
continue
if line.startswith('|'):
line = line[1:].lstrip()
sys.stdout.write('\n\n|')
softspace = ' '
continue
# screw comments
word, _, line = line.partition(' ')
line = line.lstrip()
if word.endswith(':'):
word = word[-1]
line = ':' + line
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
# while line
# for line in stdin
sys.stdout.write('%%\n')
def format_lex():
'''
A lex file is a series of sections.
In the initial section:
If it begins with whitespace, it is indented code
It might be a /* comment */
It might be a #line
It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].*
It might be a %{ codeblock %}
It might be a %top { codeblock }
It might be a name and an expansion
A %% switches to the second section
In a comment:
*/ is the end
In a codeblock:
if it started with %{, %} ends it
if it started with %top{, } ends it if it matches the nesting
In section 2's header:
there may be %{ %} sections, possibly nested
there may also be indented code
there may be unindented code if it's inside the %{ %}
In section 2 proper:
pattern action
<sc>pattern action
<sc>{
pattern action
}
a %% switches to section 3
In section 3:
everything is just C code
'''
format_lex_or_yacc_definitions()
format_lex_rules()
format_cc()
def format_yacc():
''' A yacc file is a series of sections.
In the initial section:
whitespace and comments are ignored.
%someoption
=
|
;
name
name:
int
'char'
"string"
<*>
<>
<something>
%{ prologue %}
{ braced code }
[ bracketed identifier ]
%% switch to section 2
In the second section:
is actually the same! wtf?
But in practice:
name:
symbol 'c' "str" { code }
|
symbol 'c' "str" { code } /* in any order */
;
any name may instead be name[namedref]
code may additionally contain $$, $1, $namedref
In section 3:
everything is C code.
'''
format_lex_or_yacc_definitions()
format_yacc_rules()
format_cc()
def format_cc():
sys.stdout.flush()
tail = subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=None)
tail.stdin.writelines(sys.stdin)
tail.stdin.close()
sys.exit(tail.wait())
def indent_cpp_slop(code):
proc = subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
rv = proc.communicate(code)[0].strip().split('\n')
prv = proc.wait()
if prv:
sys.exit(prv)
return rv
operators = {
'#', '##',
'+', '++', '+=',
'-', '--', '-=', '->', '->*',
'*', '*=',
'/', '/=',
'%', '%=',
'=', '==',
'!', '!=',
'~',
'|', '||', '|=',
'&', '&&', '&=',
'^', '^=',
'<', '<=', '<<', '<<=',
'>', '>=', '>>', '>>=',
'.', '..', '.*', '...',
':', '::',
'(', ')',
'[', ']',
'{', '}',
'?',
',', ';',
'//', '/*', # comments are specially handled at end
}
operators = {
k: {v[len(k):] for v in operators if v is not k and v.startswith(k)}
for k in operators
}
# *please* don't use any of these except and, or, and not
operator_map = {
'and': '&&',
'and_eq': '&=',
'bitand': '&',
'bitor': '|',
'compl': '~',
'not': '!',
'not_eq': '!=',
'or': '||',
'or_eq': '|=',
'xor': '^',
'xor_eq': '^=',
}
num1 = string.digits
num_x = num1 + '.\''
ident1 = string.ascii_letters + '_$@' # $@ for bison
ident_x = ident1 + string.digits
class CxxLexer(object):
__slots__ = ('_reader', '_w', '_t', '_l', '_namespaces', '_classes')
def __init__(self, reader):
self._reader = reader
self.adv()
self._namespaces = []
self._classes = []
def get2(self):
return self._w, self._t, self._l
def adv(self):
self._w, self._t, self._l = self.pull()
def pull(self):
r = self._reader
white = bytearray()
while True:
c = r.get()
if not c:
return '\n', None, r.loc()
if not c.isspace():
break
white += c
r.adv()
white = str(white)
black = bytearray()
black += c
l = r.loc()
r.adv()
if c in operators:
while True:
c = r.get()
if not c or c.isspace():
break
op = operators[str(black)]
if c not in op:
break
black += c
r.adv()
if black == '/*':
take_mlc(black, r)
if black == '//':
take_slc(black, r)
elif c in num1:
take_while(black, r, num_x)
c = r.get()
if c in ident1:
black += c
r.adv()
take_while(black, r, ident_x)
elif c in ident1:
take_while(black, r, ident_x)
c = r.get()
if black in ('L', 'u8', 'u', 'U') and c == '"':
black += c
r.adv()
take_str(black, r)
elif c == '\'':
take_char(black, r)
elif c == '"':
take_str(black, r)
else:
l.error('Unknown character: %r' % c)
sys.exit(1)
black = str(black)
return white, black, l
class Flavored(object):
__slots__ = ('_str')
def __init__(self, s):
self._str = s
class Control(Flavored):
__slots__ = ()
class Binary(Flavored):
__slots__ = ()
class Unary(Flavored):
__slots__ = ()
class Postfix(Flavored):
__slots__ = ()
class Type(Flavored):
__slots__ = ()
class Value(Flavored):
__slots__ = ()
class Literal(Value):
__slots__ = ()
class TypeExpr(Flavored):
__slots__ = ()
class Attr(Flavored):
__slots__ = ()
class Def(Flavored):
__slots__ = ()
class MatchHead(Flavored):
__slots__ = ('_tail_char', '_purpose')
def __init__(self, s, t, p):
Flavored.__init__(self, s)
assert isinstance(t, str)
self._tail_char = t
self._purpose = p
class MatchTail(Flavored):
__slots__ = ('_head_obj')
def __init__(self, s, head):
Flavored.__init__(self, s)
assert isinstance(head, MatchHead)
self._head_obj = head
@property
def _purpose(self):
return self._head_obj._purpose
class MatchTail2(Flavored):
__slots__ = ('_head_inner', '_head_outer')
def __init__(self, s, head_inner, head_outer):
Flavored.__init__(self, s)
assert isinstance(head_inner, MatchHead)
assert isinstance(head_outer, MatchHead)
self._head_inner = head_inner
self._head_outer = head_outer
@property
def _purpose(self):
return self._head_outer._purpose
class CxxFormatter(object):
__slots__ = ('_lexer', '_w', '_t', '_types', '_values', '_type_expressions', '_scopes')
def __init__(self, lexer):
assert isinstance(lexer, CxxLexer)
self._lexer = lexer
self._w = None
self._t = None
self._scopes = []
self._types = {
'auto',
'bool',
'char',
'char16_t',
'char32_t',
'double',
'float',
'int',
'long',
'short',
'signed',
'unsigned',
'void',
'wchar_t',
}
self._values = {
'alignof',
'const_cast',
'dynamic_cast',
'false',
'nullptr',
'reinterpret_cast',
'sizeof',
'static_cast',
'this',
'typeid',
'true',
}
self._type_expressions = {
'decltype',
}
# the following two functions should *generally* not access self
# but they do need to a bit
def flavor2(self, w, t, l):
''' Given the next token and its whitespace, calculate the flavor.
Note: the need to know the preceding whitespace is a hack
(but a pretty good one!)
self.w, self.t, and self.f still contain the previous token.
'''
if t.startswith('//') or t.startswith('/*') or (t.startswith('#') and len(t) > 2):
return None
if t[0] in num1 or t[0] == '\'':
return Literal(t)
if '"' in t:
if self._t and self._t._str == 'extern':
# extern "C"
return Attr(t)
return Literal(t)
o = operator_map.get(t, t)
if o in {
'#',
'!',
'~',
}:
return Unary(t)
if o in {
'##',
'+=',
'-=',
'->',
'->*',
'*=',
'/',
'/=',
'%',
'%=',
'=',
'==',
'!=',
'|',
'||',
'|=',
'&=',
'^',
'^=',
'<=',
'<<',
'<<=',
'>=',
'>>=',
'.',
'..',
'.*',
'::',
}:
return Binary(t)
if o == '<':
if w:
return Binary(t)
u = {'<': '>', '(': ')', '{': '}', '[': ']'}.get(o)
if u is not None:
rv = MatchHead(t, u, None) # fix this, it is CRITICAL
self._scopes.append(rv)
return rv
if o == '>' or o == '>>':
if not self._scopes or self._scopes[-1]._str != '<':
return Binary(t)
if o == '>>':
assert len(self._scopes) >= 2
assert self._scopes[-1]._str == '<'
assert self._scopes[-2]._str == '<'
return MatchTail2(t, self._scopes.pop(), self._scopes.pop())
if o in {'>', ')', '}', ']'}:
if not self._scopes:
l.fatal('Unexpected %r' % t)
if self._scopes[-1]._tail_char != t:
l.fatal('Expected %r, got %r' % (self._scopes[-1]._tail_char, t))
return MatchTail(t, self._scopes.pop())
if o == '...':
return Postfix(t)
if o in {'*', '&', '&&'}:
if isinstance(self._t, Type):
return Type(t)
if o in {'+', '-', '*', '&', '&&'}:
# && is a gcc extension for address-of-a-label
if isinstance(self._t, (Unary, Binary, Control, MatchHead)):
return Unary(t)
elif isinstance(self._t, (Value, Postfix, MatchTail, MatchTail2)):
return Binary(t)
else:
l.fatal('Not sure how to handle ambiguous unary/binary after instance of %r' % self._t.__class__.__name__)
if o in {'--', '++'}:
if isinstance(self._t, (Unary, Binary, Control, MatchHead)):
return Unary(t)
elif isinstance(self._t, (Value, Postfix, MatchTail, MatchTail2)):
return Postfix(t)
else:
l.fatal('Not sure how to handle ambiguous prefix/postfix after instance of %r' % self._t.__class__.__name__)
if o in {',', ';'}:
return Binary(t)
if o == '?':
return Binary(t)
if o == ':':
return Binary(t)
assert t == o
assert t[0] in ident1
assert all(c in ident_x for c in t[1:])
# keywords!
if t == '__attribute__':
return Attr(t)
if t in {
'alignas',
'constexpr',
'explicit',
'export',
'extern',
'friend',
'inline',
'mutable',
'register',
'static',
'thread_local',
'virtual',
}:
return Attr(t)
if t in {
'const',
'volatile',
}:
if self._t is None or isinstance(self._t, (Attr, Binary, MatchTail, MatchTail2)): # ; is binary
return Attr(t)
if isinstance(self._t, Def):
# trailing function
return Attr(t)
return Type(t)
if t in {'final', 'override'}:
# theoretically, should only do this for thingy
return Attr(t)
if t == 'noexcept':
if isinstance(self._t, (Attr, MatchTail, MatchTail2)):
return Attr(t)
else:
return Value(t)
if t == 'asm':
return Value(t)
if t in {'delete', 'default'} and isinstance(self._t, Binary): # = delete
return Value(t)
if t in {'new', 'delete'}:
return Unary(t)
if t in {'case', 'goto', 'return', 'throw'}:
return Unary(t)
if t in {'default', 'public', 'private', 'protected'}:
return Value(t)
if t in {'break', 'continue'}:
return Value(t)
if t in {'try', 'catch', 'do', 'else', 'for', 'if', 'switch', 'while'}:
return Control(t)
if t in {'class', 'enum', 'struct', 'typename', 'union'}:
return Def(t)
if t == 'static_assert':
return Value(t)
if t == 'operator':
return Value(t)
if t == 'namespace':
return Def(t)
if t == 'template':
return Def(t)
if t == 'typedef':
return Def(t)
if t == 'using':
return Unary(t)
if t in self._type_expressions:
return TypeExpr(t)
# types, values, and keywords that act like one of those
if t in self._types:
return Type(t)
if t in self._values:
return Value(t)
u = t.replace('_', '')
if u.isupper():
return Value(t)
if u and u[0].isupper():
return Type(t)
return Value(t)
def whitespace(self, pt, t):
''' Given a token and its flavor, calculate its whitespace.
'''
w = self._w # TODO set to '' instead to force calculation
for func in [
]:
w = func(w, pt, t)
return w
def format_ii():
r = Reader('<stdin>', sys.stdin)
l = CxxLexer(r)
f = CxxFormatter(l)
while True:
wspace, raw_tok, loc = l.get2()
assert isinstance(wspace, str)
if raw_tok is None:
break
assert isinstance(raw_tok, str)
l.adv()
prev_tok = f._t
cooked_tok = f.flavor2(wspace, raw_tok, loc)
if cooked_tok is None:
f._w = wspace # or ' '
# f._t is unchanged
else:
f._w = wspace
f._t = cooked_tok
wspace = f.whitespace(prev_tok, cooked_tok)
if cooked_tok is None:
sys.stdout.writelines([wspace, raw_tok])
else:
sys.stdout.writelines([wspace, cooked_tok._str])
if 1:
sys.stdout.write('\n')
exts = {
'-lpp': format_lex,
'-ypp': format_yacc,
'-cpp': format_cc,
'-ipp': format_ii,
}
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
sys.exit('Usage: %s -ext < input.ext > output.ext')
func = exts.get(sys.argv[1])
if not func:
sys.exit('Bad -ext')
func()