#!/usr/bin/env python
# -*- encoding: utf-8
## indenter.py - Top-level indenter for all files
##
## Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com>
##
## This file is part of The Mana World (Athena server)
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function
from collections import namedtuple
import cStringIO
import string
import subprocess
import sys
# Settings.
class LexSettings:
pad = 2
indent = 4
brace = True
nested_indent = 0 # 4
# Code.
Location = namedtuple('Location', ('name', 'line', 'column', 'text'))
if 1:
def _diagnostic(self, level, msg, to):
print('{file}:{line}:{column}: {level}: {msg}'.format(
file=self.name, line=self.line, column=self.column,
level=level, msg=msg),
file=sys.stderr)
print(self.text, file=sys.stderr)
if to:
assert to.name == self.name
assert to.line == self.line
assert to.column >= self.column
else:
to = self
print(' ' * (self.column - 1) + '^' + '~' * (to.column - self.column), file=sys.stderr)
def error(self, msg, to=None):
self._diagnostic('error', msg, to)
def warning(self, msg, to=None):
self._diagnostic('warning', msg, to)
def note(self, msg, to=None):
self._diagnostic('note', msg, to)
Location._diagnostic = _diagnostic
Location.error = error
Location.warning = warning
Location.note = note
del _diagnostic, error, warning, note
class Reader(object):
__slots__ = ('_name', '_stream', '_buffer', '_line', '_column')
def __init__(self, name, stream, line=1, column=1):
''' Create a new character reader that is smart with lines.
'''
self._name = name
self._stream = stream
self._buffer = '\n'
self._line = line - 1
self._column = 0
column -= 1
self.adv()
self._buffer = ' ' * column + self._buffer
self._column = column
# no skew on input (actually belongs below)
def get(self):
''' Fetch the current character, or falsy on EOF
'''
if self._buffer:
return self._buffer[self._column]
else:
return None # less prone to accidental errors than ''
def loc(self):
''' Fetch the Location of the current character.
'''
# internally we store 0-based, but users want 1-based
# also, cut off the newline
return Location(self._name, self._line, self._column + 1,
self._buffer[:-1])
def adv(self):
if self._buffer[self._column] == '\n':
self._buffer = self._stream.readline()
self._line += 1
self._column = 0
if self._buffer and not self._buffer.endswith('\n'):
self._buffer += '\n'
else:
self._column += 1
def string_reader(s, name='<string>', line=1, column=1):
return Reader(name, cStringIO.StringIO(s), line, column)
def take_while(b, r, s):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
s = frozenset(s)
while True:
c = r.get()
if not c or c not in s:
break
b += c
r.adv()
def take_mlc(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
star = False
while True:
c = r.get()
r.adv()
b += c
if star and c == '/':
return
star = c == '*'
def take_slc(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
# if c == '\n': return
r.adv()
b += c
if c == '\n' and not bs:
return
bs = c == '\\'
def take_char(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
r.adv()
b += c
if not bs and c == '\'':
return
bs = not bs and c == '\\'
def take_str(b, r):
assert isinstance(b, bytearray)
assert isinstance(r, Reader)
bs = False
while True:
c = r.get()
r.adv()
b += c
if not bs and c == '"':
return
bs = not bs and c == '\\'
def round_up(i, a):
m = i % a
if m:
i += (a - m)
return i
class Table:
''' Aligned output
'''
def __init__(self):
self.buf = []
self.size = 0
def put1(self, line):
line = line.rstrip()
self.buf.append((line, ''))
def put2(self, left, right):
left = left.rstrip()
right = right.strip()
self.buf.append((left, right))
if right and len(left) > self.size:
self.size = len(left)
def flush(self):
self.size += LexSettings.pad
self.size = round_up(self.size, LexSettings.indent)
for l, r in self.buf:
if not r:
sys.stdout.writelines([l, '\n'])
else:
need = self.size - len(l)
sys.stdout.writelines([l, ' ' * need, r, '\n'])
del self.buf[:]
self.size = 0
def format_lex_or_yacc_definitions():
'definitions section (mostly used for options actually)'
table = Table()
in_code = False
code = bytearray()
for line in sys.stdin:
if line == '%%\n':
break
if line == '%{\n':
in_code = True
continue
if in_code:
if line == '%}\n':
in_code = False
continue
code += line
continue
if not line.strip() or line != line.lstrip():
# starts with whitespace or is an empty line ('\n')
code += line
continue
if code.strip():
if LexSettings.brace:
table.put1('%{')
for line2 in indent_cpp_slop(code):
table.put1(LexSettings.nested_indent * ' ' + line2)
if LexSettings.brace:
table.put1('%}')
table.put1('')
elif code:
table.put1('')
code = bytearray()
if line.startswith('%'):
# %top is flex, %code and %union are bison
union = line.startswith('%union')
if union or line.startswith('%top') or line.startswith('%code'):
# TODO fix stupidity when in strings or comments
count = line.count('{')
#code = bytearray()
if union:
assert count <= 1
code += line[1:]
else:
if count:
assert count == 1
code += line[line.find('{'):]
table.put1(line[:line.find('{')])
else:
table.put1(line.rstrip())
assert line.count('}') == 0
for line in sys.stdin:
count += line.count('{') - line.count('}')
code += line
assert count >= 0
if count == 0:
break
if union:
first = True
for line2 in indent_cpp_slop(code):
if first:
line2 = '%' + line2
first = False
table.put1(line2)
else:
for line2 in indent_cpp_slop(code):
table.put1(LexSettings.nested_indent * ' ' + line2)
code = bytearray()
else:
table.put1(line)
elif line[0].isalpha() or line[0] == '_':
table.put2(*line.split(None, 1))
else:
table.put1(line)
assert not in_code
del code
del in_code
table.flush()
sys.stdout.write('\n%%\n')
def format_lex_rules():
'rule section'
table = Table()
for line in sys.stdin:
if line == '%%\n':
break
if line.startswith('<') and not line.startswith('<<'):
raise NotImplementedError('start conditions not yet supported')
i = 0
p = 0
bs = False
while True:
if bs:
bs = False
i += 1
continue
if line[i] == '\\':
bs = True
i += 1
continue
if not p and line[i].isspace():
break
if line[i] == '"':
i += 1
while line[i] != '"':
if line[i] == '\\':
i += 1
i += 1
elif line[i] == '[':
i += 1
if line[i] == '^':
i += 1
while line[i] != ']':
i += 1
elif line[i] == '(':
p += 1
elif line[i] == ')':
assert p
p -= 1
i += 1
if not i:
table.put1('')
continue
del bs
del p
pattern = line[:i]
rule = bytearray(line[i:])
del i
count = rule.count('{') - rule.count('}')
while count:
blah = next(sys.stdin)
rule += blah
count += blah.count('{') - blah.count('}')
rules = indent_cpp_slop(rule)
table.put2(pattern, rules[0])
for line in rules[1:]:
table.put1(line)
table.flush()
sys.stdout.write('%%\n')
def format_yacc_rules():
'''
tokens are any of:
word
word[namedref]
'c'
"str"
{ code }
break before {
break twice before a : or |
break twice before and thrice after a ;
put a softspace after everything except ;
'''
sys.stdout.write('\n')
softspace = '' # NOT reset by new lines
for line in sys.stdin:
if line == '%%\n':
break
line = line.strip()
while line:
if line.startswith("'"):
bs = False
for i, c in enumerate(line):
if bs:
continue
bs = c == '\\'
if i and c == "'":
break
else:
raise Exception('broken char')
i += 1
word = line[:i]
line = line[i:].lstrip()
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
if line.startswith('"'):
for i, c in enumerate(line):
if bs:
continue
bs = c == '\\'
if i and c == '"':
break
else:
raise Exception('broken string')
i += 1
word = line[:i]
line = line[i:].lstrip()
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
if line.startswith(':'):
line = line[1:].lstrip()
sys.stdout.write('\n\n:')
softspace = ' '
continue
if line.startswith('{'):
line += '\n'
lines = bytearray()
# TODO fix braces in comments and strings
lo = 1
behold = 1
while behold:
i = line.find('}', lo)
if i == -1:
behold += line[lo:].count('{')
lines += line
line = next(sys.stdin)
lo = 0
else:
behold -= 1
i += 1
behold += line[lo:i].count('{')
lo = i
lines += line[:lo]
for line2 in indent_cpp_slop(lines):
sys.stdout.writelines(['\n', line2])
line = line[lo:].strip()
softspace = ' '
continue
if line.startswith(';'):
line = line[1:].lstrip()
sys.stdout.write('\n\n;\n\n\n')
softspace = ''
continue
if line.startswith('|'):
line = line[1:].lstrip()
sys.stdout.write('\n\n|')
softspace = ' '
continue
# screw comments
word, _, line = line.partition(' ')
line = line.lstrip()
if word.endswith(':'):
word = word[-1]
line = ':' + line
sys.stdout.writelines([softspace, word])
softspace = ' '
continue
# while line
# for line in stdin
sys.stdout.write('%%\n')
def format_lex():
'''
A lex file is a series of sections.
In the initial section:
If it begins with whitespace, it is indented code
It might be a /* comment */
It might be a #line
It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].*
It might be a %{ codeblock %}
It might be a %top { codeblock }
It might be a name and an expansion
A %% switches to the second section
In a comment:
*/ is the end
In a codeblock:
if it started with %{, %} ends it
if it started with %top{, } ends it if it matches the nesting
In section 2's header:
there may be %{ %} sections, possibly nested
there may also be indented code
there may be unindented code if it's inside the %{ %}
In section 2 proper:
pattern action
<sc>pattern action
<sc>{
pattern action
}
a %% switches to section 3
In section 3:
everything is just C code
'''
format_lex_or_yacc_definitions()
format_lex_rules()
format_cc()
def format_yacc():
''' A yacc file is a series of sections.
In the initial section:
whitespace and comments are ignored.
%someoption
=
|
;
name
name:
int
'char'
"string"
<*>
<>
<something>
%{ prologue %}
{ braced code }
[ bracketed identifier ]
%% switch to section 2
In the second section:
is actually the same! wtf?
But in practice:
name:
symbol 'c' "str" { code }
|
symbol 'c' "str" { code } /* in any order */
;
any name may instead be name[namedref]
code may additionally contain $$, $1, $namedref
In section 3:
everything is C code.
'''
format_lex_or_yacc_definitions()
format_yacc_rules()
format_cc()
def format_cc():
sys.stdout.flush()
tail = subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=None)
tail.stdin.writelines(sys.stdin)
tail.stdin.close()
sys.exit(tail.wait())
def indent_cpp_slop(code):
return subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n')
operators = {
'#', '##',
'+', '++', '+=',
'-', '--', '-=', '->', '->*',
'*', '*=',
'/', '/=',
'%', '%=',
'=', '==',
'!', '!=',
'~',
'|', '||', '|=',
'&', '&&', '&=',
'^', '^=',
'<', '<=', '<<', '<<=',
'>', '>=', '>>', '>>=',
'.', '..', '.*', '...',
':', '::',
'(', ')',
'[', ']',
'{', '}',
'?',
',', ';',
'//', '/*', # comments are specially handled at end
}
operators = {
k: {v[len(k):] for v in operators if v is not k and v.startswith(k)}
for k in operators
}
num1 = string.digits
num_x = num1 + '.\''
ident1 = string.ascii_letters + '_$@' # $@ for bison
ident_x = ident1 + string.digits
class CxxLexer(object):
__slots__ = ('_reader', '_w', '_t', '_f', '_namespaces', '_classes')
def __init__(self, reader):
self._reader = reader
self.adv()
self._namespaces = []
self._classes = []
def get(self):
return self._w, self._t, self._f
def adv(self):
self._w, self._t, self._f = self.pull()
def pull(self):
r = self._reader
white = bytearray()
while True:
c = r.get()
if not c:
return '\n', None, None
if not c.isspace():
break
white += c
r.adv()
black = bytearray()
black += c
l = r.loc()
r.adv()
if c in operators:
while True:
c = r.get()
if not c or c.isspace():
break
op = operators[str(black)]
if c not in op:
break
black += c
r.adv()
if black == '/*':
take_mlc(black, r)
if black == '//':
take_slc(black, r)
elif c in num1:
take_while(black, r, num_x)
c = r.get()
if c in ident1:
black += c
r.adv()
take_while(black, r, ident_x)
elif c in ident1:
take_while(black, r, ident_x)
c = r.get()
if black in ('L', 'u8', 'u', 'U') and c == '"':
black += c
r.adv()
take_str(black, r)
elif c == '\'':
take_char(black, r)
elif c == '"':
take_str(black, r)
else:
l.error('Unknown character: %r' % c)
sys.exit(1)
# c is the first char of the next thing
return white, black, None
def whitespace(w, (t, f), (pt, pf)):
return w
def format_ii():
r = Reader('<stdin>', sys.stdin)
l = CxxLexer(r)
pt = None
pf = None
while True:
w, t, f = l.get()
if not t:
break
l.adv()
w = whitespace(w, (t, f), (pt, pf))
sys.stdout.writelines([w, t])
#print('w:', repr(str(w)))
#print('t:', t)
pt, pf = t, f
if not pt.endswith('\n'):
sys.stdout.write('\n')
exts = {
'-lpp': format_lex,
'-ypp': format_yacc,
'-cpp': format_cc,
'-ipp': format_ii,
}
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
sys.exit('Usage: %s -ext < input.ext > output.ext')
func = exts.get(sys.argv[1])
if not func:
sys.exit('Bad -ext')
func()