#!/usr/bin/env python # -*- encoding: utf-8 ## indenter.py - Top-level indenter for all files ## ## Copyright ©2013 Ben Longbons ## ## This file is part of The Mana World (Athena server) ## ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation, either version 3 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . # Settings. class LexSettings: pad = 2 indent = 4 brace = True nested_indent = 0 # 4 # Code. import subprocess import sys def round_up(i, a): m = i % a if m: i += (a - m) return i class Table: ''' Aligned output ''' def __init__(self): self.buf = [] self.size = 0 def put1(self, line): line = line.rstrip() self.buf.append((line, '')) def put2(self, left, right): left = left.rstrip() right = right.strip() self.buf.append((left, right)) if right and len(left) > self.size: self.size = len(left) def flush(self): self.size += LexSettings.pad self.size = round_up(self.size, LexSettings.indent) for l, r in self.buf: if not r: sys.stdout.writelines([l, '\n']) else: need = self.size - len(l) sys.stdout.writelines([l, ' ' * need, r, '\n']) del self.buf[:] self.size = 0 def format_lex_or_yacc_definitions(): 'definitions section (mostly used for options actually)' table = Table() in_code = False code = bytearray() for line in sys.stdin: if line == '%%\n': break if line == '%{\n': in_code = True continue if in_code: if line == '%}\n': in_code = False continue code += line continue if not line.strip() or line != line.lstrip(): # starts with whitespace or is an empty line ('\n') code += line continue if code.strip(): if LexSettings.brace: table.put1('%{') for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'): table.put1(LexSettings.nested_indent * ' ' + line2) if LexSettings.brace: table.put1('%}') table.put1('') elif code: table.put1('') code = bytearray() if line.startswith('%'): # %top is flex, %code and %union are bison union = line.startswith('%union') if union or line.startswith('%top') or line.startswith('%code'): # TODO fix stupidity when in strings or comments count = line.count('{') #code = bytearray() if union: assert count <= 1 code += line[1:] else: if count: assert count == 1 code += line[line.find('{'):] table.put1(line[:line.find('{')]) else: table.put1(line.rstrip()) assert line.count('}') == 0 for line in sys.stdin: count += line.count('{') - line.count('}') code += line assert count >= 0 if count == 0: break if union: first = True for line2 in indent_cpp_slop(code): if first: line2 = '%' + line2 first = False table.put1(line2) else: for line2 in indent_cpp_slop(code): table.put1(LexSettings.nested_indent * ' ' + line2) code = bytearray() else: table.put1(line) elif line[0].isalpha() or line[0] == '_': table.put2(*line.split(None, 1)) else: table.put1(line) assert not in_code del code del in_code table.flush() sys.stdout.write('\n%%\n') def format_lex_rules(): 'rule section' table = Table() for line in sys.stdin: if line == '%%\n': break if line.startswith('<') and not line.startswith('<<'): raise NotImplementedError('start conditions not yet supported') i = 0 p = 0 bs = False while True: if bs: bs = False i += 1 continue if line[i] == '\\': bs = True i += 1 continue if not p and line[i].isspace(): break if line[i] == '"': i += 1 while line[i] != '"': if line[i] == '\\': i += 1 i += 1 elif line[i] == '[': i += 1 if line[i] == '^': i += 1 while line[i] != ']': i += 1 elif line[i] == '(': p += 1 elif line[i] == ')': assert p p -= 1 i += 1 del bs del p pattern = line[:i] rule = bytearray(line[i:]) del i count = rule.count('{') - rule.count('}') while count: blah = next(sys.stdin) rule += blah count += blah.count('{') - blah.count('}') rules = indent_cpp_slop(rule) table.put2(pattern, rules[0]) for line in rules[1:]: table.put1(line) table.flush() sys.stdout.write('%%\n') def format_yacc_rules(): ''' tokens are any of: word word[namedref] 'c' "str" { code } break before { break twice before a : or | break twice before and thrice after a ; put a softspace after everything except ; ''' sys.stdout.write('\n') softspace = '' # NOT reset by new lines for line in sys.stdin: if line == '%%\n': break line = line.strip() while line: if line.startswith("'"): bs = False for i, c in enumerate(line): if bs: continue bs = c == '\\' if i and c == "'": break else: raise Exception('broken char') i += 1 word = line[:i] line = line[i:].lstrip() sys.stdout.writelines([softspace, word]) softspace = ' ' continue if line.startswith('"'): for i, c in enumerate(line): if bs: continue bs = c == '\\' if i and c == '"': break else: raise Exception('broken string') i += 1 word = line[:i] line = line[i:].lstrip() sys.stdout.writelines([softspace, word]) softspace = ' ' continue if line.startswith(':'): line = line[1:].lstrip() sys.stdout.write('\n\n:') softspace = ' ' continue if line.startswith('{'): line += '\n' lines = bytearray() # TODO fix braces in comments and strings lo = 1 behold = 1 while behold: i = line.find('}', lo) if i == -1: behold += line[lo:].count('{') lines += line line = next(sys.stdin) lo = 0 else: behold -= 1 i += 1 behold += line[lo:i].count('{') lo = i lines += line[:lo] for line2 in indent_cpp_slop(lines): sys.stdout.writelines(['\n', line2]) line = line[lo:].strip() softspace = ' ' continue if line.startswith(';'): line = line[1:].lstrip() sys.stdout.write('\n\n;\n\n\n') softspace = '' continue if line.startswith('|'): line = line[1:].lstrip() sys.stdout.write('\n\n|') softspace = ' ' continue # screw comments word, _, line = line.partition(' ') line = line.lstrip() if word.endswith(':'): word = word[-1] line = ':' + line sys.stdout.writelines([softspace, word]) softspace = ' ' continue # while line # for line in stdin sys.stdout.write('%%\n') def format_lex(): ''' A lex file is a series of sections. In the initial section: If it begins with whitespace, it is indented code It might be a /* comment */ It might be a #line It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].* It might be a %{ codeblock %} It might be a %top { codeblock } It might be a name and an expansion A %% switches to the second section In a comment: */ is the end In a codeblock: if it started with %{, %} ends it if it started with %top{, } ends it if it matches the nesting In section 2's header: there may be %{ %} sections, possibly nested there may also be indented code there may be unindented code if it's inside the %{ %} In section 2 proper: pattern action pattern action { pattern action } a %% switches to section 3 In section 3: everything is just C code ''' format_lex_or_yacc_definitions() format_lex_rules() format_cc() def format_yacc(): ''' A yacc file is a series of sections. In the initial section: whitespace and comments are ignored. %someoption = | ; name name: int 'char' "string" <*> <> %{ prologue %} { braced code } [ bracketed identifier ] %% switch to section 2 In the second section: is actually the same! wtf? But in practice: name: symbol 'c' "str" { code } | symbol 'c' "str" { code } /* in any order */ ; any name may instead be name[namedref] code may additionally contain $$, $1, $namedref In section 3: everything is C code. ''' format_lex_or_yacc_definitions() format_yacc_rules() format_cc() def format_cc(): tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None) tail.stdin.writelines(sys.stdin) def indent_cpp_slop(code): return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n') def format_ii(): format_passthrough() def format_passthrough(): for line in sys.stdin: sys.stdout.write(line) exts = { '-lpp': format_lex, '-ypp': format_yacc, '-cpp': format_cc, '-ipp': format_ii, } if __name__ == '__main__': import sys if len(sys.argv) != 2: sys.exit('Usage: %s -ext < input.ext > output.ext') func = exts.get(sys.argv[1]) if not func: sys.exit('Bad -ext') func()