diff options
author | Ben Longbons <b.r.longbons@gmail.com> | 2013-12-19 22:02:04 -0800 |
---|---|---|
committer | Ben Longbons <b.r.longbons@gmail.com> | 2013-12-23 11:52:55 -0800 |
commit | 7081836d86e910efbd3b670904aa2be326d29bb2 (patch) | |
tree | 6aa59ec576d223d49cfd1c7b2dcf361d0d6552e5 /tools/indenter | |
parent | 98225193b93906351dfe2ad594a390997e301584 (diff) | |
download | tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.gz tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.bz2 tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.xz tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.zip |
Refactor the lex indenter so that it can (almost) format yacc files
Diffstat (limited to 'tools/indenter')
-rwxr-xr-x | tools/indenter | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/tools/indenter b/tools/indenter new file mode 100755 index 0000000..56d707c --- /dev/null +++ b/tools/indenter @@ -0,0 +1,274 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 +## indenter.py - Top-level indenter for all files +## +## Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com> +## +## This file is part of The Mana World (Athena server) +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Settings. +class LexSettings: + pad = 2 + indent = 4 + brace = True + nested_indent = 0 # 4 + +# Code. +import subprocess +import sys + +def round_up(i, a): + m = i % a + if m: + i += (a - m) + return i + +class Table: + ''' Aligned output + ''' + def __init__(self): + self.buf = [] + self.size = 0 + def put1(self, line): + line = line.rstrip() + self.buf.append((line, '')) + def put2(self, left, right): + left = left.rstrip() + right = right.strip() + self.buf.append((left, right)) + if right and len(left) > self.size: + self.size = len(left) + def flush(self): + self.size += LexSettings.pad + self.size = round_up(self.size, LexSettings.indent) + for l, r in self.buf: + if not r: + sys.stdout.writelines([l, '\n']) + else: + need = self.size - len(l) + sys.stdout.writelines([l, ' ' * need, r, '\n']) + del self.buf[:] + self.size = 0 + +def format_lex_or_yacc_definitions(): + 'definitions section (mostly used for options actually)' + table = Table() + in_code = False + code = bytearray() + for line in sys.stdin: + if line == '%%\n': + break + if line == '%{\n': + in_code = True + continue + if in_code: + if line == '%}\n': + in_code = False + continue + code += line + continue + if not line.strip() or line != line.lstrip(): + # starts with whitespace or is an empty line ('\n') + code += line + continue + if code.strip(): + if LexSettings.brace: + table.put1('%{') + for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'): + table.put1(LexSettings.nested_indent * ' ' + line2) + if LexSettings.brace: + table.put1('%}') + table.put1('') + elif code: + table.put1('') + code = bytearray() + + if line.startswith('%'): + # %top is flex, %code and %union are bison + union = line.startswith('%union') + if union or line.startswith('%top') or line.startswith('%code'): + # TODO fix stupidity when in strings or comments + count = line.count('{') + #code = bytearray() + if union: + assert count <= 1 + code += line[1:] + else: + if count: + assert count == 1 + code += line[line.find('{'):] + table.put1(line[:line.find('{')]) + else: + table.put1(line.rstrip()) + assert line.count('}') == 0 + for line in sys.stdin: + count += line.count('{') - line.count('}') + code += line + assert count >= 0 + if count == 0: + break + if union: + first = True + for line2 in indent_cpp_slop(code): + if first: + line2 = '%' + line2 + first = False + table.put1(line2) + else: + for line2 in indent_cpp_slop(code): + table.put1(LexSettings.nested_indent * ' ' + line2) + code = bytearray() + else: + table.put1(line) + elif line[0].isalpha() or line[0] == '_': + table.put2(*line.split(None, 1)) + else: + table.put1(line) + + assert not in_code + del code + del in_code + table.flush() + sys.stdout.write('\n%%\n') + +def format_lex_rules(): + 'rule section' + for line in sys.stdin: + if line == '%%\n': + break + if line.startswith('<') and not line.startswith('<<'): + raise NotImplementedError('start conditions not yet supported') + i = 0 + p = 0 + bs = False + while True: + if bs: + bs = False + i += 1 + continue + if line[i] == '\\': + bs = True + i += 1 + continue + if not p and line[i].isspace(): + break + if line[i] == '"': + i += 1 + while line[i] != '"': + if line[i] == '\\': + i += 1 + i += 1 + elif line[i] == '[': + i += 1 + if line[i] == '^': + i += 1 + while line[i] != ']': + i += 1 + elif line[i] == '(': + p += 1 + elif line[i] == ')': + assert p + p -= 1 + i += 1 + del bs + del p + pattern = line[:i] + rule = line[i:].strip() + del i + + sys.stdout.write(line) + + sys.stdout.write('%%\n') + +def format_yacc_rules(): + format_passthrough() # TODO only until %% + +def format_lex(): + ''' + A lex file is a series of sections. + + In the initial section: + If it begins with whitespace, it is indented code + It might be a /* comment */ + It might be a #line + It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].* + It might be a %{ codeblock %} + It might be a %top { codeblock } + It might be a name and an expansion + A %% switches to the second section + + In a comment: + */ is the end + + In a codeblock: + if it started with %{, %} ends it + if it started with %top{, } ends it if it matches the nesting + + In section 2's header: + there may be %{ %} sections, possibly nested + there may also be indented code + there may be unindented code if it's inside the %{ %} + + In section 2 proper: + pattern action + <sc>pattern action + <sc>{ + pattern action + } + a %% switches to section 3 + + In section 3: + everything is just C code + ''' + + format_lex_or_yacc_definitions() + format_lex_rules() + format_cc() + +def format_yacc(): + format_lex_or_yacc_definitions() + format_yacc_rules() + format_cc() + +def format_cc(): + tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None) + tail.stdin.writelines(sys.stdin) + +def indent_cpp_slop(code): + return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n') + +def format_ii(): + format_passthrough() + +def format_passthrough(): + for line in sys.stdin: + sys.stdout.write(line) + +exts = { + '-lpp': format_lex, + '-ypp': format_yacc, + '-cpp': format_cc, + '-ipp': format_ii, +} + +if __name__ == '__main__': + import sys + if len(sys.argv) != 2: + sys.exit('Usage: %s -ext < input.ext > output.ext') + func = exts.get(sys.argv[1]) + if not func: + sys.exit('Bad -ext') + func() |