Refactor the lex indenter so that it can (almost) format yacc files

author: Ben Longbons <b.r.longbons@gmail.com> 2013-12-19 22:02:04 -0800
committer: Ben Longbons <b.r.longbons@gmail.com> 2013-12-23 11:52:55 -0800
commit: 7081836d86e910efbd3b670904aa2be326d29bb2 (patch)
tree: 6aa59ec576d223d49cfd1c7b2dcf361d0d6552e5
parent: 98225193b93906351dfe2ad594a390997e301584 (diff)
download: tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.gz
tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.bz2
tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.xz
tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.zip
5 files changed, 278 insertions, 202 deletions
diff --git a/real.make b/real.make
index 58eae29..0177de0 100644
--- a/real.make
+++ b/real.make
@@ -386,11 +386,11 @@ dist: dist/tmwa-${VERSION_FULL}-src.tar dist/tmwa-${VERSION_FULL}-bundled.tar
 
 format: format-cpp format-hpp format-lpp format-ypp
 format-cpp:
-	cd ${SRC_DIR} && apply-filter indent-cpp ${REAL_SOURCES}
+	cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_SOURCES}
 format-hpp:
-	cd ${SRC_DIR} && apply-filter indent-cpp ${REAL_HEADERS}
+	cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_HEADERS}
 format-lpp:
-	cd ${SRC_DIR} && apply-filter indent-lpp ${LEXERS}
+	cd ${SRC_DIR} && apply-filter 'indenter -lpp' ${LEXERS}
 format-ypp:
-	cd ${SRC_DIR} && apply-filter indent-ypp ${PARSERS}
+	cd ${SRC_DIR} && apply-filter 'indenter -ypp' ${PARSERS}
 .PHONY: format format-cpp format-hpp format-lpp format-ypp
diff --git a/tools/indent-ipp b/tools/indent-ipp
deleted file mode 100755
index d868679..0000000
--- a/tools/indent-ipp
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-exec cat
diff --git a/tools/indent-lpp b/tools/indent-lpp
deleted file mode 100755
index 0d24faf..0000000
--- a/tools/indent-lpp
+++ /dev/null
@@ -1,194 +0,0 @@
-#!/usr/bin/env python
-# -*- encoding: utf-8
-##    indent - Top-level indenter for lex files
-##
-##    Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com>
-##
-##    This file is part of The Mana World (Athena server)
-##
-##    This program is free software: you can redistribute it and/or modify
-##    it under the terms of the GNU General Public License as published by
-##    the Free Software Foundation, either version 3 of the License, or
-##    (at your option) any later version.
-##
-##    This program is distributed in the hope that it will be useful,
-##    but WITHOUT ANY WARRANTY; without even the implied warranty of
-##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-##    GNU General Public License for more details.
-##
-##    You should have received a copy of the GNU General Public License
-##    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-'''
-A lex file is a series of sections.
-
-In the initial section:
-    If it begins with whitespace, it is indented code
-    It might be a /* comment */
-    It might be a #line
-    It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].*
-    It might be a %{ codeblock %}
-    It might be a %top { codeblock }
-    It might be a name and an expansion
-    A %% switches to the second section
-
-In a comment:
-    */ is the end
-
-In a codeblock:
-    if it started with %{, %} ends it
-    if it started with %top{, } ends it if it matches the nesting
-
-In section 2's header:
-    there may be %{ %} sections, possibly nested
-    there may also be indented code
-    there may be unindented code if it's inside the %{ %}
-
-In section 2 proper:
-    pattern         action
-    <sc>pattern     action
-    <sc>{
-        pattern     action
-    }
-    a %% switches to section 3
-
-In section 3:
-    everything is just C code
-'''
-
-# Settings.
-pad = 2
-indent = 4
-brace = True
-
-# Code.
-import subprocess
-import sys
-
-class Table:
-    ''' Aligned output
-    '''
-    def __init__(self):
-        self.buf = []
-        self.size = 0
-    def put1(self, line):
-        line = line.rstrip()
-        self.buf.append((line, ''))
-    def put2(self, left, right):
-        left = left.rstrip()
-        right = right.strip()
-        self.buf.append((left, right))
-        if right and len(left) > self.size:
-            self.size = len(left)
-    def flush(self):
-        self.size += pad
-        if self.size % indent:
-            self.size += indent - self.size % indent
-        for l, r in self.buf:
-            if not r:
-                sys.stdout.writelines([l, '\n'])
-            else:
-                need = self.size - len(l)
-                sys.stdout.writelines([l, ' ' * need, r, '\n'])
-        del self.buf[:]
-        self.size = 0
-
-table = Table()
-# definitions section (mostly used for options actually)
-in_code = False
-code = bytearray()
-for line in sys.stdin:
-    if line == '%%\n':
-        break
-    if line == '%{\n':
-        in_code = True
-        continue
-    if in_code:
-        if line == '%}\n':
-            in_code = False
-            continue
-        code += line
-        continue
-    if not line.strip() or line != line.lstrip():
-        # starts with whitespace or is an empty line ('\n')
-        code += line
-        continue
-    if code.strip():
-        if brace:
-            table.put1('%{')
-        for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'):
-            # this looks bad actually
-            table.put1(0 * indent * ' ' + line2)
-        if brace:
-            table.put1('%}')
-            table.put1('')
-        code = bytearray()
-    elif code:
-        table.put1('')
-
-    if line.startswith('%'):
-        if line.startswith('%top'):
-            raise NotImplementedError('top block not yet supported')
-        table.put1(line)
-    elif line[0].isalpha() or line[0] == '_':
-        table.put2(*line.split(None, 1))
-    else:
-        table.put1(line)
-
-assert not in_code
-del code
-del in_code
-table.flush()
-sys.stdout.write('\n%%\n')
-
-# rule section
-for line in sys.stdin:
-    if line == '%%\n':
-        break
-    if line.startswith('<') and not line.startswith('<<'):
-        raise NotImplementedError('start conditions not yet supported')
-    i = 0
-    p = 0
-    bs = False
-    while True:
-        if bs:
-            bs = False
-            i += 1
-            continue
-        if line[i] == '\\':
-            bs = True
-            i += 1
-            continue
-        if not p and line[i].isspace():
-            break
-        if line[i] == '"':
-            i += 1
-            while line[i] != '"':
-                if line[i] == '\\':
-                    i += 1
-                i += 1
-        elif line[i] == '[':
-            i += 1
-            if line[i] == '^':
-                i += 1
-            while line[i] != ']':
-                i += 1
-        elif line[i] == '(':
-            p += 1
-        elif line[i] == ')':
-            assert p
-            p -= 1
-        i += 1
-    del bs
-    del p
-    pattern = line[:i]
-    rule = line[i:].strip()
-    del i
-
-    sys.stdout.write(line)
-
-sys.stdout.write('%%\n')
-
-# 3rd section is just copied verbatim ... IF it exists
-tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None)
-tail.stdin.writelines(sys.stdin)
diff --git a/tools/indent-ypp b/tools/indent-ypp
deleted file mode 100755
index d868679..0000000
--- a/tools/indent-ypp
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-exec cat
diff --git a/tools/indenter b/tools/indenter
new file mode 100755
index 0000000..56d707c
--- /dev/null
+++ b/tools/indenter
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+# -*- encoding: utf-8
+##    indenter.py - Top-level indenter for all files
+##
+##    Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com>
+##
+##    This file is part of The Mana World (Athena server)
+##
+##    This program is free software: you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation, either version 3 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License
+##    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Settings.
+class LexSettings:
+    pad = 2
+    indent = 4
+    brace = True
+    nested_indent = 0 # 4
+
+# Code.
+import subprocess
+import sys
+
+def round_up(i, a):
+    m = i % a
+    if m:
+        i += (a - m)
+    return i
+
+class Table:
+    ''' Aligned output
+    '''
+    def __init__(self):
+        self.buf = []
+        self.size = 0
+    def put1(self, line):
+        line = line.rstrip()
+        self.buf.append((line, ''))
+    def put2(self, left, right):
+        left = left.rstrip()
+        right = right.strip()
+        self.buf.append((left, right))
+        if right and len(left) > self.size:
+            self.size = len(left)
+    def flush(self):
+        self.size += LexSettings.pad
+        self.size = round_up(self.size, LexSettings.indent)
+        for l, r in self.buf:
+            if not r:
+                sys.stdout.writelines([l, '\n'])
+            else:
+                need = self.size - len(l)
+                sys.stdout.writelines([l, ' ' * need, r, '\n'])
+        del self.buf[:]
+        self.size = 0
+
+def format_lex_or_yacc_definitions():
+    'definitions section (mostly used for options actually)'
+    table = Table()
+    in_code = False
+    code = bytearray()
+    for line in sys.stdin:
+        if line == '%%\n':
+            break
+        if line == '%{\n':
+            in_code = True
+            continue
+        if in_code:
+            if line == '%}\n':
+                in_code = False
+                continue
+            code += line
+            continue
+        if not line.strip() or line != line.lstrip():
+            # starts with whitespace or is an empty line ('\n')
+            code += line
+            continue
+        if code.strip():
+            if LexSettings.brace:
+                table.put1('%{')
+            for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'):
+                table.put1(LexSettings.nested_indent * ' ' + line2)
+            if LexSettings.brace:
+                table.put1('%}')
+                table.put1('')
+        elif code:
+            table.put1('')
+        code = bytearray()
+
+        if line.startswith('%'):
+            # %top is flex, %code and %union are bison
+            union = line.startswith('%union')
+            if union or line.startswith('%top') or line.startswith('%code'):
+                # TODO fix stupidity when in strings or comments
+                count = line.count('{')
+                #code = bytearray()
+                if union:
+                    assert count <= 1
+                    code += line[1:]
+                else:
+                    if count:
+                        assert count == 1
+                        code += line[line.find('{'):]
+                        table.put1(line[:line.find('{')])
+                    else:
+                        table.put1(line.rstrip())
+                assert line.count('}') == 0
+                for line in sys.stdin:
+                    count += line.count('{') - line.count('}')
+                    code += line
+                    assert count >= 0
+                    if count == 0:
+                        break
+                if union:
+                    first = True
+                    for line2 in indent_cpp_slop(code):
+                        if first:
+                            line2 = '%' + line2
+                            first = False
+                        table.put1(line2)
+                else:
+                    for line2 in indent_cpp_slop(code):
+                        table.put1(LexSettings.nested_indent * ' ' + line2)
+                code = bytearray()
+            else:
+                table.put1(line)
+        elif line[0].isalpha() or line[0] == '_':
+            table.put2(*line.split(None, 1))
+        else:
+            table.put1(line)
+
+    assert not in_code
+    del code
+    del in_code
+    table.flush()
+    sys.stdout.write('\n%%\n')
+
+def format_lex_rules():
+    'rule section'
+    for line in sys.stdin:
+        if line == '%%\n':
+            break
+        if line.startswith('<') and not line.startswith('<<'):
+            raise NotImplementedError('start conditions not yet supported')
+        i = 0
+        p = 0
+        bs = False
+        while True:
+            if bs:
+                bs = False
+                i += 1
+                continue
+            if line[i] == '\\':
+                bs = True
+                i += 1
+                continue
+            if not p and line[i].isspace():
+                break
+            if line[i] == '"':
+                i += 1
+                while line[i] != '"':
+                    if line[i] == '\\':
+                        i += 1
+                    i += 1
+            elif line[i] == '[':
+                i += 1
+                if line[i] == '^':
+                    i += 1
+                while line[i] != ']':
+                    i += 1
+            elif line[i] == '(':
+                p += 1
+            elif line[i] == ')':
+                assert p
+                p -= 1
+            i += 1
+        del bs
+        del p
+        pattern = line[:i]
+        rule = line[i:].strip()
+        del i
+
+        sys.stdout.write(line)
+
+    sys.stdout.write('%%\n')
+
+def format_yacc_rules():
+    format_passthrough() # TODO only until %%
+
+def format_lex():
+    '''
+    A lex file is a series of sections.
+
+    In the initial section:
+        If it begins with whitespace, it is indented code
+        It might be a /* comment */
+        It might be a #line
+        It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].*
+        It might be a %{ codeblock %}
+        It might be a %top { codeblock }
+        It might be a name and an expansion
+        A %% switches to the second section
+
+    In a comment:
+        */ is the end
+
+    In a codeblock:
+        if it started with %{, %} ends it
+        if it started with %top{, } ends it if it matches the nesting
+
+    In section 2's header:
+        there may be %{ %} sections, possibly nested
+        there may also be indented code
+        there may be unindented code if it's inside the %{ %}
+
+    In section 2 proper:
+        pattern         action
+        <sc>pattern     action
+        <sc>{
+            pattern     action
+        }
+        a %% switches to section 3
+
+    In section 3:
+        everything is just C code
+    '''
+
+    format_lex_or_yacc_definitions()
+    format_lex_rules()
+    format_cc()
+
+def format_yacc():
+    format_lex_or_yacc_definitions()
+    format_yacc_rules()
+    format_cc()
+
+def format_cc():
+    tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None)
+    tail.stdin.writelines(sys.stdin)
+
+def indent_cpp_slop(code):
+    return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n')
+
+def format_ii():
+    format_passthrough()
+
+def format_passthrough():
+    for line in sys.stdin:
+        sys.stdout.write(line)
+
+exts = {
+        '-lpp': format_lex,
+        '-ypp': format_yacc,
+        '-cpp': format_cc,
+        '-ipp': format_ii,
+}
+
+if __name__ == '__main__':
+    import sys
+    if len(sys.argv) != 2:
+        sys.exit('Usage: %s -ext < input.ext > output.ext')
+    func = exts.get(sys.argv[1])
+    if not func:
+        sys.exit('Bad -ext')
+    func()
author	Ben Longbons <b.r.longbons@gmail.com>	2013-12-19 22:02:04 -0800
committer	Ben Longbons <b.r.longbons@gmail.com>	2013-12-23 11:52:55 -0800
commit	7081836d86e910efbd3b670904aa2be326d29bb2 (patch)
tree	6aa59ec576d223d49cfd1c7b2dcf361d0d6552e5
parent	98225193b93906351dfe2ad594a390997e301584 (diff)
download	tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.gz tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.bz2 tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.tar.xz tmwa-7081836d86e910efbd3b670904aa2be326d29bb2.zip