summaryrefslogblamecommitdiff
path: root/tools/indenter
blob: 4e17a15b464f23842666886a96aadcc19e262265 (plain) (tree)



















































































































































                                                                                                                                                  
                   







































                                                                           
                                  
             








                                                      
 
                 


                            




































































































                                                         











































                                                                     




































                                                        
































                                                                                                                                   
#!/usr/bin/env python
# -*- encoding: utf-8
##    indenter.py - Top-level indenter for all files
##
##    Copyright ©2013 Ben Longbons <b.r.longbons@gmail.com>
##
##    This file is part of The Mana World (Athena server)
##
##    This program is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation, either version 3 of the License, or
##    (at your option) any later version.
##
##    This program is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Settings.
class LexSettings:
    pad = 2
    indent = 4
    brace = True
    nested_indent = 0 # 4

# Code.
import subprocess
import sys

def round_up(i, a):
    m = i % a
    if m:
        i += (a - m)
    return i

class Table:
    ''' Aligned output
    '''
    def __init__(self):
        self.buf = []
        self.size = 0
    def put1(self, line):
        line = line.rstrip()
        self.buf.append((line, ''))
    def put2(self, left, right):
        left = left.rstrip()
        right = right.strip()
        self.buf.append((left, right))
        if right and len(left) > self.size:
            self.size = len(left)
    def flush(self):
        self.size += LexSettings.pad
        self.size = round_up(self.size, LexSettings.indent)
        for l, r in self.buf:
            if not r:
                sys.stdout.writelines([l, '\n'])
            else:
                need = self.size - len(l)
                sys.stdout.writelines([l, ' ' * need, r, '\n'])
        del self.buf[:]
        self.size = 0

def format_lex_or_yacc_definitions():
    'definitions section (mostly used for options actually)'
    table = Table()
    in_code = False
    code = bytearray()
    for line in sys.stdin:
        if line == '%%\n':
            break
        if line == '%{\n':
            in_code = True
            continue
        if in_code:
            if line == '%}\n':
                in_code = False
                continue
            code += line
            continue
        if not line.strip() or line != line.lstrip():
            # starts with whitespace or is an empty line ('\n')
            code += line
            continue
        if code.strip():
            if LexSettings.brace:
                table.put1('%{')
            for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'):
                table.put1(LexSettings.nested_indent * ' ' + line2)
            if LexSettings.brace:
                table.put1('%}')
                table.put1('')
        elif code:
            table.put1('')
        code = bytearray()

        if line.startswith('%'):
            # %top is flex, %code and %union are bison
            union = line.startswith('%union')
            if union or line.startswith('%top') or line.startswith('%code'):
                # TODO fix stupidity when in strings or comments
                count = line.count('{')
                #code = bytearray()
                if union:
                    assert count <= 1
                    code += line[1:]
                else:
                    if count:
                        assert count == 1
                        code += line[line.find('{'):]
                        table.put1(line[:line.find('{')])
                    else:
                        table.put1(line.rstrip())
                assert line.count('}') == 0
                for line in sys.stdin:
                    count += line.count('{') - line.count('}')
                    code += line
                    assert count >= 0
                    if count == 0:
                        break
                if union:
                    first = True
                    for line2 in indent_cpp_slop(code):
                        if first:
                            line2 = '%' + line2
                            first = False
                        table.put1(line2)
                else:
                    for line2 in indent_cpp_slop(code):
                        table.put1(LexSettings.nested_indent * ' ' + line2)
                code = bytearray()
            else:
                table.put1(line)
        elif line[0].isalpha() or line[0] == '_':
            table.put2(*line.split(None, 1))
        else:
            table.put1(line)

    assert not in_code
    del code
    del in_code
    table.flush()
    sys.stdout.write('\n%%\n')

def format_lex_rules():
    'rule section'
    table = Table()
    for line in sys.stdin:
        if line == '%%\n':
            break
        if line.startswith('<') and not line.startswith('<<'):
            raise NotImplementedError('start conditions not yet supported')
        i = 0
        p = 0
        bs = False
        while True:
            if bs:
                bs = False
                i += 1
                continue
            if line[i] == '\\':
                bs = True
                i += 1
                continue
            if not p and line[i].isspace():
                break
            if line[i] == '"':
                i += 1
                while line[i] != '"':
                    if line[i] == '\\':
                        i += 1
                    i += 1
            elif line[i] == '[':
                i += 1
                if line[i] == '^':
                    i += 1
                while line[i] != ']':
                    i += 1
            elif line[i] == '(':
                p += 1
            elif line[i] == ')':
                assert p
                p -= 1
            i += 1
        del bs
        del p
        pattern = line[:i]
        rule = bytearray(line[i:])
        del i
        count = rule.count('{') - rule.count('}')
        while count:
            blah = next(sys.stdin)
            rule += blah
            count += blah.count('{') - blah.count('}')
        rules = indent_cpp_slop(rule)
        table.put2(pattern, rules[0])
        for line in rules[1:]:
            table.put1(line)

    table.flush()
    sys.stdout.write('%%\n')

def format_yacc_rules():
    '''
        tokens are any of:
        word
        word[namedref]
        'c'
        "str"
        { code }
        break before {
        break twice before a : or |
        break twice before and thrice after a ;
        put a softspace after everything except ;
    '''
    sys.stdout.write('\n')
    softspace = '' # NOT reset by new lines
    for line in sys.stdin:
        if line == '%%\n':
            break
        line = line.strip()
        while line:
            if line.startswith("'"):
                bs = False
                for i, c in enumerate(line):
                    if bs:
                        continue
                    bs = c == '\\'
                    if i and c == "'":
                        break
                else:
                    raise Exception('broken char')
                i += 1
                word = line[:i]
                line = line[i:].lstrip()
                sys.stdout.writelines([softspace, word])
                softspace = ' '
                continue
            if line.startswith('"'):
                for i, c in enumerate(line):
                    if bs:
                        continue
                    bs = c == '\\'
                    if i and c == '"':
                        break
                else:
                    raise Exception('broken string')
                i += 1
                word = line[:i]
                line = line[i:].lstrip()
                sys.stdout.writelines([softspace, word])
                softspace = ' '
                continue
            if line.startswith(':'):
                line = line[1:].lstrip()
                sys.stdout.write('\n\n:')
                softspace = ' '
                continue
            if line.startswith('{'):
                line += '\n'
                lines = bytearray()
                # TODO fix braces in comments and strings
                lo = 1
                behold = 1
                while behold:
                    i = line.find('}', lo)
                    if i == -1:
                        behold += line[lo:].count('{')
                        lines += line
                        line = next(sys.stdin)
                        lo = 0
                    else:
                        behold -= 1
                        i += 1
                        behold += line[lo:i].count('{')
                        lo = i
                lines += line[:lo]
                for line2 in indent_cpp_slop(lines):
                    sys.stdout.writelines(['\n', line2])
                line = line[lo:].strip()
                softspace = ' '
                continue
            if line.startswith(';'):
                line = line[1:].lstrip()
                sys.stdout.write('\n\n;\n\n\n')
                softspace = ''
                continue
            if line.startswith('|'):
                line = line[1:].lstrip()
                sys.stdout.write('\n\n|')
                softspace = ' '
                continue
            # screw comments
            word, _, line = line.partition(' ')
            line = line.lstrip()
            if word.endswith(':'):
                word = word[-1]
                line = ':' + line
            sys.stdout.writelines([softspace, word])
            softspace = ' '
            continue
        # while line
    # for line in stdin
    sys.stdout.write('%%\n')

def format_lex():
    '''
    A lex file is a series of sections.

    In the initial section:
        If it begins with whitespace, it is indented code
        It might be a /* comment */
        It might be a #line
        It might be a %s, %x, %pointer, %array, %option %[a-z][0-9].*
        It might be a %{ codeblock %}
        It might be a %top { codeblock }
        It might be a name and an expansion
        A %% switches to the second section

    In a comment:
        */ is the end

    In a codeblock:
        if it started with %{, %} ends it
        if it started with %top{, } ends it if it matches the nesting

    In section 2's header:
        there may be %{ %} sections, possibly nested
        there may also be indented code
        there may be unindented code if it's inside the %{ %}

    In section 2 proper:
        pattern         action
        <sc>pattern     action
        <sc>{
            pattern     action
        }
        a %% switches to section 3

    In section 3:
        everything is just C code
    '''

    format_lex_or_yacc_definitions()
    format_lex_rules()
    format_cc()

def format_yacc():
    ''' A yacc file is a series of sections.

    In the initial section:
        whitespace and comments are ignored.
        %someoption
        =
        |
        ;
        name
        name:
        int

        'char'
        "string"
        <*>
        <>
        <something>
        %{ prologue %}
        { braced code }
        [ bracketed identifier ]
        %% switch to section 2

    In the second section:
        is actually the same! wtf?
        But in practice:

        name:
            symbol 'c' "str" { code }
        |
            symbol 'c' "str" { code } /* in any order */
        ;
        any name may instead be name[namedref]
        code may additionally contain $$, $1, $namedref

    In section 3:
        everything is C code.
    '''
    format_lex_or_yacc_definitions()
    format_yacc_rules()
    format_cc()

def format_cc():
    tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None)
    tail.stdin.writelines(sys.stdin)

def indent_cpp_slop(code):
    return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n')

def format_ii():
    format_passthrough()

def format_passthrough():
    for line in sys.stdin:
        sys.stdout.write(line)

exts = {
        '-lpp': format_lex,
        '-ypp': format_yacc,
        '-cpp': format_cc,
        '-ipp': format_ii,
}

if __name__ == '__main__':
    import sys
    if len(sys.argv) != 2:
        sys.exit('Usage: %s -ext < input.ext > output.ext')
    func = exts.get(sys.argv[1])
    if not func:
        sys.exit('Bad -ext')
    func()