From f71413176f32ef642824456544ecbda5933a0944 Mon Sep 17 00:00:00 2001
From: Ben Longbons <b.r.longbons@gmail.com>
Date: Mon, 23 Dec 2013 19:40:38 -0800
Subject: Add non-indenting indenter for C++

Fix a bunch of bugs that none of me noticed before.
---
 real.make                           |  41 +++--
 src/common/human_time_diff_test.cpp |   1 -
 src/map/magic-interpreter-lexer.lpp |   1 +
 tools/indent-cpp                    |   2 -
 tools/indenter                      | 293 ++++++++++++++++++++++++++++++++++--
 tools/indenter-cpp                  |   9 ++
 tools/maybe-mv                      |   4 +-
 7 files changed, 328 insertions(+), 23 deletions(-)
 delete mode 100755 tools/indent-cpp
 create mode 100755 tools/indenter-cpp

diff --git a/real.make b/real.make
index 0177de0..d99a823 100644
--- a/real.make
+++ b/real.make
@@ -53,6 +53,7 @@
 # 5. Remove the few (obvious) bits that are hard-coded for TMWA.
 # 6. Handle testing better. I'm guessing I should actually compile just
 #   one foo_test.cpp file into each executable test ...
+# 7. Refactor into several files after all. We need extensibility!
 #
 # IWBNMI:
 # 1. Add 'make check' and 'make installcheck'.
@@ -123,6 +124,8 @@ GEN_HEADERS := \
     $(patsubst %.ypp,%.hpp,${PARSERS})
 REAL_SOURCES := $(shell cd ${SRC_DIR}; find src/ -name '*.cpp')
 REAL_HEADERS := $(shell cd ${SRC_DIR}; find src/ -name '*.hpp' -o -name '*.tcc')
+REAL_SOURCES := $(filter-out ${GEN_SOURCES},${REAL_SOURCES})
+REAL_HEADERS := $(filter-out ${GEN_HEADERS},${REAL_HEADERS})
 SOURCES := ${GEN_SOURCES} ${REAL_SOURCES}
 HEADERS := ${GEN_HEADERS} ${REAL_HEADERS}
 DEPENDS := $(patsubst src/%.cpp,obj/%.d,${SOURCES})
@@ -231,7 +234,8 @@ mostlyclean:
 	rm -rf obj conf-raw
 clean: mostlyclean
 	rm -rf bin
-distclean: clean
+distclean: clean gen-clean
+gen-clean:
 	rm -f ${GEN_SOURCES} ${GEN_HEADERS}
 
 %.cpp: %.lpp
@@ -384,13 +388,30 @@ dist/%-bundled.tar: dist/%-src.tar dist/%-attoconf-only.tar
 dist: dist/tmwa-${VERSION_FULL}-src.tar dist/tmwa-${VERSION_FULL}-bundled.tar
 .PHONY: dist
 
-format: format-cpp format-hpp format-lpp format-ypp
-format-cpp:
-	cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_SOURCES}
-format-hpp:
-	cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_HEADERS}
-format-lpp:
-	cd ${SRC_DIR} && apply-filter 'indenter -lpp' ${LEXERS}
-format-ypp:
-	cd ${SRC_DIR} && apply-filter 'indenter -ypp' ${PARSERS}
+# lpp and ypp are (currently) very slow, so do them first (parallel)
+format: format-lpp format-ypp format-cpp format-hpp
+format-cpp: $(patsubst src/%,obj/%.formatted,${REAL_SOURCES})
+format-hpp: $(patsubst src/%,obj/%.formatted,${REAL_HEADERS})
+format-lpp: $(patsubst src/%,obj/%.formatted,${LEXERS})
+format-ypp: $(patsubst src/%,obj/%.formatted,${PARSERS})
+obj/%.cpp.formatted: src/%.cpp tools/indenter
+	$(MKDIR_FIRST)
+	cd ${SRC_DIR} && apply-filter 'indenter -cpp' $<
+	touch $@
+obj/%.hpp.formatted: src/%.hpp tools/indenter
+	$(MKDIR_FIRST)
+	cd ${SRC_DIR} && apply-filter 'indenter -cpp' $<
+	touch $@
+obj/%.tcc.formatted: src/%.tcc tools/indenter
+	$(MKDIR_FIRST)
+	cd ${SRC_DIR} && apply-filter 'indenter -cpp' $<
+	touch $@
+obj/%.lpp.formatted: src/%.lpp tools/indenter
+	$(MKDIR_FIRST)
+	cd ${SRC_DIR} && apply-filter 'indenter -lpp' $<
+	touch $@
+obj/%.ypp.formatted: src/%.ypp tools/indenter
+	$(MKDIR_FIRST)
+	cd ${SRC_DIR} && apply-filter 'indenter -ypp' $<
+	touch $@
 .PHONY: format format-cpp format-hpp format-lpp format-ypp
diff --git a/src/common/human_time_diff_test.cpp b/src/common/human_time_diff_test.cpp
index d11a116..d3ddad1 100644
--- a/src/common/human_time_diff_test.cpp
+++ b/src/common/human_time_diff_test.cpp
@@ -81,4 +81,3 @@ TEST(humantimediff, multiple)
     EXPECT_EQ(0, diff.second);
     EXPECT_FALSE(extract("1y2y", &diff));
 }
-
diff --git a/src/map/magic-interpreter-lexer.lpp b/src/map/magic-interpreter-lexer.lpp
index 3625ee3..786088e 100644
--- a/src/map/magic-interpreter-lexer.lpp
+++ b/src/map/magic-interpreter-lexer.lpp
@@ -153,3 +153,4 @@
 .                       FPRINTF(stderr, "%s: Unexpected character in line %d\n", MAGIC_CONFIG_FILE, magic_frontend_lineno);
 
 %%
+// nothing to see here, move along
diff --git a/tools/indent-cpp b/tools/indent-cpp
deleted file mode 100755
index 610d623..0000000
--- a/tools/indent-cpp
+++ /dev/null
@@ -1,2 +0,0 @@
-#!/bin/sh
-pp-indent | bs-align
diff --git a/tools/indenter b/tools/indenter
index 4e17a15..0f0f31d 100755
--- a/tools/indenter
+++ b/tools/indenter
@@ -19,6 +19,16 @@
 ##    You should have received a copy of the GNU General Public License
 ##    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
+
+from __future__ import print_function
+
+from collections import namedtuple
+import cStringIO
+import string
+import subprocess
+import sys
+
+
 # Settings.
 class LexSettings:
     pad = 2
@@ -26,9 +36,146 @@ class LexSettings:
     brace = True
     nested_indent = 0 # 4
 
+
 # Code.
-import subprocess
-import sys
+Location = namedtuple('Location', ('name', 'line', 'column', 'text'))
+if 1:
+    def _diagnostic(self, level, msg, to):
+        print('{file}:{line}:{column}: {level}: {msg}'.format(
+                file=self.name, line=self.line, column=self.column,
+                level=level, msg=msg),
+            file=sys.stderr)
+        print(self.text, file=sys.stderr)
+        if to:
+            assert to.name == self.name
+            assert to.line == self.line
+            assert to.column >= self.column
+        else:
+            to = self
+        print(' ' * (self.column - 1) + '^' + '~' * (to.column - self.column), file=sys.stderr)
+    def error(self, msg, to=None):
+        self._diagnostic('error', msg, to)
+    def warning(self, msg, to=None):
+        self._diagnostic('warning', msg, to)
+    def note(self, msg, to=None):
+        self._diagnostic('note', msg, to)
+    Location._diagnostic = _diagnostic
+    Location.error = error
+    Location.warning = warning
+    Location.note = note
+    del _diagnostic, error, warning, note
+
+
+class Reader(object):
+    __slots__ = ('_name', '_stream', '_buffer', '_line', '_column')
+    def __init__(self, name, stream, line=1, column=1):
+        ''' Create a new character reader that is smart with lines.
+        '''
+        self._name = name
+        self._stream = stream
+        self._buffer = '\n'
+        self._line = line - 1
+        self._column = 0
+
+        column -= 1
+        self.adv()
+        self._buffer = ' ' * column + self._buffer
+        self._column = column
+        # no skew on input (actually belongs below)
+
+    def get(self):
+        ''' Fetch the current character, or falsy on EOF
+        '''
+        if self._buffer:
+            return self._buffer[self._column]
+        else:
+            return None # less prone to accidental errors than ''
+
+    def loc(self):
+        ''' Fetch the Location of the current character.
+        '''
+        # internally we store 0-based, but users want 1-based
+        # also, cut off the newline
+        return Location(self._name, self._line, self._column + 1,
+                self._buffer[:-1])
+
+    def adv(self):
+        if self._buffer[self._column] == '\n':
+            self._buffer = self._stream.readline()
+            self._line += 1
+            self._column = 0
+            if self._buffer and not self._buffer.endswith('\n'):
+                self._buffer += '\n'
+        else:
+            self._column += 1
+
+def string_reader(s, name='<string>', line=1, column=1):
+    return Reader(name, cStringIO.StringIO(s), line, column)
+
+def take_while(b, r, s):
+    assert isinstance(b, bytearray)
+    assert isinstance(r, Reader)
+    s = frozenset(s)
+    while True:
+        c = r.get()
+        if not c or c not in s:
+            break
+        b += c
+        r.adv()
+
+def take_mlc(b, r):
+    assert isinstance(b, bytearray)
+    assert isinstance(r, Reader)
+
+    star = False
+    while True:
+        c = r.get()
+        r.adv()
+        b += c
+        if star and c == '/':
+            return
+        star = c == '*'
+
+def take_slc(b, r):
+    assert isinstance(b, bytearray)
+    assert isinstance(r, Reader)
+
+    bs = False
+    while True:
+        c = r.get()
+        # if c == '\n': return
+        r.adv()
+        b += c
+        if c == '\n' and not bs:
+            return
+        bs = c == '\\'
+
+def take_char(b, r):
+    assert isinstance(b, bytearray)
+    assert isinstance(r, Reader)
+
+    bs = False
+    while True:
+        c = r.get()
+        r.adv()
+        b += c
+        if not bs and c == '\'':
+            return
+        bs = not bs and c == '\\'
+
+def take_str(b, r):
+    assert isinstance(b, bytearray)
+    assert isinstance(r, Reader)
+
+    bs = False
+    while True:
+        c = r.get()
+        r.adv()
+        b += c
+        if not bs and c == '"':
+            return
+        bs = not bs and c == '\\'
+
 
 def round_up(i, a):
     m = i % a
@@ -87,7 +234,7 @@ def format_lex_or_yacc_definitions():
         if code.strip():
             if LexSettings.brace:
                 table.put1('%{')
-            for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'):
+            for line2 in indent_cpp_slop(code):
                 table.put1(LexSettings.nested_indent * ' ' + line2)
             if LexSettings.brace:
                 table.put1('%}')
@@ -184,6 +331,9 @@ def format_lex_rules():
                 assert p
                 p -= 1
             i += 1
+        if not i:
+            table.put1('')
+            continue
         del bs
         del p
         pattern = line[:i]
@@ -390,18 +540,143 @@ def format_yacc():
     format_cc()
 
 def format_cc():
-    tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None)
+    sys.stdout.flush()
+    tail = subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=None)
     tail.stdin.writelines(sys.stdin)
+    tail.stdin.close()
+    sys.exit(tail.wait())
 
 def indent_cpp_slop(code):
-    return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n')
+    return subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n')
+
+operators = {
+    '#', '##',
+    '+', '++', '+=',
+    '-', '--', '-=', '->', '->*',
+    '*', '*=',
+    '/', '/=',
+    '%', '%=',
+    '=', '==',
+    '!', '!=',
+    '~',
+    '|', '||', '|=',
+    '&', '&&', '&=',
+    '^', '^=',
+    '<', '<=', '<<', '<<=',
+    '>', '>=', '>>', '>>=',
+    '.', '..', '.*', '...',
+    ':', '::',
+    '(', ')',
+    '[', ']',
+    '{', '}',
+    '?',
+    ',', ';',
+
+    '//', '/*', # comments are specially handled at end
+}
+operators = {
+    k: {v[len(k):] for v in operators if v is not k and v.startswith(k)}
+    for k in operators
+}
+
+num1 = string.digits
+num_x = num1 + '.\''
+ident1 = string.ascii_letters + '_$@' # $@ for bison
+ident_x = ident1 + string.digits
+
+class CxxLexer(object):
+    __slots__ = ('_reader', '_w', '_t', '_f', '_namespaces', '_classes')
+
+    def __init__(self, reader):
+        self._reader = reader
+        self.adv()
+        self._namespaces = []
+        self._classes = []
+
+    def get(self):
+        return self._w, self._t, self._f
+
+    def adv(self):
+        self._w, self._t, self._f = self.pull()
+
+    def pull(self):
+        r = self._reader
+
+        white = bytearray()
+        while True:
+            c = r.get()
+            if not c:
+                return '\n', None, None
+            if not c.isspace():
+                break
+            white += c
+            r.adv()
+
+        black = bytearray()
+        black += c
+        l = r.loc()
+        r.adv()
+
+        if c in operators:
+            while True:
+                c = r.get()
+                if not c or c.isspace():
+                    break
+                op = operators[str(black)]
+                if c not in op:
+                    break
+                black += c
+                r.adv()
+            if black == '/*':
+                take_mlc(black, r)
+            if black == '//':
+                take_slc(black, r)
+        elif c in num1:
+            take_while(black, r, num_x)
+            c = r.get()
+            if c in ident1:
+                black += c
+                r.adv()
+                take_while(black, r, ident_x)
+        elif c in ident1:
+            take_while(black, r, ident_x)
+            c = r.get()
+            if black in ('L', 'u8', 'u', 'U') and c == '"':
+                black += c
+                r.adv()
+                take_str(black, r)
+        elif c == '\'':
+            take_char(black, r)
+        elif c == '"':
+            take_str(black, r)
+        else:
+            l.error('Unknown character: %r' % c)
+            sys.exit(1)
+
+        # c is the first char of the next thing
+        return white, black, None
+
+def whitespace(w, (t, f), (pt, pf)):
+    return w
 
 def format_ii():
-    format_passthrough()
+    r = Reader('<stdin>', sys.stdin)
+    l = CxxLexer(r)
+    pt = None
+    pf = None
+    while True:
+        w, t, f = l.get()
+        if not t:
+            break
+        l.adv()
 
-def format_passthrough():
-    for line in sys.stdin:
-        sys.stdout.write(line)
+        w = whitespace(w, (t, f), (pt, pf))
+        sys.stdout.writelines([w, t])
+        #print('w:', repr(str(w)))
+        #print('t:', t)
+        pt, pf = t, f
+    if not pt.endswith('\n'):
+        sys.stdout.write('\n')
 
 exts = {
         '-lpp': format_lex,
diff --git a/tools/indenter-cpp b/tools/indenter-cpp
new file mode 100755
index 0000000..bcdf2e0
--- /dev/null
+++ b/tools/indenter-cpp
@@ -0,0 +1,9 @@
+#!/bin/bash -e
+# this is the implementation of indenter -cpp
+set -o pipefail
+expand |
+sed -e 's:^ *//:&&:' -e 's:^ *#://#:' |
+indenter -ipp |
+sed -e 's:^ *// *#:#:' -e 's:^\( *//\) *//:\1:' |
+pp-indent |
+bs-align
diff --git a/tools/maybe-mv b/tools/maybe-mv
index 725b86b..996052f 100755
--- a/tools/maybe-mv
+++ b/tools/maybe-mv
@@ -1,8 +1,10 @@
 #!/bin/bash -eu
 # Replace one file with another, but maybe don't update the timestamp
-if cmp -s "$1" "$2"
+if cmp "$1" "$2"
 then
+    echo rm "$1"
     rm "$1"
 else
+    echo mv "$1" "$2"
     mv "$1" "$2"
 fi
-- 
cgit v1.2.3-70-g09d2