From f71413176f32ef642824456544ecbda5933a0944 Mon Sep 17 00:00:00 2001 From: Ben Longbons Date: Mon, 23 Dec 2013 19:40:38 -0800 Subject: Add non-indenting indenter for C++ Fix a bunch of bugs that none of me noticed before. --- real.make | 41 +++-- src/common/human_time_diff_test.cpp | 1 - src/map/magic-interpreter-lexer.lpp | 1 + tools/indent-cpp | 2 - tools/indenter | 293 ++++++++++++++++++++++++++++++++++-- tools/indenter-cpp | 9 ++ tools/maybe-mv | 4 +- 7 files changed, 328 insertions(+), 23 deletions(-) delete mode 100755 tools/indent-cpp create mode 100755 tools/indenter-cpp diff --git a/real.make b/real.make index 0177de0..d99a823 100644 --- a/real.make +++ b/real.make @@ -53,6 +53,7 @@ # 5. Remove the few (obvious) bits that are hard-coded for TMWA. # 6. Handle testing better. I'm guessing I should actually compile just # one foo_test.cpp file into each executable test ... +# 7. Refactor into several files after all. We need extensibility! # # IWBNMI: # 1. Add 'make check' and 'make installcheck'. @@ -123,6 +124,8 @@ GEN_HEADERS := \ $(patsubst %.ypp,%.hpp,${PARSERS}) REAL_SOURCES := $(shell cd ${SRC_DIR}; find src/ -name '*.cpp') REAL_HEADERS := $(shell cd ${SRC_DIR}; find src/ -name '*.hpp' -o -name '*.tcc') +REAL_SOURCES := $(filter-out ${GEN_SOURCES},${REAL_SOURCES}) +REAL_HEADERS := $(filter-out ${GEN_HEADERS},${REAL_HEADERS}) SOURCES := ${GEN_SOURCES} ${REAL_SOURCES} HEADERS := ${GEN_HEADERS} ${REAL_HEADERS} DEPENDS := $(patsubst src/%.cpp,obj/%.d,${SOURCES}) @@ -231,7 +234,8 @@ mostlyclean: rm -rf obj conf-raw clean: mostlyclean rm -rf bin -distclean: clean +distclean: clean gen-clean +gen-clean: rm -f ${GEN_SOURCES} ${GEN_HEADERS} %.cpp: %.lpp @@ -384,13 +388,30 @@ dist/%-bundled.tar: dist/%-src.tar dist/%-attoconf-only.tar dist: dist/tmwa-${VERSION_FULL}-src.tar dist/tmwa-${VERSION_FULL}-bundled.tar .PHONY: dist -format: format-cpp format-hpp format-lpp format-ypp -format-cpp: - cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_SOURCES} -format-hpp: - cd ${SRC_DIR} && apply-filter 'indenter -cpp' ${REAL_HEADERS} -format-lpp: - cd ${SRC_DIR} && apply-filter 'indenter -lpp' ${LEXERS} -format-ypp: - cd ${SRC_DIR} && apply-filter 'indenter -ypp' ${PARSERS} +# lpp and ypp are (currently) very slow, so do them first (parallel) +format: format-lpp format-ypp format-cpp format-hpp +format-cpp: $(patsubst src/%,obj/%.formatted,${REAL_SOURCES}) +format-hpp: $(patsubst src/%,obj/%.formatted,${REAL_HEADERS}) +format-lpp: $(patsubst src/%,obj/%.formatted,${LEXERS}) +format-ypp: $(patsubst src/%,obj/%.formatted,${PARSERS}) +obj/%.cpp.formatted: src/%.cpp tools/indenter + $(MKDIR_FIRST) + cd ${SRC_DIR} && apply-filter 'indenter -cpp' $< + touch $@ +obj/%.hpp.formatted: src/%.hpp tools/indenter + $(MKDIR_FIRST) + cd ${SRC_DIR} && apply-filter 'indenter -cpp' $< + touch $@ +obj/%.tcc.formatted: src/%.tcc tools/indenter + $(MKDIR_FIRST) + cd ${SRC_DIR} && apply-filter 'indenter -cpp' $< + touch $@ +obj/%.lpp.formatted: src/%.lpp tools/indenter + $(MKDIR_FIRST) + cd ${SRC_DIR} && apply-filter 'indenter -lpp' $< + touch $@ +obj/%.ypp.formatted: src/%.ypp tools/indenter + $(MKDIR_FIRST) + cd ${SRC_DIR} && apply-filter 'indenter -ypp' $< + touch $@ .PHONY: format format-cpp format-hpp format-lpp format-ypp diff --git a/src/common/human_time_diff_test.cpp b/src/common/human_time_diff_test.cpp index d11a116..d3ddad1 100644 --- a/src/common/human_time_diff_test.cpp +++ b/src/common/human_time_diff_test.cpp @@ -81,4 +81,3 @@ TEST(humantimediff, multiple) EXPECT_EQ(0, diff.second); EXPECT_FALSE(extract("1y2y", &diff)); } - diff --git a/src/map/magic-interpreter-lexer.lpp b/src/map/magic-interpreter-lexer.lpp index 3625ee3..786088e 100644 --- a/src/map/magic-interpreter-lexer.lpp +++ b/src/map/magic-interpreter-lexer.lpp @@ -153,3 +153,4 @@ . FPRINTF(stderr, "%s: Unexpected character in line %d\n", MAGIC_CONFIG_FILE, magic_frontend_lineno); %% +// nothing to see here, move along diff --git a/tools/indent-cpp b/tools/indent-cpp deleted file mode 100755 index 610d623..0000000 --- a/tools/indent-cpp +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/sh -pp-indent | bs-align diff --git a/tools/indenter b/tools/indenter index 4e17a15..0f0f31d 100755 --- a/tools/indenter +++ b/tools/indenter @@ -19,6 +19,16 @@ ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . + +from __future__ import print_function + +from collections import namedtuple +import cStringIO +import string +import subprocess +import sys + + # Settings. class LexSettings: pad = 2 @@ -26,9 +36,146 @@ class LexSettings: brace = True nested_indent = 0 # 4 + # Code. -import subprocess -import sys +Location = namedtuple('Location', ('name', 'line', 'column', 'text')) +if 1: + def _diagnostic(self, level, msg, to): + print('{file}:{line}:{column}: {level}: {msg}'.format( + file=self.name, line=self.line, column=self.column, + level=level, msg=msg), + file=sys.stderr) + print(self.text, file=sys.stderr) + if to: + assert to.name == self.name + assert to.line == self.line + assert to.column >= self.column + else: + to = self + print(' ' * (self.column - 1) + '^' + '~' * (to.column - self.column), file=sys.stderr) + def error(self, msg, to=None): + self._diagnostic('error', msg, to) + def warning(self, msg, to=None): + self._diagnostic('warning', msg, to) + def note(self, msg, to=None): + self._diagnostic('note', msg, to) + Location._diagnostic = _diagnostic + Location.error = error + Location.warning = warning + Location.note = note + del _diagnostic, error, warning, note + + +class Reader(object): + __slots__ = ('_name', '_stream', '_buffer', '_line', '_column') + def __init__(self, name, stream, line=1, column=1): + ''' Create a new character reader that is smart with lines. + ''' + self._name = name + self._stream = stream + self._buffer = '\n' + self._line = line - 1 + self._column = 0 + + column -= 1 + self.adv() + self._buffer = ' ' * column + self._buffer + self._column = column + # no skew on input (actually belongs below) + + def get(self): + ''' Fetch the current character, or falsy on EOF + ''' + if self._buffer: + return self._buffer[self._column] + else: + return None # less prone to accidental errors than '' + + def loc(self): + ''' Fetch the Location of the current character. + ''' + # internally we store 0-based, but users want 1-based + # also, cut off the newline + return Location(self._name, self._line, self._column + 1, + self._buffer[:-1]) + + def adv(self): + if self._buffer[self._column] == '\n': + self._buffer = self._stream.readline() + self._line += 1 + self._column = 0 + if self._buffer and not self._buffer.endswith('\n'): + self._buffer += '\n' + else: + self._column += 1 + +def string_reader(s, name='', line=1, column=1): + return Reader(name, cStringIO.StringIO(s), line, column) + +def take_while(b, r, s): + assert isinstance(b, bytearray) + assert isinstance(r, Reader) + s = frozenset(s) + while True: + c = r.get() + if not c or c not in s: + break + b += c + r.adv() + +def take_mlc(b, r): + assert isinstance(b, bytearray) + assert isinstance(r, Reader) + + star = False + while True: + c = r.get() + r.adv() + b += c + if star and c == '/': + return + star = c == '*' + +def take_slc(b, r): + assert isinstance(b, bytearray) + assert isinstance(r, Reader) + + bs = False + while True: + c = r.get() + # if c == '\n': return + r.adv() + b += c + if c == '\n' and not bs: + return + bs = c == '\\' + +def take_char(b, r): + assert isinstance(b, bytearray) + assert isinstance(r, Reader) + + bs = False + while True: + c = r.get() + r.adv() + b += c + if not bs and c == '\'': + return + bs = not bs and c == '\\' + +def take_str(b, r): + assert isinstance(b, bytearray) + assert isinstance(r, Reader) + + bs = False + while True: + c = r.get() + r.adv() + b += c + if not bs and c == '"': + return + bs = not bs and c == '\\' + def round_up(i, a): m = i % a @@ -87,7 +234,7 @@ def format_lex_or_yacc_definitions(): if code.strip(): if LexSettings.brace: table.put1('%{') - for line2 in subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n'): + for line2 in indent_cpp_slop(code): table.put1(LexSettings.nested_indent * ' ' + line2) if LexSettings.brace: table.put1('%}') @@ -184,6 +331,9 @@ def format_lex_rules(): assert p p -= 1 i += 1 + if not i: + table.put1('') + continue del bs del p pattern = line[:i] @@ -390,18 +540,143 @@ def format_yacc(): format_cc() def format_cc(): - tail = subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=None) + sys.stdout.flush() + tail = subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=None) tail.stdin.writelines(sys.stdin) + tail.stdin.close() + sys.exit(tail.wait()) def indent_cpp_slop(code): - return subprocess.Popen(['indent-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n') + return subprocess.Popen(['indenter-cpp'], stdin=subprocess.PIPE, stdout=subprocess.PIPE).communicate(code)[0].strip().split('\n') + +operators = { + '#', '##', + '+', '++', '+=', + '-', '--', '-=', '->', '->*', + '*', '*=', + '/', '/=', + '%', '%=', + '=', '==', + '!', '!=', + '~', + '|', '||', '|=', + '&', '&&', '&=', + '^', '^=', + '<', '<=', '<<', '<<=', + '>', '>=', '>>', '>>=', + '.', '..', '.*', '...', + ':', '::', + '(', ')', + '[', ']', + '{', '}', + '?', + ',', ';', + + '//', '/*', # comments are specially handled at end +} +operators = { + k: {v[len(k):] for v in operators if v is not k and v.startswith(k)} + for k in operators +} + +num1 = string.digits +num_x = num1 + '.\'' +ident1 = string.ascii_letters + '_$@' # $@ for bison +ident_x = ident1 + string.digits + +class CxxLexer(object): + __slots__ = ('_reader', '_w', '_t', '_f', '_namespaces', '_classes') + + def __init__(self, reader): + self._reader = reader + self.adv() + self._namespaces = [] + self._classes = [] + + def get(self): + return self._w, self._t, self._f + + def adv(self): + self._w, self._t, self._f = self.pull() + + def pull(self): + r = self._reader + + white = bytearray() + while True: + c = r.get() + if not c: + return '\n', None, None + if not c.isspace(): + break + white += c + r.adv() + + black = bytearray() + black += c + l = r.loc() + r.adv() + + if c in operators: + while True: + c = r.get() + if not c or c.isspace(): + break + op = operators[str(black)] + if c not in op: + break + black += c + r.adv() + if black == '/*': + take_mlc(black, r) + if black == '//': + take_slc(black, r) + elif c in num1: + take_while(black, r, num_x) + c = r.get() + if c in ident1: + black += c + r.adv() + take_while(black, r, ident_x) + elif c in ident1: + take_while(black, r, ident_x) + c = r.get() + if black in ('L', 'u8', 'u', 'U') and c == '"': + black += c + r.adv() + take_str(black, r) + elif c == '\'': + take_char(black, r) + elif c == '"': + take_str(black, r) + else: + l.error('Unknown character: %r' % c) + sys.exit(1) + + # c is the first char of the next thing + return white, black, None + +def whitespace(w, (t, f), (pt, pf)): + return w def format_ii(): - format_passthrough() + r = Reader('', sys.stdin) + l = CxxLexer(r) + pt = None + pf = None + while True: + w, t, f = l.get() + if not t: + break + l.adv() -def format_passthrough(): - for line in sys.stdin: - sys.stdout.write(line) + w = whitespace(w, (t, f), (pt, pf)) + sys.stdout.writelines([w, t]) + #print('w:', repr(str(w))) + #print('t:', t) + pt, pf = t, f + if not pt.endswith('\n'): + sys.stdout.write('\n') exts = { '-lpp': format_lex, diff --git a/tools/indenter-cpp b/tools/indenter-cpp new file mode 100755 index 0000000..bcdf2e0 --- /dev/null +++ b/tools/indenter-cpp @@ -0,0 +1,9 @@ +#!/bin/bash -e +# this is the implementation of indenter -cpp +set -o pipefail +expand | +sed -e 's:^ *//:&&:' -e 's:^ *#://#:' | +indenter -ipp | +sed -e 's:^ *// *#:#:' -e 's:^\( *//\) *//:\1:' | +pp-indent | +bs-align diff --git a/tools/maybe-mv b/tools/maybe-mv index 725b86b..996052f 100755 --- a/tools/maybe-mv +++ b/tools/maybe-mv @@ -1,8 +1,10 @@ #!/bin/bash -eu # Replace one file with another, but maybe don't update the timestamp -if cmp -s "$1" "$2" +if cmp "$1" "$2" then + echo rm "$1" rm "$1" else + echo mv "$1" "$2" mv "$1" "$2" fi -- cgit v1.2.3-60-g2f50