From ed81f91ac3c470e00df0d5c5cf9c274ce30f47b3 Mon Sep 17 00:00:00 2001 From: Ben Longbons Date: Fri, 16 Sep 2011 14:36:23 -0700 Subject: Reimplement aligncsv in C++ instead of python. The main advantage of the new one is that it properly handles {script arg1, arg2;} --- tools/.gitignore | 1 + tools/aligncsv.cpp | 177 +++++++++++++++++++++++++++++++++++++++++++++++++++++ tools/aligncsv.py | 88 -------------------------- 3 files changed, 178 insertions(+), 88 deletions(-) create mode 100644 tools/.gitignore create mode 100644 tools/aligncsv.cpp delete mode 100755 tools/aligncsv.py (limited to 'tools') diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 00000000..00e2a6af --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1 @@ +/aligncsv diff --git a/tools/aligncsv.cpp b/tools/aligncsv.cpp new file mode 100644 index 00000000..e65c2605 --- /dev/null +++ b/tools/aligncsv.cpp @@ -0,0 +1,177 @@ +#include +#include +#include + +#include +#include + +// this configuration puts 3-6 spaces between entries (excluding headers) +// and rounds the start of each field up to 4, for easier manual indenting +const size_t min_pad = 3; +const size_t align_pad = 4; + +void add_pieces(std::vector& line, std::vector& sizes) +{ + // This would get rid of trailing commas, + // but that would break certain db.txt files. + // Instead we'll have to manually check whether it's empty when checking length +// if (!line.empty() && line.back().empty()) +// line.pop_back(); + size_t num_sizes = line.size(); + if (!num_sizes) // line.empty() + return; + if (line[0].size() >= 2 + && (line[0][0] == '#' + || (line[0][0] == '/' + && line[0][1] == '/'))) + return; + + if (num_sizes > sizes.size()) + sizes.resize(num_sizes, 1UL); + for (size_t i = 0; i < num_sizes; ++i) + { + size_t elt_size = line[i].size(); + if (!elt_size)// line[i].empty() + continue; + if (line[i][elt_size - 1] == ' ') + line[i].resize(--elt_size); + // mandatory padding and comma + elt_size += min_pad + 1; + if (elt_size > sizes[i]) + // always true if we expanded sizes + sizes[i] = elt_size; + } +} + +// the arguments may be the same file - the whole file is stored in memory +void aligncsv(FILE *in, FILE *out, const char *name) +{ + bool newline = true; + bool can_split = true; + bool can_have_whitespace = false; + int c; + std::vector > contents; + + while ((c = fgetc(in)) != -1) + { + if (c == '}' || c == '\n') + can_split = true; + if (c == '\n') + { + if (newline) + { + // preserve consecutive blank lines + contents.push_back(std::vector()); + } + newline = true; + continue; + } + if (c == '{') + can_split = false; + if (c == '\t') + c = ' '; + if (c == ' ') + { + if (!can_have_whitespace) + continue; + can_have_whitespace = false; + } + else + can_have_whitespace = true; + if (newline) + { + contents.push_back(std::vector(1, std::string(1, c))); + newline = false; + } + else + { + if (can_split && c == ',') + { + can_have_whitespace = false; + contents.back().push_back(std::string()); + } + else + contents.back().back() += c; + } + } + + typedef std::vector >::iterator outer_it; + typedef std::vector >::const_iterator outer_cit; + typedef std::vector::iterator pieces_it; + // at this point, each entry in a line: + // * does not start with whitespace + // * has one space in place of any previous run of whitespace + // * may end in a single space + // The last is fixed during add_pieces + std::vector pieces; + for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it) + add_pieces(*it, pieces); + for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it) + if (size_t trail = *it % align_pad) + *it += align_pad - trail; + + if (in == out) + { + //rewind(out); + if (fseek(out, 0, SEEK_SET) == -1) + { + perror(name); + return; + } + if (ftruncate(fileno(out), 0) == -1) + { + perror(name); + return; + } + } + for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit) + { + const std::vector& inner = *oit; + size_t num_elems = inner.size(); + // we have previously guaranteed that pieces[i].size() >= num_elems + for (size_t i = 0; i < num_elems; ++i) + { + // FIXME handle UTF-8 characters (here AND above?) + if (fputs(inner[i].c_str(), out) == -1) + { + perror(name); + return; + } + if (i != num_elems - 1) + { + if (fputc(',', out) == -1) + { + perror(name); + return; + } + size_t elem_length = inner[i].size() + 1; + while (elem_length++ < pieces[i]) + { + if (fputc(' ', out) == -1) + { + perror(name); + return; + } + } + } + } + fputc('\n', out); + } +} + +int main(int argc, char **argv) +{ + if (argc == 1) + aligncsv(stdin, stdout, ""); + for (int i = 1; i < argc; ++i) + { + FILE *f = fopen(argv[i], "r+"); + if (!f) + { + perror(argv[i]); + continue; + } + aligncsv(f, f, argv[i]); + fclose(f); + } +} diff --git a/tools/aligncsv.py b/tools/aligncsv.py deleted file mode 100755 index 54bcd9d4..00000000 --- a/tools/aligncsv.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/python -# this formats a csv file to a serious whitespace intended format. - -import os -import sys - -tabs=True -additionalspaces = 5 - -fname = sys.argv[1] -if not os.path.exists(fname): - print "that file doesn't exist" - exit(0); - -f=open(fname,"r"); -lines=f.readlines() -f.close(); -length=0 - -for line in lines: - length=max(length, len(line.split(","))) - -print "# number of entries =",length - - -#setup text array -textarray=range(len(lines)+1) -for x in range(len(lines)+1): - textarray[x] = range(length) - -for x in range(length): - textarray[-1][x] = 0 - -#find the longest entry in each line in each position -for lineno in range(len(lines)): - if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"): - sp=lines[lineno].split(",") - for pieceno in range(len(sp)): - sp[pieceno] = sp[pieceno].strip() + "," #for the comma add a char - textarray[-1][pieceno] = max(len(sp[pieceno]),textarray[-1][pieceno]) - -if tabs: - #make it divisable by 8 (tabs work then) - for pieceno in range(length): - if (textarray[-1][pieceno] %8) !=0: - textarray[-1][pieceno] = (((textarray[-1][pieceno])/8)*8)+8 - -for lineno in range(len(lines)): - if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"): - sp=lines[lineno].split(",") - for pieceno in range(length): - textarray[lineno][pieceno] = "" - if pieceno