diff options
author | Ben Longbons <b.r.longbons@gmail.com> | 2011-09-16 14:36:23 -0700 |
---|---|---|
committer | Ben Longbons <b.r.longbons@gmail.com> | 2011-09-16 14:48:22 -0700 |
commit | 5a8ecb60b34d0dd95db088ab787fd836311f7074 (patch) | |
tree | a4025c72e844d1674bf65a616804c94692295226 | |
parent | 44d941ffff786617eaac9999e88941299772b32c (diff) | |
download | tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.gz tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.bz2 tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.xz tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.zip |
Reimplement aligncsv in C++ instead of python.
The main advantage of the new one is that it properly handles
{script arg1, arg2;}
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | aligncsv.cpp | 177 | ||||
-rwxr-xr-x | aligncsv.py | 88 |
3 files changed, 178 insertions, 88 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..00e2a6a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/aligncsv diff --git a/aligncsv.cpp b/aligncsv.cpp new file mode 100644 index 0000000..e65c260 --- /dev/null +++ b/aligncsv.cpp @@ -0,0 +1,177 @@ +#include <cerrno> +#include <cstdio> +#include <cstddef> + +#include <vector> +#include <string> + +// this configuration puts 3-6 spaces between entries (excluding headers) +// and rounds the start of each field up to 4, for easier manual indenting +const size_t min_pad = 3; +const size_t align_pad = 4; + +void add_pieces(std::vector<std::string>& line, std::vector<size_t>& sizes) +{ + // This would get rid of trailing commas, + // but that would break certain db.txt files. + // Instead we'll have to manually check whether it's empty when checking length +// if (!line.empty() && line.back().empty()) +// line.pop_back(); + size_t num_sizes = line.size(); + if (!num_sizes) // line.empty() + return; + if (line[0].size() >= 2 + && (line[0][0] == '#' + || (line[0][0] == '/' + && line[0][1] == '/'))) + return; + + if (num_sizes > sizes.size()) + sizes.resize(num_sizes, 1UL); + for (size_t i = 0; i < num_sizes; ++i) + { + size_t elt_size = line[i].size(); + if (!elt_size)// line[i].empty() + continue; + if (line[i][elt_size - 1] == ' ') + line[i].resize(--elt_size); + // mandatory padding and comma + elt_size += min_pad + 1; + if (elt_size > sizes[i]) + // always true if we expanded sizes + sizes[i] = elt_size; + } +} + +// the arguments may be the same file - the whole file is stored in memory +void aligncsv(FILE *in, FILE *out, const char *name) +{ + bool newline = true; + bool can_split = true; + bool can_have_whitespace = false; + int c; + std::vector<std::vector<std::string> > contents; + + while ((c = fgetc(in)) != -1) + { + if (c == '}' || c == '\n') + can_split = true; + if (c == '\n') + { + if (newline) + { + // preserve consecutive blank lines + contents.push_back(std::vector<std::string>()); + } + newline = true; + continue; + } + if (c == '{') + can_split = false; + if (c == '\t') + c = ' '; + if (c == ' ') + { + if (!can_have_whitespace) + continue; + can_have_whitespace = false; + } + else + can_have_whitespace = true; + if (newline) + { + contents.push_back(std::vector<std::string>(1, std::string(1, c))); + newline = false; + } + else + { + if (can_split && c == ',') + { + can_have_whitespace = false; + contents.back().push_back(std::string()); + } + else + contents.back().back() += c; + } + } + + typedef std::vector<std::vector<std::string> >::iterator outer_it; + typedef std::vector<std::vector<std::string> >::const_iterator outer_cit; + typedef std::vector<size_t>::iterator pieces_it; + // at this point, each entry in a line: + // * does not start with whitespace + // * has one space in place of any previous run of whitespace + // * may end in a single space + // The last is fixed during add_pieces + std::vector<size_t> pieces; + for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it) + add_pieces(*it, pieces); + for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it) + if (size_t trail = *it % align_pad) + *it += align_pad - trail; + + if (in == out) + { + //rewind(out); + if (fseek(out, 0, SEEK_SET) == -1) + { + perror(name); + return; + } + if (ftruncate(fileno(out), 0) == -1) + { + perror(name); + return; + } + } + for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit) + { + const std::vector<std::string>& inner = *oit; + size_t num_elems = inner.size(); + // we have previously guaranteed that pieces[i].size() >= num_elems + for (size_t i = 0; i < num_elems; ++i) + { + // FIXME handle UTF-8 characters (here AND above?) + if (fputs(inner[i].c_str(), out) == -1) + { + perror(name); + return; + } + if (i != num_elems - 1) + { + if (fputc(',', out) == -1) + { + perror(name); + return; + } + size_t elem_length = inner[i].size() + 1; + while (elem_length++ < pieces[i]) + { + if (fputc(' ', out) == -1) + { + perror(name); + return; + } + } + } + } + fputc('\n', out); + } +} + +int main(int argc, char **argv) +{ + if (argc == 1) + aligncsv(stdin, stdout, "<stdio>"); + for (int i = 1; i < argc; ++i) + { + FILE *f = fopen(argv[i], "r+"); + if (!f) + { + perror(argv[i]); + continue; + } + aligncsv(f, f, argv[i]); + fclose(f); + } +} diff --git a/aligncsv.py b/aligncsv.py deleted file mode 100755 index 54bcd9d..0000000 --- a/aligncsv.py +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/python -# this formats a csv file to a serious whitespace intended format. - -import os -import sys - -tabs=True -additionalspaces = 5 - -fname = sys.argv[1] -if not os.path.exists(fname): - print "that file doesn't exist" - exit(0); - -f=open(fname,"r"); -lines=f.readlines() -f.close(); -length=0 - -for line in lines: - length=max(length, len(line.split(","))) - -print "# number of entries =",length - - -#setup text array -textarray=range(len(lines)+1) -for x in range(len(lines)+1): - textarray[x] = range(length) - -for x in range(length): - textarray[-1][x] = 0 - -#find the longest entry in each line in each position -for lineno in range(len(lines)): - if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"): - sp=lines[lineno].split(",") - for pieceno in range(len(sp)): - sp[pieceno] = sp[pieceno].strip() + "," #for the comma add a char - textarray[-1][pieceno] = max(len(sp[pieceno]),textarray[-1][pieceno]) - -if tabs: - #make it divisable by 8 (tabs work then) - for pieceno in range(length): - if (textarray[-1][pieceno] %8) !=0: - textarray[-1][pieceno] = (((textarray[-1][pieceno])/8)*8)+8 - -for lineno in range(len(lines)): - if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"): - sp=lines[lineno].split(",") - for pieceno in range(length): - textarray[lineno][pieceno] = "" - if pieceno<len(sp): - sp[pieceno]= sp[pieceno].strip() - if pieceno<len(sp)-1: - sp[pieceno]= sp[pieceno] + "," - - if (tabs): - n=(textarray[-1][pieceno]-len(sp[pieceno])) - textarray[lineno][pieceno] = sp[pieceno] - if (n%8) != 0: - textarray[lineno][pieceno] += "\t"*((n/8)+1) - else: - textarray[lineno][pieceno] += "\t"*((n/8)) - else: - n=(textarray[-1][pieceno]-len(sp[pieceno])+additionalspaces) - textarray[lineno][pieceno] = " "*(n) + sp[pieceno] - else: - for pieceno in range(length): - textarray[lineno][pieceno] = "" - textarray[lineno][0]=lines[lineno].strip() - - -fname = sys.argv[2] -if not os.path.exists(fname): - print "that file doesn't exist" - exit(0); -else: - f=open(fname,"w"); - for line in textarray[:-1]: - for piece in line: - f.write(piece) - f.write("\n") - f.close() - - - - |