summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Longbons <b.r.longbons@gmail.com>2011-09-16 14:36:23 -0700
committerBen Longbons <b.r.longbons@gmail.com>2011-09-16 14:48:22 -0700
commit5a8ecb60b34d0dd95db088ab787fd836311f7074 (patch)
treea4025c72e844d1674bf65a616804c94692295226
parent44d941ffff786617eaac9999e88941299772b32c (diff)
downloadtools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.gz
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.bz2
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.xz
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.zip
Reimplement aligncsv in C++ instead of python.
The main advantage of the new one is that it properly handles {script arg1, arg2;}
-rw-r--r--.gitignore1
-rw-r--r--aligncsv.cpp177
-rwxr-xr-xaligncsv.py88
3 files changed, 178 insertions, 88 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..00e2a6a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+/aligncsv
diff --git a/aligncsv.cpp b/aligncsv.cpp
new file mode 100644
index 0000000..e65c260
--- /dev/null
+++ b/aligncsv.cpp
@@ -0,0 +1,177 @@
+#include <cerrno>
+#include <cstdio>
+#include <cstddef>
+
+#include <vector>
+#include <string>
+
+// this configuration puts 3-6 spaces between entries (excluding headers)
+// and rounds the start of each field up to 4, for easier manual indenting
+const size_t min_pad = 3;
+const size_t align_pad = 4;
+
+void add_pieces(std::vector<std::string>& line, std::vector<size_t>& sizes)
+{
+ // This would get rid of trailing commas,
+ // but that would break certain db.txt files.
+ // Instead we'll have to manually check whether it's empty when checking length
+// if (!line.empty() && line.back().empty())
+// line.pop_back();
+ size_t num_sizes = line.size();
+ if (!num_sizes) // line.empty()
+ return;
+ if (line[0].size() >= 2
+ && (line[0][0] == '#'
+ || (line[0][0] == '/'
+ && line[0][1] == '/')))
+ return;
+
+ if (num_sizes > sizes.size())
+ sizes.resize(num_sizes, 1UL);
+ for (size_t i = 0; i < num_sizes; ++i)
+ {
+ size_t elt_size = line[i].size();
+ if (!elt_size)// line[i].empty()
+ continue;
+ if (line[i][elt_size - 1] == ' ')
+ line[i].resize(--elt_size);
+ // mandatory padding and comma
+ elt_size += min_pad + 1;
+ if (elt_size > sizes[i])
+ // always true if we expanded sizes
+ sizes[i] = elt_size;
+ }
+}
+
+// the arguments may be the same file - the whole file is stored in memory
+void aligncsv(FILE *in, FILE *out, const char *name)
+{
+ bool newline = true;
+ bool can_split = true;
+ bool can_have_whitespace = false;
+ int c;
+ std::vector<std::vector<std::string> > contents;
+
+ while ((c = fgetc(in)) != -1)
+ {
+ if (c == '}' || c == '\n')
+ can_split = true;
+ if (c == '\n')
+ {
+ if (newline)
+ {
+ // preserve consecutive blank lines
+ contents.push_back(std::vector<std::string>());
+ }
+ newline = true;
+ continue;
+ }
+ if (c == '{')
+ can_split = false;
+ if (c == '\t')
+ c = ' ';
+ if (c == ' ')
+ {
+ if (!can_have_whitespace)
+ continue;
+ can_have_whitespace = false;
+ }
+ else
+ can_have_whitespace = true;
+ if (newline)
+ {
+ contents.push_back(std::vector<std::string>(1, std::string(1, c)));
+ newline = false;
+ }
+ else
+ {
+ if (can_split && c == ',')
+ {
+ can_have_whitespace = false;
+ contents.back().push_back(std::string());
+ }
+ else
+ contents.back().back() += c;
+ }
+ }
+
+ typedef std::vector<std::vector<std::string> >::iterator outer_it;
+ typedef std::vector<std::vector<std::string> >::const_iterator outer_cit;
+ typedef std::vector<size_t>::iterator pieces_it;
+ // at this point, each entry in a line:
+ // * does not start with whitespace
+ // * has one space in place of any previous run of whitespace
+ // * may end in a single space
+ // The last is fixed during add_pieces
+ std::vector<size_t> pieces;
+ for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it)
+ add_pieces(*it, pieces);
+ for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it)
+ if (size_t trail = *it % align_pad)
+ *it += align_pad - trail;
+
+ if (in == out)
+ {
+ //rewind(out);
+ if (fseek(out, 0, SEEK_SET) == -1)
+ {
+ perror(name);
+ return;
+ }
+ if (ftruncate(fileno(out), 0) == -1)
+ {
+ perror(name);
+ return;
+ }
+ }
+ for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit)
+ {
+ const std::vector<std::string>& inner = *oit;
+ size_t num_elems = inner.size();
+ // we have previously guaranteed that pieces[i].size() >= num_elems
+ for (size_t i = 0; i < num_elems; ++i)
+ {
+ // FIXME handle UTF-8 characters (here AND above?)
+ if (fputs(inner[i].c_str(), out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ if (i != num_elems - 1)
+ {
+ if (fputc(',', out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ size_t elem_length = inner[i].size() + 1;
+ while (elem_length++ < pieces[i])
+ {
+ if (fputc(' ', out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ }
+ }
+ }
+ fputc('\n', out);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1)
+ aligncsv(stdin, stdout, "<stdio>");
+ for (int i = 1; i < argc; ++i)
+ {
+ FILE *f = fopen(argv[i], "r+");
+ if (!f)
+ {
+ perror(argv[i]);
+ continue;
+ }
+ aligncsv(f, f, argv[i]);
+ fclose(f);
+ }
+}
diff --git a/aligncsv.py b/aligncsv.py
deleted file mode 100755
index 54bcd9d..0000000
--- a/aligncsv.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#!/usr/bin/python
-# this formats a csv file to a serious whitespace intended format.
-
-import os
-import sys
-
-tabs=True
-additionalspaces = 5
-
-fname = sys.argv[1]
-if not os.path.exists(fname):
- print "that file doesn't exist"
- exit(0);
-
-f=open(fname,"r");
-lines=f.readlines()
-f.close();
-length=0
-
-for line in lines:
- length=max(length, len(line.split(",")))
-
-print "# number of entries =",length
-
-
-#setup text array
-textarray=range(len(lines)+1)
-for x in range(len(lines)+1):
- textarray[x] = range(length)
-
-for x in range(length):
- textarray[-1][x] = 0
-
-#find the longest entry in each line in each position
-for lineno in range(len(lines)):
- if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"):
- sp=lines[lineno].split(",")
- for pieceno in range(len(sp)):
- sp[pieceno] = sp[pieceno].strip() + "," #for the comma add a char
- textarray[-1][pieceno] = max(len(sp[pieceno]),textarray[-1][pieceno])
-
-if tabs:
- #make it divisable by 8 (tabs work then)
- for pieceno in range(length):
- if (textarray[-1][pieceno] %8) !=0:
- textarray[-1][pieceno] = (((textarray[-1][pieceno])/8)*8)+8
-
-for lineno in range(len(lines)):
- if not lines[lineno].strip().startswith("//") or lines[lineno].strip().startswith("//id"):
- sp=lines[lineno].split(",")
- for pieceno in range(length):
- textarray[lineno][pieceno] = ""
- if pieceno<len(sp):
- sp[pieceno]= sp[pieceno].strip()
- if pieceno<len(sp)-1:
- sp[pieceno]= sp[pieceno] + ","
-
- if (tabs):
- n=(textarray[-1][pieceno]-len(sp[pieceno]))
- textarray[lineno][pieceno] = sp[pieceno]
- if (n%8) != 0:
- textarray[lineno][pieceno] += "\t"*((n/8)+1)
- else:
- textarray[lineno][pieceno] += "\t"*((n/8))
- else:
- n=(textarray[-1][pieceno]-len(sp[pieceno])+additionalspaces)
- textarray[lineno][pieceno] = " "*(n) + sp[pieceno]
- else:
- for pieceno in range(length):
- textarray[lineno][pieceno] = ""
- textarray[lineno][0]=lines[lineno].strip()
-
-
-fname = sys.argv[2]
-if not os.path.exists(fname):
- print "that file doesn't exist"
- exit(0);
-else:
- f=open(fname,"w");
- for line in textarray[:-1]:
- for piece in line:
- f.write(piece)
- f.write("\n")
- f.close()
-
-
-
-