summaryrefslogtreecommitdiff
path: root/aligncsv.cpp
diff options
context:
space:
mode:
authorBen Longbons <b.r.longbons@gmail.com>2011-09-16 14:36:23 -0700
committerBen Longbons <b.r.longbons@gmail.com>2011-09-16 14:48:22 -0700
commit5a8ecb60b34d0dd95db088ab787fd836311f7074 (patch)
treea4025c72e844d1674bf65a616804c94692295226 /aligncsv.cpp
parent44d941ffff786617eaac9999e88941299772b32c (diff)
downloadtools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.gz
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.bz2
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.tar.xz
tools-5a8ecb60b34d0dd95db088ab787fd836311f7074.zip
Reimplement aligncsv in C++ instead of python.
The main advantage of the new one is that it properly handles {script arg1, arg2;}
Diffstat (limited to 'aligncsv.cpp')
-rw-r--r--aligncsv.cpp177
1 files changed, 177 insertions, 0 deletions
diff --git a/aligncsv.cpp b/aligncsv.cpp
new file mode 100644
index 0000000..e65c260
--- /dev/null
+++ b/aligncsv.cpp
@@ -0,0 +1,177 @@
+#include <cerrno>
+#include <cstdio>
+#include <cstddef>
+
+#include <vector>
+#include <string>
+
+// this configuration puts 3-6 spaces between entries (excluding headers)
+// and rounds the start of each field up to 4, for easier manual indenting
+const size_t min_pad = 3;
+const size_t align_pad = 4;
+
+void add_pieces(std::vector<std::string>& line, std::vector<size_t>& sizes)
+{
+ // This would get rid of trailing commas,
+ // but that would break certain db.txt files.
+ // Instead we'll have to manually check whether it's empty when checking length
+// if (!line.empty() && line.back().empty())
+// line.pop_back();
+ size_t num_sizes = line.size();
+ if (!num_sizes) // line.empty()
+ return;
+ if (line[0].size() >= 2
+ && (line[0][0] == '#'
+ || (line[0][0] == '/'
+ && line[0][1] == '/')))
+ return;
+
+ if (num_sizes > sizes.size())
+ sizes.resize(num_sizes, 1UL);
+ for (size_t i = 0; i < num_sizes; ++i)
+ {
+ size_t elt_size = line[i].size();
+ if (!elt_size)// line[i].empty()
+ continue;
+ if (line[i][elt_size - 1] == ' ')
+ line[i].resize(--elt_size);
+ // mandatory padding and comma
+ elt_size += min_pad + 1;
+ if (elt_size > sizes[i])
+ // always true if we expanded sizes
+ sizes[i] = elt_size;
+ }
+}
+
+// the arguments may be the same file - the whole file is stored in memory
+void aligncsv(FILE *in, FILE *out, const char *name)
+{
+ bool newline = true;
+ bool can_split = true;
+ bool can_have_whitespace = false;
+ int c;
+ std::vector<std::vector<std::string> > contents;
+
+ while ((c = fgetc(in)) != -1)
+ {
+ if (c == '}' || c == '\n')
+ can_split = true;
+ if (c == '\n')
+ {
+ if (newline)
+ {
+ // preserve consecutive blank lines
+ contents.push_back(std::vector<std::string>());
+ }
+ newline = true;
+ continue;
+ }
+ if (c == '{')
+ can_split = false;
+ if (c == '\t')
+ c = ' ';
+ if (c == ' ')
+ {
+ if (!can_have_whitespace)
+ continue;
+ can_have_whitespace = false;
+ }
+ else
+ can_have_whitespace = true;
+ if (newline)
+ {
+ contents.push_back(std::vector<std::string>(1, std::string(1, c)));
+ newline = false;
+ }
+ else
+ {
+ if (can_split && c == ',')
+ {
+ can_have_whitespace = false;
+ contents.back().push_back(std::string());
+ }
+ else
+ contents.back().back() += c;
+ }
+ }
+
+ typedef std::vector<std::vector<std::string> >::iterator outer_it;
+ typedef std::vector<std::vector<std::string> >::const_iterator outer_cit;
+ typedef std::vector<size_t>::iterator pieces_it;
+ // at this point, each entry in a line:
+ // * does not start with whitespace
+ // * has one space in place of any previous run of whitespace
+ // * may end in a single space
+ // The last is fixed during add_pieces
+ std::vector<size_t> pieces;
+ for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it)
+ add_pieces(*it, pieces);
+ for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it)
+ if (size_t trail = *it % align_pad)
+ *it += align_pad - trail;
+
+ if (in == out)
+ {
+ //rewind(out);
+ if (fseek(out, 0, SEEK_SET) == -1)
+ {
+ perror(name);
+ return;
+ }
+ if (ftruncate(fileno(out), 0) == -1)
+ {
+ perror(name);
+ return;
+ }
+ }
+ for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit)
+ {
+ const std::vector<std::string>& inner = *oit;
+ size_t num_elems = inner.size();
+ // we have previously guaranteed that pieces[i].size() >= num_elems
+ for (size_t i = 0; i < num_elems; ++i)
+ {
+ // FIXME handle UTF-8 characters (here AND above?)
+ if (fputs(inner[i].c_str(), out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ if (i != num_elems - 1)
+ {
+ if (fputc(',', out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ size_t elem_length = inner[i].size() + 1;
+ while (elem_length++ < pieces[i])
+ {
+ if (fputc(' ', out) == -1)
+ {
+ perror(name);
+ return;
+ }
+ }
+ }
+ }
+ fputc('\n', out);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1)
+ aligncsv(stdin, stdout, "<stdio>");
+ for (int i = 1; i < argc; ++i)
+ {
+ FILE *f = fopen(argv[i], "r+");
+ if (!f)
+ {
+ perror(argv[i]);
+ continue;
+ }
+ aligncsv(f, f, argv[i]);
+ fclose(f);
+ }
+}