summaryrefslogtreecommitdiff
path: root/tools/aligncsv.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tools/aligncsv.cpp')
-rw-r--r--tools/aligncsv.cpp183
1 files changed, 0 insertions, 183 deletions
diff --git a/tools/aligncsv.cpp b/tools/aligncsv.cpp
deleted file mode 100644
index ef75ac00..00000000
--- a/tools/aligncsv.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-#include <cerrno>
-#include <cstdio>
-#include <cstddef>
-
-#include <unistd.h>
-
-#include <vector>
-#include <string>
-
-// this configuration puts 2-5 spaces between entries (excluding headers)
-// and rounds the start of each field up to 4, for easier manual indenting
-// but force each field to be at least size 8
-const size_t min_pad = 2;
-const size_t align_pad = 4;
-const size_t min_size = 8;
-
-void add_pieces(std::vector<std::string>& line, std::vector<size_t>& sizes)
-{
- // This would get rid of trailing commas,
- // but that would break certain db.txt files.
- // Instead we'll have to manually check whether it's empty when checking length
-// if (!line.empty() && line.back().empty())
-// line.pop_back();
- size_t num_sizes = line.size();
- if (!num_sizes) // line.empty()
- return;
- if (line[0].size() >= 2
- && (line[0][0] == '#'
- || (line[0][0] == '/'
- && line[0][1] == '/')))
- return;
-
- if (num_sizes > sizes.size())
- sizes.resize(num_sizes, 1UL);
- for (size_t i = 0; i < num_sizes; ++i)
- {
- size_t elt_size = line[i].size();
- if (!elt_size)// line[i].empty()
- continue;
- if (line[i][elt_size - 1] == ' ')
- line[i].resize(--elt_size);
- // mandatory padding and comma
- elt_size += min_pad + 1;
- if (elt_size < min_size)
- elt_size = min_size;
- if (elt_size > sizes[i])
- // always true if we expanded sizes
- sizes[i] = elt_size;
- }
-}
-
-// the arguments may be the same file - the whole file is stored in memory
-void aligncsv(FILE *in, FILE *out, const char *name)
-{
- bool newline = true;
- bool can_split = true;
- bool can_have_whitespace = false;
- int c;
- std::vector<std::vector<std::string> > contents;
-
- while ((c = fgetc(in)) != -1)
- {
- if (c == '}' || c == '\n')
- can_split = true;
- if (c == '\n')
- {
- if (newline)
- {
- // preserve consecutive blank lines
- contents.push_back(std::vector<std::string>());
- }
- newline = true;
- continue;
- }
- if (c == '{')
- can_split = false;
- if (c == '\t')
- c = ' ';
- if (c == ' ')
- {
- if (!can_have_whitespace)
- continue;
- can_have_whitespace = false;
- }
- else
- can_have_whitespace = true;
- if (newline)
- {
- contents.push_back(std::vector<std::string>(1, std::string(1, c)));
- newline = false;
- }
- else
- {
- if (can_split && c == ',')
- {
- can_have_whitespace = false;
- contents.back().push_back(std::string());
- }
- else
- contents.back().back() += c;
- }
- }
-
- typedef std::vector<std::vector<std::string> >::iterator outer_it;
- typedef std::vector<std::vector<std::string> >::const_iterator outer_cit;
- typedef std::vector<size_t>::iterator pieces_it;
- // at this point, each entry in a line:
- // * does not start with whitespace
- // * has one space in place of any previous run of whitespace
- // * may end in a single space
- // The last is fixed during add_pieces
- std::vector<size_t> pieces;
- for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it)
- add_pieces(*it, pieces);
- for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it)
- if (size_t trail = *it % align_pad)
- *it += align_pad - trail;
-
- if (in == out)
- {
- //rewind(out);
- if (fseek(out, 0, SEEK_SET) == -1)
- {
- perror(name);
- return;
- }
- if (ftruncate(fileno(out), 0) == -1)
- {
- perror(name);
- return;
- }
- }
- for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit)
- {
- const std::vector<std::string>& inner = *oit;
- size_t num_elems = inner.size();
- // we have previously guaranteed that pieces[i].size() >= num_elems
- for (size_t i = 0; i < num_elems; ++i)
- {
- // FIXME handle UTF-8 characters (here AND above?)
- if (fputs(inner[i].c_str(), out) == -1)
- {
- perror(name);
- return;
- }
- if (i != num_elems - 1)
- {
- if (fputc(',', out) == -1)
- {
- perror(name);
- return;
- }
- size_t elem_length = inner[i].size() + 1;
- while (elem_length++ < pieces[i])
- {
- if (fputc(' ', out) == -1)
- {
- perror(name);
- return;
- }
- }
- }
- }
- fputc('\n', out);
- }
-}
-
-int main(int argc, char **argv)
-{
- if (argc == 1)
- aligncsv(stdin, stdout, "<stdio>");
- for (int i = 1; i < argc; ++i)
- {
- FILE *f = fopen(argv[i], "r+");
- if (!f)
- {
- perror(argv[i]);
- continue;
- }
- aligncsv(f, f, argv[i]);
- fclose(f);
- }
-}