#include <cerrno> #include <cstdio> #include <cstddef> #include <unistd.h> #include <vector> #include <string> // this configuration puts 3-6 spaces between entries (excluding headers) // and rounds the start of each field up to 4, for easier manual indenting const size_t min_pad = 3; const size_t align_pad = 4; void add_pieces(std::vector<std::string>& line, std::vector<size_t>& sizes) { // This would get rid of trailing commas, // but that would break certain db.txt files. // Instead we'll have to manually check whether it's empty when checking length // if (!line.empty() && line.back().empty()) // line.pop_back(); size_t num_sizes = line.size(); if (!num_sizes) // line.empty() return; if (line[0].size() >= 2 && (line[0][0] == '#' || (line[0][0] == '/' && line[0][1] == '/'))) return; if (num_sizes > sizes.size()) sizes.resize(num_sizes, 1UL); for (size_t i = 0; i < num_sizes; ++i) { size_t elt_size = line[i].size(); if (!elt_size)// line[i].empty() continue; if (line[i][elt_size - 1] == ' ') line[i].resize(--elt_size); // mandatory padding and comma elt_size += min_pad + 1; if (elt_size > sizes[i]) // always true if we expanded sizes sizes[i] = elt_size; } } // the arguments may be the same file - the whole file is stored in memory void aligncsv(FILE *in, FILE *out, const char *name) { bool newline = true; bool can_split = true; bool can_have_whitespace = false; int c; std::vector<std::vector<std::string> > contents; while ((c = fgetc(in)) != -1) { if (c == '}' || c == '\n') can_split = true; if (c == '\n') { if (newline) { // preserve consecutive blank lines contents.push_back(std::vector<std::string>()); } newline = true; continue; } if (c == '{') can_split = false; if (c == '\t') c = ' '; if (c == ' ') { if (!can_have_whitespace) continue; can_have_whitespace = false; } else can_have_whitespace = true; if (newline) { contents.push_back(std::vector<std::string>(1, std::string(1, c))); newline = false; } else { if (can_split && c == ',') { can_have_whitespace = false; contents.back().push_back(std::string()); } else contents.back().back() += c; } } typedef std::vector<std::vector<std::string> >::iterator outer_it; typedef std::vector<std::vector<std::string> >::const_iterator outer_cit; typedef std::vector<size_t>::iterator pieces_it; // at this point, each entry in a line: // * does not start with whitespace // * has one space in place of any previous run of whitespace // * may end in a single space // The last is fixed during add_pieces std::vector<size_t> pieces; for (outer_it it = contents.begin(), end = contents.end(); it != end; ++it) add_pieces(*it, pieces); for (pieces_it it = pieces.begin(), end = pieces.end(); it != end; ++it) if (size_t trail = *it % align_pad) *it += align_pad - trail; if (in == out) { //rewind(out); if (fseek(out, 0, SEEK_SET) == -1) { perror(name); return; } if (ftruncate(fileno(out), 0) == -1) { perror(name); return; } } for (outer_cit oit = contents.begin(), oend = contents.end(); oit != oend; ++oit) { const std::vector<std::string>& inner = *oit; size_t num_elems = inner.size(); // we have previously guaranteed that pieces[i].size() >= num_elems for (size_t i = 0; i < num_elems; ++i) { // FIXME handle UTF-8 characters (here AND above?) if (fputs(inner[i].c_str(), out) == -1) { perror(name); return; } if (i != num_elems - 1) { if (fputc(',', out) == -1) { perror(name); return; } size_t elem_length = inner[i].size() + 1; while (elem_length++ < pieces[i]) { if (fputc(' ', out) == -1) { perror(name); return; } } } } fputc('\n', out); } } int main(int argc, char **argv) { if (argc == 1) aligncsv(stdin, stdout, "<stdio>"); for (int i = 1; i < argc; ++i) { FILE *f = fopen(argv[i], "r+"); if (!f) { perror(argv[i]); continue; } aligncsv(f, f, argv[i]); fclose(f); } }