summaryrefslogtreecommitdiff
path: root/src/sexpr/lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/sexpr/lexer.cpp')
-rw-r--r--src/sexpr/lexer.cpp228
1 files changed, 228 insertions, 0 deletions
diff --git a/src/sexpr/lexer.cpp b/src/sexpr/lexer.cpp
new file mode 100644
index 0000000..8c1c380
--- /dev/null
+++ b/src/sexpr/lexer.cpp
@@ -0,0 +1,228 @@
+#include "lexer.hpp"
+// lexer.cpp - tokenize a stream of S-expressions
+//
+// Copyright © 2014 Ben Longbons <b.r.longbons@gmail.com>
+//
+// This file is part of The Mana World (Athena server)
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Affero General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Affero General Public License for more details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+#include "../strings/mstring.hpp"
+
+#include "../io/cxxstdio.hpp"
+
+#include "../poison.hpp"
+
+namespace sexpr
+{
+ Lexeme Lexer::_adv()
+ {
+ XString whitespace = " \t\n\r\v\f";
+ while (true)
+ {
+ if (!_in.get(_span.begin))
+ {
+ if (!_depth.empty())
+ {
+ _depth.back().error("Unmatched '('");
+ return TOK_ERROR;
+ }
+ return TOK_EOF;
+ }
+ char co = _span.begin.ch();
+ if (!whitespace.contains(co))
+ break;
+ _in.adv();
+ }
+
+ char co = _span.begin.ch();
+ _in.adv();
+ _span.end = _span.begin;
+ switch (co)
+ {
+ case '(':
+ _string = "(";
+ _depth.push_back(_span.end);
+ return TOK_OPEN;
+ case ')':
+ _string = ")";
+ if (_depth.empty())
+ {
+ _span.end.error("Unmatched ')'");
+ return TOK_ERROR;
+ }
+ _depth.pop_back();
+ return TOK_CLOSE;
+ case '"':
+ {
+ MString collect;
+ // read until " and consume it
+ // but handle \s
+ while (true)
+ {
+ if (!_in.get(_span.end))
+ {
+ _span.error("EOF in string literal");
+ return TOK_ERROR;
+ }
+ char ch = _span.end.ch();
+ _in.adv();
+ if (ch == '"')
+ break;
+
+ if (ch != '\\')
+ {
+ collect += ch;
+ continue;
+ }
+
+ if (!_in.get(_span.end))
+ {
+ _span.end.error("EOF at backslash in string");
+ return TOK_ERROR;
+ }
+ ch = _span.end.ch();
+ _in.adv();
+ switch (ch)
+ {
+ default:
+ _span.end.error("Unknown backslash sequence");
+ return TOK_ERROR;
+ case 'a': collect += '\a'; break;
+ case 'b': collect += '\b'; break;
+ case 'e': collect += '\e'; break;
+ case 'f': collect += '\f'; break;
+ case 'n': collect += '\n'; break;
+ case 'r': collect += '\r'; break;
+ case 't': collect += '\t'; break;
+ case 'v': collect += '\v'; break;
+ case '\\': collect += '\\'; break;
+ case '\"': collect += '\"'; break;
+ case 'x':
+ {
+ unsigned char tmp = 0;
+ for (int i = 0; i < 2; ++i)
+ {
+ tmp *= 16;
+ if (!_in.get(_span.end))
+ {
+ _span.end.error("EOF after \\x in string");
+ return TOK_ERROR;
+ }
+ char cx = _span.end.ch();
+ _in.adv();
+ if ('0' <= cx && cx <= '9')
+ tmp += cx - '0';
+ else if ('A' <= cx && cx <= 'F')
+ tmp += cx - 'A' + 10;
+ else if ('a' <= cx && cx <= 'a')
+ tmp += cx - 'a' + 10;
+ else
+ {
+ _span.end.error("Non-hex char after \\x");
+ return TOK_ERROR;
+ }
+ }
+ collect += tmp;
+ }
+ }
+ }
+ _string = AString(collect);
+ return TOK_STRING;
+ }
+ case '\'':
+ case '\\':
+ _span.end.error("forbidden character");
+ return TOK_ERROR;
+ default:
+ // this includes integers - they are differentiated in parsing
+ {
+ MString collect;
+ collect += co;
+ // read until whitespace, (, ), ", or EOF
+ io::LineChar tmp;
+ while (_in.get(tmp))
+ {
+ char ct = tmp.ch();
+ if (ct == '\'' || ct == '\\')
+ // error later
+ break;
+ if (ct == '(' || ct == ')' || ct == '"')
+ break;
+ if (whitespace.contains(ct))
+ break;
+ collect += ct;
+ _span.end = tmp;
+ _in.adv();
+ }
+ _string = AString(collect);
+ if (!_string.is_print())
+ _span.error("String is not entirely printable");
+ return TOK_TOKEN;
+ }
+ }
+ }
+
+ VString<4> escape(char c)
+ {
+ switch (c)
+ {
+ case '\a': return {"\\a"};
+ case '\b': return {"\\b"};
+ case '\e': return {"\\e"};
+ case '\f': return {"\\f"};
+ //case '\n': return {"\\n"};
+ case '\r': return {"\\r"};
+ case '\t': return {"\\t"};
+ case '\v': return {"\\v"};
+ case '\\': return {"\\\\"};
+ case '\"': return {"\\\""};
+ default:
+ if (c == '\n')
+ return c;
+ if (' ' <= c && c <= '~')
+ return c;
+ else
+ return STRNPRINTF(5, "\\x%02x", static_cast<uint8_t>(c));
+ }
+ }
+ AString escape(XString s)
+ {
+ MString m;
+ m += '"';
+ for (char c : s)
+ m += escape(c);
+ m += '"';
+ return AString(m);
+ }
+
+ ZString token_name(Lexeme tok)
+ {
+ switch (tok)
+ {
+ case TOK_EOF:
+ return ZString("EOF");
+ case TOK_OPEN:
+ return ZString("OPEN");
+ case TOK_CLOSE:
+ return ZString("CLOSE");
+ case TOK_STRING:
+ return ZString("STRING");
+ case TOK_TOKEN:
+ return ZString("TOKEN");
+ default:
+ return ZString("ERROR");
+ }
+ }
+} // namespace sexpr