From fe3d4ce758822d65a0a5d617b7b77df2dbc972d8 Mon Sep 17 00:00:00 2001 From: Ben Longbons Date: Sun, 16 Mar 2014 14:55:57 -0700 Subject: Implement new magic frontend using sexpr --- src/sexpr/lexer.cpp | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 src/sexpr/lexer.cpp (limited to 'src/sexpr/lexer.cpp') diff --git a/src/sexpr/lexer.cpp b/src/sexpr/lexer.cpp new file mode 100644 index 0000000..8c1c380 --- /dev/null +++ b/src/sexpr/lexer.cpp @@ -0,0 +1,228 @@ +#include "lexer.hpp" +// lexer.cpp - tokenize a stream of S-expressions +// +// Copyright © 2014 Ben Longbons +// +// This file is part of The Mana World (Athena server) +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +#include "../strings/mstring.hpp" + +#include "../io/cxxstdio.hpp" + +#include "../poison.hpp" + +namespace sexpr +{ + Lexeme Lexer::_adv() + { + XString whitespace = " \t\n\r\v\f"; + while (true) + { + if (!_in.get(_span.begin)) + { + if (!_depth.empty()) + { + _depth.back().error("Unmatched '('"); + return TOK_ERROR; + } + return TOK_EOF; + } + char co = _span.begin.ch(); + if (!whitespace.contains(co)) + break; + _in.adv(); + } + + char co = _span.begin.ch(); + _in.adv(); + _span.end = _span.begin; + switch (co) + { + case '(': + _string = "("; + _depth.push_back(_span.end); + return TOK_OPEN; + case ')': + _string = ")"; + if (_depth.empty()) + { + _span.end.error("Unmatched ')'"); + return TOK_ERROR; + } + _depth.pop_back(); + return TOK_CLOSE; + case '"': + { + MString collect; + // read until " and consume it + // but handle \s + while (true) + { + if (!_in.get(_span.end)) + { + _span.error("EOF in string literal"); + return TOK_ERROR; + } + char ch = _span.end.ch(); + _in.adv(); + if (ch == '"') + break; + + if (ch != '\\') + { + collect += ch; + continue; + } + + if (!_in.get(_span.end)) + { + _span.end.error("EOF at backslash in string"); + return TOK_ERROR; + } + ch = _span.end.ch(); + _in.adv(); + switch (ch) + { + default: + _span.end.error("Unknown backslash sequence"); + return TOK_ERROR; + case 'a': collect += '\a'; break; + case 'b': collect += '\b'; break; + case 'e': collect += '\e'; break; + case 'f': collect += '\f'; break; + case 'n': collect += '\n'; break; + case 'r': collect += '\r'; break; + case 't': collect += '\t'; break; + case 'v': collect += '\v'; break; + case '\\': collect += '\\'; break; + case '\"': collect += '\"'; break; + case 'x': + { + unsigned char tmp = 0; + for (int i = 0; i < 2; ++i) + { + tmp *= 16; + if (!_in.get(_span.end)) + { + _span.end.error("EOF after \\x in string"); + return TOK_ERROR; + } + char cx = _span.end.ch(); + _in.adv(); + if ('0' <= cx && cx <= '9') + tmp += cx - '0'; + else if ('A' <= cx && cx <= 'F') + tmp += cx - 'A' + 10; + else if ('a' <= cx && cx <= 'a') + tmp += cx - 'a' + 10; + else + { + _span.end.error("Non-hex char after \\x"); + return TOK_ERROR; + } + } + collect += tmp; + } + } + } + _string = AString(collect); + return TOK_STRING; + } + case '\'': + case '\\': + _span.end.error("forbidden character"); + return TOK_ERROR; + default: + // this includes integers - they are differentiated in parsing + { + MString collect; + collect += co; + // read until whitespace, (, ), ", or EOF + io::LineChar tmp; + while (_in.get(tmp)) + { + char ct = tmp.ch(); + if (ct == '\'' || ct == '\\') + // error later + break; + if (ct == '(' || ct == ')' || ct == '"') + break; + if (whitespace.contains(ct)) + break; + collect += ct; + _span.end = tmp; + _in.adv(); + } + _string = AString(collect); + if (!_string.is_print()) + _span.error("String is not entirely printable"); + return TOK_TOKEN; + } + } + } + + VString<4> escape(char c) + { + switch (c) + { + case '\a': return {"\\a"}; + case '\b': return {"\\b"}; + case '\e': return {"\\e"}; + case '\f': return {"\\f"}; + //case '\n': return {"\\n"}; + case '\r': return {"\\r"}; + case '\t': return {"\\t"}; + case '\v': return {"\\v"}; + case '\\': return {"\\\\"}; + case '\"': return {"\\\""}; + default: + if (c == '\n') + return c; + if (' ' <= c && c <= '~') + return c; + else + return STRNPRINTF(5, "\\x%02x", static_cast(c)); + } + } + AString escape(XString s) + { + MString m; + m += '"'; + for (char c : s) + m += escape(c); + m += '"'; + return AString(m); + } + + ZString token_name(Lexeme tok) + { + switch (tok) + { + case TOK_EOF: + return ZString("EOF"); + case TOK_OPEN: + return ZString("OPEN"); + case TOK_CLOSE: + return ZString("CLOSE"); + case TOK_STRING: + return ZString("STRING"); + case TOK_TOKEN: + return ZString("TOKEN"); + default: + return ZString("ERROR"); + } + } +} // namespace sexpr -- cgit v1.2.3-60-g2f50