blob: ea1890e7e37c6b236f9edbaf3e576215fb1a39b6 (
plain) (
tree)
|
|
#include "lexer.hpp"
// lexer.cpp - tokenize a stream of S-expressions
//
// Copyright © 2014 Ben Longbons <b.r.longbons@gmail.com>
//
// This file is part of The Mana World (Athena server)
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../strings/mstring.hpp"
#include "../strings/vstring.hpp"
#include "../strings/literal.hpp"
#include "../io/cxxstdio.hpp"
#include "../poison.hpp"
namespace tmwa
{
namespace sexpr
{
Lexeme Lexer::_adv()
{
LString whitespace = " \t\n\r\v\f"_s;
while (true)
{
if (!_in.get(_span.begin))
{
if (!_depth.empty())
{
_depth.back().error("Unmatched '('"_s);
return TOK_ERROR;
}
return TOK_EOF;
}
char co = _span.begin.ch();
if (!whitespace.contains(co))
break;
_in.adv();
}
char co = _span.begin.ch();
_in.adv();
_span.end = _span.begin;
switch (co)
{
case '(':
_string = "("_s;
_depth.push_back(_span.end);
return TOK_OPEN;
case ')':
_string = ")"_s;
if (_depth.empty())
{
_span.end.error("Unmatched ')'"_s);
return TOK_ERROR;
}
_depth.pop_back();
return TOK_CLOSE;
case '"':
{
MString collect;
// read until " and consume it
// but handle \s
while (true)
{
if (!_in.get(_span.end))
{
_span.error("EOF in string literal"_s);
return TOK_ERROR;
}
char ch = _span.end.ch();
_in.adv();
if (ch == '"')
break;
if (ch != '\\')
{
collect += ch;
continue;
}
if (!_in.get(_span.end))
{
_span.end.error("EOF at backslash in string"_s);
return TOK_ERROR;
}
ch = _span.end.ch();
_in.adv();
switch (ch)
{
default:
_span.end.error("Unknown backslash sequence"_s);
return TOK_ERROR;
case 'a': collect += '\a'; break;
case 'b': collect += '\b'; break;
case 'e': collect += '\e'; break;
case 'f': collect += '\f'; break;
case 'n': collect += '\n'; break;
case 'r': collect += '\r'; break;
case 't': collect += '\t'; break;
case 'v': collect += '\v'; break;
case '\\': collect += '\\'; break;
case '\"': collect += '\"'; break;
case 'x':
{
unsigned char tmp = 0;
for (int i = 0; i < 2; ++i)
{
tmp *= 16;
if (!_in.get(_span.end))
{
_span.end.error("EOF after \\x in string"_s);
return TOK_ERROR;
}
char cx = _span.end.ch();
_in.adv();
if ('0' <= cx && cx <= '9')
tmp += cx - '0';
else if ('A' <= cx && cx <= 'F')
tmp += cx - 'A' + 10;
else if ('a' <= cx && cx <= 'a')
tmp += cx - 'a' + 10;
else
{
_span.end.error("Non-hex char after \\x"_s);
return TOK_ERROR;
}
}
collect += tmp;
}
}
}
_string = AString(collect);
return TOK_STRING;
}
case '\'':
case '\\':
_span.end.error("forbidden character"_s);
return TOK_ERROR;
default:
// this includes integers - they are differentiated in parsing
{
MString collect;
collect += co;
// read until whitespace, (, ), ", or EOF
io::LineChar tmp;
while (_in.get(tmp))
{
char ct = tmp.ch();
if (ct == '\'' || ct == '\\')
// error later
break;
if (ct == '(' || ct == ')' || ct == '"')
break;
if (whitespace.contains(ct))
break;
collect += ct;
_span.end = tmp;
_in.adv();
}
_string = AString(collect);
if (!_string.is_print())
_span.error("String is not entirely printable"_s);
return TOK_TOKEN;
}
}
}
VString<4> escape(char c)
{
switch (c)
{
case '\a': return "\\a"_s;
case '\b': return "\\b"_s;
case '\e': return "\\e"_s;
case '\f': return "\\f"_s;
//case '\n': return "\\n"_s;
case '\r': return "\\r"_s;
case '\t': return "\\t"_s;
case '\v': return "\\v"_s;
case '\\': return "\\\\"_s;
case '\"': return "\\\""_s;
default:
if (c == '\n')
return c;
if (' ' <= c && c <= '~')
return c;
else
return STRNPRINTF(5, "\\x%02x"_fmt, static_cast<uint8_t>(c));
}
}
AString escape(XString s)
{
MString m;
m += '"';
for (char c : s)
m += escape(c);
m += '"';
return AString(m);
}
LString token_name(Lexeme tok)
{
switch (tok)
{
case TOK_EOF:
return "EOF"_s;
case TOK_OPEN:
return "OPEN"_s;
case TOK_CLOSE:
return "CLOSE"_s;
case TOK_STRING:
return "STRING"_s;
case TOK_TOKEN:
return "TOKEN"_s;
default:
return "ERROR"_s;
}
}
} // namespace sexpr
} // namespace tmwa
|