summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFlavioJS <FlavioJS@54d463be-8e91-2dee-dedb-b68131a5f0ec>2007-12-09 22:08:31 +0000
committerFlavioJS <FlavioJS@54d463be-8e91-2dee-dedb-b68131a5f0ec>2007-12-09 22:08:31 +0000
commitb83d9633d70ee986fa52cf0445c228190ddf0b13 (patch)
treea50f200e4a322fc92decd9ef4537077b087fb109
parentfa26ed23b8bb0ae3eeca8a916930d0a8053d3a41 (diff)
downloadhercules-b83d9633d70ee986fa52cf0445c228190ddf0b13.tar.gz
hercules-b83d9633d70ee986fa52cf0445c228190ddf0b13.tar.bz2
hercules-b83d9633d70ee986fa52cf0445c228190ddf0b13.tar.xz
hercules-b83d9633d70ee986fa52cf0445c228190ddf0b13.zip
* Added a generic 'delimiter-separated values' string parser to strlib.c/h.
git-svn-id: https://rathena.svn.sourceforge.net/svnroot/rathena/trunk@11878 54d463be-8e91-2dee-dedb-b68131a5f0ec
-rw-r--r--Changelog-Trunk.txt2
-rw-r--r--src/common/strlib.c337
-rw-r--r--src/common/strlib.h36
3 files changed, 375 insertions, 0 deletions
diff --git a/Changelog-Trunk.txt b/Changelog-Trunk.txt
index 742677b67..6f7461546 100644
--- a/Changelog-Trunk.txt
+++ b/Changelog-Trunk.txt
@@ -3,6 +3,8 @@ Date Added
AS OF SVN REV. 5091, WE ARE NOW USING TRUNK. ALL UNTESTED BUGFIXES/FEATURES GO INTO TRUNK.
IF YOU HAVE A WORKING AND TESTED BUGFIX PUT IT INTO STABLE AS WELL AS TRUNK.
+2007/12/09
+ * Added a generic 'delimiter-separated values' string parser to strlib.c/h. [FlavioJS]
2007/12/07
* Fixed ensembles skills.
* removed the timer heap correction code when the timers overflow since
diff --git a/src/common/strlib.c b/src/common/strlib.c
index db47c969b..da1fddd8c 100644
--- a/src/common/strlib.c
+++ b/src/common/strlib.c
@@ -3,6 +3,7 @@
#include "../common/cbasetypes.h"
#include "../common/malloc.h"
+#include "../common/showmsg.h"
#include "strlib.h"
#include <stdio.h>
@@ -362,6 +363,342 @@ int strline(const char* str, size_t pos)
return line;
}
+
+
+/////////////////////////////////////////////////////////////////////
+/// Parses a delim-separated string.
+/// Starts parsing at startoff and fills the out_pos array with the start and
+/// end positions in the string of the line and fields (that fit the array).
+/// Returns the number of fields or -1 if an error occurs.
+///
+/// out_pos can be NULL.
+/// Positions out_pos[0] and out_pos[1] are for the line start and end
+/// positions. If a line terminator is found, the end position is placed there.
+/// The next values of the array are the start and end positions of the fields.
+/// out_pos[2] and out_pos[3] for the first field, out_pos[4] and out_pos[5]
+/// for the seconds field and so on.
+/// Unfilled positions are set to -1.
+///
+/// @param str String to parse
+/// @param len Length of the string
+/// @param startoff Where to start parsing
+/// @param delim Field delimiter
+/// @parem out_pos Array of resulting positions
+/// @param npos Size of the pos array
+/// @param opt Options that determine the parsing behaviour
+/// @return Number of fields in the string or -1 if an error occured
+int sv_parse(const char* str, int len, int startoff, char delim, int* out_pos, int npos, enum e_svopt opt)
+{
+ int i;
+ int count;
+ enum {
+ START_OF_FIELD,
+ PARSING_FIELD,
+ PARSING_C_ESCAPE,
+ END_OF_FIELD,
+ TERMINATE,
+ END
+ } state;
+
+ // check pos/npos
+ if( out_pos == NULL ) npos = 0;
+ for( i = 0; i < npos; ++i )
+ out_pos[i] = -1;
+
+ // check opt
+ if( delim == '\n' && (opt&(SV_TERMINATE_CRLF|SV_TERMINATE_LF)) )
+ {
+ ShowError("sv_parse: delimiter '\\n' is not compatible with options SV_TERMINATE_LF or SV_TERMINATE_CRLF.\n");
+ return -1;// error
+ }
+ if( delim == '\r' && (opt&(SV_TERMINATE_CRLF|SV_TERMINATE_CR)) )
+ {
+ ShowError("sv_parse: delimiter '\\r' is not compatible with options SV_TERMINATE_CR or SV_TERMINATE_CRLF.\n");
+ return -1;// error
+ }
+
+ // check str
+ if( str == NULL )
+ return 0;// nothing to parse
+
+#define IS_END() ( i >= len )
+#define IS_DELIM() ( str[i] == delim )
+#define IS_TERMINATOR() ( \
+ ((opt&SV_TERMINATE_LF) && str[i] == '\n') || \
+ ((opt&SV_TERMINATE_CR) && str[i] == '\r') || \
+ ((opt&SV_TERMINATE_CRLF) && i+1 < len && str[i] == '\r' && str[i+1] == '\n') )
+#define IS_C_ESCAPE() ( (opt&SV_ESCAPE_C) && str[i] == '\\' )
+#define SET_FIELD_START() if( npos > count*2+2 ) out_pos[count*2+2] = i
+#define SET_FIELD_END() if( npos > count*2+3 ) out_pos[count*2+3] = i; ++count
+
+ i = startoff;
+ count = 0;
+ state = START_OF_FIELD;
+ if( npos > 0 ) out_pos[0] = startoff;// start
+ while( state != END )
+ {
+ if( npos > 1 ) out_pos[1] = i;// end
+ switch( state )
+ {
+ case START_OF_FIELD:// record start of field and start parsing it
+ SET_FIELD_START();
+ state = PARSING_FIELD;
+ break;
+
+ case PARSING_FIELD:// skip field character
+ if( IS_END() || IS_DELIM() || IS_TERMINATOR() )
+ state = END_OF_FIELD;
+ else if( IS_C_ESCAPE() )
+ state = PARSING_C_ESCAPE;
+ else
+ ++i;// normal character
+ break;
+
+ case PARSING_C_ESCAPE:// skip escape sequence (validates it too)
+ {
+ ++i;// '\\'
+ if( IS_END() )
+ {
+ ShowError("sv_parse: empty escape sequence\n");
+ return -1;
+ }
+ if( str[i] == 'x' )
+ {// hex escape
+ ++i;// 'x'
+ if( IS_END() || !ISXDIGIT(str[i]) )
+ {
+ ShowError("sv_parse: \\x with no following hex digits\n");
+ return -1;
+ }
+ do{
+ ++i;// hex digit
+ }while( !IS_END() && ISXDIGIT(str[i]));
+ }
+ else if( str[i] == '0' || str[i] == '1' || str[i] == '2' )
+ {// octal escape
+ ++i;// octal digit
+ if( !IS_END() && str[i] >= '0' && str[i] <= '7' )
+ ++i;// octal digit
+ if( !IS_END() && str[i] >= '0' && str[i] <= '7' )
+ ++i;// octal digit
+ }
+ else if( strchr(SV_ESCAPE_C_SUPPORTED, str[i]) )
+ {// supported escape character
+ ++i;
+ }
+ else
+ {
+ ShowError("sv_parse: unknown escape sequence \\%c\n", str[i]);
+ return -1;
+ }
+ state = PARSING_FIELD;
+ break;
+ }
+
+ case END_OF_FIELD:// record end of field and continue
+ SET_FIELD_END();
+ if( IS_END() )
+ state = END;
+ else if( IS_DELIM() )
+ {
+ ++i;// delim
+ state = START_OF_FIELD;
+ }
+ else if( IS_TERMINATOR() )
+ state = TERMINATE;
+ else
+ state = START_OF_FIELD;
+ break;
+
+ case TERMINATE:
+#if 0
+ // skip line terminator
+ if( (opt&SV_TERMINATE_CRLF) && i+1 < len && str[i] == '\r' && str[i+1] == '\n' )
+ i += 2;// CRLF
+ else
+ ++i;// CR or LF
+#endif
+ state = END;
+ break;
+ }
+ }
+
+#undef IS_END
+#undef IS_DELIM
+#undef IS_TERMINATOR
+#undef IS_C_ESCAPE
+#undef SET_FIELD_START
+#undef SET_FIELD_END
+
+ return count;
+}
+
+/// Escapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the escaped string.
+/// out_dest should be len*4+1 in size.
+///
+/// @param out_dest Destination buffer
+/// @param src Source string
+/// @param len Length of the source string
+/// @param escapes Extra characters to be escaped
+/// @return Length of the escaped string
+size_t sv_escape_c(char* out_dest, const char* src, size_t len, const char* escapes)
+{
+ size_t i;
+ size_t j;
+
+ if( out_dest == NULL )
+ return 0;// nothing to do
+ if( src == NULL )
+ {// nothing to escape
+ *out_dest = 0;
+ return 0;
+ }
+ if( escapes == NULL )
+ escapes = "";
+
+ for( i = 0, j = 0; i < len; ++i )
+ {
+ switch( src[i] )
+ {
+ case '\0':// octal 0
+ out_dest[j++] = '\\';
+ out_dest[j++] = '0';
+ out_dest[j++] = '0';
+ out_dest[j++] = '0';
+ break;
+ case '\r':// carriage return
+ out_dest[j++] = '\\';
+ out_dest[j++] = 'r';
+ break;
+ case '\n':// line feed
+ out_dest[j++] = '\\';
+ out_dest[j++] = 'n';
+ break;
+ case '\\':// escape character
+ out_dest[j++] = '\\';
+ out_dest[j++] = '\\';
+ break;
+ default:
+ if( strchr(escapes,src[i]) )
+ {// escapes to octal
+ out_dest[j++] = '\\';
+ out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0700)>>6));
+ out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0070)>>3));
+ out_dest[j++] = '0'+((char)(((unsigned char)src[i]&0007) ));
+ }
+ else
+ out_dest[j++] = src[i];
+ break;
+ }
+ }
+ out_dest[j] = 0;
+ return j;
+}
+
+/// Unescapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the unescaped string.
+/// out_dest should be len+1 in size and can be the same buffer as src.
+///
+/// @param out_dest Destination buffer
+/// @param src Source string
+/// @param len Length of the source string
+/// @return Length of the escaped string
+size_t sv_unescape_c(char* out_dest, const char* src, size_t len)
+{
+ static unsigned char low2hex[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x0?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x1?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x2?
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0,// 0x3?
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x4?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x5?
+ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x6?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x7?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x8?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0x9?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xA?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xB?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xC?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xD?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,// 0xE?
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // 0xF?
+ };
+ size_t i;
+ size_t j;
+
+ for( i = 0, j = 0; i < len; )
+ {
+ if( src[i] == '\\' )
+ {
+ ++i;// '\\'
+ if( i >= len )
+ ShowWarning("sv_unescape_c: empty escape sequence\n");
+ else if( src[i] == 'x' )
+ {// hex escape sequence
+ unsigned char c = 0;
+ unsigned char inrange = 1;
+
+ ++i;// 'x'
+ if( i >= len || !ISXDIGIT(src[i]) )
+ {
+ ShowWarning("sv_unescape_c: \\x with no following hex digits\n");
+ continue;
+ }
+ do{
+ if( c > 0x0F && inrange )
+ {
+ ShowWarning("sv_unescape_c: hex escape sequence out of range\n");
+ inrange = 0;
+ }
+ c = (c<<8)|low2hex[(unsigned char)src[i++]];// hex digit
+ }while( i >= len || !ISXDIGIT(src[i]) );
+ out_dest[j++] = (char)c;
+ }
+ else if( src[i] == '0' || src[i] == '1' || src[i] == '2' || src[i] == '3' )
+ {// octal escape sequence (255=0377)
+ unsigned char c = src[i]-'0';
+ ++i;// '0', '1', '2' or '3'
+ if( i < len && src[i] >= '0' && src[i] <= '9' )
+ {
+ c = (c<<3)|(src[i]-'0');
+ ++i;// octal digit
+ }
+ if( i < len && src[i] >= '0' && src[i] <= '9' )
+ {
+ c = (c<<3)|(src[i]-'0');
+ ++i;// octal digit
+ }
+ out_dest[j++] = (char)c;
+ }
+ else
+ {// other escape sequence
+ if( strchr(SV_ESCAPE_C_SUPPORTED, src[i]) == NULL )
+ ShowWarning("sv_parse: unknown escape sequence \\%c\n", src[i]);
+ switch( src[i] )
+ {
+ case 'a': out_dest[j++] = '\a'; break;
+ case 'b': out_dest[j++] = '\b'; break;
+ case 't': out_dest[j++] = '\t'; break;
+ case 'n': out_dest[j++] = '\n'; break;
+ case 'v': out_dest[j++] = '\v'; break;
+ case 'f': out_dest[j++] = '\f'; break;
+ case 'r': out_dest[j++] = '\r'; break;
+ case '?': out_dest[j++] = '\?'; break;
+ default: out_dest[j++] = src[i]; break;
+ }
+ ++i;// escaped character
+ }
+ }
+ else
+ out_dest[j++] = src[i++];// normal character
+ }
+ out_dest[j] = 0;
+ return j;
+}
+
+
+
/////////////////////////////////////////////////////////////////////
// StringBuf - dynamic string
//
diff --git a/src/common/strlib.h b/src/common/strlib.h
index 31e364e2a..d1fb20d49 100644
--- a/src/common/strlib.h
+++ b/src/common/strlib.h
@@ -46,6 +46,42 @@ int safesnprintf(char* buf, size_t sz, const char* fmt, ...);
/// Lines start at 1.
int strline(const char* str, size_t pos);
+
+
+/// Bitfield determining the behaviour of sv_parse.
+enum e_svopt
+{
+ // default: no escapes and no line terminator
+ SV_NOESCAPE_NOTERMINATE = 0,
+ // Escapes according to the C compiler.
+ SV_ESCAPE_C = 1,
+ // Line terminators
+ SV_TERMINATE_LF = 2,
+ SV_TERMINATE_CRLF = 4,
+ SV_TERMINATE_CR = 8,
+};
+
+/// Other escape sequences supported by the C compiler.
+#define SV_ESCAPE_C_SUPPORTED "abtnvfr\?\"'\\"
+
+/// Parses a delim-separated string.
+/// Starts parsing at startoff and fills the pos array with the start and end
+/// positions in the string of the line and fields (that fit the array).
+/// Returns the number of fields or -1 if an error occurs.
+int sv_parse(const char* str, int len, int startoff, char delim, int* out_pos, int npos, enum e_svopt opt);
+
+/// Escapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the escaped string.
+/// out_dest should be len*4+1 in size.
+size_t sv_escape_c(char* out_dest, const char* src, size_t len, const char* escapes);
+
+/// Unescapes src to out_dest according to the format of the C compiler.
+/// Returns the length of the unescaped string.
+/// out_dest should be len+1 in size and can be the same buffer as src.
+size_t sv_unescape_c(char* out_dest, const char* src, size_t len);
+
+
+
/// StringBuf - dynamic string
struct StringBuf
{