From 42da97d7979f5db56d50072dfd7787ebf549ab1b Mon Sep 17 00:00:00 2001 From: Haru Date: Fri, 20 Sep 2013 04:28:50 +0200 Subject: Added regular expression matching script commands and operators - The script command pcre_match and the operator ~= will return the number of regular expression matches in a given string (roughly equivalent to the php function preg_match or the perl operator =~) - The operator ~! is the opposite of ~= (roughly equivalent to the perl operator !~) - See script_commands and npc/custom/test.txt for more information. Signed-off-by: Haru --- doc/script_commands.txt | 66 ++++++++++++++++++++++++++------- npc/custom/test.txt | 17 ++++++++- src/map/script.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++-- src/map/script.h | 4 ++ 4 files changed, 166 insertions(+), 18 deletions(-) diff --git a/doc/script_commands.txt b/doc/script_commands.txt index dc09256c5..5e3f62a69 100644 --- a/doc/script_commands.txt +++ b/doc/script_commands.txt @@ -725,6 +725,12 @@ other, but you can not compare numbers to strings. > - True if the first value greater than the second value. < - True if the first value is less than the second value. != - True if the first value IS NOT equal to the second one. + ~= - True if the second value (as regular expression) matches the first + value. Both values must be strings. See the script function pcre_match + for more details and advanced features. + ~! - True if the second value (as regular expression) DOES NOT match the + first value. Both values must be strings. See script function pcre_match + for more details and advanced features. Examples: @@ -732,9 +738,9 @@ Examples: 1<2 is True while 1>2 is False. @x>2 is True if @x is equal to 3. But it isn't true if @x is 2. -Only '==' and '!=' have been tested for comparing strings. Since there's -no way to code a seriously complex data structure in this language, trying -to sort strings by alphabet would be pointless anyway. +Only '==', '!=', '~=' and '~!' have been tested for comparing strings. Since +there's no way to code a seriously complex data structure in this language, +trying to sort strings by alphabet would be pointless anyway. Comparisons can be stacked in the same condition: @@ -910,6 +916,8 @@ Precedence | Description | Associativity --------------------------------------------------------------------------- 7 | == Equal to | Left to right | != Not equal to | + | ~= Regexp match | + | ~! Regexp non-match | --------------------------------------------------------------------------- 8 | & Bitwise AND | Left to right --------------------------------------------------------------------------- @@ -7234,15 +7242,54 @@ script is used. --------------------------------------- +*pcre_match("",""); + +This command is only available if the server is compiled with regular +expressions library enabled. + +The string will be searched for a match to the regular expression +, and the number of matches will be returned. + +An alternative way to invoke this command is to use the operators '~=' or '~!'. +The operator '~=' is exactly the same as pcre_match, while the operator '~!' +will return 1 if no matches were found, or 0 if at least a match was found. + + if (pcre_match("string", "regex")) mes "There was a match."; + if ("string" ~= "regex") mes "There was a match."; + if ("string" ~! "regex") mes "There were no matches."; + +You can find more usage examples in the test script npc/custom/test.txt. + +Using regular expressions is high wizardry. But with this high wizardry +comes unparalleled power of text manipulation. For an explanation of what +a regular expression pattern is, see a few web pages: + +http://www.regular-expressions.info/ +http://www.weitz.de/regex-coach/ + +Additionally, the following temporary variables will be filled (unless the +command is invoked as '~!'): + +- $@regexmatchcount: The number of matches detected, including any + parenthesized capture-groups. +- $@regexmatch$[0]: The part of That matched the full pattern. +- $@regexmatch$[1 .. $@regexmatchcount]: The parts of that matched + each of the parenthesized capture-groups in . + +A capture group is a part of a regex enclosed in (parentheses) in order to +store in a variable the part of the expression that was matched by that part of +the regex. For more details, see the links above, as this is not intended to be +a regex tutorial. + +--------------------------------------- + *defpattern ,"",""; *activatepset ; *deactivatepset ; *deletepset ; This set of commands is only available if the server is compiled with -regular expressions library enabled. Default compilation and most binary -distributions aren't, which is probably bad, since these, while complex to -use, are quite fascinating. +regular expressions library enabled. They will make the NPC object listen for text spoken publicly by players and match it against regular expression patterns, then trigger labels @@ -7266,13 +7313,6 @@ pattern set number in this case will deactivate all pattern sets defined. 'deletepset' will delete a pattern set from memory, so you can create a new pattern set in its place. -Using regular expressions is high wizardry. But with this high wizardry -comes unparalleled power of text manipulation. For an explanation of what -a regular expression pattern is, see a few web pages: - -http://www.regular-expressions.info/ -http://www.weitz.de/regex-coach/ - For an example of this in use, see doc/sample/npc_test_pcre.txt With this you could, for example, automatically punish players for asking diff --git a/npc/custom/test.txt b/npc/custom/test.txt index 00f9c376e..6d1c6b49f 100644 --- a/npc/custom/test.txt +++ b/npc/custom/test.txt @@ -335,7 +335,7 @@ OnInit: callsub(OnCheck, "Order of < and <<", .@x); - // ==, != operators + // ==, !=, ~=, ~! operators .@x = (0 == 0); // true .@y = (1 == 0); // false callsub(OnCheck, "== operator", .@x); @@ -344,6 +344,21 @@ OnInit: .@y = (1 != 1); // false callsub(OnCheck, "!= operator", .@x); callsub(OnCheck, "!= operator", .@y, 0); + .@x$ = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. " + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. " + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."; + .@y = (.@x$ ~= "^Lorem.*, ([a-z]*).*(Duis).* ([a-z.]*)$"); + callsub(OnCheck, "~= operator", .@y, 4); + callsub(OnCheck, "~= operator", $@regexmatchcount, 4); + if( $@regexmatchcount == 4 ) { + callsub(OnCheck, "~= operator", $@regexmatch$[0], .@x$); + callsub(OnCheck, "~= operator", $@regexmatch$[1], "quis"); + callsub(OnCheck, "~= operator", $@regexmatch$[2], "Duis"); + callsub(OnCheck, "~= operator", $@regexmatch$[3], "laborum."); + } + .@y = (.@x$ ~! "^Not Lorem.*, ([a-z]*).*(Duis).* ([a-z.]*)$"); + callsub(OnCheck, "~! operator", .@y); // Associativity of ==, != .@x = (1 == 0 == 0); // (1 == 0) == 0 --> 0 == 0 --> 1 diff --git a/src/map/script.c b/src/map/script.c index 17eba6b21..41453e366 100644 --- a/src/map/script.c +++ b/src/map/script.c @@ -112,6 +112,10 @@ const char* script_op2name(int op) { RETURN_OP_NAME(C_SUB_POST); RETURN_OP_NAME(C_ADD_PRE); RETURN_OP_NAME(C_SUB_PRE); +#ifdef PCRE_SUPPORT + RETURN_OP_NAME(C_RE_EQ); + RETURN_OP_NAME(C_RE_NE); +#endif // PCRE_SUPPORT default: ShowDebug("script_op2name: unexpected op=%d\n", op); @@ -1224,6 +1228,10 @@ const char* script_parse_subexpr(const char* p,int limit) { || (op=C_XOR, opl=4, len=1,*p=='^') // ^ || (op=C_EQ, opl=6, len=2,*p=='=' && p[1]=='=') // == || (op=C_NE, opl=6, len=2,*p=='!' && p[1]=='=') // != +#ifdef PCRE_SUPPORT + || (op=C_RE_EQ, opl=6, len=2,*p=='~' && p[1]=='=') // ~= + || (op=C_RE_NE, opl=6, len=2,*p=='~' && p[1]=='!') // ~! +#endif // PCRE_SUPPORT || (op=C_R_SHIFT,opl=8, len=2,*p=='>' && p[1]=='>') // >> || (op=C_GE, opl=7, len=2,*p=='>' && p[1]=='=') // >= || (op=C_GT, opl=7, len=1,*p=='>') // > @@ -3405,6 +3413,8 @@ void op_3(struct script_state* st, int op) /// s1 GE s2 -> i /// s1 LT s2 -> i /// s1 LE s2 -> i +/// s1 RE_EQ s2 -> i +/// s1 RE_NE s2 -> i /// s1 ADD s2 -> s void op_2str(struct script_state* st, int op, const char* s1, const char* s2) { @@ -3417,6 +3427,72 @@ void op_2str(struct script_state* st, int op, const char* s1, const char* s2) case C_GE: a = (strcmp(s1,s2) >= 0); break; case C_LT: a = (strcmp(s1,s2) < 0); break; case C_LE: a = (strcmp(s1,s2) <= 0); break; +#ifdef PCRE_SUPPORT + case C_RE_EQ: + case C_RE_NE: + { + int inputlen = (int)strlen(s1); + pcre *compiled_regex; + pcre_extra *extra_regex; + const char *pcre_error, *pcre_match; + int pcre_erroroffset, offsetcount, i; + int offsets[256*3]; // (max_capturing_groups+1)*3 + + compiled_regex = libpcre->compile(s2, 0, &pcre_error, &pcre_erroroffset, NULL); + + if( compiled_regex == NULL ) { + ShowError("script:op2_str: Invalid regex '%s'.\n", s2); + script->reportsrc(st); + script_pushnil(st); + st->state = END; + return; + } + + extra_regex = libpcre->study(compiled_regex, 0, &pcre_error); + + if( pcre_error != NULL ) { + libpcre->free(compiled_regex); + ShowError("script:op2_str: Unable to optimize the regex '%s': %s\n", s2, pcre_error); + script->reportsrc(st); + script_pushnil(st); + st->state = END; + return; + } + + offsetcount = libpcre->exec(compiled_regex, extra_regex, s1, inputlen, 0, 0, offsets, 256*3); + + if( offsetcount == 0 ) { + offsetcount = 256; + } else if( offsetcount == PCRE_ERROR_NOMATCH ) { + offsetcount = 0; + } else if( offsetcount < 0 ) { + libpcre->free(compiled_regex); + if( extra_regex != NULL ) + libpcre->free(extra_regex); + ShowWarning("script:op2_str: Unable to process the regex '%s'.\n", s2); + script->reportsrc(st); + script_pushnil(st); + st->state = END; + return; + } + + if( op == C_RE_EQ ) { + for( i = 0; i < offsetcount; i++ ) { + libpcre->get_substring(s1, offsets, offsetcount, i, &pcre_match); + mapreg->setregstr(reference_uid(script->add_str("$@regexmatch$"), i), pcre_match); + libpcre->free_substring(pcre_match); + } + mapreg->setreg(script->add_str("$@regexmatchcount"), i); + a = offsetcount; + } else { // C_RE_NE + a = (offsetcount == 0); + } + libpcre->free(compiled_regex); + if( extra_regex != NULL ) + libpcre->free(extra_regex); + } + break; +#endif // PCRE_SUPPORT case C_ADD: { char* buf = (char *)aMalloc((strlen(s1)+strlen(s2)+1)*sizeof(char)); @@ -3988,6 +4064,10 @@ void run_script_main(struct script_state *st) { case C_LOR: case C_R_SHIFT: case C_L_SHIFT: +#ifdef PCRE_SUPPORT + case C_RE_EQ: + case C_RE_NE: +#endif // PCRE_SUPPORT script->op_2(st, c); break; @@ -18500,10 +18580,18 @@ BUILDIN(shopcount) { // declarations that were supposed to be exported from npc_chat.c #ifdef PCRE_SUPPORT - BUILDIN(defpattern); - BUILDIN(activatepset); - BUILDIN(deactivatepset); - BUILDIN(deletepset); +BUILDIN(defpattern); +BUILDIN(activatepset); +BUILDIN(deactivatepset); +BUILDIN(deletepset); + +BUILDIN(pcre_match) { + const char *input = script_getstr(st, 2); + const char *regex = script_getstr(st, 3); + + script->op_2str(st, C_RE_EQ, input, regex); + return true; +} #endif /** @@ -18867,6 +18955,7 @@ void script_parse_builtin(void) { BUILDIN_DEF(activatepset,"i"), // Activate a pattern set [MouseJstr] BUILDIN_DEF(deactivatepset,"i"), // Deactive a pattern set [MouseJstr] BUILDIN_DEF(deletepset,"i"), // Delete a pattern set [MouseJstr] + BUILDIN_DEF(pcre_match,"ss"), #endif BUILDIN_DEF(dispbottom,"s"), //added from jA [Lupus] BUILDIN_DEF(getusersname,""), diff --git a/src/map/script.h b/src/map/script.h index cf7f22aa9..eed0dbf1d 100644 --- a/src/map/script.h +++ b/src/map/script.h @@ -213,6 +213,10 @@ typedef enum c_op { C_SUB_POST, // a-- C_ADD_PRE, // ++a C_SUB_PRE, // --a +#ifdef PCRE_SUPPORT + C_RE_EQ, // ~= + C_RE_NE, // ~! +#endif // PCRE_SUPPORT } c_op; enum hQueueOpt { -- cgit v1.2.3-70-g09d2