summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHaru <haru@dotalux.com>2013-09-20 04:28:50 +0200
committerHaru <haru@dotalux.com>2014-03-17 17:15:02 +0100
commit42da97d7979f5db56d50072dfd7787ebf549ab1b (patch)
tree8946b2b7e5319512b01c0116b20099d263d00196
parent1cf8ea92f8e8e992617addc371272c78e60df219 (diff)
downloadhercules-42da97d7979f5db56d50072dfd7787ebf549ab1b.tar.gz
hercules-42da97d7979f5db56d50072dfd7787ebf549ab1b.tar.bz2
hercules-42da97d7979f5db56d50072dfd7787ebf549ab1b.tar.xz
hercules-42da97d7979f5db56d50072dfd7787ebf549ab1b.zip
Added regular expression matching script commands and operators
- The script command pcre_match and the operator ~= will return the number of regular expression matches in a given string (roughly equivalent to the php function preg_match or the perl operator =~) - The operator ~! is the opposite of ~= (roughly equivalent to the perl operator !~) - See script_commands and npc/custom/test.txt for more information. Signed-off-by: Haru <haru@dotalux.com>
-rw-r--r--doc/script_commands.txt66
-rw-r--r--npc/custom/test.txt17
-rw-r--r--src/map/script.c97
-rw-r--r--src/map/script.h4
4 files changed, 166 insertions, 18 deletions
diff --git a/doc/script_commands.txt b/doc/script_commands.txt
index dc09256c5..5e3f62a69 100644
--- a/doc/script_commands.txt
+++ b/doc/script_commands.txt
@@ -725,6 +725,12 @@ other, but you can not compare numbers to strings.
> - True if the first value greater than the second value.
< - True if the first value is less than the second value.
!= - True if the first value IS NOT equal to the second one.
+ ~= - True if the second value (as regular expression) matches the first
+ value. Both values must be strings. See the script function pcre_match
+ for more details and advanced features.
+ ~! - True if the second value (as regular expression) DOES NOT match the
+ first value. Both values must be strings. See script function pcre_match
+ for more details and advanced features.
Examples:
@@ -732,9 +738,9 @@ Examples:
1<2 is True while 1>2 is False.
@x>2 is True if @x is equal to 3. But it isn't true if @x is 2.
-Only '==' and '!=' have been tested for comparing strings. Since there's
-no way to code a seriously complex data structure in this language, trying
-to sort strings by alphabet would be pointless anyway.
+Only '==', '!=', '~=' and '~!' have been tested for comparing strings. Since
+there's no way to code a seriously complex data structure in this language,
+trying to sort strings by alphabet would be pointless anyway.
Comparisons can be stacked in the same condition:
@@ -910,6 +916,8 @@ Precedence | Description | Associativity
---------------------------------------------------------------------------
7 | == Equal to | Left to right
| != Not equal to |
+ | ~= Regexp match |
+ | ~! Regexp non-match |
---------------------------------------------------------------------------
8 | & Bitwise AND | Left to right
---------------------------------------------------------------------------
@@ -7234,15 +7242,54 @@ script is used.
---------------------------------------
+*pcre_match("<string>","<regex>");
+
+This command is only available if the server is compiled with regular
+expressions library enabled.
+
+The string <string> will be searched for a match to the regular expression
+<regex>, and the number of matches will be returned.
+
+An alternative way to invoke this command is to use the operators '~=' or '~!'.
+The operator '~=' is exactly the same as pcre_match, while the operator '~!'
+will return 1 if no matches were found, or 0 if at least a match was found.
+
+ if (pcre_match("string", "regex")) mes "There was a match.";
+ if ("string" ~= "regex") mes "There was a match.";
+ if ("string" ~! "regex") mes "There were no matches.";
+
+You can find more usage examples in the test script npc/custom/test.txt.
+
+Using regular expressions is high wizardry. But with this high wizardry
+comes unparalleled power of text manipulation. For an explanation of what
+a regular expression pattern is, see a few web pages:
+
+http://www.regular-expressions.info/
+http://www.weitz.de/regex-coach/
+
+Additionally, the following temporary variables will be filled (unless the
+command is invoked as '~!'):
+
+- $@regexmatchcount: The number of matches detected, including any
+ parenthesized capture-groups.
+- $@regexmatch$[0]: The part of <string> That matched the full <regex> pattern.
+- $@regexmatch$[1 .. $@regexmatchcount]: The parts of <string> that matched
+ each of the parenthesized capture-groups in <pattern>.
+
+A capture group is a part of a regex enclosed in (parentheses) in order to
+store in a variable the part of the expression that was matched by that part of
+the regex. For more details, see the links above, as this is not intended to be
+a regex tutorial.
+
+---------------------------------------
+
*defpattern <set number>,"<regular expression pattern>","<event label>";
*activatepset <set number>;
*deactivatepset <set number>;
*deletepset <set number>;
This set of commands is only available if the server is compiled with
-regular expressions library enabled. Default compilation and most binary
-distributions aren't, which is probably bad, since these, while complex to
-use, are quite fascinating.
+regular expressions library enabled.
They will make the NPC object listen for text spoken publicly by players
and match it against regular expression patterns, then trigger labels
@@ -7266,13 +7313,6 @@ pattern set number in this case will deactivate all pattern sets defined.
'deletepset' will delete a pattern set from memory, so you can create a
new pattern set in its place.
-Using regular expressions is high wizardry. But with this high wizardry
-comes unparalleled power of text manipulation. For an explanation of what
-a regular expression pattern is, see a few web pages:
-
-http://www.regular-expressions.info/
-http://www.weitz.de/regex-coach/
-
For an example of this in use, see doc/sample/npc_test_pcre.txt
With this you could, for example, automatically punish players for asking
diff --git a/npc/custom/test.txt b/npc/custom/test.txt
index 00f9c376e..6d1c6b49f 100644
--- a/npc/custom/test.txt
+++ b/npc/custom/test.txt
@@ -335,7 +335,7 @@ OnInit:
callsub(OnCheck, "Order of < and <<", .@x);
- // ==, != operators
+ // ==, !=, ~=, ~! operators
.@x = (0 == 0); // true
.@y = (1 == 0); // false
callsub(OnCheck, "== operator", .@x);
@@ -344,6 +344,21 @@ OnInit:
.@y = (1 != 1); // false
callsub(OnCheck, "!= operator", .@x);
callsub(OnCheck, "!= operator", .@y, 0);
+ .@x$ = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. "
+ "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. "
+ "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. "
+ "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
+ .@y = (.@x$ ~= "^Lorem.*, ([a-z]*).*(Duis).* ([a-z.]*)$");
+ callsub(OnCheck, "~= operator", .@y, 4);
+ callsub(OnCheck, "~= operator", $@regexmatchcount, 4);
+ if( $@regexmatchcount == 4 ) {
+ callsub(OnCheck, "~= operator", $@regexmatch$[0], .@x$);
+ callsub(OnCheck, "~= operator", $@regexmatch$[1], "quis");
+ callsub(OnCheck, "~= operator", $@regexmatch$[2], "Duis");
+ callsub(OnCheck, "~= operator", $@regexmatch$[3], "laborum.");
+ }
+ .@y = (.@x$ ~! "^Not Lorem.*, ([a-z]*).*(Duis).* ([a-z.]*)$");
+ callsub(OnCheck, "~! operator", .@y);
// Associativity of ==, !=
.@x = (1 == 0 == 0); // (1 == 0) == 0 --> 0 == 0 --> 1
diff --git a/src/map/script.c b/src/map/script.c
index 17eba6b21..41453e366 100644
--- a/src/map/script.c
+++ b/src/map/script.c
@@ -112,6 +112,10 @@ const char* script_op2name(int op) {
RETURN_OP_NAME(C_SUB_POST);
RETURN_OP_NAME(C_ADD_PRE);
RETURN_OP_NAME(C_SUB_PRE);
+#ifdef PCRE_SUPPORT
+ RETURN_OP_NAME(C_RE_EQ);
+ RETURN_OP_NAME(C_RE_NE);
+#endif // PCRE_SUPPORT
default:
ShowDebug("script_op2name: unexpected op=%d\n", op);
@@ -1224,6 +1228,10 @@ const char* script_parse_subexpr(const char* p,int limit) {
|| (op=C_XOR, opl=4, len=1,*p=='^') // ^
|| (op=C_EQ, opl=6, len=2,*p=='=' && p[1]=='=') // ==
|| (op=C_NE, opl=6, len=2,*p=='!' && p[1]=='=') // !=
+#ifdef PCRE_SUPPORT
+ || (op=C_RE_EQ, opl=6, len=2,*p=='~' && p[1]=='=') // ~=
+ || (op=C_RE_NE, opl=6, len=2,*p=='~' && p[1]=='!') // ~!
+#endif // PCRE_SUPPORT
|| (op=C_R_SHIFT,opl=8, len=2,*p=='>' && p[1]=='>') // >>
|| (op=C_GE, opl=7, len=2,*p=='>' && p[1]=='=') // >=
|| (op=C_GT, opl=7, len=1,*p=='>') // >
@@ -3405,6 +3413,8 @@ void op_3(struct script_state* st, int op)
/// s1 GE s2 -> i
/// s1 LT s2 -> i
/// s1 LE s2 -> i
+/// s1 RE_EQ s2 -> i
+/// s1 RE_NE s2 -> i
/// s1 ADD s2 -> s
void op_2str(struct script_state* st, int op, const char* s1, const char* s2)
{
@@ -3417,6 +3427,72 @@ void op_2str(struct script_state* st, int op, const char* s1, const char* s2)
case C_GE: a = (strcmp(s1,s2) >= 0); break;
case C_LT: a = (strcmp(s1,s2) < 0); break;
case C_LE: a = (strcmp(s1,s2) <= 0); break;
+#ifdef PCRE_SUPPORT
+ case C_RE_EQ:
+ case C_RE_NE:
+ {
+ int inputlen = (int)strlen(s1);
+ pcre *compiled_regex;
+ pcre_extra *extra_regex;
+ const char *pcre_error, *pcre_match;
+ int pcre_erroroffset, offsetcount, i;
+ int offsets[256*3]; // (max_capturing_groups+1)*3
+
+ compiled_regex = libpcre->compile(s2, 0, &pcre_error, &pcre_erroroffset, NULL);
+
+ if( compiled_regex == NULL ) {
+ ShowError("script:op2_str: Invalid regex '%s'.\n", s2);
+ script->reportsrc(st);
+ script_pushnil(st);
+ st->state = END;
+ return;
+ }
+
+ extra_regex = libpcre->study(compiled_regex, 0, &pcre_error);
+
+ if( pcre_error != NULL ) {
+ libpcre->free(compiled_regex);
+ ShowError("script:op2_str: Unable to optimize the regex '%s': %s\n", s2, pcre_error);
+ script->reportsrc(st);
+ script_pushnil(st);
+ st->state = END;
+ return;
+ }
+
+ offsetcount = libpcre->exec(compiled_regex, extra_regex, s1, inputlen, 0, 0, offsets, 256*3);
+
+ if( offsetcount == 0 ) {
+ offsetcount = 256;
+ } else if( offsetcount == PCRE_ERROR_NOMATCH ) {
+ offsetcount = 0;
+ } else if( offsetcount < 0 ) {
+ libpcre->free(compiled_regex);
+ if( extra_regex != NULL )
+ libpcre->free(extra_regex);
+ ShowWarning("script:op2_str: Unable to process the regex '%s'.\n", s2);
+ script->reportsrc(st);
+ script_pushnil(st);
+ st->state = END;
+ return;
+ }
+
+ if( op == C_RE_EQ ) {
+ for( i = 0; i < offsetcount; i++ ) {
+ libpcre->get_substring(s1, offsets, offsetcount, i, &pcre_match);
+ mapreg->setregstr(reference_uid(script->add_str("$@regexmatch$"), i), pcre_match);
+ libpcre->free_substring(pcre_match);
+ }
+ mapreg->setreg(script->add_str("$@regexmatchcount"), i);
+ a = offsetcount;
+ } else { // C_RE_NE
+ a = (offsetcount == 0);
+ }
+ libpcre->free(compiled_regex);
+ if( extra_regex != NULL )
+ libpcre->free(extra_regex);
+ }
+ break;
+#endif // PCRE_SUPPORT
case C_ADD:
{
char* buf = (char *)aMalloc((strlen(s1)+strlen(s2)+1)*sizeof(char));
@@ -3988,6 +4064,10 @@ void run_script_main(struct script_state *st) {
case C_LOR:
case C_R_SHIFT:
case C_L_SHIFT:
+#ifdef PCRE_SUPPORT
+ case C_RE_EQ:
+ case C_RE_NE:
+#endif // PCRE_SUPPORT
script->op_2(st, c);
break;
@@ -18500,10 +18580,18 @@ BUILDIN(shopcount) {
// declarations that were supposed to be exported from npc_chat.c
#ifdef PCRE_SUPPORT
- BUILDIN(defpattern);
- BUILDIN(activatepset);
- BUILDIN(deactivatepset);
- BUILDIN(deletepset);
+BUILDIN(defpattern);
+BUILDIN(activatepset);
+BUILDIN(deactivatepset);
+BUILDIN(deletepset);
+
+BUILDIN(pcre_match) {
+ const char *input = script_getstr(st, 2);
+ const char *regex = script_getstr(st, 3);
+
+ script->op_2str(st, C_RE_EQ, input, regex);
+ return true;
+}
#endif
/**
@@ -18867,6 +18955,7 @@ void script_parse_builtin(void) {
BUILDIN_DEF(activatepset,"i"), // Activate a pattern set [MouseJstr]
BUILDIN_DEF(deactivatepset,"i"), // Deactive a pattern set [MouseJstr]
BUILDIN_DEF(deletepset,"i"), // Delete a pattern set [MouseJstr]
+ BUILDIN_DEF(pcre_match,"ss"),
#endif
BUILDIN_DEF(dispbottom,"s"), //added from jA [Lupus]
BUILDIN_DEF(getusersname,""),
diff --git a/src/map/script.h b/src/map/script.h
index cf7f22aa9..eed0dbf1d 100644
--- a/src/map/script.h
+++ b/src/map/script.h
@@ -213,6 +213,10 @@ typedef enum c_op {
C_SUB_POST, // a--
C_ADD_PRE, // ++a
C_SUB_PRE, // --a
+#ifdef PCRE_SUPPORT
+ C_RE_EQ, // ~=
+ C_RE_NE, // ~!
+#endif // PCRE_SUPPORT
} c_op;
enum hQueueOpt {