From 07a3d8dbd5d391d7be813f51bb7c941a5276aee3 Mon Sep 17 00:00:00 2001 From: ewewukek Date: Fri, 23 Feb 2024 17:24:41 +0300 Subject: Switch to PCRE2 engine --- configure.ac | 2 + src/jpcre2.hpp | 5119 +++++++++++++++++++++++++++++++++++++ src/lintmanager.cpp | 3 +- src/rules/baseclass.cpp | 7 +- src/rules/brackets.cpp | 2 +- src/rules/constructor.cpp | 2 +- src/rules/constructorbrackets.cpp | 5 +- src/rules/copyconstructor.cpp | 17 +- src/rules/debug.cpp | 4 +- src/rules/dump.cpp | 4 +- src/rules/final.cpp | 6 +- src/rules/formatting.cpp | 4 +- src/rules/include.cpp | 3 +- src/rules/license.cpp | 6 +- src/rules/packet.cpp | 3 +- src/rules/po.cpp | 3 +- src/rules/translation.cpp | 2 +- src/rules/virtual.cpp | 4 +- src/rules/xml.cpp | 3 +- src/stringutils.cpp | 27 +- src/stringutils.h | 7 +- src/template.hpp | 1 - 22 files changed, 5182 insertions(+), 52 deletions(-) create mode 100644 src/jpcre2.hpp diff --git a/configure.ac b/configure.ac index 5944ab8..dd52d17 100755 --- a/configure.ac +++ b/configure.ac @@ -25,6 +25,8 @@ AC_CHECK_FUNCS([atexit floor getcwd gethostbyname memset mkdir select socket]) AC_HEADER_STDC AC_CHECK_HEADERS([arpa/inet.h fcntl.h malloc.h netdb.h netinet/in.h stdlib.h string.h sys/socket.h unistd.h]) +LIBS="$LIBS -lpcre2-8" + AC_CONFIG_FILES([ Makefile src/Makefile diff --git a/src/jpcre2.hpp b/src/jpcre2.hpp new file mode 100644 index 0000000..0736275 --- /dev/null +++ b/src/jpcre2.hpp @@ -0,0 +1,5119 @@ +/* ***************************************************************************** + * ******************* C++ wrapper for PCRE2 Library **************************** + * ***************************************************************************** + * Copyright (c) Md. Jahidul Hamid + * + * ----------------------------------------------------------------------------- + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * Disclaimer: + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * */ + +/** @file jpcre2.hpp + * @brief Main header file for JPCRE2 library to be included by programs that uses its functionalities. + * It includes the `pcre2.h` header, therefore you shouldn't include `pcre2.h`, neither should you define `PCRE2_CODE_UNIT_WIDTH` before including + * `jpcre2.hpp`. + * If your `pcre2.h` header is not in standard include paths, you may include `pcre2.h` with correct path before including `jpcre2.hpp` + * manually. In this case you will have to define `PCRE2_CODE_UNIT_WIDTH` before including `pcre2.h`. + * Make sure to link required PCRE2 libraries when compiling. + * + * @author [Md Jahidul Hamid](https://github.com/neurobin) + */ + +#ifndef JPCRE2_HPP +#define JPCRE2_HPP + +#ifndef PCRE2_CODE_UNIT_WIDTH + +///@def PCRE2_CODE_UNIT_WIDTH +///This macro does not have any significance in JPCRE2 context. +///It is defined as 0 by default. Defining it before including jpcre2.hpp +///will override the default (discouraged as it will make it harder for you to detect problems), +///but still it will have no effect in a JPCRE2 perspective. +///Defining it with an invalid value will yield to compile error. +#define PCRE2_CODE_UNIT_WIDTH 0 +#endif + +//previous inclusion of pcre2.h will be respected and we won't try to include it twice. +//Thus one can pre-include pcre2.h from an arbitrary/non-standard path. +#ifndef PCRE2_MAJOR + #include // pcre2 header +#endif +#include // std::string, std::wstring +#include // std::vector +#include // std::map +#include // std::fprintf +#include // CHAR_BIT +#include // std::abort() + +#if __cplusplus >= 201103L || _MSVC_LANG >= 201103L + #define JPCRE2_USE_MINIMUM_CXX_11 1 + #include + #ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK + #include // std::function + #endif +#else + #define JPCRE2_USE_MINIMUM_CXX_11 0 +#endif + +#define JPCRE2_UNUSED(x) ((void)(x)) +#if defined(NDEBUG) || defined(JPCRE2_NDEBUG) + #define JPCRE2_ASSERT(cond, msg) ((void)0) + #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) ((void)0) +#else + #define JPCRE2_ASSERT(cond, msg) jpcre2::jassert(cond, msg, __FILE__, __LINE__) + #define JPCRE2_VECTOR_DATA_ASSERT(cond, name) jpcre2::_jvassert(cond, name, __FILE__, __LINE__) +#endif + +// In Windows, Windows.h defines ERROR macro +// It conflicts with our jpcre2::ERROR namespace +#ifdef ERROR +#undef ERROR +#endif + + +/** @namespace jpcre2 + * Top level namespace of JPCRE2. + * + * All functions, classes/structs, constants, enums that are provided by JPCRE2 belong to this namespace while + * **PCRE2** structs, functions, constants remain outside of its scope. + * + * If you want to use any PCRE2 functions or constants, + * remember that they are in the global scope and should be used as such. + */ +namespace jpcre2 { + + +///Define for JPCRE2 version. +///It can be used to support changes in different versions of the lib. +#define JPCRE2_VERSION 103201L + +/** @namespace jpcre2::INFO + * Namespace to provide information about JPCRE2 library itself. + * Contains constant Strings with version info. + */ +namespace INFO { + static const char NAME[] = "JPCRE2"; ///< Name of the project + static const char FULL_VERSION[] = "10.32.01"; ///< Full version string + static const char VERSION_GENRE[] = "10"; ///< Generation, depends on original PCRE2 version + static const char VERSION_MAJOR[] = "32"; ///< Major version, updated when API change is made + static const char VERSION_MINOR[] = "01"; ///< Minor version, includes bug fix or minor feature upgrade + static const char VERSION_PRE_RELEASE[] = ""; ///< Alpha or beta (testing) release version +} + + +typedef PCRE2_SIZE SIZE_T; ///< Used for match count and vector size +typedef uint32_t Uint; ///< Used for options (bitwise operation) +typedef uint8_t Ush; ///< 8 bit unsigned integer. +typedef std::vector VecOff; ///< vector of size_t. +typedef std::vector VecOpt; ///< vector for Uint option values. + +/// @namespace jpcre2::ERROR +/// Namespace for error codes. +namespace ERROR { + /** Error numbers for JPCRE2. + * JPCRE2 error numbers are positive integers while + * PCRE2 error numbers are negative integers. + */ + enum { + INVALID_MODIFIER = 2, ///< Invalid modifier was detected + INSUFFICIENT_OVECTOR = 3 ///< Ovector was not big enough during a match + }; +} + + +/** These constants provide JPCRE2 options. + */ +enum { + NONE = 0x0000000u, ///< Option 0 (zero) + FIND_ALL = 0x0000002u, ///< Find all during match (global match) + JIT_COMPILE = 0x0000004u ///< Perform JIT compilation for optimization +}; + + +//enableif and is_same implementation +template +struct EnableIf{}; +template +struct EnableIf{typedef T Type;}; + +template +struct IsSame{ static const bool value = false; }; +template +struct IsSame{ static const bool value = true; }; + + +///JPCRE2 assert function. +///Aborts with an error message if condition fails. +///@param cond boolean condition +///@param msg message (std::string) +///@param f file where jassert was called. +///@param line line number where jassert was called. +static inline void jassert(bool cond, const char* msg, const char* f, size_t line){ + if(!cond) { + std::fprintf(stderr,"\n\tE: AssertionFailure\n%s\nAssertion failed in file: %s\t at line: %u\n", msg, f, (unsigned)line); + std::abort(); + } +} + +static inline void _jvassert(bool cond, char const * name, const char* f, size_t line){ + jassert(cond, (std::string("ValueError: \n\ + Required data vector of type ")+std::string(name)+" is empty.\n\ + Your MatchEvaluator callback function is not\n\ + compatible with existing data!!\n\ + You are trying to use a vector that does not\n\ + have any match data. Either call nreplace() or replace()\n\ + with true or perform a match with appropriate\n\ + callback function. For more details, refer to\n\ + the doc in MatchEvaluator section.").c_str(), f, line); +} + +static inline std::string _tostdstring(unsigned x){ + char buf[128]; + int written = std::sprintf(buf, "%u", x); + return (written > 0) ? std::string(buf, buf + written) : std::string(); +} + + +////////////////////////// The following are type and function mappings from PCRE2 interface to JPCRE2 interface ///////////////////////// + +//forward declaration + +template struct Pcre2Type; +template struct Pcre2Func; + +//PCRE2 types +//These templated types will be used in place of actual types +template struct Pcre2Type {}; + +template<> struct Pcre2Type<8>{ + //typedefs used + typedef PCRE2_UCHAR8 Pcre2Uchar; + typedef PCRE2_SPTR8 Pcre2Sptr; + typedef pcre2_code_8 Pcre2Code; + typedef pcre2_compile_context_8 CompileContext; + typedef pcre2_match_data_8 MatchData; + typedef pcre2_general_context_8 GeneralContext; + typedef pcre2_match_context_8 MatchContext; + typedef pcre2_jit_callback_8 JitCallback; + typedef pcre2_jit_stack_8 JitStack; +}; + +template<> struct Pcre2Type<16>{ + //typedefs used + typedef PCRE2_UCHAR16 Pcre2Uchar; + typedef PCRE2_SPTR16 Pcre2Sptr; + typedef pcre2_code_16 Pcre2Code; + typedef pcre2_compile_context_16 CompileContext; + typedef pcre2_match_data_16 MatchData; + typedef pcre2_general_context_16 GeneralContext; + typedef pcre2_match_context_16 MatchContext; + typedef pcre2_jit_callback_16 JitCallback; + typedef pcre2_jit_stack_16 JitStack; +}; + +template<> struct Pcre2Type<32>{ + //typedefs used + typedef PCRE2_UCHAR32 Pcre2Uchar; + typedef PCRE2_SPTR32 Pcre2Sptr; + typedef pcre2_code_32 Pcre2Code; + typedef pcre2_compile_context_32 CompileContext; + typedef pcre2_match_data_32 MatchData; + typedef pcre2_general_context_32 GeneralContext; + typedef pcre2_match_context_32 MatchContext; + typedef pcre2_jit_callback_32 JitCallback; + typedef pcre2_jit_stack_32 JitStack; +}; + +//wrappers for PCRE2 functions +template struct Pcre2Func{}; + +//8-bit version +template<> struct Pcre2Func<8> { + static Pcre2Type<8>::CompileContext* compile_context_create(Pcre2Type<8>::GeneralContext *gcontext){ + return pcre2_compile_context_create_8(gcontext); + } + static void compile_context_free(Pcre2Type<8>::CompileContext *ccontext){ + pcre2_compile_context_free_8(ccontext); + } + static Pcre2Type<8>::CompileContext* compile_context_copy(Pcre2Type<8>::CompileContext* ccontext){ + return pcre2_compile_context_copy_8(ccontext); + } + static const unsigned char * maketables(Pcre2Type<8>::GeneralContext* gcontext){ + return pcre2_maketables_8(gcontext); + } + static int set_character_tables(Pcre2Type<8>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_8(ccontext, table); + } + static Pcre2Type<8>::Pcre2Code * compile(Pcre2Type<8>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<8>::CompileContext *ccontext){ + return pcre2_compile_8(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<8>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_8(code, options); + } + static int substitute( const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<8>::MatchData *match_data, + Pcre2Type<8>::MatchContext *mcontext, + Pcre2Type<8>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<8>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_8( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<8>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_8(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<8>::MatchData *match_data, + //~ Pcre2Type<8>::Pcre2Sptr name, + //~ Pcre2Type<8>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_8(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<8>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_8(buffer); + //~ } + //~ static Pcre2Type<8>::Pcre2Code * code_copy(const Pcre2Type<8>::Pcre2Code *code){ + //~ return pcre2_code_copy_8(code); + //~ } + static void code_free(Pcre2Type<8>::Pcre2Code *code){ + pcre2_code_free_8(code); + } + static int get_error_message( int errorcode, + Pcre2Type<8>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_8(errorcode, buffer, bufflen); + } + static Pcre2Type<8>::MatchData * match_data_create_from_pattern( + const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_8(code, gcontext); + + } + static int match( const Pcre2Type<8>::Pcre2Code *code, + Pcre2Type<8>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<8>::MatchData *match_data, + Pcre2Type<8>::MatchContext *mcontext){ + return pcre2_match_8(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<8>::MatchData *match_data){ + pcre2_match_data_free_8(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<8>::MatchData *match_data){ + return pcre2_get_ovector_pointer_8(match_data); + } + static int pattern_info(const Pcre2Type<8>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_8(code, what, where); + } + static int set_newline(Pcre2Type<8>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_8(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<8>::MatchContext *mcontext, + //~ Pcre2Type<8>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_8(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<8>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<8>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_8(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<8>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_8(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<8>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_8(gcontext); + //~ } + //~ static Pcre2Type<8>::MatchContext *match_context_create(Pcre2Type<8>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_8(gcontext); + //~ } + //~ static Pcre2Type<8>::MatchContext *match_context_copy(Pcre2Type<8>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_8(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<8>::MatchContext *mcontext){ + //~ pcre2_match_context_free_8(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<8>::MatchData *match_data){ + return pcre2_get_ovector_count_8(match_data); + } +}; + +//16-bit version +template<> struct Pcre2Func<16> { + static Pcre2Type<16>::CompileContext* compile_context_create(Pcre2Type<16>::GeneralContext *gcontext){ + return pcre2_compile_context_create_16(gcontext); + } + static void compile_context_free(Pcre2Type<16>::CompileContext *ccontext){ + pcre2_compile_context_free_16(ccontext); + } + static Pcre2Type<16>::CompileContext* compile_context_copy(Pcre2Type<16>::CompileContext* ccontext){ + return pcre2_compile_context_copy_16(ccontext); + } + static const unsigned char * maketables(Pcre2Type<16>::GeneralContext* gcontext){ + return pcre2_maketables_16(gcontext); + } + static int set_character_tables(Pcre2Type<16>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_16(ccontext, table); + } + static Pcre2Type<16>::Pcre2Code * compile(Pcre2Type<16>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<16>::CompileContext *ccontext){ + return pcre2_compile_16(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<16>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_16(code, options); + } + static int substitute( const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<16>::MatchData *match_data, + Pcre2Type<16>::MatchContext *mcontext, + Pcre2Type<16>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<16>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_16( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<16>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_16(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<16>::MatchData *match_data, + //~ Pcre2Type<16>::Pcre2Sptr name, + //~ Pcre2Type<16>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_16(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<16>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_16(buffer); + //~ } + //~ static Pcre2Type<16>::Pcre2Code * code_copy(const Pcre2Type<16>::Pcre2Code *code){ + //~ return pcre2_code_copy_16(code); + //~ } + static void code_free(Pcre2Type<16>::Pcre2Code *code){ + pcre2_code_free_16(code); + } + static int get_error_message( int errorcode, + Pcre2Type<16>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_16(errorcode, buffer, bufflen); + } + static Pcre2Type<16>::MatchData * match_data_create_from_pattern( + const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_16(code, gcontext); + + } + static int match( const Pcre2Type<16>::Pcre2Code *code, + Pcre2Type<16>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<16>::MatchData *match_data, + Pcre2Type<16>::MatchContext *mcontext){ + return pcre2_match_16(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<16>::MatchData *match_data){ + pcre2_match_data_free_16(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<16>::MatchData *match_data){ + return pcre2_get_ovector_pointer_16(match_data); + } + static int pattern_info(const Pcre2Type<16>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_16(code, what, where); + } + static int set_newline(Pcre2Type<16>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_16(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<16>::MatchContext *mcontext, + //~ Pcre2Type<16>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_16(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<16>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<16>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_16(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<16>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_16(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<16>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_16(gcontext); + //~ } + //~ static Pcre2Type<16>::MatchContext *match_context_create(Pcre2Type<16>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_16(gcontext); + //~ } + //~ static Pcre2Type<16>::MatchContext *match_context_copy(Pcre2Type<16>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_16(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<16>::MatchContext *mcontext){ + //~ pcre2_match_context_free_16(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<16>::MatchData *match_data){ + return pcre2_get_ovector_count_16(match_data); + } +}; + +//32-bit version +template<> struct Pcre2Func<32> { + static Pcre2Type<32>::CompileContext* compile_context_create(Pcre2Type<32>::GeneralContext *gcontext){ + return pcre2_compile_context_create_32(gcontext); + } + static void compile_context_free(Pcre2Type<32>::CompileContext *ccontext){ + pcre2_compile_context_free_32(ccontext); + } + static Pcre2Type<32>::CompileContext* compile_context_copy(Pcre2Type<32>::CompileContext* ccontext){ + return pcre2_compile_context_copy_32(ccontext); + } + static const unsigned char * maketables(Pcre2Type<32>::GeneralContext* gcontext){ + return pcre2_maketables_32(gcontext); + } + static int set_character_tables(Pcre2Type<32>::CompileContext * ccontext, const unsigned char * table){ + return pcre2_set_character_tables_32(ccontext, table); + } + static Pcre2Type<32>::Pcre2Code * compile(Pcre2Type<32>::Pcre2Sptr pattern, + PCRE2_SIZE length, + uint32_t options, + int *errorcode, + PCRE2_SIZE *erroroffset, + Pcre2Type<32>::CompileContext *ccontext){ + return pcre2_compile_32(pattern, length, options, errorcode, erroroffset, ccontext); + } + static int jit_compile(Pcre2Type<32>::Pcre2Code *code, uint32_t options){ + return pcre2_jit_compile_32(code, options); + } + static int substitute( const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<32>::MatchData *match_data, + Pcre2Type<32>::MatchContext *mcontext, + Pcre2Type<32>::Pcre2Sptr replacement, + PCRE2_SIZE rlength, + Pcre2Type<32>::Pcre2Uchar *outputbuffer, + PCRE2_SIZE *outlengthptr){ + return pcre2_substitute_32( code, subject, length, startoffset, options, match_data, + mcontext, replacement, rlength, outputbuffer, outlengthptr); + } + //~ static int substring_get_bynumber(Pcre2Type<32>::MatchData *match_data, + //~ uint32_t number, + //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_bynumber_32(match_data, number, bufferptr, bufflen); + //~ } + //~ static int substring_get_byname(Pcre2Type<32>::MatchData *match_data, + //~ Pcre2Type<32>::Pcre2Sptr name, + //~ Pcre2Type<32>::Pcre2Uchar **bufferptr, + //~ PCRE2_SIZE *bufflen){ + //~ return pcre2_substring_get_byname_32(match_data, name, bufferptr, bufflen); + //~ } + //~ static void substring_free(Pcre2Type<32>::Pcre2Uchar *buffer){ + //~ pcre2_substring_free_32(buffer); + //~ } + //~ static Pcre2Type<32>::Pcre2Code * code_copy(const Pcre2Type<32>::Pcre2Code *code){ + //~ return pcre2_code_copy_32(code); + //~ } + static void code_free(Pcre2Type<32>::Pcre2Code *code){ + pcre2_code_free_32(code); + } + static int get_error_message( int errorcode, + Pcre2Type<32>::Pcre2Uchar *buffer, + PCRE2_SIZE bufflen){ + return pcre2_get_error_message_32(errorcode, buffer, bufflen); + } + static Pcre2Type<32>::MatchData * match_data_create_from_pattern( + const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::GeneralContext *gcontext){ + return pcre2_match_data_create_from_pattern_32(code, gcontext); + + } + static int match( const Pcre2Type<32>::Pcre2Code *code, + Pcre2Type<32>::Pcre2Sptr subject, + PCRE2_SIZE length, + PCRE2_SIZE startoffset, + uint32_t options, + Pcre2Type<32>::MatchData *match_data, + Pcre2Type<32>::MatchContext *mcontext){ + return pcre2_match_32(code, subject, length, startoffset, options, match_data, mcontext); + } + static void match_data_free(Pcre2Type<32>::MatchData *match_data){ + pcre2_match_data_free_32(match_data); + } + static PCRE2_SIZE * get_ovector_pointer(Pcre2Type<32>::MatchData *match_data){ + return pcre2_get_ovector_pointer_32(match_data); + } + static int pattern_info(const Pcre2Type<32>::Pcre2Code *code, uint32_t what, void *where){ + return pcre2_pattern_info_32(code, what, where); + } + static int set_newline(Pcre2Type<32>::CompileContext *ccontext, uint32_t value){ + return pcre2_set_newline_32(ccontext, value); + } + //~ static void jit_stack_assign(Pcre2Type<32>::MatchContext *mcontext, + //~ Pcre2Type<32>::JitCallback callback_function, + //~ void *callback_data){ + //~ pcre2_jit_stack_assign_32(mcontext, callback_function, callback_data); + //~ } + //~ static Pcre2Type<32>::JitStack *jit_stack_create(PCRE2_SIZE startsize, PCRE2_SIZE maxsize, + //~ Pcre2Type<32>::GeneralContext *gcontext){ + //~ return pcre2_jit_stack_create_32(startsize, maxsize, gcontext); + //~ } + //~ static void jit_stack_free(Pcre2Type<32>::JitStack *jit_stack){ + //~ pcre2_jit_stack_free_32(jit_stack); + //~ } + //~ static void jit_free_unused_memory(Pcre2Type<32>::GeneralContext *gcontext){ + //~ pcre2_jit_free_unused_memory_32(gcontext); + //~ } + //~ static Pcre2Type<32>::MatchContext *match_context_create(Pcre2Type<32>::GeneralContext *gcontext){ + //~ return pcre2_match_context_create_32(gcontext); + //~ } + //~ static Pcre2Type<32>::MatchContext *match_context_copy(Pcre2Type<32>::MatchContext *mcontext){ + //~ return pcre2_match_context_copy_32(mcontext); + //~ } + //~ static void match_context_free(Pcre2Type<32>::MatchContext *mcontext){ + //~ pcre2_match_context_free_32(mcontext); + //~ } + static uint32_t get_ovector_count(Pcre2Type<32>::MatchData *match_data){ + return pcre2_get_ovector_count_32(match_data); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + +///Class to take a std::string modifier value with null safety. +///You don't need to make an instance of this class to pass modifier, +///just pass std::string or char const*, whatever seems feasible, +///implicit conversion will kick in and take care of things for you. +class Modifier{ + std::string mod; + + public: + ///Default constructor. + Modifier(){} + + ///Constructor that takes a std::string. + ///@param x std::string as a reference. + Modifier(std::string const& x):mod(x){} + + ///Constructor that takes char const * (null safety is provided by this one) + ///@param x char const * + Modifier(char const *x):mod(x?x:""){} + + ///Returns the modifier string + ///@return modifier string (std::string) + std::string str() const { return mod; } + + ///Returns the c_str() of modifier string + ///@return char const * + char const * c_str() const { return mod.c_str(); } + + ///Returns the length of the modifier string + ///@return length + SIZE_T length() const{ return mod.length(); } + + ///operator[] overload to access character by index. + ///@param i index + ///@return character at index i. + char operator[](SIZE_T i) const { return mod[i]; } +}; + + +// Namespace for modifier constants. +// For each modifier constant there is a jpcre2::Uint option value. +// Some modifiers may have multiple values set together (ORed in bitwise operation) and +// thus they may include other modifiers. Such an example is the 'n' modifier. It is combined together with 'u'. +namespace MOD { + + // Define modifiers for compile + // String of compile modifier characters for PCRE2 options + static const char C_N[] = "eijmnsuxADJU"; + // Array of compile modifier values for PCRE2 options + // Uint is being used in getModifier() in for loop to get the number of element in this array, + // be sure to chnage there if you change here. + static const jpcre2::Uint C_V[12] = { PCRE2_MATCH_UNSET_BACKREF, // Modifier e + PCRE2_CASELESS, // Modifier i + PCRE2_ALT_BSUX | PCRE2_MATCH_UNSET_BACKREF, // Modifier j + PCRE2_MULTILINE, // Modifier m + PCRE2_UTF | PCRE2_UCP, // Modifier n (includes u) + PCRE2_DOTALL, // Modifier s + PCRE2_UTF, // Modifier u + PCRE2_EXTENDED, // Modifier x + PCRE2_ANCHORED, // Modifier A + PCRE2_DOLLAR_ENDONLY, // Modifier D + PCRE2_DUPNAMES, // Modifier J + PCRE2_UNGREEDY // Modifier U + }; + + + // String of compile modifier characters for JPCRE2 options + static const char CJ_N[] = "S"; + // Array of compile modifier values for JPCRE2 options + static const jpcre2::Uint CJ_V[1] = { JIT_COMPILE, // Modifier S + }; + + + // Define modifiers for replace + // String of action (replace) modifier characters for PCRE2 options + static const char R_N[] = "eEgx"; + // Array of action (replace) modifier values for PCRE2 options + static const jpcre2::Uint R_V[4] = { PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier e + PCRE2_SUBSTITUTE_UNKNOWN_UNSET | PCRE2_SUBSTITUTE_UNSET_EMPTY, // Modifier E (includes e) + PCRE2_SUBSTITUTE_GLOBAL, // Modifier g + PCRE2_SUBSTITUTE_EXTENDED // Modifier x + }; + + + // String of action (replace) modifier characters for JPCRE2 options + static const char RJ_N[] = ""; + // Array of action (replace) modifier values for JPCRE2 options + static const jpcre2::Uint RJ_V[1] = { NONE //placeholder + }; + + // Define modifiers for match + // String of action (match) modifier characters for PCRE2 options + static const char M_N[] = "A"; + // Array of action (match) modifier values for PCRE2 options + static const jpcre2::Uint M_V[1] = { PCRE2_ANCHORED // Modifier A + }; + + + // String of action (match) modifier characters for JPCRE2 options + static const char MJ_N[] = "g"; + // Array of action (match) modifier values for JPCRE2 options + static const jpcre2::Uint MJ_V[1] = { FIND_ALL, // Modifier g + }; + + static inline void toOption(Modifier const& mod, bool x, + Uint const * J_V, char const * J_N, SIZE_T SJ, + Uint const * V, char const * N, SIZE_T S, + Uint* po, Uint* jo, + int* en, SIZE_T* eo + ){ + //loop through mod + SIZE_T n = mod.length(); + for (SIZE_T i = 0; i < n; ++i) { + //First check for JPCRE2 mods + for(SIZE_T j = 0; j < SJ; ++j){ + if(J_N[j] == mod[i]) { + if(x) *jo |= J_V[j]; + else *jo &= ~J_V[j]; + goto endfor; + } + } + + //Now check for PCRE2 mods + for(SIZE_T j = 0; j< S; ++j){ + if(N[j] == mod[i]){ + if(x) *po |= V[j]; + else *po &= ~V[j]; + goto endfor; + } + } + + //Modifier didn't match, invalid modifier + *en = (int)ERROR::INVALID_MODIFIER; + *eo = (int)mod[i]; + + endfor:; + } + } + + static inline void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), + M_V, M_N, sizeof(M_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), + R_V, R_N, sizeof(R_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo){ + toOption(mod, x, + CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), + C_V, C_N, sizeof(C_V)/sizeof(Uint), + po, jo, en, eo); + } + + static inline std::string fromOption(Uint const * J_V, char const * J_N, SIZE_T SJ, + Uint const * V, char const * N, SIZE_T S, + Uint po, Uint jo + ){ + std::string mod; + //Calculate PCRE2 mod + for(SIZE_T i = 0; i < S; ++i){ + if( (V[i] & po) != 0 && + (V[i] & po) == V[i]) //One option can include other + mod += N[i]; + } + //Calculate JPCRE2 mod + for(SIZE_T i = 0; i < SJ; ++i){ + if( (J_V[i] & jo) != 0 && + (J_V[i] & jo) == J_V[i]) //One option can include other + mod += J_N[i]; + } + return mod; + } + + static inline std::string fromMatchOption(Uint po, Uint jo){ + return fromOption(MJ_V, MJ_N, sizeof(MJ_V)/sizeof(Uint), + M_V, M_N, sizeof(M_V)/sizeof(Uint), + po, jo); + } + + static inline std::string fromReplaceOption(Uint po, Uint jo){ + return fromOption(RJ_V, RJ_N, sizeof(RJ_V)/sizeof(Uint), + R_V, R_N, sizeof(R_V)/sizeof(Uint), + po, jo); + } + + static inline std::string fromCompileOption(Uint po, Uint jo){ + return fromOption(CJ_V, CJ_N, sizeof(CJ_V)/sizeof(Uint), + C_V, C_N, sizeof(C_V)/sizeof(Uint), + po, jo); + } + +} //MOD namespace ends + +///Lets you create custom modifier tables. +///An instance of this class can be passed to +///match, replace or compile related class objects. +class ModifierTable{ + + std::string tabjms; + std::string tabms; + std::string tabjrs; + std::string tabrs; + std::string tabjcs; + std::string tabcs; + VecOpt tabjmv; + VecOpt tabmv; + VecOpt tabjrv; + VecOpt tabrv; + VecOpt tabjcv; + VecOpt tabcv; + + void toOption(Modifier const& mod, bool x, + VecOpt const& J_V, std::string const& J_N, + VecOpt const& V, std::string const& N, + Uint* po, Uint* jo, int* en, SIZE_T* eo + ) const{ + SIZE_T SJ = J_V.size(); + SIZE_T S = V.size(); + JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); + JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); + MOD::toOption(mod, x, + J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, + V.empty()?0:&V[0], N.c_str(), S, + po, jo, en, eo + ); + } + + std::string fromOption(VecOpt const& J_V, std::string const& J_N, + VecOpt const& V, std::string const& N, + Uint po, Uint jo) const{ + SIZE_T SJ = J_V.size(); + SIZE_T S = V.size(); + JPCRE2_ASSERT(SJ == J_N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(SJ) + " == " + _tostdstring(J_N.length()) + ").").c_str()); + JPCRE2_ASSERT(S == N.length(), ("ValueError: Modifier character and value table must be of the same size (" + _tostdstring(S) + " == " + _tostdstring(N.length()) + ").").c_str()); + return MOD::fromOption(J_V.empty()?0:&J_V[0], J_N.c_str(), SJ, + V.empty()?0:&V[0], N.c_str(), S, + po, jo); + } + + void parseModifierTable(std::string& tabjs, VecOpt& tabjv, + std::string& tab_s, VecOpt& tab_v, + std::string const& tabs, VecOpt const& tabv); + public: + + ///Default constructor that creates an empty modifier table. + ModifierTable(){} + + ///@overload + ///@param deflt Initialize with default table if true, otherwise keep empty. + ModifierTable(bool deflt){ + if(deflt) setAllToDefault(); + } + + ///Reset the match modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetMatchModifierTable(){ + std::string().swap(tabjms); + std::string().swap(tabms); + VecOpt().swap(tabjmv); + VecOpt().swap(tabmv); + return *this; + } + + ///Reset the replace modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetReplaceModifierTable(){ + std::string().swap(tabjrs); + std::string().swap(tabrs); + VecOpt().swap(tabjrv); + VecOpt().swap(tabrv); + return *this; + } + + ///Reset the compile modifier table to its initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& resetCompileModifierTable(){ + std::string().swap(tabjcs); + std::string().swap(tabcs); + VecOpt().swap(tabjcv); + VecOpt().swap(tabcv); + return *this; + } + + ///Reset the modifier tables to their initial (empty) state including memory. + ///@return A reference to the calling ModifierTable object. + ModifierTable& reset(){ + resetMatchModifierTable(); + resetReplaceModifierTable(); + resetCompileModifierTable(); + return *this; + } + + ///Clear the match modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearMatchModifierTable(){ + tabjms.clear(); + tabms.clear(); + tabjmv.clear(); + tabmv.clear(); + return *this; + } + + ///Clear the replace modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearReplaceModifierTable(){ + tabjrs.clear(); + tabrs.clear(); + tabjrv.clear(); + tabrv.clear(); + return *this; + } + + ///Clear the compile modifier table to its initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clearCompileModifierTable(){ + tabjcs.clear(); + tabcs.clear(); + tabjcv.clear(); + tabcv.clear(); + return *this; + } + + ///Clear the modifier tables to their initial (empty) state. + ///Memory may retain for further use. + ///@return A reference to the calling ModifierTable object. + ModifierTable& clear(){ + clearMatchModifierTable(); + clearReplaceModifierTable(); + clearCompileModifierTable(); + return *this; + } + + ///Modifier parser for match related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 match option that will be modified. + ///@param jo pointer to JPCRE2 match option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toMatchOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + toOption(mod, x,tabjmv,tabjms,tabmv, tabms,po,jo,en,eo); + } + + ///Modifier parser for replace related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 replace option that will be modified. + ///@param jo pointer to JPCRE2 replace option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toReplaceOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + return toOption(mod, x,tabjrv,tabjrs,tabrv,tabrs,po,jo,en,eo); + } + + ///Modifier parser for compile related options. + ///@param mod modifier string + ///@param x whether to add or remove the modifers. + ///@param po pointer to PCRE2 compile option that will be modified. + ///@param jo pointer to JPCRE2 compile option that will be modified. + ///@param en where to put the error number. + ///@param eo where to put the error offset. + void toCompileOption(Modifier const& mod, bool x, Uint* po, Uint* jo, int* en, SIZE_T* eo) const { + return toOption(mod, x,tabjcv,tabjcs,tabcv,tabcs,po,jo,en,eo); + } + + ///Take match related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromMatchOption(Uint po, Uint jo) const { + return fromOption(tabjmv,tabjms,tabmv,tabms,po,jo); + } + + ///Take replace related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromReplaceOption(Uint po, Uint jo) const { + return fromOption(tabjrv,tabjrs,tabrv,tabrs,po,jo); + } + + ///Take compile related option value and convert to modifier string. + ///@param po PCRE2 option. + ///@param jo JPCRE2 option. + ///@return modifier string (std::string) + std::string fromCompileOption(Uint po, Uint jo) const { + return fromOption(tabjcv,tabjcs,tabcv,tabcs,po,jo); + } + + ///Set modifier table for match. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjms, tabjmv, tabms, tabmv, tabs, tabv); + return *this; + } + + ///Set modifier table for match. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setMatchModifierTable(tabs, tabv); + } else clearMatchModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setMatchModifierTable(tabs, tabv); + } else clearMatchModifierTable(); + return *this; + } + + ///Set modifier table for replace. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjrs, tabjrv, tabrs, tabrv, tabs, tabv); + return *this; + } + + ///Set modifier table for replace. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setReplaceModifierTable(tabs, tabv); + } else clearReplaceModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setReplaceModifierTable(tabs, tabv); + } else clearReplaceModifierTable(); + return *this; + } + + ///Set modifier table for compile. + ///Takes a string and a vector of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabv vector of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(std::string const& tabs, VecOpt const& tabv){ + parseModifierTable(tabjcs, tabjcv, tabcs, tabcv, tabs, tabv); + return *this; + } + + ///Set modifier table for compile. + ///Takes a string and an array of sequential options. + ///@param tabs modifier string (list of modifiers) + ///@param tabvp array of Uint (options). If null, table is set to empty. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(std::string const& tabs, const Uint* tabvp){ + if(tabvp) { + VecOpt tabv(tabvp, tabvp + tabs.length()); + setCompileModifierTable(tabs, tabv); + } else clearCompileModifierTable(); + return *this; + } + + ///@overload + ///... + ///This one takes modifier and value by array. + ///If the arrays are not of the same length, the behavior is undefined. + ///If any of the argument is null, the table is set empty. + ///@param tabsp modifier string (list of modifiers). + ///@param tabvp array of Uint (options). + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTable(const char* tabsp, const Uint* tabvp){ + if(tabsp && tabvp) { + std::string tabs(tabsp); + VecOpt tabv(tabvp, tabvp + tabs.length()); + setCompileModifierTable(tabs, tabv); + } else clearCompileModifierTable(); + return *this; + } + + ///Set match modifie table to default + ///@return A reference to the calling ModifierTable object. + ModifierTable& setMatchModifierTableToDefault(){ + tabjms = std::string(MOD::MJ_N, MOD::MJ_N + sizeof(MOD::MJ_V)/sizeof(Uint)); + tabms = std::string(MOD::M_N, MOD::M_N + sizeof(MOD::M_V)/sizeof(Uint)); + tabjmv = VecOpt(MOD::MJ_V, MOD::MJ_V + sizeof(MOD::MJ_V)/sizeof(Uint)); + tabmv = VecOpt(MOD::M_V, MOD::M_V + sizeof(MOD::M_V)/sizeof(Uint)); + return *this; + } + + ///Set replace modifier table to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setReplaceModifierTableToDefault(){ + tabjrs = std::string(MOD::RJ_N, MOD::RJ_N + sizeof(MOD::RJ_V)/sizeof(Uint)); + tabrs = std::string(MOD::R_N, MOD::R_N + sizeof(MOD::R_V)/sizeof(Uint)); + tabjrv = VecOpt(MOD::RJ_V, MOD::RJ_V + sizeof(MOD::RJ_V)/sizeof(Uint)); + tabrv = VecOpt(MOD::R_V, MOD::R_V + sizeof(MOD::R_V)/sizeof(Uint)); + return *this; + } + + ///Set compile modifier table to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setCompileModifierTableToDefault(){ + tabjcs = std::string(MOD::CJ_N, MOD::CJ_N + sizeof(MOD::CJ_V)/sizeof(Uint)); + tabcs = std::string(MOD::C_N, MOD::C_N + sizeof(MOD::C_V)/sizeof(Uint)); + tabjcv = VecOpt(MOD::CJ_V, MOD::CJ_V + sizeof(MOD::CJ_V)/sizeof(Uint)); + tabcv = VecOpt(MOD::C_V, MOD::C_V + sizeof(MOD::C_V)/sizeof(Uint)); + return *this; + } + + ///Set all tables to default. + ///@return A reference to the calling ModifierTable object. + ModifierTable& setAllToDefault(){ + setMatchModifierTableToDefault(); + setReplaceModifierTableToDefault(); + setCompileModifierTableToDefault(); + return *this; + } +}; + + +//These message strings are used for error/warning message construction. +//take care to prevent multiple definition +template struct MSG{ + static std::basic_string INVALID_MODIFIER(void); + static std::basic_string INSUFFICIENT_OVECTOR(void); +}; +//specialization +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return "Invalid modifier: "; } +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return L"Invalid modifier: "; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return "ovector wasn't big enough"; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return L"ovector wasn't big enough"; } +#if JPCRE2_USE_MINIMUM_CXX_11 +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return u"Invalid modifier: "; } +template<> inline std::basic_string MSG::INVALID_MODIFIER(){ return U"Invalid modifier: "; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return u"ovector wasn't big enough"; } +template<> inline std::basic_string MSG::INSUFFICIENT_OVECTOR(){ return U"ovector wasn't big enough"; } +#endif + +///struct to select the types. +/// +///@tparam Char_T Character type (`char`, `wchar_t`, `char16_t`, `char32_t`) +///@tparam Map Optional parameter (Only `>= C++11`) to specify a map container (`std::map`, `std::unordered_map` etc..). Default is `std::map`. +/// +///The character type (`Char_T`) must be in accordance with the PCRE2 library you are linking against. +///If not sure which library you need, link against all 3 PCRE2 libraries and they will be used as needed. +/// +///If you want to be specific, then here's the rule: +/// +///1. If `Char_T` is 8 bit, you need 8 bit PCRE2 library +///2. If `Char_T` is 16 bit, you need 16 bit PCRE2 library +///3. If `Char_T` is 32 bit, you need 32 bit PCRE2 library +///4. if `Char_T` is not 8 or 16 or 32 bit, you will get compile error. +/// +///In `>= C++11` you get an additional optional template parameter to specify a map container. +///For example, you can use `std::unordered_map` instead of the default `std::map`: +/// ```cpp +/// #include +/// typedef jpcre2::select jp; +/// ``` +/// +///We will use the following typedef throughout this doc: +///```cpp +///typedef jpcre2::select jp; +///``` +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map=std::map> +#else +template +#endif +struct select{ + + ///Typedef for character (`char`, `wchar_t`, `char16_t`, `char32_t`) + typedef Char_T Char; + + //typedef Char_T Char; + ///Typedef for string (`std::string`, `std::wstring`, `std::u16string`, `std::u32string`). + ///Defined as `std::basic_string`. + ///May be this list will make more sense: + ///Character | String + ///--------- | ------- + ///char | std::string + ///wchar_t | std::wstring + ///char16_t | std::u16string (>=C++11) + ///char32_t | std::u32string (>=C++11) + typedef typename std::basic_string String; + + #if JPCRE2_USE_MINIMUM_CXX_11 + ///Map for Named substrings. + typedef class Map MapNas; + ///Substring name to Substring number map. + typedef class Map MapNtN; + #else + ///Map for Named substrings. + typedef typename std::map MapNas; + ///Substring name to Substring number map. + typedef typename std::map MapNtN; + #endif + + ///Allow spelling mistake of MapNtN as MapNtn. + typedef MapNtN MapNtn; + + ///Vector for Numbered substrings (Sub container). + typedef typename std::vector NumSub; + ///Vector of matches with named substrings. + typedef typename std::vector VecNas; + ///Vector of substring name to substring number map. + typedef typename std::vector VecNtN; + ///Allow spelling mistake of VecNtN as VecNtn. + typedef VecNtN VecNtn; + ///Vector of matches with numbered substrings. + typedef typename std::vector VecNum; + + //These are to shorten the code + typedef typename Pcre2Type::Pcre2Uchar Pcre2Uchar; + typedef typename Pcre2Type::Pcre2Sptr Pcre2Sptr; + typedef typename Pcre2Type::Pcre2Code Pcre2Code; + typedef typename Pcre2Type::CompileContext CompileContext; + typedef typename Pcre2Type::MatchData MatchData; + typedef typename Pcre2Type::GeneralContext GeneralContext; + typedef typename Pcre2Type::MatchContext MatchContext; + typedef typename Pcre2Type::JitCallback JitCallback; + typedef typename Pcre2Type::JitStack JitStack; + + template + static String toString(T); //prevent implicit type conversion of T + + ///Converts a Char_T to jpcre2::select::String + ///@param a Char_T + ///@return jpcre2::select::String + static String toString(Char a){ + return a?String(1, a):String(); + } + + ///@overload + ///... + ///Converts a Char_T const * to jpcre2::select::String + ///@param a Char_T const * + ///@return jpcre2::select::String + static String toString(Char const *a){ + return a?String(a):String(); + } + + ///@overload + ///... + ///Converts a Char_T* to jpcre2::select::String + ///@param a Char_T const * + ///@return jpcre2::select::String + static String toString(Char* a){ + return a?String(a):String(); + } + + ///@overload + ///... + ///Converts a PCRE2_UCHAR to String + ///@param a PCRE2_UCHAR + ///@return jpcre2::select::String + static String toString(Pcre2Uchar* a) { + return a?String((Char*) a):String(); + } + + ///Retruns error message from PCRE2 error number + ///@param err_num error number (negative) + ///@return message as jpcre2::select::String. + static String getPcre2ErrorMessage(int err_num) { + Pcre2Uchar buffer[sizeof(Char)*CHAR_BIT*1024]; + Pcre2Func::get_error_message(err_num, buffer, sizeof(buffer)); + return toString((Pcre2Uchar*) buffer); + } + + ///Returns error message (either JPCRE2 or PCRE2) from error number and error offset + ///@param err_num error number (negative for PCRE2, positive for JPCRE2) + ///@param err_off error offset + ///@return message as jpcre2::select::String. + static String getErrorMessage(int err_num, int err_off) { + if(err_num == (int)ERROR::INVALID_MODIFIER){ + return MSG::INVALID_MODIFIER() + toString((Char)err_off); + } else if(err_num == (int)ERROR::INSUFFICIENT_OVECTOR){ + return MSG::INSUFFICIENT_OVECTOR(); + } else if(err_num != 0) { + return getPcre2ErrorMessage((int) err_num); + } else return String(); + } + + //forward declaration + class Regex; + class RegexMatch; + class RegexReplace; + class MatchEvaluator; + + /** Provides public constructors to create RegexMatch objects. + * Every RegexMatch object should be associated with a Regex object. + * This class stores a pointer to its' associated Regex object, thus when + * the content of the associated Regex object is changed, there will be no need to + * set the pointer again. + * + * Examples: + * + * ```cpp + * jp::Regex re; + * jp::RegexMatch rm; + * rm.setRegexObject(&re); + * rm.match("subject", "g"); // 0 match + * re.compile("\\w"); + * rm.match(); // 7 matches + * ``` + */ + class RegexMatch { + + private: + + friend class MatchEvaluator; + + Regex const *re; + + String m_subject; + String const *m_subject_ptr; + Uint match_opts; + Uint jpcre2_match_opts; + MatchContext *mcontext; + ModifierTable const * modtab; + MatchData * mdata; + + PCRE2_SIZE _start_offset; //name collision, use _ at start + + VecNum* vec_num; + VecNas* vec_nas; + VecNtN* vec_ntn; + + VecOff* vec_soff; + VecOff* vec_eoff; + + bool getNumberedSubstrings(int, Pcre2Sptr, PCRE2_SIZE*, uint32_t); + + bool getNamedSubstrings(int, int, Pcre2Sptr, Pcre2Sptr, PCRE2_SIZE*); + + void init_vars() { + re = 0; + vec_num = 0; + vec_nas = 0; + vec_ntn = 0; + vec_soff = 0; + vec_eoff = 0; + match_opts = 0; + jpcre2_match_opts = 0; + error_number = 0; + error_offset = 0; + _start_offset = 0; + m_subject_ptr = &m_subject; + mcontext = 0; + modtab = 0; + mdata = 0; + } + + void onlyCopy(RegexMatch const &rm){ + re = rm.re; //only pointer should be copied + + //pointer to subject may point to m_subject or other user data + m_subject_ptr = (rm.m_subject_ptr == &rm.m_subject) ? &m_subject //not &rm.m_subject + : rm.m_subject_ptr; + + //underlying data of vectors are not handled by RegexMatch + //thus it's safe to just copy the pointers. + vec_num = rm.vec_num; + vec_nas = rm.vec_nas; + vec_ntn = rm.vec_ntn; + vec_soff = rm.vec_soff; + vec_eoff = rm.vec_eoff; + + match_opts = rm.match_opts; + jpcre2_match_opts = rm.jpcre2_match_opts; + error_number = rm.error_number; + error_offset = rm.error_offset; + _start_offset = rm._start_offset; + mcontext = rm.mcontext; + modtab = rm.modtab; + mdata = rm.mdata; + } + + void deepCopy(RegexMatch const &rm){ + m_subject = rm.m_subject; + onlyCopy(rm); + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(RegexMatch& rm){ + m_subject = std::move_if_noexcept(rm.m_subject); + onlyCopy(rm); + } + #endif + + friend class Regex; + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + ///Default constructor. + RegexMatch(){ + init_vars(); + } + + ///@overload + ///... + ///Creates a RegexMatch object associating a Regex object. + ///Underlying data is not modified. + ///@param r pointer to a Regex object + RegexMatch(Regex const *r) { + init_vars(); + re = r; + } + + ///@overload + ///... + ///Copy constructor. + ///@param rm Reference to RegexMatch object + RegexMatch(RegexMatch const &rm){ + init_vars(); + deepCopy(rm); + } + + ///Overloaded copy-assignment operator. + ///@param rm RegexMatch object + ///@return A reference to the calling RegexMatch object. + virtual RegexMatch& operator=(RegexMatch const &rm){ + if(this == &rm) return *this; + deepCopy(rm); + return *this; + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + ///@overload + ///... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rm rvalue reference to a RegexMatch object + RegexMatch(RegexMatch&& rm){ + init_vars(); + deepMove(rm); + } + + ///@overload + ///... + ///Overloaded move-assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rm rvalue reference to a RegexMatch object + ///@return A reference to the calling RegexMatch object. + virtual RegexMatch& operator=(RegexMatch&& rm){ + if(this == &rm) return *this; + deepMove(rm); + return *this; + } + #endif + + ///Destructor + ///Frees all internal memories that were used. + virtual ~RegexMatch() {} + + ///Reset all class variables to its default (initial) state including memory. + ///Data in the vectors will retain (as it's external) + ///You will need to pass vector pointers again after calling this function to get match results. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& reset() { + String().swap(m_subject); //not ptr , external string won't be modified. + init_vars(); + return *this; + } + + ///Clear all class variables (may retain some memory for further use). + ///Data in the vectors will retain (as it's external) + ///You will need to pass vector pointers again after calling this function to get match results. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& clear(){ + m_subject.clear(); //not ptr , external string won't be modified. + init_vars(); + return *this; + } + + ///reset match related errors to zero. + ///If you want to examine the error status of a function call in the method chain, + ///add this function just before your target function so that the error is set to zero + ///before that target function is called, and leave everything out after the target + ///function so that there will be no additional errors from other function calls. + ///@return A reference to the RegexMatch object + ///@see Regex::resetErrors() + ///@see RegexReplace::resetErrors() + virtual RegexMatch& resetErrors(){ + error_number = 0; + error_offset = 0; + return *this; + } + + /// Returns the last error number + ///@return Last error number + virtual int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + virtual int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + virtual String getErrorMessage() const { + #if JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + ///Get subject string (by value). + ///@return subject string + ///@see RegexReplace::getSubject() + virtual String getSubject() const { + return *m_subject_ptr; + } + + ///Get pointer to subject string. + ///Data can not be changed with this pointer. + ///@return constant subject string pointer + ///@see RegexReplace::getSubjectPointer() + virtual String const * getSubjectPointer() const { + return m_subject_ptr; + } + + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized + /// as long as you don't do that explicitly. Calling RegexMatch::setModifier() will re-set them. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `RegexMatch::changeModifier()`), both will get removed. + ///@return Calculated modifier string (std::string) + ///@see Regex::getModifier() + ///@see RegexReplace::getModifier() + virtual std::string getModifier() const { + return modtab ? modtab->fromMatchOption(match_opts, jpcre2_match_opts) + : MOD::fromMatchOption(match_opts, jpcre2_match_opts); + } + + ///Get the modifier table that is set, + ///@return pointer to constant ModifierTable. + virtual ModifierTable const* getModifierTable(){ + return modtab; + } + + + ///Get PCRE2 option + ///@return PCRE2 option for match operation + ///@see Regex::getPcre2Option() + ///@see RegexReplace::getPcre2Option() + virtual Uint getPcre2Option() const { + return match_opts; + } + + /// Get JPCRE2 option + ///@return JPCRE2 options for math operation + ///@see Regex::getJpcre2Option() + ///@see RegexReplace::getJpcre2Option() + virtual Uint getJpcre2Option() const { + return jpcre2_match_opts; + } + + /// Get offset from where match will start in the subject. + /// @return Start offset + virtual PCRE2_SIZE getStartOffset() const { + return _start_offset; + } + + ///Get pre-set match start offset vector pointer. + ///The pointer must be set with RegexMatch::setMatchStartOffsetVector() beforehand + ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. + ///@return pointer to the const match start offset vector + virtual VecOff const* getMatchStartOffsetVector() const { + return vec_soff; + } + + ///Get pre-set match end offset vector pointer. + ///The pointer must be set with RegexMatch::setMatchEndOffsetVector() beforehand + ///for this to work i.e it is just a convenience method to get the pre-set vector pointer. + ///@return pointer to the const end offset vector + virtual VecOff const* getMatchEndOffsetVector() const { + return vec_eoff; + } + + ///Get a pointer to the associated Regex object. + ///If no actual Regex object is associated, null is returned. + ///@return A pointer to the associated constant Regex object or null. + virtual Regex const * getRegexObject() const { + return re; + } + + ///Get pointer to numbered substring vector. + ///@return Pointer to const numbered substring vector. + virtual VecNum const* getNumberedSubstringVector() const { + return vec_num; + } + + ///Get pointer to named substring vector. + ///@return Pointer to const named substring vector. + virtual VecNas const* getNamedSubstringVector() const { + return vec_nas; + } + + ///Get pointer to name to number map vector. + ///@return Pointer to const name to number map vector. + virtual VecNtN const* getNameToNumberMapVector() const { + return vec_ntn; + } + + ///Set the associated regex object. + ///Null pointer unsets it. + ///Underlying data is not modified. + ///@param r Pointer to a Regex object. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& setRegexObject(Regex const *r){ + re = r; + return *this; + } + + /// Set a pointer to the numbered substring vector. + /// Null pointer unsets it. + /// + /// This vector will be filled with numbered (indexed) captured groups. + /// @param v pointer to the numbered substring vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNumberedSubstringVector(VecNum* v) { + vec_num = v; + return *this; + } + + /// Set a pointer to the named substring vector. + /// Null pointer unsets it. + /// + /// This vector will be populated with named captured groups. + /// @param v pointer to the named substring vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNamedSubstringVector(VecNas* v) { + vec_nas = v; + return *this; + } + + /// Set a pointer to the name to number map vector. + /// Null pointer unsets it. + /// + /// This vector will be populated with name to number map for captured groups. + /// @param v pointer to the name to number map vector + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setNameToNumberMapVector(VecNtN* v) { + vec_ntn = v; + return *this; + } + + /// Set the pointer to a vector to store the offsets where matches + /// start in the subject. + /// Null pointer unsets it. + /// @param v Pointer to a jpcre2::VecOff vector (std::vector) + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchStartOffsetVector(VecOff* v){ + vec_soff = v; + return *this; + } + + /// Set the pointer to a vector to store the offsets where matches + /// end in the subject. + /// Null pointer unsets it. + /// @param v Pointer to a VecOff vector (std::vector) + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchEndOffsetVector(VecOff* v){ + vec_eoff = v; + return *this; + } + + ///Set the subject string for match. + ///This makes a copy of the subject string. + /// @param s Subject string + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setSubject() + virtual RegexMatch& setSubject(String const &s) { + m_subject = s; + m_subject_ptr = &m_subject; //must overwrite + return *this; + } + + ///@overload + ///... + /// Works with the original without modifying it. Null pointer unsets the subject. + /// @param s Pointer to subject string + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setSubject() + virtual RegexMatch& setSubject(String const *s) { + if(s) m_subject_ptr = s; + else { + m_subject.clear(); + m_subject_ptr = &m_subject; + } + return *this; + } + + + /// Set the modifier (resets all JPCRE2 and PCRE2 options) by calling RegexMatch::changeModifier(). + /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier to set their equivalent options. + /// @param s Modifier string. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setModifier() + /// @see Regex::setModifier() + virtual RegexMatch& setModifier(Modifier const& s) { + match_opts = 0; + jpcre2_match_opts = 0; + changeModifier(s, true); + return *this; + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + ///@return Reference to the calling RegexMatch object. + virtual RegexMatch& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set JPCRE2 option for match (resets all) + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setJpcre2Option() + /// @see Regex::setJpcre2Option() + virtual RegexMatch& setJpcre2Option(Uint x) { + jpcre2_match_opts = x; + return *this; + } + + ///Set PCRE2 option match (overwrite existing option) + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::setPcre2Option() + /// @see Regex::setPcre2Option() + virtual RegexMatch& setPcre2Option(Uint x) { + match_opts = x; + return *this; + } + + /// Set whether to perform global match + /// @param x True or False + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setFindAll(bool x) { + jpcre2_match_opts = x?jpcre2_match_opts | FIND_ALL:jpcre2_match_opts & ~FIND_ALL; + return *this; + } + + ///@overload + ///... + ///This function just calls RegexMatch::setFindAll(bool x) with `true` as the parameter + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setFindAll() { + return setFindAll(true); + } + + /// Set offset from where match starts. + /// When FIND_ALL is set, a global match would not be performed on all positions on the subject, + /// rather it will be performed from the start offset and onwards. + /// @param offset Start offset + /// @return Reference to the calling RegexMatch object + virtual RegexMatch& setStartOffset(PCRE2_SIZE offset) { + _start_offset = offset; + return *this; + } + + ///Set the match context. + ///You can create match context using the native PCRE2 API. + ///The memory is not handled by RegexMatch object and not freed. + ///User will be responsible for freeing the memory of the match context. + ///@param match_context Pointer to the match context. + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchContext(MatchContext *match_context){ + mcontext = match_context; + return *this; + } + + ///Return pointer to the match context that was previously set with setMatchContext(). + ///Handling memory is the callers' responsibility. + ///@return pointer to the match context (default: null). + virtual MatchContext* getMatchContext(){ + return mcontext; + } + + ///Set the match data block to be used. + ///The memory is not handled by RegexMatch object and not freed. + ///User will be responsible for freeing the memory of the match data block. + ///@param madt Pointer to a match data block. + ///@return Reference to the calling RegexMatch object + virtual RegexMatch& setMatchDataBlock(MatchData* madt){ + mdata = madt; + return *this; + } + + ///Get the pointer to the match data block that was set previously with setMatchData() + ///Handling memory is the callers' responsibility. + ///@return pointer to the match data (default: null). + virtual MatchData* getMatchDataBlock(){ + return mdata; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// If invalid modifier is detected, then the error number for the RegexMatch + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with RegexMatch::getErrorMessage() function. + /// + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the RegexMatch object + /// @see Regex::changeModifier() + /// @see RegexReplace::changeModifier() + virtual RegexMatch& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset) + : MOD::toMatchOption(mod, x, &match_opts, &jpcre2_match_opts, &error_number, &error_offset); + return *this; + } + + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::changeJpcre2Option() + /// @see Regex::changeJpcre2Option() + virtual RegexMatch& changeJpcre2Option(Uint opt, bool x) { + jpcre2_match_opts = x ? jpcre2_match_opts | opt : jpcre2_match_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::changePcre2Option() + /// @see Regex::changePcre2Option() + virtual RegexMatch& changePcre2Option(Uint opt, bool x) { + match_opts = x ? match_opts | opt : match_opts & ~opt; + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function RegexMatch::changeModifier() + /// @param mod Modifier string. + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addModifier() + /// @see Regex::addModifier() + virtual RegexMatch& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add option to existing JPCRE2 options for match + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addJpcre2Option() + /// @see Regex::addJpcre2Option() + virtual RegexMatch& addJpcre2Option(Uint x) { + jpcre2_match_opts |= x; + return *this; + } + + /// Add option to existing PCRE2 options for match + /// @param x Option value + /// @return Reference to the calling RegexMatch object + /// @see RegexReplace::addPcre2Option() + /// @see Regex::addPcre2Option() + virtual RegexMatch& addPcre2Option(Uint x) { + match_opts |= x; + return *this; + } + + + /// Perform match operation using info from class variables and return the match count and + /// store the results in specified vectors. + /// + /// Note: This function uses pcre2_match() function to do the match. + ///@return Match count + virtual SIZE_T match(void); + }; + + + ///This class contains a typedef of a function pointer or a templated function wrapper (`std::function`) + ///to provide callback function to the `MatchEvaluator`. + ///`std::function` is used when `>=C++11` is being used , otherwise function pointer is used. + ///You can force using function pointer instead of `std::function` when `>=C++11` is used by defining the macro + ///`JPCRE2_USE_FUNCTION_POINTER_CALLBACK` before including jpcre2.hpp. + ///If you are using lambda function with capture, you must use the `std::function` approach. + /// + ///The callback function takes exactly three positional arguments: + ///@tparam T1 The first argument must be `jp::NumSub const &` aka `std::vector const &` (or `void*` if not needed). + ///@tparam T2 The second argument must be `jp::MapNas const &` aka `std::map const &` (or `void*` if not needed). + ///@tparam T3 The third argument must be `jp::MapNtN const &` aka `std::map const &` (or `void*` if not needed). + /// + /// **Examples:** + /// ```cpp + /// typedef jpcre2::select jp; + /// jp::String myCallback1(jp::NumSub const &m1, void*, void*){ + /// return "("+m1[0]+")"; + /// } + /// + /// jp::String myCallback2(jp::NumSub const &m1, jp::MapNas const &m2, void*){ + /// return "("+m1[0]+"/"+m2.at("total")+")"; + /// } + /// //Now you can pass these functions in MatchEvaluator constructors to create a match evaluator + /// jp::MatchEvaluator me1(myCallback1); + /// + /// //Examples with lambda (>=C++11) + /// jp::MatchEvaluator me2([](jp::NumSub const &m1, void*, void*) + /// { + /// return "("+m1[0]+")"; + /// }); + /// ``` + ///@see MatchEvaluator + template + struct MatchEvaluatorCallback{ + #if !defined JPCRE2_USE_FUNCTION_POINTER_CALLBACK && JPCRE2_USE_MINIMUM_CXX_11 + typedef std::function Callback; + #else + typedef String (*Callback)(T1,T2,T3); + #endif + }; + + ///Provides some default static callback functions. + ///The primary goal of this class is to provide default + ///callback function to MatchEvaluator default constructor which is + ///essentially callback::erase. + ///This class does not allow object instantiation. + struct callback{ + ///Callback function that removes the matched part/s in the subject string + /// and takes all match vectors as argument. + ///Even though this function itself does not use the vectors, it still takes them + ///so that the caller can perform a match and populate all the match data to perform + ///further evaluation of other callback functions without doing the match again. + ///@param num jp::NumSub vector. + ///@param nas jp::MapNas map. + ///@param ntn jp::MapNtN map. + ///@return empty string. + static String eraseFill(NumSub const &num, MapNas const &nas, MapNtN const &ntn){ + return String(); + } + + ///Callback function that removes the matched part/s in the subject string + ///and does not take any match vector. + ///This is a minimum cost pattern deleting callback function. + /// + ///It's the default callback function when you Instantiate + ///a MatchEvaluator object with its default constructor: + ///```cpp + ///MatchEvaluator me; + ///``` + ///@return empty string. + static String erase(void*, void*, void*){ + return String(); + } + + ///Callback function for populating match vectors that does not modify the subject string. + ///It always returns the total matched part and thus the subject string remains the same. + ///@param num jp::NumSub vector. + ///@param nas jp::MapNas map. + ///@param ntn jp::MapNtN map. + ///@return total match (group 0) of current match. + static String fill(NumSub const &num, MapNas const &nas, MapNtn const &ntn){ + return num[0]; + } + + private: + //prevent object instantiation. + callback(); + callback(callback const &); + #if JPCRE2_USE_MINIMUM_CXX_11 + callback(callback&&); + #endif + ~callback(); + }; + + ///This class inherits RegexMatch and provides a similar functionality. + ///All public member functions from RegexMatch class are publicly available except the following: + ///* setNumberedSubstringVector + ///* setNamedSubstringVector + ///* setNameToNumberMapVector + ///* setMatchStartOffsetVector + ///* setMatchEndOffsetVector + /// + ///The use of above functions is not allowed as the vectors are created according to the callback function you pass. + /// + ///Each constructor of this class takes a callback function as argument (see `MatchEvaluatorCallback`). + /// + ///It provides a MatchEvaluator::nreplace() function to perform replace operation using native JPCRE2 approach + ///and `MatchEvaluator::replace()` function for PCRE2 compatible replace operation. + /// + ///An instance of this class can also be passed with `RegexReplace::nreplace()` or `RegexReplace::replace()` function to perform replacement + ///according to this match evaluator. + /// + ///Match data is stored in vectors, and the vectors are populated according to the callback functions. + ///Populated vector data is never deleted but they get overwritten. Vector data can be manually zeroed out + ///by calling `MatchEvaluator::clearMatchData()`. If the capacities of those match vectors are desired to + ///to be shrinked too instead of just clearing them, use `MatchEvaluator::resetMatchData()` instead. + /// + /// # Re-usability of Match Data + /// A match data populated with a callback function that takes only a jp::NumSub vector is not compatible + /// with the data created according to callback function with a jp::MapNas vector. + /// Because, for this later callback, jp::MapNas data is required but is not available (only jp::NumSub is available). + /// In such cases, previous Match data can not be used to perform a new replacment operation with this second callback function. + /// + /// To populate the match vectors, one must call the `MatchEvaluator::match()` or `MatchEvaluator::nreplace()` function, they will populate + /// vectors with match data according to call back function. + /// + /// ## Example: + /// + /// ```cpp + /// jp::String callback5(NumSub const &m, void*, MapNtn const &n){ + /// return m[0]; + /// } + /// jp::String callback4(void*, void*, MapNtn const &n){ + /// return std::to_string(n.at("name")); //position of group 'name'. + /// } + /// jp::String callback2(void*, MapNas const &m, void*){ + /// return m.at('name'); //substring by name + /// } + /// + /// jp::MatchEvaluator me; + /// me.setRegexObject(&re).setSubject("string").setCallback(callback5).nreplace(); + /// //In above, nreplace() populates jp::NumSub and jp::MapNtn with match data. + /// + /// me.setCallback(callback4).nreplace(false); + /// //the above uses previous match result (note the 'false') which is OK, + /// //because, callback4 requires jp::MapNtn which was made available in the previous operation. + /// + /// //but the following is not OK: (assertion failure) + /// me.setCallback(callback2).nreplace(false); + /// //because, callback2 requires jp::MapNas data which is not available. + /// //now, this is OK: + /// me.setCallback(callback2).nreplace(); + /// //because, it will recreate those match data including this one (jp::MapNas). + /// ``` + /// + /// # Replace options + /// MatchEvaluator can not take replace options. + /// Replace options are taken directly by the replace functions: `nreplace()` and `replace()`. + /// + /// # Using as a match object + /// As it's just a subclass of RegexMatch, it can do all the things that RegexMatch can do, with some restrictions: + /// * matching options are modified to strip off bad options according to replacement (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT). + /// * match depends on the callback function. Only those vectors will be populated that are implemented by the callback functions so far + /// (multiple callback function will set multiple match data vectors.) + /// * match vectors are internal to this class, you can not set them manually (without callback function). (you can get pointers to these vectors + /// with `getNumberedSubstringVector()` and related functions). + /// + ///@see MatchEvaluatorCallback + ///@see RegexReplace::nreplace() + class MatchEvaluator: virtual public RegexMatch{ + private: + friend class RegexReplace; + + VecNum vec_num; + VecNas vec_nas; + VecNtN vec_ntn; + VecOff vec_soff; + VecOff vec_eoff; + int callbackn; + typename MatchEvaluatorCallback::Callback callback0; + typename MatchEvaluatorCallback::Callback callback1; + typename MatchEvaluatorCallback::Callback callback2; + typename MatchEvaluatorCallback::Callback callback3; + typename MatchEvaluatorCallback::Callback callback4; + typename MatchEvaluatorCallback::Callback callback5; + typename MatchEvaluatorCallback::Callback callback6; + typename MatchEvaluatorCallback::Callback callback7; + //Q: Why the callback names seem random? is it random? + //A: No, it's not random, NumSub = 1, MapNas = 2, MapNtn = 4, thus: + // NumSub + MapNas = 3 + // NumSub + MapNtn = 5 + // MapNas + MapNtn = 6 + // NumSub + MapNas + MapNtn = 7 + //Q: Why is it like this? + //A: It's historical. Once, there was not this many callback declaration, there was only one (a templated one). + // The nreplace function itself used to calculate a mode value according to available vectors + // and determine what kind of callback function needed to be called. + //Q: Why the history changed? + //A: We had some compatibility issues with the single templated callback. + // Also, this approach proved to be more readable and robust. + + PCRE2_SIZE buffer_size; + + + void init(){ + callbackn = 0; + callback0 = callback::erase; + callback1 = 0; + callback2 = 0; + callback3 = 0; + callback4 = 0; + callback5 = 0; + callback6 = 0; + callback7 = 0; + setMatchStartOffsetVector(&vec_soff); + setMatchEndOffsetVector(&vec_eoff); + buffer_size = 0; + } + + void setVectorPointersAccordingToCallback(){ + switch(callbackn){ + case 0: break; + case 1: setNumberedSubstringVector(&vec_num);break; + case 2: setNamedSubstringVector(&vec_nas);break; + case 3: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas);break; + case 4: setNameToNumberMapVector(&vec_ntn);break; + case 5: setNumberedSubstringVector(&vec_num).setNameToNumberMapVector(&vec_ntn);break; + case 6: setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; + case 7: setNumberedSubstringVector(&vec_num).setNamedSubstringVector(&vec_nas).setNameToNumberMapVector(&vec_ntn);break; + } + } + + void onlyCopy(MatchEvaluator const &me){ + callbackn = me.callbackn; + callback0 = me.callback0; + callback1 = me.callback1; + callback2 = me.callback2; + callback3 = me.callback3; + callback4 = me.callback4; + callback5 = me.callback5; + callback6 = me.callback6; + callback7 = me.callback7; + //must update the pointers to point to this class vectors. + setVectorPointersAccordingToCallback(); + buffer_size = me.buffer_size; + } + + void deepCopy(MatchEvaluator const &me) { + vec_num = me.vec_num; + vec_nas = me.vec_nas; + vec_ntn = me.vec_ntn; + vec_soff = me.vec_soff; + vec_eoff = me.vec_eoff; + onlyCopy(me); + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(MatchEvaluator& me){ + vec_num = std::move_if_noexcept(me.vec_num); + vec_nas = std::move_if_noexcept(me.vec_nas); + vec_ntn = std::move_if_noexcept(me.vec_ntn); + vec_soff = std::move_if_noexcept(me.vec_soff); + vec_eoff = std::move_if_noexcept(me.vec_eoff); + onlyCopy(me); + } + #endif + + //prevent public access to some funcitons + MatchEvaluator& setNumberedSubstringVector(VecNum* v){ + RegexMatch::setNumberedSubstringVector(v); + return *this; + } + MatchEvaluator& setNamedSubstringVector(VecNas* v){ + RegexMatch::setNamedSubstringVector(v); + return *this; + } + MatchEvaluator& setNameToNumberMapVector(VecNtN* v){ + RegexMatch::setNameToNumberMapVector(v); + return *this; + } + MatchEvaluator& setMatchStartOffsetVector(VecOff* v){ + RegexMatch::setMatchStartOffsetVector(v); + return *this; + } + MatchEvaluator& setMatchEndOffsetVector(VecOff* v){ + RegexMatch::setMatchEndOffsetVector(v); + return *this; + } + + public: + + ///Default constructor. + ///Sets callback::erase as the callback function. + ///Removes matched part/s from the subject string if the callback is not + ///changed. + /// ```cpp + /// jp::Regex re("\s*string"); + /// jp::MatchEvaluator me; + /// std::cout<< + /// me.setRegexObject(&re); + /// .setSubject("I am a string"); + /// .nreplace(); + /// //The above will delete ' string' from the subject + /// //thus the result will be 'I am a' + /// ``` + explicit + MatchEvaluator():RegexMatch(){ + init(); + } + + ///@overload + ///... + ///Constructor taking a Regex object pointer. + ///It sets the associated Regex object and + ///initializes the MatchEvaluator object with + ///callback::erase callback function. + ///Underlying data is not modified. + ///@param r constant Regex pointer. + explicit + MatchEvaluator(Regex const *r):RegexMatch(r){ + init(); + } + + ///@overload + ///... + ///Constructor taking a callback function. + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + + ///@overload + /// ... + ///It calls a corresponding MatchEvaluator::setCallback() function to set the callback function. + ///@param mef Callback function. + explicit + MatchEvaluator(typename MatchEvaluatorCallback::Callback mef): RegexMatch(){ + init(); + setCallback(mef); + } + + + + ///@overload + /// ... + ///Copy constructor. + ///@param me Reference to MatchEvaluator object + MatchEvaluator(MatchEvaluator const &me): RegexMatch(me){ + init(); + deepCopy(me); + } + + ///Overloaded copy-assignment operator + ///@param me MatchEvaluator object + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& operator=(MatchEvaluator const &me){ + if(this == &me) return *this; + RegexMatch::operator=(me); + deepCopy(me); + return *this; + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + + ///@overload + /// ... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param me rvalue reference to a MatchEvaluator object + MatchEvaluator(MatchEvaluator&& me): RegexMatch(me){ + init(); + deepMove(me); + } + + ///@overload + ///... + ///Overloaded move-assignment operator. + ///It steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param me rvalue reference to a MatchEvaluator object + ///@return A reference to the calling MatchEvaluator object. + ///@see MatchEvaluator(MatchEvaluator&& me) + MatchEvaluator& operator=(MatchEvaluator&& me){ + if(this == &me) return *this; + RegexMatch::operator=(me); + deepMove(me); + return *this; + } + + #endif + + virtual ~MatchEvaluator(){} + + ///Member function to set a callback function with no vector reference. + ///Callback function is always overwritten. The implemented vectors are set to be filled with match data. + ///Other vectors that were set previously, are not unset and thus they will be filled with match data too + ///when `match()` or `nreplace()` is called. + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback0 = mef; + callbackn = 0; + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub vector. + ///You will be working with a reference to the constant vector. + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback1 = mef; + callbackn = 1; + setNumberedSubstringVector(&vec_num); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub and jp::MapNas. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback3 = mef; + callbackn = 3; + setNumberedSubstringVector(&vec_num); + setNamedSubstringVector(&vec_nas); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub and jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_ntn["word"]; //wrong + ///map_ntn.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNtN mn = map_ntn; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback5 = mef; + callbackn = 5; + setNumberedSubstringVector(&vec_num); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + + ///@overload + /// ... + ///Sets a callback function with a jp::NumSub, jp::MapNas, jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback7 = mef; + callbackn = 7; + setNumberedSubstringVector(&vec_num); + setNamedSubstringVector(&vec_nas); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNas. + ///You will be working with reference of the constant vector. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback2 = mef; + callbackn = 2; + setNamedSubstringVector(&vec_nas); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNas, jp::MapNtN. + ///You will be working with reference of the constant vector. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_nas["word"]; //wrong + ///map_nas.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNas mn = map_nas; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback6 = mef; + callbackn = 6; + setNamedSubstringVector(&vec_nas); + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///@overload + /// ... + ///Sets a callback function with a jp::MapNtN. + ///You will be working with references of the constant vectors. + ///For maps, you won't be able to use `[]` operator with reference to constant map, use at() instead: + ///```cpp + ///map_ntn["word"]; //wrong + ///map_ntn.at("word"); //ok + ///``` + ///If you want to use `[]` operator with maps, make a copy: + ///```cpp + ///jp::MapNtN mn = map_ntn; + ///mn["word"]; //ok + ///``` + ///@param mef Callback function. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setCallback(typename MatchEvaluatorCallback::Callback mef){ + callback4 = mef; + callbackn = 4; + setNameToNumberMapVector(&vec_ntn); + return *this; + } + + ///Clear match data. + ///It clears all match data from all vectors (without shrinking). + ///For shrinking the vectors, use `resetMatchData()` + ///A call to `match()` or nreplace() will be required to produce match data again. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& clearMatchData(){ + vec_num.clear(); + vec_nas.clear(); + vec_ntn.clear(); + vec_soff.clear(); + vec_eoff.clear(); + return *this; + } + + ///Reset match data to initial state. + ///It deletes all match data from all vectors shrinking their capacity. + ///A call to `match()` or nreplace() will be required to produce match data again. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& resetMatchData(){ + VecNum().swap(vec_num); + VecNas().swap(vec_nas); + VecNtN().swap(vec_ntn); + VecOff().swap(vec_soff); + VecOff().swap(vec_eoff); + return *this; + } + + + ///Reset MatchEvaluator to initial state including memory. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& reset(){ + RegexMatch::reset(); + resetMatchData(); + init(); + return *this; + } + + ///Clears MatchEvaluator. + ///Returns everything to initial state (some memory may retain for further and faster use). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& clear(){ + RegexMatch::clear(); + clearMatchData(); + init(); + return *this; + } + + ///Call RegexMatch::resetErrors(). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& resetErrors(){ + RegexMatch::resetErrors(); + return *this; + } + + ///Call RegexMatch::setRegexObject(r). + ///@param r constant Regex object pointer + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setRegexObject (Regex const *r){ + RegexMatch::setRegexObject(r); + return *this; + } + + ///Call RegexMatch::setSubject(String const &s). + ///@param s subject string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setSubject (String const &s){ + RegexMatch::setSubject(s); + return *this; + } + + ///@overload + ///@param s constant subject string by pointer + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setSubject (String const *s){ + RegexMatch::setSubject(s); + return *this; + } + + ///Call RegexMatch::setModifier(Modifier const& s). + ///@param s modifier string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setModifier (Modifier const& s){ + RegexMatch::setModifier(s); + return *this; + } + + ///Call RegexMatch::setModifierTable(ModifierTable const * s). + ///@param mdt pointer to ModifierTable object. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setModifierTable (ModifierTable const * mdt){ + RegexMatch::setModifierTable(mdt); + return *this; + } + + ///Call RegexMatch::setJpcre2Option(Uint x). + ///@param x JPCRE2 option value. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setJpcre2Option (Uint x){ + RegexMatch::setJpcre2Option(x); + return *this; + } + + ///Call RegexMatch::setPcre2Option (Uint x). + ///@param x PCRE2 option value. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setPcre2Option (Uint x){ + RegexMatch::setPcre2Option(x); + return *this; + } + + ///Call RegexMatch::setFindAll(bool x). + ///@param x true if global match, false otherwise. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setFindAll (bool x){ + RegexMatch::setFindAll(x); + return *this; + } + + ///Call RegexMatch::setFindAll(). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setFindAll(){ + RegexMatch::setFindAll(); + return *this; + } + + ///Call RegexMatch::setStartOffset (PCRE2_SIZE offset). + ///@param offset match start offset in the subject. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setStartOffset (PCRE2_SIZE offset){ + RegexMatch::setStartOffset(offset); + return *this; + } + + ///Call RegexMatch::setMatchContext(MatchContext *match_context). + ///@param match_context pointer to match context. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setMatchContext (MatchContext *match_context){ + RegexMatch::setMatchContext(match_context); + return *this; + } + + ///Call RegexMatch::setMatchDataBlock(MatchContext * mdt); + ///@param mdt pointer to match data block + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setMatchDataBlock(MatchData* mdt){ + RegexMatch::setMatchDataBlock(mdt); + return *this; + } + + ///Set the buffer size that will be used by pcre2_substitute (replace()). + ///If buffer size proves to be enough to fit the resultant string + ///from each match (not the total resultant string), it will yield one less call + ///to pcre2_substitute for each match. + ///@param x buffer size. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& setBufferSize(PCRE2_SIZE x){ + buffer_size = x; + return *this; + } + + ///Get the initial buffer size that is being used by internal function pcre2_substitute + ///@return buffer_size + PCRE2_SIZE getBufferSize(){ + return buffer_size; + } + + ///Call RegexMatch::changeModifier(Modifier const& mod, bool x). + ///@param mod modifier string. + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changeModifier (Modifier const& mod, bool x){ + RegexMatch::changeModifier(mod, x); + return *this; + } + + ///Call RegexMatch::changeJpcre2Option(Uint opt, bool x). + ///@param opt JPCRE2 option + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changeJpcre2Option (Uint opt, bool x){ + RegexMatch::changeJpcre2Option(opt, x); + return *this; + } + + ///Call RegexMatch::changePcre2Option(Uint opt, bool x). + ///@param opt PCRE2 option. + ///@param x true (add) or false (remove). + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& changePcre2Option (Uint opt, bool x){ + RegexMatch::changePcre2Option(opt, x); + return *this; + } + + ///Call RegexMatch::addModifier(Modifier const& mod). + ///@param mod modifier string. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addModifier (Modifier const& mod){ + RegexMatch::addModifier(mod); + return *this; + } + + ///Call RegexMatch::addJpcre2Option(Uint x). + ///@param x JPCRE2 option. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addJpcre2Option (Uint x){ + RegexMatch::addJpcre2Option(x); + return *this; + } + + ///Call RegexMatch::addPcre2Option(Uint x). + ///@param x PCRE2 option. + ///@return A reference to the calling MatchEvaluator object. + MatchEvaluator& addPcre2Option (Uint x){ + RegexMatch::addPcre2Option(x); + return *this; + } + + ///Perform match and return the match count. + ///This function strips off matching options (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT) that are considered + ///bad options for replacement operation and then calls + ///RegexMatch::match() to perform the match. + ///@return match count. + SIZE_T match(void){ + //remove bad matching options + RegexMatch::changePcre2Option(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT, false); + return RegexMatch::match(); + } + + ///Perform regex replace with this match evaluator. + ///This is a JPCRE2 native replace function (thus the name nreplace). + ///It uses the `MatchEvaluatorCallback` function that was set with a constructor or `MatchEvaluator::setCallback()` function + ///to generate the replacement strings on the fly. + ///The string returned by the callback function will be treated as literal and will + ///not go through any further processing. + /// + ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. + ///To use existing match data that was created by a previous `MatchEvaluator::nreplace()` or `MatchEvaluator::match()`, call this + ///function with boolean `false` as the first argument. + /// + ///## Complexity + /// 1. Changes in replace related option takes effect without a re-match. + /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. + /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. + /// 4. If the associated Regex object or subject string changes, a new match must be performed, + /// trying to use the existing match data in such cases is undefined behavior. + /// + ///@param do_match Perform a new matching operation if true, otherwise use existing match data. + ///@param jro JPCRE2 replace options. + ///@param counter Pointer to a counter to store the number of replacement done. + ///@return resultant string after replace. + ///@see MatchEvaluator. + ///@see MatchEvaluatorCallback. + String nreplace(bool do_match=true, Uint jro=0, SIZE_T* counter=0); + + ///PCRE2 compatible replace function that uses this MatchEvaluator. + ///Performs regex replace with pcre2_substitute function + ///by generating the replacement strings dynamically with MatchEvaluator callback. + ///The string returned by callback function is processed by internal pcre2_substitute, thus allowing + ///all options that are provided by PCRE2 itself. + /// + ///This function performs a new match everytime it is called unless it is passed with a boolean `false` as the first argument. + /// + ///## Complexity + /// 1. Changes in replace related option takes effect without a re-match. + /// 2. Changes in match related option (e.g start offset) needs a re-match to take effect. + /// 3. To re-use existing match data, callback function must be compatible with the data, otherwise assertion error. + /// 4. If the associated Regex object or subject string changes, a new match must be performed, + /// trying to use the existing match data in such cases is undefined behavior. + /// + ///@param do_match perform a new match if true, otherwise use existing data. + ///@param ro replace related PCRE2 options. + ///@param counter Pointer to a counter to store the number of replacement done. + ///@return resultant string after replacement. + String replace(bool do_match=true, Uint ro=0, SIZE_T* counter=0); + }; + + /** Provides public constructors to create RegexReplace objects. + * Every RegexReplace object should be associated with a Regex object. + * This class stores a pointer to its' associated Regex object, thus when + * the content of the associated Regex object is changed, there's no need to + * set the pointer again. + * + * Examples: + * + * ```cpp + * jp::Regex re; + * jp::RegexReplace rr; + * rr.setRegexObject(&re); + * rr.replace("subject", "me"); // returns 'subject' + * re.compile("\\w+"); + * rr.replace(); // replaces 'subject' with 'me' i.e returns 'me' + * ``` + */ + class RegexReplace { + + private: + + friend class Regex; + + Regex const *re; + + String r_subject; + String *r_subject_ptr; //preplace method modifies it in-place + String r_replw; + String const *r_replw_ptr; + Uint replace_opts; + Uint jpcre2_replace_opts; + PCRE2_SIZE buffer_size; + PCRE2_SIZE _start_offset; + MatchData *mdata; + MatchContext *mcontext; + ModifierTable const * modtab; + SIZE_T last_replace_count; + SIZE_T* last_replace_counter; + + void init_vars() { + re = 0; + r_subject_ptr = &r_subject; + r_replw_ptr = &r_replw; + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; + jpcre2_replace_opts = 0; + buffer_size = 0; + error_number = 0; + error_offset = 0; + _start_offset = 0; + mdata = 0; + mcontext = 0; + modtab = 0; + last_replace_count = 0; + last_replace_counter = &last_replace_count; + } + + void onlyCopy(RegexReplace const &rr){ + re = rr.re; //only pointer should be copied. + + //rr.r_subject_ptr may point to rr.r_subject or other user data + r_subject_ptr = (rr.r_subject_ptr == &rr.r_subject) ? &r_subject //not rr.r_subject + : rr.r_subject_ptr; //other user data + + r_replw = rr.r_replw; + //rr.r_replw_ptr may point to rr.r_replw or other user data + r_replw_ptr = (rr.r_replw_ptr == &rr.r_replw) ? &r_replw //not rr.r_replw + : rr.r_replw_ptr; //other user data + + replace_opts = rr.replace_opts; + jpcre2_replace_opts = rr.jpcre2_replace_opts; + buffer_size = rr.buffer_size; + error_number = rr.error_number; + error_offset = rr.error_offset; + _start_offset = rr._start_offset; + mdata = rr.mdata; + mcontext = rr.mcontext; + modtab = rr.modtab; + last_replace_count = rr.last_replace_count; + last_replace_counter = (rr.last_replace_counter == &rr.last_replace_count) ? &last_replace_count + : rr.last_replace_counter; + } + + void deepCopy(RegexReplace const &rr){ + r_subject = rr.r_subject; + onlyCopy(rr); + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + void deepMove(RegexReplace& rr){ + r_subject = std::move_if_noexcept(rr.r_subject); + onlyCopy(rr); + } + #endif + + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + ///Default constructor + RegexReplace(){ + init_vars(); + } + + ///@overload + /// ... + ///Creates a RegexReplace object associating a Regex object. + ///Regex object is not modified. + ///@param r pointer to a Regex object + RegexReplace(Regex const *r) { + init_vars(); + re = r; + } + + ///@overload + ///... + ///Copy constructor. + ///@param rr RegexReplace object reference + RegexReplace(RegexReplace const &rr){ + init_vars(); + deepCopy(rr); + } + + ///Overloaded Copy assignment operator. + ///@param rr RegexReplace object reference + ///@return A reference to the calling RegexReplace object + RegexReplace& operator=(RegexReplace const &rr){ + if(this == &rr) return *this; + deepCopy(rr); + return *this; + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + + ///@overload + ///... + ///Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rr rvalue reference to a RegexReplace object reference + RegexReplace(RegexReplace&& rr){ + init_vars(); + deepMove(rr); + } + + ///@overload + ///... + ///Overloaded move assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + ///@param rr rvalue reference to a RegexReplace object reference + ///@return A reference to the calling RegexReplace object + RegexReplace& operator=(RegexReplace&& rr){ + if(this == &rr) return *this; + deepMove(rr); + return *this; + } + + #endif + + virtual ~RegexReplace() {} + + ///Reset all class variables to its default (initial) state including memory. + ///@return Reference to the calling RegexReplace object. + RegexReplace& reset() { + String().swap(r_subject); + String().swap(r_replw); + init_vars(); + return *this; + } + + ///Clear all class variables to its default (initial) state (some memory may retain for further use). + ///@return Reference to the calling RegexReplace object. + RegexReplace& clear() { + r_subject.clear(); + r_replw.clear(); + init_vars(); + return *this; + } + + ///Reset replace related errors to zero. + ///@return Reference to the calling RegexReplace object + ///@see Regex::resetErrors() + ///@see RegexMatch::resetErrors() + RegexReplace& resetErrors(){ + error_number = 0; + error_offset = 0; + return *this; + } + + /// Returns the last error number + ///@return Last error number + int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + String getErrorMessage() const { + #if JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + /// Get replacement string + ///@return replacement string + String getReplaceWith() const { + return *r_replw_ptr; + } + + /// Get pointer to replacement string + ///@return pointer to replacement string + String const * getReplaceWithPointer() const { + return r_replw_ptr; + } + + /// Get subject string + ///@return subject string + ///@see RegexMatch::getSubject() + String getSubject() const { + return *r_subject_ptr; + } + + /// Get pointer to subject string + ///@return Pointer to constant subject string + ///@see RegexMatch::getSubjectPointer() + String const * getSubjectPointer() const { + return r_subject_ptr; + } + + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// Do remember that modifiers (or PCRE2 and JPCRE2 options) do not change or get initialized + /// as long as you don't do that explicitly. Calling RegexReplace::setModifier() will re-set them. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `RegexReplace::changeModifier()`), both will get removed. + /// @return Calculated modifier string (std::string) + ///@see RegexMatch::getModifier() + ///@see Regex::getModifier() + std::string getModifier() const { + return modtab ? modtab->fromReplaceOption(replace_opts, jpcre2_replace_opts) + : MOD::fromReplaceOption(replace_opts, jpcre2_replace_opts); + } + + ///Get the modifier table that is set, + ///@return constant ModifierTable pointer. + ModifierTable const* getModifierTable(){ + return modtab; + } + + ///Get start offset. + ///@return the start offset where matching starts for replace operation + PCRE2_SIZE getStartOffset() const { + return _start_offset; + } + + /// Get PCRE2 option + ///@return PCRE2 option for replace + ///@see Regex::getPcre2Option() + ///@see RegexMatch::getPcre2Option() + Uint getPcre2Option() const { + return replace_opts; + } + + /// Get JPCRE2 option + ///@return JPCRE2 option for replace + ///@see Regex::getJpcre2Option() + ///@see RegexMatch::getJpcre2Option() + Uint getJpcre2Option() const { + return jpcre2_replace_opts; + } + + ///Get a pointer to the associated Regex object. + ///If no actual Regex object is associated, null is returned + ///@return A pointer to the associated constant Regex object or null + Regex const * getRegexObject() const { + return re; + } + + ///Return pointer to the match context that was previously set with setMatchContext(). + ///Handling memory is the callers' responsibility. + ///@return pointer to the match context (default: null). + MatchContext* getMatchContext(){ + return mcontext; + } + + ///Get the pointer to the match data block that was set previously with setMatchData() + ///Handling memory is the callers' responsibility. + ///@return pointer to the match data (default: null). + virtual MatchData* getMatchDataBlock(){ + return mdata; + } + + ///Get the initial buffer size that is being used by internal function pcre2_substitute + ///@return buffer_size + PCRE2_SIZE getBufferSize(){ + return buffer_size; + } + + ///Get the number of replacement in last replace operation. + ///If you set an external counter with RegexReplace::setReplaceCounter(), + ///a call to this getter method will dereference the pointer to the external counter + ///and return the value. + ///@return Last replace count + SIZE_T getLastReplaceCount(){ + return *last_replace_counter; + } + + ///Set an external counter variable to store the replacement count. + ///This counter will be updated after each replacement operation on this object. + ///A call to this method will reset the internal counter to 0, thus when you reset the counter + ///to internal counter (by giving null as param), the previous replace count won't be available. + ///@param counter Pointer to a counter variable. Null sets the counter to default internal counter. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setReplaceCounter(SIZE_T* counter){ + last_replace_count = 0; + last_replace_counter = counter ? counter : &last_replace_count; + return *this; + } + + ///Set the associated Regex object. + ///Regex object is not modified. + ///@param r Pointer to a Regex object. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setRegexObject(Regex const *r){ + re = r; + return *this; + } + + /// Set the subject string for replace. + ///This makes a copy of the string. If no copy is desired or you are working + ///with big text, consider passing by pointer. + ///@param s Subject string + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setSubject() + RegexReplace& setSubject(String const &s) { + r_subject = s; + r_subject_ptr = &r_subject; //must overwrite + return *this; + } + + ///@overload + ///... + /// Set pointer to the subject string for replace, null pointer unsets it. + /// The underlined data is not modified unless RegexReplace::preplace() method is used. + ///@param s Pointer to subject string + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setSubject() + RegexReplace& setSubject(String *s) { + if(s) r_subject_ptr = s; + else { + r_subject.clear(); + r_subject_ptr = &r_subject; + } + return *this; + } + + /// Set the replacement string. + ///`$` is a special character which implies captured group. + /// + ///1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. + ///2. A named substring can be referenced with `${name}`, where 'name' is the group name. + ///3. A literal `$` can be given as `$$`. + /// + ///**Note:** This function makes a copy of the string. If no copy is desired or + ///you are working with big text, consider passing the string with pointer. + /// + ///@param s String to replace with + ///@return Reference to the calling RegexReplace object + RegexReplace& setReplaceWith(String const &s) { + r_replw = s; + r_replw_ptr = &r_replw; //must overwrite + return *this; + } + + ///@overload + ///... + ///@param s Pointer to the string to replace with, null pointer unsets it. + ///@return Reference to the calling RegexReplace object + RegexReplace& setReplaceWith(String const *s) { + if(s) r_replw_ptr = s; + else { + r_replw.clear(); + r_replw_ptr = &r_replw; + } + return *this; + } + + /// Set the modifier string (resets all JPCRE2 and PCRE2 options) by calling RegexReplace::changeModifier(). + ///@param s Modifier string. + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setModifier() + ///@see Regex::setModifier() + RegexReplace& setModifier(Modifier const& s) { + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* must not be initialized to 0 */ + jpcre2_replace_opts = 0; + return changeModifier(s, true); + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + /// @return Reference to the calling RegexReplace object. + RegexReplace& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set the initial buffer size to be allocated for replaced string (used by PCRE2) + ///@param x Buffer size + ///@return Reference to the calling RegexReplace object + RegexReplace& setBufferSize(PCRE2_SIZE x) { + buffer_size = x; + return *this; + } + + ///Set start offset. + ///Set the offset where matching starts for replace operation + ///@param start_offset The offset where matching starts for replace operation + ///@return Reference to the calling RegexReplace object + RegexReplace& setStartOffset(PCRE2_SIZE start_offset){ + _start_offset = start_offset; + return *this; + } + + /// Set JPCRE2 option for replace (overwrite existing option) + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setJpcre2Option() + ///@see Regex::setJpcre2Option() + + RegexReplace& setJpcre2Option(Uint x) { + jpcre2_replace_opts = x; + return *this; + } + + /// Set PCRE2 option replace (overwrite existing option) + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::setPcre2Option() + ///@see Regex::setPcre2Option() + + RegexReplace& setPcre2Option(Uint x) { + replace_opts = PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | x; + return *this; + } + + ///Set the match context to be used. + ///Native PCRE2 API may be used to create match context. + ///The memory of the match context is not handled by RegexReplace object and not freed. + ///User will be responsible for freeing memory. + ///@param match_context Pointer to match context. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setMatchContext(MatchContext * match_context){ + mcontext = match_context; + return *this; + } + + ///Set the match data block to be used. + ///Native PCRE2 API may be used to create match data block. + ///The memory of the match data is not handled by RegexReplace object and not freed. + ///User will be responsible for creating/freeing memory. + ///@param match_data Pointer to match data. + ///@return Reference to the calling RegexReplace object. + RegexReplace& setMatchDataBlock(MatchData *match_data){ + mdata = match_data; + return *this; + } + + /// After a call to this function PCRE2 and JPCRE2 options will be properly set. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// + /// If invalid modifier is detected, then the error number for the RegexReplace + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with RegexReplace::getErrorMessage() function. + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the RegexReplace object + /// @see Regex::changeModifier() + /// @see RegexMatch::changeModifier() + RegexReplace& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset) + : MOD::toReplaceOption(mod, x, &replace_opts, &jpcre2_replace_opts, &error_number, &error_offset); + return *this; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::changeJpcre2Option() + /// @see Regex::changeJpcre2Option() + RegexReplace& changeJpcre2Option(Uint opt, bool x) { + jpcre2_replace_opts = x ? jpcre2_replace_opts | opt : jpcre2_replace_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::changePcre2Option() + /// @see Regex::changePcre2Option() + RegexReplace& changePcre2Option(Uint opt, bool x) { + replace_opts = x ? replace_opts | opt : replace_opts & ~opt; + //replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; /* It's important, but let user override it. */ + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function RegexReplace::changeModifier() + /// provided for convenience. + /// @param mod Modifier string. + /// @return Reference to the calling RegexReplace object + /// @see RegexMatch::addModifier() + /// @see Regex::addModifier() + RegexReplace& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add specified JPCRE2 option to existing options for replace. + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::addJpcre2Option() + ///@see Regex::addJpcre2Option() + RegexReplace& addJpcre2Option(Uint x) { + jpcre2_replace_opts |= x; + return *this; + } + + /// Add specified PCRE2 option to existing options for replace + ///@param x Option value + ///@return Reference to the calling RegexReplace object + ///@see RegexMatch::addPcre2Option() + ///@see Regex::addPcre2Option() + RegexReplace& addPcre2Option(Uint x) { + replace_opts |= x; + return *this; + } + + /// Perform regex replace by retrieving subject string, replacement string, modifier and other options from class variables. + /// In the replacement string (see RegexReplace::setReplaceWith()) `$` is a special character which implies captured group. + /// 1. A numbered substring can be referenced with `$n` or `${n}` where n is the group number. + /// 2. A named substring can be referenced with `${name}`, where 'name' is the group name. + /// 3. A literal `$` can be given as `$$`. + /// 4. Bash like features: ${:-} and ${:+:}, where is a group number or name. + /// + ///All options supported by pcre2_substitute is available. + /// + /// Note: This function calls pcre2_substitute() to do the replacement. + ///@return Replaced string + String replace(void); + + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. + /// @return replace count + SIZE_T preplace(void){ + *r_subject_ptr = replace(); + return *last_replace_counter; + } + + /// Perl compatible replace method with match evaluator. + /// Modifies subject string in-place and returns replace count. + /// MatchEvaluator class does not have a implementation of this replace method, thus it is not possible + /// to re-use match data with preplace() method. + /// Re-using match data with preplace doesn't actually make any sense, because new subject will + /// always require new match data. + /// + /// The replacement is performed with `RegexReplace::replace()` which uses `pcre2_substitute()`. + /// @param me MatchEvaluator object. + /// @return replace count + SIZE_T preplace(MatchEvaluator me){ + *r_subject_ptr = me.setRegexObject(getRegexObject()) + .setSubject(r_subject_ptr) //do not use method + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setBufferSize(getBufferSize()) + .setStartOffset(getStartOffset()) + .replace(true, getPcre2Option(), last_replace_counter); + return *last_replace_counter; + } + + ///JPCRE2 native replace function. + ///A different name is adopted to + ///distinguish itself from the regular replace() function which + ///uses pcre2_substitute() to do the replacement; contrary to that, + ///it will provide a JPCRE2 native way of replacement operation. + ///It takes a MatchEvaluator object which provides a callback function that is used + ///to generate replacement string on the fly. Any replacement string set with + ///`RegexReplace::setReplaceWith()` function will have no effect. + ///The string returned by the callback function will be treated as literal and will + ///not go through any further processing. + /// + ///This function works on a copy of the MatchEvaluator, and thus makes no changes + ///to the original. The copy is modified as below: + /// + ///1. Global replacement will set FIND_ALL for match, unset otherwise. + ///2. Bad matching options such as `PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT` will be removed. + ///3. subject, start_offset and Regex object will change according to the RegexReplace object. + ///4. match context, and match data block will be changed according to the RegexReplace object. + /// + ///It calls MatchEvaluator::nreplace() on the MatchEvaluator object to perform the replacement. + /// + ///It always performs a new match. + ///@param me A MatchEvaluator object. + ///@return The resultant string after replacement. + ///@see MatchEvaluator::nreplace() + ///@see MatchEvaluator + ///@see MatchEvaluatorCallback + String nreplace(MatchEvaluator me){ + return me.setRegexObject(getRegexObject()) + .setSubject(getSubjectPointer()) + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setStartOffset(getStartOffset()) + .nreplace(true, getJpcre2Option(), last_replace_counter); + } + + ///PCRE2 compatible replace function that takes a MatchEvaluator. + ///String returned by callback function is processed by pcre2_substitute, + ///thus all PCRE2 substitute options are supported by this replace function. + /// + ///It always performs a new match. + ///@param me MatchEvaluator instance, (copied and modified according to this object). + ///@return resultant string. + ///@see replace() + String replace(MatchEvaluator me){ + return me.setRegexObject(getRegexObject()) + .setSubject(getSubjectPointer()) + .setFindAll((getPcre2Option() & PCRE2_SUBSTITUTE_GLOBAL)!=0) + .setMatchContext(getMatchContext()) + .setMatchDataBlock(getMatchDataBlock()) + .setBufferSize(getBufferSize()) + .setStartOffset(getStartOffset()) + .replace(true, getPcre2Option(), last_replace_counter); + } + }; + + + /** Provides public constructors to create Regex object. + * Each regex pattern needs an object of this class and each pattern needs to be compiled. + * Pattern compilation can be done using one of its' overloaded constructors or the `Regex::compile()` + * member function. + * + * Examples: + * + * ```cpp + * jp::Regex re; //does not perform a compile + * re.compile("pattern", "modifier"); + * jp::Regex re2("pattern", "modifier"); //performs a compile + * ``` + * + */ + class Regex { + + private: + + friend class RegexMatch; + friend class RegexReplace; + friend class MatchEvaluator; + + String pat_str; + String const *pat_str_ptr; + Pcre2Code *code; + Uint compile_opts; + Uint jpcre2_compile_opts; + ModifierTable const * modtab; + + CompileContext *ccontext; + std::vector tabv; + + + void init_vars() { + jpcre2_compile_opts = 0; + compile_opts = 0; + error_number = 0; + error_offset = 0; + code = 0; + pat_str_ptr = &pat_str; + ccontext = 0; + modtab = 0; + } + + void freeRegexMemory(void) { + Pcre2Func::code_free(code); + code = 0; //we may use it again + } + + void freeCompileContext(){ + Pcre2Func::compile_context_free(ccontext); + ccontext = 0; + } + + void onlyCopy(Regex const &r){ + //r.pat_str_ptr may point to other user data + pat_str_ptr = (r.pat_str_ptr == &r.pat_str) ? &pat_str //not r.pat_str + : r.pat_str_ptr; //other user data + + compile_opts = r.compile_opts; + jpcre2_compile_opts = r.jpcre2_compile_opts; + error_number = r.error_number; + error_offset = r.error_offset; + modtab = r.modtab; + } + + void deepCopy(Regex const &r) { + pat_str = r.pat_str; //must not use setPattern() here + + onlyCopy(r); + + //copy tables + tabv = r.tabv; + //copy ccontext if it's not null + freeCompileContext(); + ccontext = (r.ccontext) ? Pcre2Func::compile_context_copy(r.ccontext) : 0; + //if tabv is not empty and ccontext is ok (not null) set the table pointer to ccontext + if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); + + //table pointer must be updated in the compiled code itself, jit memory copy is not available. + //copy is not going to work, we need a recompile. + //as all vars are already copied, we can just call compile() + r.code ? compile() //compile frees previous memory. + : freeRegexMemory(); + } + + #if JPCRE2_USE_MINIMUM_CXX_11 + + void deepMove(Regex& r) { + pat_str = std::move_if_noexcept(r.pat_str); + + onlyCopy(r); + + //steal tables + tabv = std::move_if_noexcept(r.tabv); + + //steal ccontext + freeCompileContext(); + ccontext = r.ccontext; r.ccontext = 0; //must set this to 0 + if(ccontext && !tabv.empty()) Pcre2Func::set_character_tables(ccontext, &tabv[0]); + + //steal the code + freeRegexMemory(); + code = r.code; r.code = 0; //must set this to 0 + } + + #endif + + protected: + + int error_number; + PCRE2_SIZE error_offset; + + public: + + /// Default Constructor. + /// Initializes all class variables to defaults. + /// Does not perform any pattern compilation. + Regex() { + init_vars(); + } + + ///Compile pattern with initialization. + /// @param re Pattern string + Regex(String const &re) { + init_vars(); + compile(re); + } + + /// @overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + Regex(String const *re) { + init_vars(); + compile(re); + } + + ///@overload + /// @param re Pattern string . + /// @param mod Modifier string. + Regex(String const &re, Modifier const& mod) { + init_vars(); + compile(re, mod); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param mod Modifier string. + Regex(String const *re, Modifier const& mod) { + init_vars(); + compile(re, mod); + } + + ///@overload + /// @param re Pattern string . + /// @param po PCRE2 option value + Regex(String const &re, Uint po) { + init_vars(); + compile(re, po); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option value + Regex(String const *re, Uint po) { + init_vars(); + compile(re, po); + } + + ///@overload + /// @param re Pattern string . + /// @param po PCRE2 option value + /// @param jo JPCRE2 option value + Regex(String const &re, Uint po, Uint jo) { + init_vars(); + compile(re, po, jo); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option value + /// @param jo JPCRE2 option value + Regex(String const *re, Uint po, Uint jo) { + init_vars(); + compile(re, po, jo); + } + + /// @overload + ///... + /// Copy constructor. + /// A separate and new compile is performed from the copied options. + /// + /// @param r Constant Regex object reference. + Regex(Regex const &r) { + init_vars(); + deepCopy(r); + } + + /// Overloaded assignment operator. + /// @param r Regex const & + /// @return *this + Regex& operator=(Regex const &r) { + if (this == &r) return *this; + deepCopy(r); + return *this; + } + + + #if JPCRE2_USE_MINIMUM_CXX_11 + + + /// @overload + ///... + /// Move constructor. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + /// @param r rvalue reference to a Regex object. + Regex(Regex&& r) { + init_vars(); + deepMove(r); + } + + ///@overload + ///... + /// Overloaded move-assignment operator. + ///This constructor steals resources from the argument. + ///It leaves the argument in a valid but indeterminate sate. + ///The indeterminate state can be returned to normal by calling reset() on that object. + /// @param r Regex&& + /// @return *this + Regex& operator=(Regex&& r) { + if (this == &r) return *this; + deepMove(r); + return *this; + } + + /// Provides boolean check for the status of the object. + /// This overloaded boolean operator needs to be declared + /// explicit to prevent implicit conversion and overloading issues. + /// + /// We will only enable it if >=C++11 is being used, as the explicit keyword + /// for a function other than constructor is not supported in older compilers. + /// + /// If you are dealing with legacy code/compilers use the Double bang trick mentioned + /// in Regex::operator!(). + /// + /// This helps us to check the status of the compiled regex like this: + /// + /// ``` + /// jpcre2::select::Regex re("pat", "mod"); + /// if(re) { + /// std::cout<<"Compile success"; + /// } else { + /// std::cout<<"Compile failed"; + /// } + /// ``` + ///@return true if regex compiled successfully, false otherwise. + /// + explicit operator bool() const { + return (code != 0); + } + #endif + + /// Provides boolean check for the status of the object. + /// This is a safe boolean approach (no implicit conversion or overloading). + /// We don't need the explicit keyword here and thus it's the preferable method + /// to check for object status that will work well with older compilers. + /// e.g: + /// + /// ``` + /// jpcre2::select::Regex re("pat","mod"); + /// if(!re) { + /// std::cout<<"Compile failed"; + /// } else { + /// std::cout<<"Compiled successfully"; + /// } + /// ``` + /// Double bang trick: + /// + /// ``` + /// jpcre2::select::Regex re("pat","mod"); + /// if(!!re) { + /// std::cout<<"Compiled successfully"; + /// } else { + /// std::cout<<"Compile failed"; + /// } + /// ``` + /// @return true if regex compile failed, false otherwise. + bool operator!() const { + return (code == 0); + } + + virtual ~Regex() { + freeRegexMemory(); + freeCompileContext(); + } + + ///Reset all class variables to its default (initial) state including memory. + ///@return Reference to the calling Regex object. + Regex& reset() { + freeRegexMemory(); + freeCompileContext(); + String().swap(pat_str); + init_vars(); + return *this; + } + + ///Clear all class variables to its default (initial) state (some memory may retain for further use). + ///@return Reference to the calling Regex object. + Regex& clear() { + freeRegexMemory(); + freeCompileContext(); + pat_str.clear(); + init_vars(); + return *this; + } + + ///Reset regex compile related errors to zero. + ///@return A reference to the Regex object + ///@see RegexReplace::resetErrors() + ///@see RegexMatch::resetErrors() + Regex& resetErrors() { + error_number = 0; + error_offset = 0; + return *this; + } + + /// Recreate character tables used by PCRE2. + /// You should call this function after changing the locale to remake the + /// character tables according to the new locale. + /// These character tables are used to compile the regex and used by match + /// and replace operation. A separate call to compile() will be required + /// to apply the new character tables. + /// @return Reference to the calling Regex object. + Regex& resetCharacterTables() { + const unsigned char* tables = Pcre2Func::maketables(0); //must pass 0, we are using free() to free the tables. + tabv = std::vector(tables, tables+1088); + ::free((void*)tables); //must free memory + if(!ccontext) + ccontext = Pcre2Func::compile_context_create(0); + Pcre2Func::set_character_tables(ccontext, &tabv[0]); + return *this; + } + + ///Get Pcre2 raw compiled code pointer. + ///@return pointer to constant pcre2_code or null. + Pcre2Code const* getPcre2Code() const{ + return code; + } + + /// Get pattern string + ///@return pattern string of type jpcre2::select::String + String getPattern() const { + return *pat_str_ptr; + } + + /// Get pointer to pattern string + ///@return Pointer to constant pattern string + String const * getPatternPointer() const { + return pat_str_ptr; + } + + + /// Calculate modifier string from PCRE2 and JPCRE2 options and return it. + /// + /// **Mixed or combined modifier**. + /// + /// Some modifier may include other modifiers i.e they have the same meaning of some modifiers + /// combined together. For example, the 'n' modifier includes the 'u' modifier and together they + /// are equivalent to `PCRE2_UTF | PCRE2_UCP`. When you set a modifier like this, both options + /// get set, and when you remove the 'n' modifier (with `Regex::changeModifier()`), both will get removed. + ///@tparam Char_T Character type + ///@return Calculated modifier string (std::string) + ///@see RegexMatch::getModifier() + ///@see RegexReplace::getModifier() + std::string getModifier() const { + return modtab ? modtab->fromCompileOption(compile_opts, jpcre2_compile_opts) + : MOD::fromCompileOption(compile_opts, jpcre2_compile_opts); + } + + /// Get PCRE2 option + /// @return Compile time PCRE2 option value + ///@see RegexReplace::getPcre2Option() + ///@see RegexMatch::getPcre2Option() + Uint getPcre2Option() const { + return compile_opts; + } + + /// Get JPCRE2 option + /// @return Compile time JPCRE2 option value + ///@see RegexReplace::getJpcre2Option() + ///@see RegexMatch::getJpcre2Option() + Uint getJpcre2Option() const { + return jpcre2_compile_opts; + } + + /// Returns the last error number + ///@return Last error number + int getErrorNumber() const { + return error_number; + } + + /// Returns the last error offset + ///@return Last error offset + int getErrorOffset() const { + return (int)error_offset; + } + + /// Returns the last error message + ///@return Last error message + String getErrorMessage() const { + #if JPCRE2_USE_MINIMUM_CXX_11 + return select::getErrorMessage(error_number, error_offset); + #else + return select::getErrorMessage(error_number, error_offset); + #endif + } + + ///Get new line convention from compiled code. + ///@return New line option value or 0. + ///``` + ///PCRE2_NEWLINE_CR Carriage return only + ///PCRE2_NEWLINE_LF Linefeed only + ///PCRE2_NEWLINE_CRLF CR followed by LF only + ///PCRE2_NEWLINE_ANYCRLF Any of the above + ///PCRE2_NEWLINE_ANY Any Unicode newline sequence + ///``` + Uint getNewLine() { + if(!code) return 0; + Uint newline = 0; + int ret = Pcre2Func::pattern_info(code, PCRE2_INFO_NEWLINE, &newline); + if(ret < 0) error_number = ret; + return newline; + } + + ///Get the modifier table that is set, + ///@return constant ModifierTable pointer. + ModifierTable const* getModifierTable(){ + return modtab; + } + + + ///Set new line convention. + ///@param value New line option value. + ///``` + ///PCRE2_NEWLINE_CR Carriage return only + ///PCRE2_NEWLINE_LF Linefeed only + ///PCRE2_NEWLINE_CRLF CR followed by LF only + ///PCRE2_NEWLINE_ANYCRLF Any of the above + ///PCRE2_NEWLINE_ANY Any Unicode newline sequence + ///``` + ///@return Reference to the calling Regex object + Regex& setNewLine(Uint value){ + if(!ccontext) + ccontext = Pcre2Func::compile_context_create(0); + int ret = Pcre2Func::set_newline(ccontext, value); + if(ret < 0) error_number = ret; + return *this; + } + + /// Set the pattern string to compile + /// @param re Pattern string + /// @return Reference to the calling Regex object. + Regex& setPattern(String const &re) { + pat_str = re; + pat_str_ptr = &pat_str; //must overwrite + return *this; + } + + /// @overload + /// @param re Pattern string pointer, null pointer will unset it. + /// @return Reference to the calling Regex object. + Regex& setPattern(String const *re) { + if(re) pat_str_ptr = re; + else { + pat_str.clear(); + pat_str_ptr = &pat_str; + } + return *this; + } + + /// set the modifier (resets all JPCRE2 and PCRE2 options) by calling Regex::changeModifier(). + /// Re-initializes the option bits for PCRE2 and JPCRE2 options, then parses the modifier and sets + /// equivalent PCRE2 and JPCRE2 options. + /// @param x Modifier string. + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setModifier() + /// @see RegexReplace::setModifier() + Regex& setModifier(Modifier const& x) { + compile_opts = 0; + jpcre2_compile_opts = 0; + return changeModifier(x, true); + } + + ///Set a custom modifier table to be used. + ///@param mdt pointer to ModifierTable object. + /// @return Reference to the calling Regex object. + Regex& setModifierTable(ModifierTable const * mdt){ + modtab = mdt; + return *this; + } + + /// Set JPCRE2 option for compile (overwrites existing option) + /// @param x Option value + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setJpcre2Option() + /// @see RegexReplace::setJpcre2Option() + Regex& setJpcre2Option(Uint x) { + jpcre2_compile_opts = x; + return *this; + } + + /// Set PCRE2 option for compile (overwrites existing option) + /// @param x Option value + /// @return Reference to the calling Regex object. + /// @see RegexMatch::setPcre2Option() + /// @see RegexReplace::setPcre2Option() + Regex& setPcre2Option(Uint x) { + compile_opts = x; + return *this; + } + + /// Parse modifier and add/remove equivalent PCRE2 and JPCRE2 options. + /// This function does not initialize or re-initialize options. + /// If you want to set options from scratch, initialize them to 0 before calling this function. + /// + /// If invalid modifier is detected, then the error number for the Regex + /// object will be jpcre2::ERROR::INVALID_MODIFIER and error offset will be the modifier character. + /// You can get the message with Regex::getErrorMessage() function. + /// @param mod Modifier string. + /// @param x Whether to add or remove option + /// @return Reference to the calling Regex object + /// @see RegexMatch::changeModifier() + /// @see RegexReplace::changeModifier() + Regex& changeModifier(Modifier const& mod, bool x){ + modtab ? modtab->toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset) + : MOD::toCompileOption(mod, x, &compile_opts, &jpcre2_compile_opts, &error_number, &error_offset); + return *this; + } + + /// Add or remove a JPCRE2 option + /// @param opt JPCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling Regex object + /// @see RegexMatch::changeJpcre2Option() + /// @see RegexReplace::changeJpcre2Option() + Regex& changeJpcre2Option(Uint opt, bool x) { + jpcre2_compile_opts = x ? jpcre2_compile_opts | opt : jpcre2_compile_opts & ~opt; + return *this; + } + + /// Add or remove a PCRE2 option + /// @param opt PCRE2 option value + /// @param x Add the option if it's true, remove otherwise. + /// @return Reference to the calling Regex object + /// @see RegexMatch::changePcre2Option() + /// @see RegexReplace::changePcre2Option() + Regex& changePcre2Option(Uint opt, bool x) { + compile_opts = x ? compile_opts | opt : compile_opts & ~opt; + return *this; + } + + /// Parse modifier string and add equivalent PCRE2 and JPCRE2 options. + /// This is just a wrapper of the original function Regex::changeModifier() + /// provided for convenience. + /// @param mod Modifier string. + /// @return Reference to the calling Regex object + /// @see RegexMatch::addModifier() + /// @see RegexReplace::addModifier() + Regex& addModifier(Modifier const& mod){ + return changeModifier(mod, true); + } + + /// Add option to existing JPCRE2 options for compile + /// @param x Option value + /// @return Reference to the calling Regex object + /// @see RegexMatch::addJpcre2Option() + /// @see RegexReplace::addJpcre2Option() + Regex& addJpcre2Option(Uint x) { + jpcre2_compile_opts |= x; + return *this; + } + + /// Add option to existing PCRE2 options for compile + /// @param x Option value + /// @return Reference to the calling Regex object + /// @see RegexMatch::addPcre2Option() + /// @see RegexReplace::addPcre2Option() + Regex& addPcre2Option(Uint x) { + compile_opts |= x; + return *this; + } + + ///Compile pattern using info from class variables. + ///@see Regex::compile(String const &re, Uint po, Uint jo) + ///@see Regex::compile(String const &re, Uint po) + ///@see Regex::compile(String const &re, Modifier mod) + ///@see Regex::compile(String const &re) + void compile(void); + + ///@overload + ///... + /// Set the specified parameters, then compile the pattern using information from class variables. + /// @param re Pattern string + /// @param po PCRE2 option + /// @param jo JPCRE2 option + void compile(String const &re, Uint po, Uint jo) { + setPattern(re).setPcre2Option(po).setJpcre2Option(jo); + compile(); + } + + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option + /// @param jo JPCRE2 option + void compile(String const *re, Uint po, Uint jo) { + setPattern(re).setPcre2Option(po).setJpcre2Option(jo); + compile(); + } + + ///@overload + /// @param re Pattern string + /// @param po PCRE2 option + void compile(String const &re, Uint po) { + setPattern(re).setPcre2Option(po); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param po PCRE2 option + void compile(String const *re, Uint po) { + setPattern(re).setPcre2Option(po); + compile(); + } + + /// @overload + /// @param re Pattern string + /// @param mod Modifier string. + void compile(String const &re, Modifier const& mod) { + setPattern(re).setModifier(mod); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + /// @param mod Modifier string. + void compile(String const *re, Modifier const& mod) { + setPattern(re).setModifier(mod); + compile(); + } + + ///@overload + /// @param re Pattern string . + void compile(String const &re) { + setPattern(re); + compile(); + } + + ///@overload + /// @param re Pointer to pattern string. A null pointer will unset the pattern and perform a compile with empty pattern. + void compile(String const *re) { + setPattern(re); + compile(); + } + + ///Returns a default constructed RegexMatch object by value. + ///This object is initialized with the same modifier table + ///as this Regex object. + ///@return RegexMatch object. + RegexMatch initMatch(){ + RegexMatch rm(this); + rm.setModifierTable(modtab); + return rm; + } + + ///Synonym for initMatch() + ///@return RegexMatch object by value. + RegexMatch getMatchObject(){ + return initMatch(); + } + + /// Perform regex match and return match count using a temporary match object. + /// This temporary match object will get available options from this Regex object, + /// that includes modifier table. + /// @param s Subject string . + /// @param mod Modifier string. + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const &s, Modifier const& mod, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); + } + + ///@overload + ///... + ///@param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. + ///@param mod Modifier string. + ///@param start_offset Offset from where matching will start in the subject string. + ///@return Match count + SIZE_T match(String const *s, Modifier const& mod, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).setModifier(mod).match(); + } + + ///@overload + ///... + /// @param s Subject string . + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const &s, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).match(); + } + + ///@overload + ///... + /// @param s Pointer to subject string. A null pointer will unset the subject and perform a match with empty subject. + /// @param start_offset Offset from where matching will start in the subject string. + /// @return Match count + /// @see RegexMatch::match() + SIZE_T match(String const *s, PCRE2_SIZE start_offset=0) { + return initMatch().setStartOffset(start_offset).setSubject(s).match(); + } + + ///Returns a default constructed RegexReplace object by value. + ///This object is initialized with the same modifier table as this Regex object. + ///@return RegexReplace object. + RegexReplace initReplace(){ + RegexReplace rr(this); + rr.setModifierTable(modtab); + return rr; + } + + ///Synonym for initReplace() + ///@return RegexReplace object. + RegexReplace getReplaceObject(){ + return initReplace(); + } + + /// Perform regex replace and return the replaced string using a temporary replace object. + /// This temporary replace object will get available options from this Regex object, + /// that includes modifier table. + /// @param mains Subject string. + /// @param repl String to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String const &mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + /// @param mains Pointer to subject string + /// @param repl String to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String *mains, String const &repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + ///... + /// @param mains Subject string + /// @param repl Pointer to string to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String const &mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + ///@overload + ///... + /// @param mains Pointer to subject string + /// @param repl Pointer to string to replace with + /// @param mod Modifier string. + ///@param counter Pointer to a counter to store the number of replacement done. + /// @return Resultant string after regex replace + /// @see RegexReplace::replace() + String replace(String *mains, String const *repl, Modifier const& mod="", SIZE_T* counter=0) { + return initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(counter).replace(); + } + + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Pointer to subject string. + /// @param repl Replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String * mains, String const& repl, Modifier const& mod=""){ + SIZE_T counter = 0; + if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Modifies subject string in-place and returns replace count. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Pointer to subject string. + /// @param repl Pointer to replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String * mains, String const* repl, Modifier const& mod=""){ + SIZE_T counter = 0; + if(mains) *mains = initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Returns replace count and discards subject string. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Subject string. + /// @param repl Replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String const& mains, String const& repl, Modifier const& mod=""){ + SIZE_T counter = 0; + initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + + /// @overload + /// + /// Perl compatible replace method. + /// Returns replace count and discards subject string. + /// + /// It's a shorthand method to `RegexReplace::preplace()`. + /// @param mains Subject string. + /// @param repl Pointer to replacement string (string to replace with). + /// @param mod Modifier string. + /// @return replace count. + SIZE_T preplace(String const& mains, String const* repl, Modifier const& mod=""){ + SIZE_T counter = 0; + initReplace().setSubject(mains).setReplaceWith(repl).setModifier(mod).setReplaceCounter(&counter).replace(); + return counter; + } + }; + + private: + //prevent object instantiation of select class + select(); + select(select const &); + #if JPCRE2_USE_MINIMUM_CXX_11 + select(select&&); + #endif + ~select(); +};//struct select +}//jpcre2 namespace + + +inline void jpcre2::ModifierTable::parseModifierTable(std::string& tabjs, VecOpt& tabjv, + std::string& tab_s, VecOpt& tab_v, + std::string const& tabs, VecOpt const& tabv){ + SIZE_T n = tabs.length(); + JPCRE2_ASSERT(n == tabv.size(), ("ValueError: Could not set Modifier table.\ + Modifier character and value tables are not of the same size (" + _tostdstring(n) + " == " + _tostdstring(tabv.size()) + ").").c_str()); + tabjs.clear(); + tab_s.clear(); tab_s.reserve(n); + tabjv.clear(); + tab_v.clear(); tab_v.reserve(n); + for(SIZE_T i=0;i class Map> +void jpcre2::select::Regex::compile() { +#else +template +void jpcre2::select::Regex::compile() { +#endif + //Get c_str of pattern + Pcre2Sptr c_pattern = (Pcre2Sptr) pat_str_ptr->c_str(); + int err_number = 0; + PCRE2_SIZE err_offset = 0; + + /************************************************************************** + * Compile the regular expression pattern, and handle + * any errors that are detected. + *************************************************************************/ + + //first release any previous memory + freeRegexMemory(); + code = Pcre2Func::compile( c_pattern, /* the pattern */ + PCRE2_ZERO_TERMINATED, /* indicates pattern is zero-terminated */ + compile_opts, /* default options */ + &err_number, /* for error number */ + &err_offset, /* for error offset */ + ccontext); /* use compile context */ + + if (code == 0) { + /* Compilation failed */ + //must not free regex memory, the only function has that right is the destructor + error_number = err_number; + error_offset = err_offset; + return; + } else if ((jpcre2_compile_opts & JIT_COMPILE) != 0) { + ///perform JIT compilation it it's enabled + int jit_ret = Pcre2Func::jit_compile(code, PCRE2_JIT_COMPLETE); + if(jit_ret < 0) error_number = jit_ret; + } + //everything's OK +} + + +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { +#else +template +typename jpcre2::select::String jpcre2::select::MatchEvaluator::replace(bool do_match, Uint replace_opts, SIZE_T * counter) { +#endif + if(counter) *counter = 0; + + replace_opts |= PCRE2_SUBSTITUTE_OVERFLOW_LENGTH; + replace_opts &= ~PCRE2_SUBSTITUTE_GLOBAL; + Regex const * re = RegexMatch::getRegexObject(); + // If re or re->code is null, return the subject string unmodified. + if (!re || re->code == 0) + return RegexMatch::getSubject(); + + Pcre2Sptr r_subject_ptr = (Pcre2Sptr) RegexMatch::getSubjectPointer()->c_str(); + //~ SIZE_T totlen = RegexMatch::getSubjectPointer()->length(); + + if(do_match) match(); + SIZE_T mcount = vec_soff.size(); + // if mcount is 0, return the subject string. (there's no need to worry about re) + if(!mcount) return RegexMatch::getSubject(); + SIZE_T current_offset = 0; //needs to be zero, not start_offset, because it's from where unmatched parts will be copied. + String res, tmp; + + //A check, this check is not fullproof. + SIZE_T last = vec_eoff.size(); + last = (last>0)?last-1:0; + JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ + If you are using esisting match data, try a new match."); + + //loop through the matches + for(SIZE_T i=0;ic_str(); + //substr(vec_soff[i], vec_eoff[i] - vec_soff[i]).c_str();//->substr(vec_soff[i], vec_eoff[i]-vec_soff[i]); + Pcre2Sptr subject = r_subject_ptr + vec_soff[i]; + PCRE2_SIZE subject_length = vec_eoff[i] - vec_soff[i]; + + ///the string returned from the callback is the replacement string. + Pcre2Sptr replace = (Pcre2Sptr) tmp.c_str(); + PCRE2_SIZE replace_length = tmp.length(); + bool retry = true; + int ret = 0; + PCRE2_SIZE outlengthptr = 0; + Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); + + while (true) { + ret = Pcre2Func::substitute( + re->code, /*Points to the compiled pattern*/ + subject, /*Points to the subject string*/ + subject_length, /*Length of the subject string*/ + 0, /*Offset in the subject at which to start matching*/ //must be zero + replace_opts, /*Option bits*/ + RegexMatch::mdata, /*Points to a match data block, or is NULL*/ + RegexMatch::mcontext, /*Points to a match context, or is NULL*/ + replace, /*Points to the replacement string*/ + replace_length, /*Length of the replacement string*/ + output_buffer, /*Points to the output buffer*/ + &outlengthptr /*Points to the length of the output buffer*/ + ); + + if (ret < 0) { + //Handle errors + if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 + && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { + retry = false; + /// If initial #buffer_size wasn't big enough for resultant string, + /// we will try once more with a new buffer size adjusted to the length of the resultant string. + delete[] output_buffer; + output_buffer = new Pcre2Uchar[outlengthptr + 1](); + // Go and try to perform the substitute again + continue; + } else { + RegexMatch::error_number = ret; + delete[] output_buffer; + return RegexMatch::getSubject(); + } + } + //If everything's ok exit the loop + break; + } + res += String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); + delete[] output_buffer; + if(counter) *counter += ret; + //if FIND_ALL is not set, single match will be performed + if((RegexMatch::getJpcre2Option() & FIND_ALL) == 0) break; + } + //All matched parts have been dealt with. + //now copy rest of the string from current_offset + res += RegexMatch::getSubject().substr(current_offset, String::npos); + return res; +} + + +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ +#else +template +typename jpcre2::select::String jpcre2::select::MatchEvaluator::nreplace(bool do_match, Uint jo, SIZE_T* counter){ +#endif + if(counter) *counter = 0; + if(do_match) match(); + SIZE_T mcount = vec_soff.size(); + // if mcount is 0, return the subject string. (there's no need to worry about re) + if(!mcount) return RegexMatch::getSubject(); + SIZE_T current_offset = 0; //no need for worrying about start offset, it's handled by match and we get valid offsets out of it. + String res; + + //A check, this check is not fullproof + SIZE_T last = vec_eoff.size(); + last = (last>0)?last-1:0; + JPCRE2_ASSERT(vec_eoff[last] <= RegexMatch::getSubject().size(), "ValueError: subject string is not of the required size, may be it's changed!!!\ + If you are using esisting match data, try a new match."); + + //loop through the matches + for(SIZE_T i=0;i class Map> +typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { +#else +template +typename jpcre2::select::String jpcre2::select::RegexReplace::replace() { +#endif + *last_replace_counter = 0; + + // If re or re->code is null, return the subject string unmodified. + if (!re || re->code == 0) + return *r_subject_ptr; + + Pcre2Sptr subject = (Pcre2Sptr) r_subject_ptr->c_str(); + PCRE2_SIZE subject_length = r_subject_ptr->length(); + Pcre2Sptr replace = (Pcre2Sptr) r_replw_ptr->c_str(); + PCRE2_SIZE replace_length = r_replw_ptr->length(); + PCRE2_SIZE outlengthptr = (PCRE2_SIZE) buffer_size; + bool retry = true; + int ret = 0; + Pcre2Uchar* output_buffer = new Pcre2Uchar[outlengthptr + 1](); + + while (true) { + ret = Pcre2Func::substitute( + re->code, /*Points to the compiled pattern*/ + subject, /*Points to the subject string*/ + subject_length, /*Length of the subject string*/ + _start_offset, /*Offset in the subject at which to start matching*/ + replace_opts, /*Option bits*/ + mdata, /*Points to a match data block, or is NULL*/ + mcontext, /*Points to a match context, or is NULL*/ + replace, /*Points to the replacement string*/ + replace_length, /*Length of the replacement string*/ + output_buffer, /*Points to the output buffer*/ + &outlengthptr /*Points to the length of the output buffer*/ + ); + + if (ret < 0) { + //Handle errors + if ((replace_opts & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0 + && ret == (int) PCRE2_ERROR_NOMEMORY && retry) { + retry = false; + /// If initial #buffer_size wasn't big enough for resultant string, + /// we will try once more with a new buffer size adjusted to the length of the resultant string. + delete[] output_buffer; + output_buffer = new Pcre2Uchar[outlengthptr + 1](); + // Go and try to perform the substitute again + continue; + } else { + error_number = ret; + delete[] output_buffer; + return *r_subject_ptr; + } + } + //If everything's ok exit the loop + break; + } + *last_replace_counter += ret; + String result = String((Char*) output_buffer,(Char*) (output_buffer + outlengthptr) ); + delete[] output_buffer; + return result; +} + + +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { +#else +template +bool jpcre2::select::RegexMatch::getNumberedSubstrings(int rc, Pcre2Sptr subject, PCRE2_SIZE* ovector, uint32_t ovector_count) { +#endif + NumSub num_sub; + uint32_t rcu = rc; + num_sub.reserve(rcu); //we know exactly how many elements it will have. + uint32_t i; + for (i = 0u; i < rcu; i++) + num_sub.push_back(String((Char*)(subject + ovector[2*i]), ovector[2*i+1] - ovector[2*i])); + for (uint32_t j = i; j < ovector_count; j++) + num_sub.push_back(String()); + vec_num->push_back(num_sub); //this function shouldn't be called if this vector is null + return true; +} + + +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, + Pcre2Sptr name_table, + Pcre2Sptr subject, PCRE2_SIZE* ovector ) { +#else +template +bool jpcre2::select::RegexMatch::getNamedSubstrings(int namecount, int name_entry_size, + Pcre2Sptr name_table, + Pcre2Sptr subject, PCRE2_SIZE* ovector ) { +#endif + Pcre2Sptr tabptr = name_table; + String key; + MapNas map_nas; + MapNtN map_ntn; + for (int i = 0; i < namecount; i++) { + int n; + if(sizeof( Char_T ) * CHAR_BIT == 8){ + n = (int)((tabptr[0] << 8) | tabptr[1]); + key = toString((Char*) (tabptr + 2)); + } + else{ + n = (int)tabptr[0]; + key = toString((Char*) (tabptr + 1)); + } + //Use of tabptr is finished for this iteration, let's increment it now. + tabptr += name_entry_size; + String value((Char*)(subject + ovector[2*n]), ovector[2*n+1] - ovector[2*n]); //n, not i. + if(vec_nas) map_nas[key] = value; + if(vec_ntn) map_ntn[key] = n; + } + //push the maps into vectors: + if(vec_nas) vec_nas->push_back(map_nas); + if(vec_ntn) vec_ntn->push_back(map_ntn); + return true; +} + + +#if JPCRE2_USE_MINIMUM_CXX_11 +template class Map> +jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { +#else +template +jpcre2::SIZE_T jpcre2::select::RegexMatch::match() { +#endif + + // If re or re->code is null, return 0 as the match count + if (!re || re->code == 0) + return 0; + + Pcre2Sptr subject = (Pcre2Sptr) m_subject_ptr->c_str(); + Pcre2Sptr name_table = 0; + int crlf_is_newline = 0; + int namecount = 0; + int name_entry_size = 0; + int rc = 0; + uint32_t ovector_count = 0; + int utf = 0; + SIZE_T count = 0; + Uint option_bits; + Uint newline = 0; + PCRE2_SIZE *ovector = 0; + SIZE_T subject_length = 0; + MatchData *match_data = 0; + subject_length = m_subject_ptr->length(); + bool mdc = false; //mdata created. + + + if (vec_num) vec_num->clear(); + if (vec_nas) vec_nas->clear(); + if (vec_ntn) vec_ntn->clear(); + if(vec_soff) vec_soff->clear(); + if(vec_eoff) vec_eoff->clear(); + + + /* Using this function ensures that the block is exactly the right size for + the number of capturing parentheses in the pattern. */ + if(mdata) match_data = mdata; + else { + match_data = Pcre2Func::match_data_create_from_pattern(re->code, 0); + mdc = true; + } + + rc = Pcre2Func::match( re->code, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + _start_offset, /* start at offset 'start_offset' in the subject */ + match_opts, /* default options */ + match_data, /* block for storing the result */ + mcontext); /* use default match context */ + + /* Matching failed: handle error cases */ + + if (rc < 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); /* Release memory used for the match */ + //must not free code. This function has no right to modify regex + switch (rc) { + case PCRE2_ERROR_NOMATCH: + return count; + /* + Handle other special cases if you like + */ + default:; + } + error_number = rc; + return count; + } + + ++count; //Increment the counter + /* Match succeded. Get a pointer to the output vector, where string offsets are + stored. */ + ovector = Pcre2Func::get_ovector_pointer(match_data); + ovector_count = Pcre2Func::get_ovector_count(match_data); + + /************************************************************************//* + * We have found the first match within the subject string. If the output * + * vector wasn't big enough, say so. Then output any substrings that were * + * captured. * + *************************************************************************/ + + /* The output vector wasn't big enough. This should not happen, because we used + pcre2_match_data_create_from_pattern() above. */ + + if (rc == 0) { + //ovector was not big enough for all the captured substrings; + error_number = (int)ERROR::INSUFFICIENT_OVECTOR; + rc = ovector_count; + // TODO: We may throw exception at this point. + } + //match succeeded at offset ovector[0] + if(vec_soff) vec_soff->push_back(ovector[0]); + if(vec_eoff) vec_eoff->push_back(ovector[1]); + + // Get numbered substrings if vec_num isn't null + if (vec_num) { //must do null check + if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) + return count; + } + + //get named substrings if either vec_nas or vec_ntn is given. + if (vec_nas || vec_ntn) { + /* See if there are any named substrings, and if so, show them by name. First + we have to extract the count of named parentheses from the pattern. */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMECOUNT, /* get the number of named substrings */ + &namecount); /* where to put the answer */ + + if (namecount <= 0); /*No named substrings*/ + + else { + /* Before we can access the substrings, we must extract the table for + translating names to numbers, and the size of each entry in the table. */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMETABLE, /* address of the table */ + &name_table); /* where to put the answer */ + + (void) Pcre2Func::pattern_info( re->code, /* the compiled pattern */ + PCRE2_INFO_NAMEENTRYSIZE, /* size of each entry in the table */ + &name_entry_size); /* where to put the answer */ + + /* Now we can scan the table and, for each entry, print the number, the name, + and the substring itself. In the 8-bit library the number is held in two + bytes, most significant first. */ + + + // Get named substrings if vec_nas isn't null. + // Get name to number map if vec_ntn isn't null. + } + //the following must be outside the above if-else + if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) + return count; + } + + /***********************************************************************//* + * If the "g" modifier was given, we want to continue * + * to search for additional matches in the subject string, in a similar * + * way to the /g option in Perl. This turns out to be trickier than you * + * might think because of the possibility of matching an empty string. * + * What happens is as follows: * + * * + * If the previous match was NOT for an empty string, we can just start * + * the next match at the end of the previous one. * + * * + * If the previous match WAS for an empty string, we can't do that, as it * + * would lead to an infinite loop. Instead, a call of pcre2_match() is * + * made with the PCRE2_NOTEMPTY_ATSTART and PCRE2_ANCHORED flags set. The * + * first of these tells PCRE2 that an empty string at the start of the * + * subject is not a valid match; other possibilities must be tried. The * + * second flag restricts PCRE2 to one match attempt at the initial string * + * position. If this match succeeds, an alternative to the empty string * + * match has been found, and we can print it and proceed round the loop, * + * advancing by the length of whatever was found. If this match does not * + * succeed, we still stay in the loop, advancing by just one character. * + * In UTF-8 mode, which can be set by (*UTF) in the pattern, this may be * + * more than one byte. * + * * + * However, there is a complication concerned with newlines. When the * + * newline convention is such that CRLF is a valid newline, we must * + * advance by two characters rather than one. The newline convention can * + * be set in the regex by (*CR), etc.; if not, we must find the default. * + *************************************************************************/ + + if ((jpcre2_match_opts & FIND_ALL) == 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); /* Release the memory that was used */ + // Must not free code. This function has no right to modify regex. + return count; /* Exit the program. */ + } + + /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline + sequence. First, find the options with which the regex was compiled and extract + the UTF state. */ + + (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_ALLOPTIONS, &option_bits); + utf = ((option_bits & PCRE2_UTF) != 0); + + /* Now find the newline convention and see whether CRLF is a valid newline + sequence. */ + + (void) Pcre2Func::pattern_info(re->code, PCRE2_INFO_NEWLINE, &newline); + crlf_is_newline = newline == PCRE2_NEWLINE_ANY + || newline == PCRE2_NEWLINE_CRLF + || newline == PCRE2_NEWLINE_ANYCRLF; + + /** We got the first match. Now loop for second and subsequent matches. */ + + for (;;) { + + Uint options = match_opts; /* Normally no options */ + PCRE2_SIZE start_offset = ovector[1]; /* Start at end of previous match */ + + /* If the previous match was for an empty string, we are finished if we are + at the end of the subject. Otherwise, arrange to run another match at the + same point to see if a non-empty match can be found. */ + + if (ovector[0] == ovector[1]) { + if (ovector[0] == subject_length) + break; + options |= PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED; + } + + /// Run the next matching operation */ + + rc = Pcre2Func::match( re->code, /* the compiled pattern */ + subject, /* the subject string */ + subject_length, /* the length of the subject */ + start_offset, /* starting offset in the subject */ + options, /* options */ + match_data, /* block for storing the result */ + mcontext); /* use match context */ + + /* This time, a result of NOMATCH isn't an error. If the value in "options" + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-string match at a + point where there was a previous empty-string match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the "end of previous match" offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. */ + + if (rc == PCRE2_ERROR_NOMATCH) { + if (options == 0) + break; /* All matches found */ + ovector[1] = start_offset + 1; /* Advance one code unit */ + if (crlf_is_newline && /* If CRLF is newline & */ + start_offset < subject_length - 1 && /* we are at CRLF, */ + subject[start_offset] == '\r' && subject[start_offset + 1] == '\n') + ovector[1] += 1; /* Advance by one more. */ + else if (utf) { /* advance a whole UTF (8 or 16), for UTF-32, it's not needed */ + while (ovector[1] < subject_length) { + if(sizeof( Char_T ) * CHAR_BIT == 8 && (subject[ovector[1]] & 0xc0) != 0x80) break; + else if(sizeof( Char_T ) * CHAR_BIT == 16 && (subject[ovector[1]] & 0xfc00) != 0xdc00) break; + else if(sizeof( Char_T ) * CHAR_BIT == 32) break; //must be else if + ovector[1] += 1; + } + } + continue; /* Go round the loop again */ + } + + /* Other matching errors are not recoverable. */ + + if (rc < 0) { + if(mdc) + Pcre2Func::match_data_free(match_data); + // Must not free code. This function has no right to modify regex. + error_number = rc; + return count; + } + + /* match succeeded */ + ++count; //Increment the counter + + if (rc == 0) { + /* The match succeeded, but the output vector wasn't big enough. This + should not happen. */ + error_number = (int)ERROR::INSUFFICIENT_OVECTOR; + rc = ovector_count; + // TODO: We may throw exception at this point. + } + + //match succeded at ovector[0] + if(vec_soff) vec_soff->push_back(ovector[0]); + if(vec_eoff) vec_eoff->push_back(ovector[1]); + + /* As before, get substrings stored in the output vector by number, and then + also any named substrings. */ + + // Get numbered substrings if vec_num isn't null + if (vec_num) { //must do null check + if(!getNumberedSubstrings(rc, subject, ovector, ovector_count)) + return count; + } + + if (vec_nas || vec_ntn) { + //must call this whether we have named substrings or not: + if(!getNamedSubstrings(namecount, name_entry_size, name_table, subject, ovector)) + return count; + } + } /* End of loop to find second and subsequent matches */ + + if(mdc) + Pcre2Func::match_data_free(match_data); + // Must not free code. This function has no right to modify regex. + return count; +} + +#undef JPCRE2_VECTOR_DATA_ASSERT +#undef JPCRE2_UNUSED +#undef JPCRE2_USE_MINIMUM_CXX_11 + +//some macro documentation for doxygen + +#ifdef __DOXYGEN__ + + +#ifndef JPCRE2_USE_FUNCTION_POINTER_CALLBACK +#define JPCRE2_USE_FUNCTION_POINTER_CALLBACK +#endif + +#ifndef JPCRE2_NDEBUG +#define JPCRE2_NDEBUG +#endif + + +///@def JPCRE2_USE_FUNCTION_POINTER_CALLBACK +///Use function pointer in all cases for MatchEvaluatorCallback function. +///By default function pointer is used for callback in MatchEvaluator when using =C++11` compiler `std::function` instead of function pointer is used. +///If this macro is defined before including jpcre2.hpp, function pointer will be used in all cases. +///It you are using lambda function with captures, stick with `std::function`, on the other hand, if +///you are using older compilers, you might want to use function pointer instead. +/// +///For example, with gcc-4.7, `std::function` will give compile error in C++11 mode, in such cases where full C++11 +///support is not available, use function pointer. + + +///@def JPCRE2_ASSERT(cond, msg) +///Macro to call `jpcre2::jassert()` with file path and line number. +///When `NDEBUG` or `JPCRE2_NDEBUG` is defined before including this header, this macro will +///be defined as `((void)0)` thus eliminating this assertion. +///@param cond condtion (boolean) +///@param msg message + + +///@def JPCRE2_NDEBUG +///Macro to remove debug codes. +///Using this macro is discouraged even in production mode but provided for completeness. +///You should not use this macro to bypass any error in your program. +///Define this macro before including this header if you want to remove debug codes included in this library. +/// +///Using the standard `NDEBUG` macro will have the same effect, +///but it is recommended that you use `JPCRE2_NDEBUG` to strip out debug codes specifically for this library. + +#endif + + +#endif diff --git a/src/lintmanager.cpp b/src/lintmanager.cpp index 1f1714e..4ad0c6e 100644 --- a/src/lintmanager.cpp +++ b/src/lintmanager.cpp @@ -193,8 +193,7 @@ bool LintManager::isMatchFile(RuleBase *const rule) FOR_EACH (std::set::const_iterator, it, masks) { // printf("check regexp: %s\n", (*it).c_str()); - std::regex exp(*it); - if (std::regex_match (mFileName, exp)) + if (isMatch(mFileName, *it)) { // printf("matched\n"); return true; diff --git a/src/rules/baseclass.cpp b/src/rules/baseclass.cpp index c6c06bf..edb339e 100644 --- a/src/rules/baseclass.cpp +++ b/src/rules/baseclass.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(baseclass, "006", "(.+)[.](cpp|h)") +registerRuleExt(baseclass, "006", R"( \.(cpp|h)$ )") startRule(baseclass) { @@ -32,11 +32,8 @@ endRule(baseclass) parseLineRule(baseclass) { - std::smatch m; if (data.find("friend ") == std::string::npos && - isMatch(data, "(.*)(class|struct) (([a-zA-Z_0123456789]+)|" - "([a-zA-Z_0123456789 ]+)([a-zA-Z_0123456789]+))[:](.*)", - m)) + isMatch(data, R"( (class|struct) \s+? [\w\s]+? [^\s] : )")) { print("Wrong align after class name and before ':'."); } diff --git a/src/rules/brackets.cpp b/src/rules/brackets.cpp index a323249..2bb025f 100644 --- a/src/rules/brackets.cpp +++ b/src/rules/brackets.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(brackets, "015", "(.+)[.](cpp|h)") +registerRuleExt(brackets, "015", R"( \.(cpp|h)$ )") startRule(brackets) { diff --git a/src/rules/constructor.cpp b/src/rules/constructor.cpp index e1795cb..c8bc8ef 100644 --- a/src/rules/constructor.cpp +++ b/src/rules/constructor.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(constructor, "006", "(.+)[.](cpp|h)") +registerRuleExt(constructor, "006", R"( \.(cpp|h)$ )") bool foundConstructor(false); int align = 0; diff --git a/src/rules/constructorbrackets.cpp b/src/rules/constructorbrackets.cpp index 73dd9f3..17ae235 100644 --- a/src/rules/constructorbrackets.cpp +++ b/src/rules/constructorbrackets.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(constructorBrackets, "017", "(.+)[.](cpp|h)") +registerRuleExt(constructorBrackets, "017", R"( \.(cpp|h)$ )") startRule(constructorBrackets) { @@ -34,8 +34,7 @@ parseLineRule(constructorBrackets) { if (data.find("new ") != std::string::npos) { - std::smatch m; - if (isMatch(data, "(.+)[ ]new[ ]([a-zA-Z_0123456789]+)[(][)](.+)", m)) + if (isMatch(data, R"( \b new \s+? \w+? \s*? \( \s*? \) )")) { print("Remove () after new CLASS()."); } diff --git a/src/rules/copyconstructor.cpp b/src/rules/copyconstructor.cpp index 3aeeccb..e96234e 100644 --- a/src/rules/copyconstructor.cpp +++ b/src/rules/copyconstructor.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(copyConstructor, "016", "(.+)[.](cpp|h)") +registerRuleExt(copyConstructor, "016", R"( \.(cpp|h)$ )") namespace { @@ -51,20 +51,17 @@ endRule(copyConstructor) parseLineRule(copyConstructor) { - std::smatch m; - if (isMatch(data, "(.*)(class|struct) ([a-zA-Z_0123456789]+)" - " (|not)final(.*)", - m)) + jp::VecNum m; + if (isMatch(data, R"( (class|struct) \s+? (\w+?) \s+? (not)?final )", m)) { - const std::string str = m.str(3); + const std::string str = m[0][2]; mClasses.insert(str); mLines[str] = line; } - else if (isMatch(data, "([ ]*)(A_DELETE_COPY|A_DEFAULT_COPY)[(]" - "([a-zA-Z_0123456789]+)[)](.*)", - m)) + else if (isMatch(data, R"( ^ \s*? (A_DELETE_COPY|A_DEFAULT_COPY) )" + R"( \s*? \( \s*? (\w+?) \s*? \) )", m)) { - const std::string str = m.str(3); + const std::string str = m[0][2]; if (mClasses.find(str) != mClasses.end()) { mClasses.erase(str); diff --git a/src/rules/debug.cpp b/src/rules/debug.cpp index 82e16fe..f2cbb96 100644 --- a/src/rules/debug.cpp +++ b/src/rules/debug.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(debugH, "003", "(.+)[.]h") +registerRuleExt(debugH, "003", R"( \.h$ )") startRule(debugH) { @@ -47,7 +47,7 @@ registerRuleExt(debugCpp, "004", "(.+)[.]cpp") startRule(debugCpp) { - if (isMatch(file, "(.*)[/]debug[/]([^/]*)[.](cpp|h)")) + if (isMatch(file, R"( /debug/ .*? \.(cpp|h) $ )")) terminateRule(); foundDebug = false; } diff --git a/src/rules/dump.cpp b/src/rules/dump.cpp index a01aefe..0e909ac 100644 --- a/src/rules/dump.cpp +++ b/src/rules/dump.cpp @@ -21,7 +21,7 @@ #include "template.hpp" /* -registerRuleExt(dumpCpp, "001", "(.+)[.]cpp") +registerRuleExt(dumpCpp, "001", R"( \.cpp$ )") startRule(dumpCpp) { @@ -39,7 +39,7 @@ parseLineRule(dumpCpp) } -registerRuleExt(dumpH, "002", "(.+)[.]h") +registerRuleExt(dumpH, "002", R"( \.h$ )") startRule(dumpH) { diff --git a/src/rules/final.cpp b/src/rules/final.cpp index bc2acb7..a968c54 100644 --- a/src/rules/final.cpp +++ b/src/rules/final.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(finalCheck, "007", "(.+)[.](cpp|h)") +registerRuleExt(finalCheck, "007", R"( \.(cpp|h)$ )") startRule(finalCheck) { @@ -38,8 +38,8 @@ endRule(finalCheck) parseLineRule(finalCheck) { - if (isMatch(data, "([ ]*)(static |)(class|struct) ([a-zA-Z_0123456789]+)" - "($|( [:])([^;]+)(.*))")) + if (isMatch(data, R"( ^ \s*? (static \s+?)? (class|struct) )" + R"( \s+? \w+? \s*? ($ | :) )")) { print("Need add final or notfinal into class declaration"); } diff --git a/src/rules/formatting.cpp b/src/rules/formatting.cpp index 1a49848..175be96 100644 --- a/src/rules/formatting.cpp +++ b/src/rules/formatting.cpp @@ -20,7 +20,9 @@ #include "template.hpp" -registerRuleExt(formatting, "011", "(.+)[.](cpp|h)") +#include + +registerRuleExt(formatting, "011", R"( \.(cpp|h)$ )") namespace { diff --git a/src/rules/include.cpp b/src/rules/include.cpp index 347bf58..c6e4246 100644 --- a/src/rules/include.cpp +++ b/src/rules/include.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(include, "008", "(.+)[.](cpp|h)") +registerRuleExt(include, "008", R"( \.(cpp|h)$ )") startRule(include) { @@ -61,4 +61,3 @@ parseLineRule(include) } } } - diff --git a/src/rules/license.cpp b/src/rules/license.cpp index 0778831..9bd8fd9 100644 --- a/src/rules/license.cpp +++ b/src/rules/license.cpp @@ -25,12 +25,12 @@ bool foundManaPlus(false); bool checkText(false); bool foundLicenseNumber(false); -registerRuleExt(license, "005", "(.+)[.](h|cpp|cc|inc)") +registerRuleExt(license, "005", R"( \.(h|cpp|cc|inc)$ )") startRule(license) { - if (isMatch(file, "(.*)[/]((debug|sdl2gfx)[/]([^/]*)|mumblemanager" - "|SDLMain|utils/physfsrwops|utils/base64)[.](cpp|h)")) + if (isMatch(file, R"( / ( (debug|sdl2gfx)/.*? | mumblemanager | )" + R"( SDLMain | utils/physfsrwops | utils/base64 ) \.(cpp|h) $ )")) { terminateRule(); } diff --git a/src/rules/packet.cpp b/src/rules/packet.cpp index 003f28b..68362c2 100644 --- a/src/rules/packet.cpp +++ b/src/rules/packet.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(packet, "012", "(.+)[.](cpp)") +registerRuleExt(packet, "012", R"( \.cpp$ )") startRule(packet) { @@ -35,4 +35,3 @@ parseLineRule(packet) if (data.find("createOutPacket(SMSG") != std::string::npos) print("Wrong output packet creation. Must be 'createOutPacket(CMSG...'"); } - diff --git a/src/rules/po.cpp b/src/rules/po.cpp index ab40041..98d0698 100644 --- a/src/rules/po.cpp +++ b/src/rules/po.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(po, "010", "(.+)[.](po)") +registerRuleExt(po, "010", R"( \.po$ )") bool readId(false); bool readStr(false); @@ -244,4 +244,3 @@ parseLineRule(po) msgStr = std::string(); } } - diff --git a/src/rules/translation.cpp b/src/rules/translation.cpp index e8a2fdd..c951483 100644 --- a/src/rules/translation.cpp +++ b/src/rules/translation.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(Translation, "014", "(.+)[.](cpp|h|hpp|cc|inc)") +registerRuleExt(Translation, "014", R"( \.(cpp|h|hpp|cc|inc)$ )") startRule(Translation) { diff --git a/src/rules/virtual.cpp b/src/rules/virtual.cpp index a52a369..4772d44 100644 --- a/src/rules/virtual.cpp +++ b/src/rules/virtual.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(virtualCheck, "013", "(.+)[.](cpp|h)") +registerRuleExt(virtualCheck, "013", R"( \.(cpp|h)$ )") startRule(virtualCheck) { @@ -32,7 +32,7 @@ endRule(virtualCheck) parseLineRule(virtualCheck) { - if (isMatch(data, "(.+)virtual[ ](.+)[ ](override|final)(.*)") && + if (isMatch(data, R"( virtual \s+? .+? \s+? (override|final) )") && data.find("finalize") == std::string::npos) { print("Keywords virtual is useless if used with override or final"); diff --git a/src/rules/xml.cpp b/src/rules/xml.cpp index 1736a4f..d7bae95 100644 --- a/src/rules/xml.cpp +++ b/src/rules/xml.cpp @@ -20,7 +20,7 @@ #include "template.hpp" -registerRuleExt(xml, "009", "(.+)[.](xml)") +registerRuleExt(xml, "009", R"( \.xml$ )") startRule(xml) { @@ -39,4 +39,3 @@ parseLineRule(xml) print("Wrong xml header. Must be '" + str + "'"); } } - diff --git a/src/stringutils.cpp b/src/stringutils.cpp index fc80017..4f6ff6a 100644 --- a/src/stringutils.cpp +++ b/src/stringutils.cpp @@ -709,16 +709,33 @@ void secureChatCommand(std::string &str) bool isMatch(const std::string &str, const std::string &exp) { - std::regex regExp(exp); - return std::regex_match(str, regExp); + jp::Regex re(exp, "x"); + if (!re) + { + printf("Invalid regular expression '%s': %s at offset %d\n", exp.c_str(), + re.getErrorMessage().c_str(), re.getErrorOffset()); + exit(1); + } + return re.match(str); } bool isMatch(const std::string &str, const std::string &exp, - std::smatch &m) + jp::VecNum &m) { - std::regex regExp(exp); - return std::regex_match(str, m, regExp); + jp::Regex re(exp, "x"); + if (!re) + { + printf("Invalid regular expression '%s': %s at offset %d\n", exp.c_str(), + re.getErrorMessage().c_str(), re.getErrorOffset()); + exit(1); + } + jp::RegexMatch rm; + size_t count = rm.setRegexObject(&re) + .setSubject(&str) + .setNumberedSubstringVector(&m) + .match(); + return count > 0; } bool fileExists(const std::string &name) diff --git a/src/stringutils.h b/src/stringutils.h index 91c9ea8..ed4265e 100644 --- a/src/stringutils.h +++ b/src/stringutils.h @@ -30,10 +30,13 @@ #include #include #include -#include + +#include "jpcre2.hpp" #include "localconsts.h" +typedef jpcre2::select jp; + /** * Trims spaces off the end and the beginning of the given string. * @@ -245,7 +248,7 @@ bool isMatch(const std::string &str, bool isMatch(const std::string &str, const std::string &exp, - std::smatch &m); + jp::VecNum &m); bool fileExists(const std::string &name); diff --git a/src/template.hpp b/src/template.hpp index cf67ebe..ae514ae 100644 --- a/src/template.hpp +++ b/src/template.hpp @@ -22,7 +22,6 @@ #include "stringutils.h" #include -#include #include "localconsts.h" -- cgit v1.2.3-70-g09d2