diff --git a/include/ksr/regex.h b/include/ksr/regex.h index 4665252..3565068 100644 --- a/include/ksr/regex.h +++ b/include/ksr/regex.h @@ -1,13 +1,14 @@ #pragma once #include -#include +#define PCRE2_CODE_UNIT_WIDTH 8 +#include /** * Regex structure. */ typedef struct { - regex_t expr; + pcre2_code *expr; unsigned nmatches; char **matches; } ksregex; @@ -16,17 +17,17 @@ typedef struct { * Create a ksregex from an expression. * @param expression - the expression to compile. * @param nmatches - the number of matches groups to get, 0 if you want none. - * @param flags - regex flags. + * @param options - PCRE2 primary option bits. * @return - the created regex, NULL if an error happen. */ -ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags); +ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options); /** * Create a ksregex from an expression with 0 match group. * @param expression - the expression to compile. - * @param flags - regex flags. + * @param options - PCRE2 primary option bits. */ -static inline ksregex* ksregex_new_nogroup(const char *expression, int flags) -{ return ksregex_new(expression, 0, flags); } +static inline ksregex *ksregex_new_nogroup(PCRE2_SPTR expression, int options) +{ return ksregex_new(expression, 0, options); } /** * Try to match the regex with a tested string. @@ -59,5 +60,8 @@ static inline void ksregex_free(ksregex *regex) // free matches groups. _ksregex_free_matches(regex); + // free pcre expr. + pcre2_code_free(regex->expr); + free(regex); // free regex data. } diff --git a/meson.build b/meson.build index 6f9730b..43f7f39 100644 --- a/meson.build +++ b/meson.build @@ -5,6 +5,8 @@ c = meson.get_compiler('c') deps = [ dependency('jemalloc') ] # add pthread dependency deps += c.find_library('pthread') +# add pcre2 dependency +deps += c.find_library('pcre2-8') # includes include_dirs = include_directories([ 'include' ]) diff --git a/src/regex.c b/src/regex.c index fca4d18..e3bec89 100644 --- a/src/regex.c +++ b/src/regex.c @@ -1,35 +1,41 @@ #include -#include #include -ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags) +ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options) { ksregex *regex = malloc(sizeof(ksregex)); regex->nmatches = nmatches; regex->matches = NULL; - if (regcomp(®ex->expr, expression, // compile expression in regex. - flags | REG_EXTENDED | (nmatches == 0 ? REG_NOSUB : 0)) != 0) + int error_code; + PCRE2_SIZE error_offset; + // compile expression in regex. + regex->expr = pcre2_compile(expression, strlen(expression), options, &error_code, &error_offset, NULL); + + if (error_code < 0) return NULL; // an error happened, returning NULL. return regex; // return created regex. } -void ksregex_save_string_matches(ksregex *regex, regmatch_t *raw_matches, const char *source) +void ksregex_save_string_matches(ksregex *regex, pcre2_match_data *match_data) { // allocate string matches array. regex->matches = malloc(sizeof(char *) * regex->nmatches); - // save full string match. - regex->matches[0] = strdup(source); - - for(unsigned i = 1; i < regex->nmatches; i++) + for(unsigned i = 0; i < regex->nmatches; i++) { // for each group match, converting it to an independent string. - regoff_t current_match_length = raw_matches[i].rm_eo - raw_matches[i].rm_so; - regex->matches[i] = malloc(current_match_length + 1); // allocate the current group match string. - // copying the string part from source to the current match string. - strncpy(regex->matches[i], &source[raw_matches[i].rm_so], current_match_length); - regex->matches[i][current_match_length] = 0; // set end of string. + // get buffer of current pcre2 match. + PCRE2_UCHAR* pcre2_buffer; + PCRE2_SIZE pcre2_buffer_len; + pcre2_substring_get_bynumber(match_data, i, &pcre2_buffer, &pcre2_buffer_len); + + regex->matches[i] = malloc(pcre2_buffer_len + 1); // allocate the current group match string. + // copying the current buffer to the current match string. + strncpy(regex->matches[i], pcre2_buffer, pcre2_buffer_len); + regex->matches[i][pcre2_buffer_len] = 0; // set end of string. + + pcre2_substring_free(pcre2_buffer); // free pcre2 buffer. } } @@ -38,21 +44,21 @@ bool ksregex_matches(ksregex *regex, const char *tested) // if there was matches, free them. _ksregex_free_matches(regex); - // allocate raw matches array. - regmatch_t *raw_matches = malloc(sizeof(regmatch_t) * regex->nmatches); + // create match data. + pcre2_match_data *match_data = pcre2_match_data_create(regex->nmatches, NULL); - if (regexec(®ex->expr, tested, regex->nmatches, raw_matches, 0) != 0) - { // an error happened, freeing results then returning NULL. - free(raw_matches); // free raw groups matches. + if (pcre2_match(regex->expr, tested, strlen(tested), 0, 0, match_data, NULL) <= 0) + { // an error happened, freeing results then returning false. + pcre2_match_data_free(match_data); // free match data. return false; } if (regex->nmatches > 0) { // there are matches, getting them. - ksregex_save_string_matches(regex, raw_matches, tested); + ksregex_save_string_matches(regex, match_data); } - free(raw_matches); // free raw groups matches. + pcre2_match_data_free(match_data); // free match data. return true; // no error, return true. } diff --git a/tests/ksregex.c b/tests/ksregex.c index d2f02f1..d42c432 100644 --- a/tests/ksregex.c +++ b/tests/ksregex.c @@ -10,7 +10,7 @@ int main(void) assert(reg1->matches == NULL); // try a more complex regex with matches groups. - ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, REG_ICASE); + ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, PCRE2_CASELESS); assert(ksregex_matches(reg2, "1a")); assert(!ksregex_matches(reg2, "H8")); assert(!ksregex_matches(reg2, "4MM"));