Change regex library to pcre2 for cross-platform compilation and better regex support.

This commit is contained in:
Madeorsk 2024-08-22 10:55:50 +02:00
parent 2552fa9db9
commit 35d761794e
4 changed files with 41 additions and 29 deletions

View File

@ -1,13 +1,14 @@
#pragma once #pragma once
#include <jemalloc/jemalloc.h> #include <jemalloc/jemalloc.h>
#include <regex.h> #define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
/** /**
* Regex structure. * Regex structure.
*/ */
typedef struct { typedef struct {
regex_t expr; pcre2_code *expr;
unsigned nmatches; unsigned nmatches;
char **matches; char **matches;
} ksregex; } ksregex;
@ -16,17 +17,17 @@ typedef struct {
* Create a ksregex from an expression. * Create a ksregex from an expression.
* @param expression - the expression to compile. * @param expression - the expression to compile.
* @param nmatches - the number of matches groups to get, 0 if you want none. * @param nmatches - the number of matches groups to get, 0 if you want none.
* @param flags - regex flags. * @param options - PCRE2 primary option bits.
* @return - the created regex, NULL if an error happen. * @return - the created regex, NULL if an error happen.
*/ */
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags); ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options);
/** /**
* Create a ksregex from an expression with 0 match group. * Create a ksregex from an expression with 0 match group.
* @param expression - the expression to compile. * @param expression - the expression to compile.
* @param flags - regex flags. * @param options - PCRE2 primary option bits.
*/ */
static inline ksregex* ksregex_new_nogroup(const char *expression, int flags) static inline ksregex *ksregex_new_nogroup(PCRE2_SPTR expression, int options)
{ return ksregex_new(expression, 0, flags); } { return ksregex_new(expression, 0, options); }
/** /**
* Try to match the regex with a tested string. * Try to match the regex with a tested string.
@ -59,5 +60,8 @@ static inline void ksregex_free(ksregex *regex)
// free matches groups. // free matches groups.
_ksregex_free_matches(regex); _ksregex_free_matches(regex);
// free pcre expr.
pcre2_code_free(regex->expr);
free(regex); // free regex data. free(regex); // free regex data.
} }

View File

@ -5,6 +5,8 @@ c = meson.get_compiler('c')
deps = [ dependency('jemalloc') ] deps = [ dependency('jemalloc') ]
# add pthread dependency # add pthread dependency
deps += c.find_library('pthread') deps += c.find_library('pthread')
# add pcre2 dependency
deps += c.find_library('pcre2-8')
# includes # includes
include_dirs = include_directories([ 'include' ]) include_dirs = include_directories([ 'include' ])

View File

@ -1,35 +1,41 @@
#include <ksr/regex.h> #include <ksr/regex.h>
#include <regex.h>
#include <string.h> #include <string.h>
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags) ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options)
{ {
ksregex *regex = malloc(sizeof(ksregex)); ksregex *regex = malloc(sizeof(ksregex));
regex->nmatches = nmatches; regex->nmatches = nmatches;
regex->matches = NULL; regex->matches = NULL;
if (regcomp(&regex->expr, expression, // compile expression in regex. int error_code;
flags | REG_EXTENDED | (nmatches == 0 ? REG_NOSUB : 0)) != 0) PCRE2_SIZE error_offset;
// compile expression in regex.
regex->expr = pcre2_compile(expression, strlen(expression), options, &error_code, &error_offset, NULL);
if (error_code < 0)
return NULL; // an error happened, returning NULL. return NULL; // an error happened, returning NULL.
return regex; // return created regex. return regex; // return created regex.
} }
void ksregex_save_string_matches(ksregex *regex, regmatch_t *raw_matches, const char *source) void ksregex_save_string_matches(ksregex *regex, pcre2_match_data *match_data)
{ {
// allocate string matches array. // allocate string matches array.
regex->matches = malloc(sizeof(char *) * regex->nmatches); regex->matches = malloc(sizeof(char *) * regex->nmatches);
// save full string match. for(unsigned i = 0; i < regex->nmatches; i++)
regex->matches[0] = strdup(source);
for(unsigned i = 1; i < regex->nmatches; i++)
{ // for each group match, converting it to an independent string. { // for each group match, converting it to an independent string.
regoff_t current_match_length = raw_matches[i].rm_eo - raw_matches[i].rm_so; // get buffer of current pcre2 match.
regex->matches[i] = malloc(current_match_length + 1); // allocate the current group match string. PCRE2_UCHAR* pcre2_buffer;
// copying the string part from source to the current match string. PCRE2_SIZE pcre2_buffer_len;
strncpy(regex->matches[i], &source[raw_matches[i].rm_so], current_match_length); pcre2_substring_get_bynumber(match_data, i, &pcre2_buffer, &pcre2_buffer_len);
regex->matches[i][current_match_length] = 0; // set end of string.
regex->matches[i] = malloc(pcre2_buffer_len + 1); // allocate the current group match string.
// copying the current buffer to the current match string.
strncpy(regex->matches[i], pcre2_buffer, pcre2_buffer_len);
regex->matches[i][pcre2_buffer_len] = 0; // set end of string.
pcre2_substring_free(pcre2_buffer); // free pcre2 buffer.
} }
} }
@ -38,21 +44,21 @@ bool ksregex_matches(ksregex *regex, const char *tested)
// if there was matches, free them. // if there was matches, free them.
_ksregex_free_matches(regex); _ksregex_free_matches(regex);
// allocate raw matches array. // create match data.
regmatch_t *raw_matches = malloc(sizeof(regmatch_t) * regex->nmatches); pcre2_match_data *match_data = pcre2_match_data_create(regex->nmatches, NULL);
if (regexec(&regex->expr, tested, regex->nmatches, raw_matches, 0) != 0) if (pcre2_match(regex->expr, tested, strlen(tested), 0, 0, match_data, NULL) <= 0)
{ // an error happened, freeing results then returning NULL. { // an error happened, freeing results then returning false.
free(raw_matches); // free raw groups matches. pcre2_match_data_free(match_data); // free match data.
return false; return false;
} }
if (regex->nmatches > 0) if (regex->nmatches > 0)
{ // there are matches, getting them. { // there are matches, getting them.
ksregex_save_string_matches(regex, raw_matches, tested); ksregex_save_string_matches(regex, match_data);
} }
free(raw_matches); // free raw groups matches. pcre2_match_data_free(match_data); // free match data.
return true; // no error, return true. return true; // no error, return true.
} }

View File

@ -10,7 +10,7 @@ int main(void)
assert(reg1->matches == NULL); assert(reg1->matches == NULL);
// try a more complex regex with matches groups. // try a more complex regex with matches groups.
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, REG_ICASE); ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, PCRE2_CASELESS);
assert(ksregex_matches(reg2, "1a")); assert(ksregex_matches(reg2, "1a"));
assert(!ksregex_matches(reg2, "H8")); assert(!ksregex_matches(reg2, "H8"));
assert(!ksregex_matches(reg2, "4MM")); assert(!ksregex_matches(reg2, "4MM"));