Change regex library to pcre2 for cross-platform compilation and better regex support.

This commit is contained in:
Madeorsk 2024-08-22 10:55:50 +02:00
parent 2552fa9db9
commit 35d761794e
4 changed files with 41 additions and 29 deletions

View File

@ -1,13 +1,14 @@
#pragma once
#include <jemalloc/jemalloc.h>
#include <regex.h>
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
/**
* Regex structure.
*/
typedef struct {
regex_t expr;
pcre2_code *expr;
unsigned nmatches;
char **matches;
} ksregex;
@ -16,17 +17,17 @@ typedef struct {
* Create a ksregex from an expression.
* @param expression - the expression to compile.
* @param nmatches - the number of matches groups to get, 0 if you want none.
* @param flags - regex flags.
* @param options - PCRE2 primary option bits.
* @return - the created regex, NULL if an error happen.
*/
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags);
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options);
/**
* Create a ksregex from an expression with 0 match group.
* @param expression - the expression to compile.
* @param flags - regex flags.
* @param options - PCRE2 primary option bits.
*/
static inline ksregex* ksregex_new_nogroup(const char *expression, int flags)
{ return ksregex_new(expression, 0, flags); }
static inline ksregex *ksregex_new_nogroup(PCRE2_SPTR expression, int options)
{ return ksregex_new(expression, 0, options); }
/**
* Try to match the regex with a tested string.
@ -59,5 +60,8 @@ static inline void ksregex_free(ksregex *regex)
// free matches groups.
_ksregex_free_matches(regex);
// free pcre expr.
pcre2_code_free(regex->expr);
free(regex); // free regex data.
}

View File

@ -5,6 +5,8 @@ c = meson.get_compiler('c')
deps = [ dependency('jemalloc') ]
# add pthread dependency
deps += c.find_library('pthread')
# add pcre2 dependency
deps += c.find_library('pcre2-8')
# includes
include_dirs = include_directories([ 'include' ])

View File

@ -1,35 +1,41 @@
#include <ksr/regex.h>
#include <regex.h>
#include <string.h>
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags)
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options)
{
ksregex *regex = malloc(sizeof(ksregex));
regex->nmatches = nmatches;
regex->matches = NULL;
if (regcomp(&regex->expr, expression, // compile expression in regex.
flags | REG_EXTENDED | (nmatches == 0 ? REG_NOSUB : 0)) != 0)
int error_code;
PCRE2_SIZE error_offset;
// compile expression in regex.
regex->expr = pcre2_compile(expression, strlen(expression), options, &error_code, &error_offset, NULL);
if (error_code < 0)
return NULL; // an error happened, returning NULL.
return regex; // return created regex.
}
void ksregex_save_string_matches(ksregex *regex, regmatch_t *raw_matches, const char *source)
void ksregex_save_string_matches(ksregex *regex, pcre2_match_data *match_data)
{
// allocate string matches array.
regex->matches = malloc(sizeof(char *) * regex->nmatches);
// save full string match.
regex->matches[0] = strdup(source);
for(unsigned i = 1; i < regex->nmatches; i++)
for(unsigned i = 0; i < regex->nmatches; i++)
{ // for each group match, converting it to an independent string.
regoff_t current_match_length = raw_matches[i].rm_eo - raw_matches[i].rm_so;
regex->matches[i] = malloc(current_match_length + 1); // allocate the current group match string.
// copying the string part from source to the current match string.
strncpy(regex->matches[i], &source[raw_matches[i].rm_so], current_match_length);
regex->matches[i][current_match_length] = 0; // set end of string.
// get buffer of current pcre2 match.
PCRE2_UCHAR* pcre2_buffer;
PCRE2_SIZE pcre2_buffer_len;
pcre2_substring_get_bynumber(match_data, i, &pcre2_buffer, &pcre2_buffer_len);
regex->matches[i] = malloc(pcre2_buffer_len + 1); // allocate the current group match string.
// copying the current buffer to the current match string.
strncpy(regex->matches[i], pcre2_buffer, pcre2_buffer_len);
regex->matches[i][pcre2_buffer_len] = 0; // set end of string.
pcre2_substring_free(pcre2_buffer); // free pcre2 buffer.
}
}
@ -38,21 +44,21 @@ bool ksregex_matches(ksregex *regex, const char *tested)
// if there was matches, free them.
_ksregex_free_matches(regex);
// allocate raw matches array.
regmatch_t *raw_matches = malloc(sizeof(regmatch_t) * regex->nmatches);
// create match data.
pcre2_match_data *match_data = pcre2_match_data_create(regex->nmatches, NULL);
if (regexec(&regex->expr, tested, regex->nmatches, raw_matches, 0) != 0)
{ // an error happened, freeing results then returning NULL.
free(raw_matches); // free raw groups matches.
if (pcre2_match(regex->expr, tested, strlen(tested), 0, 0, match_data, NULL) <= 0)
{ // an error happened, freeing results then returning false.
pcre2_match_data_free(match_data); // free match data.
return false;
}
if (regex->nmatches > 0)
{ // there are matches, getting them.
ksregex_save_string_matches(regex, raw_matches, tested);
ksregex_save_string_matches(regex, match_data);
}
free(raw_matches); // free raw groups matches.
pcre2_match_data_free(match_data); // free match data.
return true; // no error, return true.
}

View File

@ -10,7 +10,7 @@ int main(void)
assert(reg1->matches == NULL);
// try a more complex regex with matches groups.
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, REG_ICASE);
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, PCRE2_CASELESS);
assert(ksregex_matches(reg2, "1a"));
assert(!ksregex_matches(reg2, "H8"));
assert(!ksregex_matches(reg2, "4MM"));