Change regex library to pcre2 for cross-platform compilation and better regex support.
This commit is contained in:
parent
2552fa9db9
commit
35d761794e
@ -1,13 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include <jemalloc/jemalloc.h>
|
||||
#include <regex.h>
|
||||
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||
#include <pcre2.h>
|
||||
|
||||
/**
|
||||
* Regex structure.
|
||||
*/
|
||||
typedef struct {
|
||||
regex_t expr;
|
||||
pcre2_code *expr;
|
||||
unsigned nmatches;
|
||||
char **matches;
|
||||
} ksregex;
|
||||
@ -16,17 +17,17 @@ typedef struct {
|
||||
* Create a ksregex from an expression.
|
||||
* @param expression - the expression to compile.
|
||||
* @param nmatches - the number of matches groups to get, 0 if you want none.
|
||||
* @param flags - regex flags.
|
||||
* @param options - PCRE2 primary option bits.
|
||||
* @return - the created regex, NULL if an error happen.
|
||||
*/
|
||||
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags);
|
||||
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options);
|
||||
/**
|
||||
* Create a ksregex from an expression with 0 match group.
|
||||
* @param expression - the expression to compile.
|
||||
* @param flags - regex flags.
|
||||
* @param options - PCRE2 primary option bits.
|
||||
*/
|
||||
static inline ksregex* ksregex_new_nogroup(const char *expression, int flags)
|
||||
{ return ksregex_new(expression, 0, flags); }
|
||||
static inline ksregex *ksregex_new_nogroup(PCRE2_SPTR expression, int options)
|
||||
{ return ksregex_new(expression, 0, options); }
|
||||
|
||||
/**
|
||||
* Try to match the regex with a tested string.
|
||||
@ -59,5 +60,8 @@ static inline void ksregex_free(ksregex *regex)
|
||||
// free matches groups.
|
||||
_ksregex_free_matches(regex);
|
||||
|
||||
// free pcre expr.
|
||||
pcre2_code_free(regex->expr);
|
||||
|
||||
free(regex); // free regex data.
|
||||
}
|
||||
|
@ -5,6 +5,8 @@ c = meson.get_compiler('c')
|
||||
deps = [ dependency('jemalloc') ]
|
||||
# add pthread dependency
|
||||
deps += c.find_library('pthread')
|
||||
# add pcre2 dependency
|
||||
deps += c.find_library('pcre2-8')
|
||||
|
||||
# includes
|
||||
include_dirs = include_directories([ 'include' ])
|
||||
|
48
src/regex.c
48
src/regex.c
@ -1,35 +1,41 @@
|
||||
#include <ksr/regex.h>
|
||||
#include <regex.h>
|
||||
#include <string.h>
|
||||
|
||||
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags)
|
||||
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options)
|
||||
{
|
||||
ksregex *regex = malloc(sizeof(ksregex));
|
||||
regex->nmatches = nmatches;
|
||||
regex->matches = NULL;
|
||||
|
||||
if (regcomp(®ex->expr, expression, // compile expression in regex.
|
||||
flags | REG_EXTENDED | (nmatches == 0 ? REG_NOSUB : 0)) != 0)
|
||||
int error_code;
|
||||
PCRE2_SIZE error_offset;
|
||||
// compile expression in regex.
|
||||
regex->expr = pcre2_compile(expression, strlen(expression), options, &error_code, &error_offset, NULL);
|
||||
|
||||
if (error_code < 0)
|
||||
return NULL; // an error happened, returning NULL.
|
||||
|
||||
return regex; // return created regex.
|
||||
}
|
||||
|
||||
void ksregex_save_string_matches(ksregex *regex, regmatch_t *raw_matches, const char *source)
|
||||
void ksregex_save_string_matches(ksregex *regex, pcre2_match_data *match_data)
|
||||
{
|
||||
// allocate string matches array.
|
||||
regex->matches = malloc(sizeof(char *) * regex->nmatches);
|
||||
|
||||
// save full string match.
|
||||
regex->matches[0] = strdup(source);
|
||||
|
||||
for(unsigned i = 1; i < regex->nmatches; i++)
|
||||
for(unsigned i = 0; i < regex->nmatches; i++)
|
||||
{ // for each group match, converting it to an independent string.
|
||||
regoff_t current_match_length = raw_matches[i].rm_eo - raw_matches[i].rm_so;
|
||||
regex->matches[i] = malloc(current_match_length + 1); // allocate the current group match string.
|
||||
// copying the string part from source to the current match string.
|
||||
strncpy(regex->matches[i], &source[raw_matches[i].rm_so], current_match_length);
|
||||
regex->matches[i][current_match_length] = 0; // set end of string.
|
||||
// get buffer of current pcre2 match.
|
||||
PCRE2_UCHAR* pcre2_buffer;
|
||||
PCRE2_SIZE pcre2_buffer_len;
|
||||
pcre2_substring_get_bynumber(match_data, i, &pcre2_buffer, &pcre2_buffer_len);
|
||||
|
||||
regex->matches[i] = malloc(pcre2_buffer_len + 1); // allocate the current group match string.
|
||||
// copying the current buffer to the current match string.
|
||||
strncpy(regex->matches[i], pcre2_buffer, pcre2_buffer_len);
|
||||
regex->matches[i][pcre2_buffer_len] = 0; // set end of string.
|
||||
|
||||
pcre2_substring_free(pcre2_buffer); // free pcre2 buffer.
|
||||
}
|
||||
}
|
||||
|
||||
@ -38,21 +44,21 @@ bool ksregex_matches(ksregex *regex, const char *tested)
|
||||
// if there was matches, free them.
|
||||
_ksregex_free_matches(regex);
|
||||
|
||||
// allocate raw matches array.
|
||||
regmatch_t *raw_matches = malloc(sizeof(regmatch_t) * regex->nmatches);
|
||||
// create match data.
|
||||
pcre2_match_data *match_data = pcre2_match_data_create(regex->nmatches, NULL);
|
||||
|
||||
if (regexec(®ex->expr, tested, regex->nmatches, raw_matches, 0) != 0)
|
||||
{ // an error happened, freeing results then returning NULL.
|
||||
free(raw_matches); // free raw groups matches.
|
||||
if (pcre2_match(regex->expr, tested, strlen(tested), 0, 0, match_data, NULL) <= 0)
|
||||
{ // an error happened, freeing results then returning false.
|
||||
pcre2_match_data_free(match_data); // free match data.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (regex->nmatches > 0)
|
||||
{ // there are matches, getting them.
|
||||
ksregex_save_string_matches(regex, raw_matches, tested);
|
||||
ksregex_save_string_matches(regex, match_data);
|
||||
}
|
||||
|
||||
free(raw_matches); // free raw groups matches.
|
||||
pcre2_match_data_free(match_data); // free match data.
|
||||
|
||||
return true; // no error, return true.
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ int main(void)
|
||||
assert(reg1->matches == NULL);
|
||||
|
||||
// try a more complex regex with matches groups.
|
||||
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, REG_ICASE);
|
||||
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, PCRE2_CASELESS);
|
||||
assert(ksregex_matches(reg2, "1a"));
|
||||
assert(!ksregex_matches(reg2, "H8"));
|
||||
assert(!ksregex_matches(reg2, "4MM"));
|
||||
|
Loading…
Reference in New Issue
Block a user