Change regex library to pcre2 for cross-platform compilation and better regex support.
This commit is contained in:
parent
2552fa9db9
commit
35d761794e
@ -1,13 +1,14 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <jemalloc/jemalloc.h>
|
#include <jemalloc/jemalloc.h>
|
||||||
#include <regex.h>
|
#define PCRE2_CODE_UNIT_WIDTH 8
|
||||||
|
#include <pcre2.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regex structure.
|
* Regex structure.
|
||||||
*/
|
*/
|
||||||
typedef struct {
|
typedef struct {
|
||||||
regex_t expr;
|
pcre2_code *expr;
|
||||||
unsigned nmatches;
|
unsigned nmatches;
|
||||||
char **matches;
|
char **matches;
|
||||||
} ksregex;
|
} ksregex;
|
||||||
@ -16,17 +17,17 @@ typedef struct {
|
|||||||
* Create a ksregex from an expression.
|
* Create a ksregex from an expression.
|
||||||
* @param expression - the expression to compile.
|
* @param expression - the expression to compile.
|
||||||
* @param nmatches - the number of matches groups to get, 0 if you want none.
|
* @param nmatches - the number of matches groups to get, 0 if you want none.
|
||||||
* @param flags - regex flags.
|
* @param options - PCRE2 primary option bits.
|
||||||
* @return - the created regex, NULL if an error happen.
|
* @return - the created regex, NULL if an error happen.
|
||||||
*/
|
*/
|
||||||
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags);
|
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options);
|
||||||
/**
|
/**
|
||||||
* Create a ksregex from an expression with 0 match group.
|
* Create a ksregex from an expression with 0 match group.
|
||||||
* @param expression - the expression to compile.
|
* @param expression - the expression to compile.
|
||||||
* @param flags - regex flags.
|
* @param options - PCRE2 primary option bits.
|
||||||
*/
|
*/
|
||||||
static inline ksregex* ksregex_new_nogroup(const char *expression, int flags)
|
static inline ksregex *ksregex_new_nogroup(PCRE2_SPTR expression, int options)
|
||||||
{ return ksregex_new(expression, 0, flags); }
|
{ return ksregex_new(expression, 0, options); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to match the regex with a tested string.
|
* Try to match the regex with a tested string.
|
||||||
@ -59,5 +60,8 @@ static inline void ksregex_free(ksregex *regex)
|
|||||||
// free matches groups.
|
// free matches groups.
|
||||||
_ksregex_free_matches(regex);
|
_ksregex_free_matches(regex);
|
||||||
|
|
||||||
|
// free pcre expr.
|
||||||
|
pcre2_code_free(regex->expr);
|
||||||
|
|
||||||
free(regex); // free regex data.
|
free(regex); // free regex data.
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,8 @@ c = meson.get_compiler('c')
|
|||||||
deps = [ dependency('jemalloc') ]
|
deps = [ dependency('jemalloc') ]
|
||||||
# add pthread dependency
|
# add pthread dependency
|
||||||
deps += c.find_library('pthread')
|
deps += c.find_library('pthread')
|
||||||
|
# add pcre2 dependency
|
||||||
|
deps += c.find_library('pcre2-8')
|
||||||
|
|
||||||
# includes
|
# includes
|
||||||
include_dirs = include_directories([ 'include' ])
|
include_dirs = include_directories([ 'include' ])
|
||||||
|
48
src/regex.c
48
src/regex.c
@ -1,35 +1,41 @@
|
|||||||
#include <ksr/regex.h>
|
#include <ksr/regex.h>
|
||||||
#include <regex.h>
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
ksregex* ksregex_new(const char *expression, unsigned nmatches, int flags)
|
ksregex *ksregex_new(PCRE2_SPTR expression, unsigned nmatches, int options)
|
||||||
{
|
{
|
||||||
ksregex *regex = malloc(sizeof(ksregex));
|
ksregex *regex = malloc(sizeof(ksregex));
|
||||||
regex->nmatches = nmatches;
|
regex->nmatches = nmatches;
|
||||||
regex->matches = NULL;
|
regex->matches = NULL;
|
||||||
|
|
||||||
if (regcomp(®ex->expr, expression, // compile expression in regex.
|
int error_code;
|
||||||
flags | REG_EXTENDED | (nmatches == 0 ? REG_NOSUB : 0)) != 0)
|
PCRE2_SIZE error_offset;
|
||||||
|
// compile expression in regex.
|
||||||
|
regex->expr = pcre2_compile(expression, strlen(expression), options, &error_code, &error_offset, NULL);
|
||||||
|
|
||||||
|
if (error_code < 0)
|
||||||
return NULL; // an error happened, returning NULL.
|
return NULL; // an error happened, returning NULL.
|
||||||
|
|
||||||
return regex; // return created regex.
|
return regex; // return created regex.
|
||||||
}
|
}
|
||||||
|
|
||||||
void ksregex_save_string_matches(ksregex *regex, regmatch_t *raw_matches, const char *source)
|
void ksregex_save_string_matches(ksregex *regex, pcre2_match_data *match_data)
|
||||||
{
|
{
|
||||||
// allocate string matches array.
|
// allocate string matches array.
|
||||||
regex->matches = malloc(sizeof(char *) * regex->nmatches);
|
regex->matches = malloc(sizeof(char *) * regex->nmatches);
|
||||||
|
|
||||||
// save full string match.
|
for(unsigned i = 0; i < regex->nmatches; i++)
|
||||||
regex->matches[0] = strdup(source);
|
|
||||||
|
|
||||||
for(unsigned i = 1; i < regex->nmatches; i++)
|
|
||||||
{ // for each group match, converting it to an independent string.
|
{ // for each group match, converting it to an independent string.
|
||||||
regoff_t current_match_length = raw_matches[i].rm_eo - raw_matches[i].rm_so;
|
// get buffer of current pcre2 match.
|
||||||
regex->matches[i] = malloc(current_match_length + 1); // allocate the current group match string.
|
PCRE2_UCHAR* pcre2_buffer;
|
||||||
// copying the string part from source to the current match string.
|
PCRE2_SIZE pcre2_buffer_len;
|
||||||
strncpy(regex->matches[i], &source[raw_matches[i].rm_so], current_match_length);
|
pcre2_substring_get_bynumber(match_data, i, &pcre2_buffer, &pcre2_buffer_len);
|
||||||
regex->matches[i][current_match_length] = 0; // set end of string.
|
|
||||||
|
regex->matches[i] = malloc(pcre2_buffer_len + 1); // allocate the current group match string.
|
||||||
|
// copying the current buffer to the current match string.
|
||||||
|
strncpy(regex->matches[i], pcre2_buffer, pcre2_buffer_len);
|
||||||
|
regex->matches[i][pcre2_buffer_len] = 0; // set end of string.
|
||||||
|
|
||||||
|
pcre2_substring_free(pcre2_buffer); // free pcre2 buffer.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,21 +44,21 @@ bool ksregex_matches(ksregex *regex, const char *tested)
|
|||||||
// if there was matches, free them.
|
// if there was matches, free them.
|
||||||
_ksregex_free_matches(regex);
|
_ksregex_free_matches(regex);
|
||||||
|
|
||||||
// allocate raw matches array.
|
// create match data.
|
||||||
regmatch_t *raw_matches = malloc(sizeof(regmatch_t) * regex->nmatches);
|
pcre2_match_data *match_data = pcre2_match_data_create(regex->nmatches, NULL);
|
||||||
|
|
||||||
if (regexec(®ex->expr, tested, regex->nmatches, raw_matches, 0) != 0)
|
if (pcre2_match(regex->expr, tested, strlen(tested), 0, 0, match_data, NULL) <= 0)
|
||||||
{ // an error happened, freeing results then returning NULL.
|
{ // an error happened, freeing results then returning false.
|
||||||
free(raw_matches); // free raw groups matches.
|
pcre2_match_data_free(match_data); // free match data.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (regex->nmatches > 0)
|
if (regex->nmatches > 0)
|
||||||
{ // there are matches, getting them.
|
{ // there are matches, getting them.
|
||||||
ksregex_save_string_matches(regex, raw_matches, tested);
|
ksregex_save_string_matches(regex, match_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(raw_matches); // free raw groups matches.
|
pcre2_match_data_free(match_data); // free match data.
|
||||||
|
|
||||||
return true; // no error, return true.
|
return true; // no error, return true.
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ int main(void)
|
|||||||
assert(reg1->matches == NULL);
|
assert(reg1->matches == NULL);
|
||||||
|
|
||||||
// try a more complex regex with matches groups.
|
// try a more complex regex with matches groups.
|
||||||
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, REG_ICASE);
|
ksregex *reg2 = ksregex_new("^([0-9])([a-z])$", 3, PCRE2_CASELESS);
|
||||||
assert(ksregex_matches(reg2, "1a"));
|
assert(ksregex_matches(reg2, "1a"));
|
||||||
assert(!ksregex_matches(reg2, "H8"));
|
assert(!ksregex_matches(reg2, "H8"));
|
||||||
assert(!ksregex_matches(reg2, "4MM"));
|
assert(!ksregex_matches(reg2, "4MM"));
|
||||||
|
Loading…
Reference in New Issue
Block a user