From: glyn Date: Fri, 25 Sep 2015 15:53:06 +0000 (+0100) Subject: Initial Commit X-Git-Url: https://git.8kb.co.uk/?p=dataflex%2Fdfregex;a=commitdiff_plain;h=524416dc7971a1fe7ef1c7c1817022be6a7f274f Initial Commit --- 524416dc7971a1fe7ef1c7c1817022be6a7f274f diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fafff2e --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.DS_Store +Thumbs.db diff --git a/README.md b/README.md new file mode 100644 index 0000000..25d0770 --- /dev/null +++ b/README.md @@ -0,0 +1,16 @@ +POSIX Regex functions for DataFlex 3.2 +-------------------------------------- + +Requires: + DataFlex 3.1c onwards + GCC or compatible compiler like MinGW + GNU regex libraries for windows + http://sourceforge.net/projects/mingw/files/Other/UserContributed/regex + +I created this because despite VDF (Visual Dataflex) users being able to leverage +vbscript.dll in Windows via COM to do regex operations, it seems that Console Mode +users are out of luck. + +Despite being GNU libraries, this is intended to be used on Microsoft Windows +using the DataFlex external_function import. No doubt if you're using DataFlex +on Unix you can get these to work with minimal modifications. diff --git a/bin/dfregex.dll b/bin/dfregex.dll new file mode 100644 index 0000000..dd1766f Binary files /dev/null and b/bin/dfregex.dll differ diff --git a/bin/libgnurx-0.dll b/bin/libgnurx-0.dll new file mode 100644 index 0000000..e741c92 Binary files /dev/null and b/bin/libgnurx-0.dll differ diff --git a/src/c/Makefile b/src/c/Makefile new file mode 100644 index 0000000..2f4b2ef --- /dev/null +++ b/src/c/Makefile @@ -0,0 +1,49 @@ +##------------------------------------------------------------------------- +## posix regex extensions +## +## Copyright (c) 2015, glyn@8kb.co.uk +## Author: Glyn Astill +## +##------------------------------------------------------------------------- +## + +CPP = g++.exe +CC = gcc.exe +WINDRES = windres.exe +RES = dfregex_private.res +OBJ = dfregex.o memman.o gnuregex.o $(RES) +LINKOBJ = dfregex.o memman.o gnuregex.o $(RES) +LIBS = --no-export-all-symbols --add-stdcall-alias -lgnurx +INCS = +CXXINCS = +BIN = dfregex.dll +CXXFLAGS = $(CXXINCS) -DBUILDING_DLL=1 +CFLAGS = $(INCS) -DBUILDING_DLL=1 +RM = del -f + +.PHONY: all all-before all-after clean clean-custom + +all: all-before dfregex.dll all-after + + +clean: clean-custom + ${RM} $(OBJ) $(BIN) + +DLLWRAP=dllwrap.exe +DEFFILE=libdfregex.def +STATICLIB=libdfregex.a + +$(BIN): $(LINKOBJ) + $(DLLWRAP) --output-def $(DEFFILE) --implib $(STATICLIB) $(LINKOBJ) $(LIBS) -o $(BIN) + +dfregex.o: dfregex.c + $(CC) -c dfregex.c -o dfregex.o $(CFLAGS) + +memman.o: memman.c + $(CC) -c memman.c -o memman.o $(CFLAGS) + +gnuregex.o: gnuregex.c + $(CC) -c gnuregex.c -o gnuregex.o $(CFLAGS) + +dfregex_private.res: dfregex_private.rc + $(WINDRES) -i dfregex_private.rc --input-format=rc -o dfregex_private.res -O coff diff --git a/src/c/dfregex.c b/src/c/dfregex.c new file mode 100644 index 0000000..a742f54 --- /dev/null +++ b/src/c/dfregex.c @@ -0,0 +1,94 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#include +#include +#include +#include "gnuregex.h" +#include "dfregex.h" + +DLLIMPORT int RegexpMatch (const char *str, const char *pattern, const char *flags, int errors) +{ + return regexp_match(str, pattern, flags, errors); +} + +DLLIMPORT int RegexpMatches(const char *str, const char *pattern, const char *flags, char *output, int output_len, int errors) +{ + char *matches = regexp_matches(str, pattern, flags, errors); + int matches_len; + int result = 0; + + if (matches != NULL) + { + matches_len = strlen(matches); + if (matches_len <= output_len) + { + strncpy(output, matches, matches_len); + result = 0; + } + else + result = -1; + + wfree(matches); + } + else + result = -2; + + return result; +} + +DLLIMPORT int RegexpReplace(const char *str, const char *pattern, const char *replacement, const char *flags, char *output, int output_len, int errors) +{ + char *replaced = regexp_replace(str, pattern, replacement, flags, errors); + int replaced_len; + int result = 0; + + if (replaced != NULL) + { + replaced_len = strlen(replaced); + + if (replaced_len <= output_len) + { + strncpy(output, replaced, replaced_len); + result = 0; + } + else + result = -1; + + wfree(replaced); + } + else + result = -2; + + + return result; +} + +BOOL APIENTRY DllMain (HINSTANCE hInst /* Library instance handle. */ , + DWORD reason /* Reason this function is being called. */ , + LPVOID reserved /* Not used. */ ) +{ + switch (reason) + { + case DLL_PROCESS_ATTACH: + break; + + case DLL_PROCESS_DETACH: + break; + + case DLL_THREAD_ATTACH: + break; + + case DLL_THREAD_DETACH: + break; + } + + /* Returns TRUE on success, FALSE on failure */ + return TRUE; +} diff --git a/src/c/dfregex.h b/src/c/dfregex.h new file mode 100644 index 0000000..594d881 --- /dev/null +++ b/src/c/dfregex.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#ifndef _DFREGEX_H_ +#define _DFREGEX_H_ + +#if BUILDING_DLL +# define DLLIMPORT __declspec (dllexport) +#else /* Not BUILDING_DLL */ +# define DLLIMPORT __declspec (dllimport) +#endif /* Not BUILDING_DLL */ + + +DLLIMPORT int RegexpMatch(const char *str, const char *pattern, const char *flags, int errors); +DLLIMPORT int RegexpMatches(const char *str, const char *pattern, const char *flags, char *output, int output_len, int errors); +DLLIMPORT int RegexpReplace(const char *str, const char *pattern, const char *replacement, const char *flags, char *output, int output_len, int errors); + +#endif /* _DFREGEX_H_ */ diff --git a/src/c/dfregex_private.h b/src/c/dfregex_private.h new file mode 100644 index 0000000..d0006f2 --- /dev/null +++ b/src/c/dfregex_private.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#ifndef DFREGEX_PRIVATE_H +#define DFREGEX_PRIVATE_H + +/* VERSION DEFINITIONS */ +#define VER_STRING "0.1.1.1" +#define VER_MAJOR 0 +#define VER_MINOR 1 +#define VER_RELEASE 1 +#define VER_BUILD 1 +#define COMPANY_NAME "8kb.co.uk" +#define FILE_VERSION "0.1.1.1" +#define FILE_DESCRIPTION "Regex for DataFlex 3.2" +#define INTERNAL_NAME "" +#define LEGAL_COPYRIGHT "Glyn Astill" +#define LEGAL_TRADEMARKS "" +#define ORIGINAL_FILENAME "" +#define PRODUCT_NAME "" +#define PRODUCT_VERSION "" + +#endif /*DFREGEX_PRIVATE_H*/ diff --git a/src/c/dfregex_private.rc b/src/c/dfregex_private.rc new file mode 100644 index 0000000..cb42ebf --- /dev/null +++ b/src/c/dfregex_private.rc @@ -0,0 +1,37 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#include + +1 VERSIONINFO +FILEVERSION 0,1,1,1 +PRODUCTVERSION 0,1,1,1 +FILETYPE VFT_DLL +{ + BLOCK "StringFileInfo" + { + BLOCK "080904E4" + { + VALUE "CompanyName", "8kb.co.uk" + VALUE "FileVersion", "0.1.1.1" + VALUE "FileDescription", "Regex for DataFlex 3.2" + VALUE "InternalName", "" + VALUE "LegalCopyright", "Glyn Astill" + VALUE "LegalTrademarks", "" + VALUE "OriginalFilename", "" + VALUE "ProductName", "" + VALUE "ProductVersion", "" + } + } + BLOCK "VarFileInfo" + { + VALUE "Translation", 0x0809, 1252 + } +} + diff --git a/src/c/gnuregex.c b/src/c/gnuregex.c new file mode 100644 index 0000000..611c990 --- /dev/null +++ b/src/c/gnuregex.c @@ -0,0 +1,464 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#include +#include +#include +#include +#include "memman.h" + +#define MAX_ERROR_MSG 0x1000 + +/* + * Return a properly escaped / quoted string + */ +static char * quote_output(char *str) { + char *result; + char *result_return; + int len; + int do_quote = 0; + char *ptr; + + len = strlen(str); + + /* Check for characters that need quoting */ + for (ptr = str; *ptr; ptr++) { + char ch = *ptr; + if (ch == '\"' || ch =='\\' || ch == '\{' || ch == ',') { + do_quote = 1; + break; + } + } + + /* If we find no characters that need quoting just return the input */ + if (do_quote != 1) + return str; + + /* Do the quoting, here the allocation is wasteful */ + result = (char *) wmalloc((len * 2 + 3) * sizeof(char)); + result_return = result; + + /* + * Starting address of result is incremented as we modify it's contents here + * with result_return keeping the starting address + */ + *result++ = '"'; + while (len-- > 0) { + /* Escape double quotes and backslash with backslash */ + if (*str == '"') { + *result++ = '\\'; + } + if (*str == '\\') { + *result++ = '\\'; + } + *result++ = *str++; + } + *result++ = '"'; + *result++ = '\0'; + + return result_return; +} + +/* + * Count open parenthesis to evaluate the number of subexpressions in the regex + */ +static int count_subexpressions(const char *str){ + int result = 0; + int last_was_backslash = 0; + const char *ptr; + + for(ptr = str; *ptr; ptr++){ + if (*ptr == '\\' && !last_was_backslash){ + last_was_backslash = 1; + continue; + } + if (*ptr == ')' && !last_was_backslash) + result++; + last_was_backslash = 0; + } + return result; +} + +/* + * Check to see if string contains any escape chars + * these could of course just be escaped backslashes + * themselvs. + */ +static int has_escapes(const char *str){ + const char *ptr; + + for(ptr=str; *ptr; ptr++){ + if (*ptr == '\\') + return 1; + } + return 0; +} + +/* + * Compile the regex pattern + */ +static int compile_regex(regex_t *re, const char *pattern, const char *flags, int errors) +{ + int status; + int cflags = REG_EXTENDED; + + if (strchr(flags, 'i')) { + cflags = cflags|REG_ICASE; + } + if (strchr(flags, 'n')) { + cflags = cflags|REG_NEWLINE; + } + + status = regcomp(re, pattern, cflags); + if (status != REG_NOERROR) { + if (errors == 1) { + char *error_message; + regerror (status, re, error_message, MAX_ERROR_MSG); + fprintf (stderr, "Regex error compiling '%s': %s\n", pattern, error_message); + } + return 1; + } + return status; +} + +/* + * Returns a pointer to a malloced array of regmatch_t containing match offsets + * in the input string. (As opposed to offests from each match) + * + * The regmatch struct info: + * regmatch_t.rm_so (regoff_t) = byte offset from start of string to start of substring + * regmatch_t.rm_eo (regoff_t) = byte offset from start of string to first character after the end of substring + */ +static int find_regex_matches(regex_t *re, const char *str, const int nsub, const char *flags, regmatch_t **result) +{ + /* Each individual match and it's subexpression matches stored in m */ + regmatch_t m[nsub+1]; + + /* A pointer into the string at the end of the previous match */ + const char *prev_match_eo = str; + + /* + * We return a count of matches and pass back an array of regmatch_t in + * matches containing match offsets in the original string + */ + int array_len = strchr(flags, 'g') ? 256 : 32; + int match_count = 0; + regmatch_t *matches; + + matches = (regmatch_t *) wmalloc(sizeof(regmatch_t) * array_len); + + while (!regexec(re, prev_match_eo, nsub+1, m, 0)) { + int i = 0; + + /* resize the matches array; when more space is required double current size */ + while (match_count + (nsub * 2) > array_len) { + array_len *= 2; + matches = (regmatch_t *) wrealloc(matches, sizeof(regmatch_t) * array_len); + } + + /* when we have subexpressions, we're only interested in their match offsets */ + if (nsub > 0) { + for (i = 1; i <= nsub; i++) { + if (m[i].rm_so < 0 || m[i].rm_eo < 0) { + matches[match_count].rm_so = -1; + matches[match_count++].rm_eo = -1; + } + else { + matches[match_count].rm_so = (prev_match_eo - str) + m[i].rm_so; + matches[match_count++].rm_eo = (prev_match_eo - str) + m[i].rm_eo; + } + } + } + /* else we want the original match offsets*/ + else { + matches[match_count].rm_so = (prev_match_eo - str) + m[0].rm_so; + matches[match_count++].rm_eo = (prev_match_eo - str) + m[0].rm_eo; + } + + /* + * If we have matched on a blank expression or we were + * not flagged to do greedy matching then break + */ + if (!m[0].rm_eo || !strchr(flags, 'g')) + break; + + /* + * Advance the search position to the end of the current match + * If the match happens to be zero length, advance search position + * by one? + */ + if (m[0].rm_eo == m[0].rm_so) + prev_match_eo++; + else + prev_match_eo += m[0].rm_eo; + } + *result = matches; + + return match_count; +} + +/* + * Takes regmatch_t array returned by find_regex_matches and returns a malloced + * string representing the captured substrings. + */ +static char * regex_matches_to_string(const char *str, int nsub, int match_count, regmatch_t *matches) { + int j; + int i; + char *unquoted = NULL; + char *quoted = NULL; + int quoted_len; + char *result; + + int str_len = strlen(str); + int allocated_sz = str_len+1; + result = wmalloc(allocated_sz * sizeof(char)); + int result_sz = 0; + + j = 0; + while (j < match_count) { + + if (j > 0) { + result_sz += 2; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-2] = ','; + result[result_sz-1] = '{'; + } + else { + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '{'; + } + + for (i = 0; i <= nsub; i++) { + if ((nsub > 0) && (i == 0)) + continue; + + if (i > 1) { + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = ','; + } + + int so = matches[j].rm_so; + int eo = matches[j].rm_eo; + + if (so == -1 || eo == -1) { + result = reallocate_block(result, &allocated_sz, (result_sz+4) * sizeof(char), str_len); + strncpy(result+result_sz, "NULL", 4); + result_sz += 4; + } + else { + unquoted = wmalloc((eo-so)+1 * sizeof(char)); + strncpy(unquoted, str+so, eo-so); + unquoted[eo-so] = '\0'; + quoted = quote_output(unquoted); + quoted_len = strlen(quoted); + + result = reallocate_block(result, &allocated_sz, (result_sz+quoted_len) * sizeof(char), str_len); + strncpy(result+result_sz, quoted, quoted_len); + result_sz += quoted_len; + + if (quoted != unquoted) + wfree(unquoted); + wfree(quoted); + } + j++; + } + + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '}'; + } + + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '\0'; + + return result; +} + +/* + * Purely check for a match in the regex + */ +int regexp_match(const char *str, const char *pattern, const char *flags, int errors) +{ + regex_t re; + int result; + int status; + + status = compile_regex(&re, pattern, flags, errors); + if (status == REG_NOERROR) { + result = regexec(&re, str, (size_t) 0, NULL, 0); + regfree(&re); + + if (!result) /* match */ + return 1; + else + return 0; + } + else /* no match */ + return 0; +} + +/* + * Return all matches in the regex as a string by first calling find_regex_matches + * and then regex_matches_to_string. Arguably this could all be one function + * however separation will make future multiple output formats easier. + */ +char * regexp_matches(const char *str, const char *pattern, const char *flags, int errors) +{ + regex_t re; + regmatch_t *matches_p = NULL; + int nsub; + int match_count; + int status; + char *result = NULL; + + /* Compile the regex */ + status = compile_regex(&re, pattern, flags, errors); + if (status == REG_NOERROR) { + /* Count our subexpressions to size our regmatch_t array */ + nsub = count_subexpressions(pattern); + /* Find all the matches relative to the input string */ + match_count = find_regex_matches(&re, str, nsub, flags, &matches_p); + /* Turn the matches into an output string */ + result = regex_matches_to_string(str, nsub, match_count, matches_p); + /* Free up the regmatch_t malloced by find_regex_matches */ + wfree(matches_p); + regfree(&re); + } + + return result; +} + +/* + * Substitutes matches with the regex pattern in the string with the replacement + * pattern/string. + */ +char * regexp_replace(const char *str, const char *pattern, const char *replacement, const char *flags, int errors) +{ + regex_t re; + int nsub; + char *result = NULL; + char *match_str; + int status; + const char *prev_match_eo = str; + int str_len = strlen(str); + int replacement_len = strlen(replacement); + int allocated_sz = str_len+1; + int result_sz = 0; + + status = compile_regex(&re, pattern, flags, errors); + if (status == REG_NOERROR) { + + result = wmalloc(allocated_sz * sizeof(char)); + + /* Count our subexpressions to size our regmatch_t array */ + nsub = count_subexpressions(pattern); + regmatch_t m[nsub+1]; + + while (!regexec(&re, prev_match_eo, nsub+1, m, 0)) { + + /* Copy everything to the left of the first match */ + if (m[0].rm_so > 0) { + result = reallocate_block(result, &allocated_sz, (result_sz+m[0].rm_so) * sizeof(char), str_len); + strncpy(result+result_sz, prev_match_eo, m[0].rm_so); + result_sz += m[0].rm_so; + } + + /* If there are no backreferences in the replacement, copy in the replacement */ + if (!has_escapes(replacement)) { + result = reallocate_block(result, &allocated_sz, (result_sz+replacement_len) * sizeof(char), str_len); + strncpy(result+result_sz, replacement, replacement_len); + result_sz += replacement_len; + } + /* Otherwise process the backreferences and copy in subcaptures */ + else { + /* find the next escape char */ + const char *start = replacement; + const char *ptr; + + for(ptr = replacement; *ptr; ptr++) { + if (*ptr != '\\') + continue; + + /* append everything to the left of the current escape */ + result = reallocate_block(result, &allocated_sz, (result_sz+(ptr-start)) * sizeof(char), str_len); + strncpy(result+result_sz, start, (ptr-start)); + result_sz += (ptr-start); + + ptr++; + + if ((*ptr >= '1' && *ptr <= '9') || (*ptr == '&')) + { + /* Use the back reference of regexp. */ + int sub; + if (*ptr == '&') + sub = 0; + else + sub = *ptr - '0'; + + if (m[sub].rm_so != -1 && m[sub].rm_eo != -1 && sub <= nsub) { + result = reallocate_block(result, &allocated_sz, (result_sz+(m[sub].rm_eo-m[sub].rm_so)) * sizeof(char), str_len); + strncpy(result+result_sz, prev_match_eo+m[sub].rm_so, (m[sub].rm_eo-m[sub].rm_so)); + result_sz += (m[sub].rm_eo-m[sub].rm_so); + } + ptr++; + } + else if (*ptr == '\\') + { + /* append backsalsh */ + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '\\'; + ptr++; + } + else { + /* append backsalsh */ + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '\\'; + } + start = ptr; + } + /* + * Append right trailing replacement, except in the instance + * when it starts with character zero, which can happen when the + * last part of the replace string is escaped. + */ + if (*start) { + result = reallocate_block(result, &allocated_sz, (result_sz+(ptr-start)) * sizeof(char), str_len); + strncpy(result+result_sz, start, (ptr-start)); + result_sz += (ptr-start); + } + + } + prev_match_eo += m[0].rm_eo; + + /* + * If we have matched on a blank expression or we were + * not flagged to do greedy matching then break + */ + if (!m[0].rm_eo || !strchr(flags, 'g')) + break; + } + + /* Copy everything to the right of the last match */ + result = reallocate_block(result, &allocated_sz, (result_sz+(str_len-(prev_match_eo-str))) * sizeof(char), str_len); + strncpy(result+result_sz, prev_match_eo, str_len-(prev_match_eo-str)); + result_sz += str_len-(prev_match_eo-str); + + regfree(&re); + + result_sz++; + result = reallocate_block(result, &allocated_sz, result_sz * sizeof(char), str_len); + result[result_sz-1] = '\0'; + } + return result; +} diff --git a/src/c/gnuregex.h b/src/c/gnuregex.h new file mode 100644 index 0000000..8929e21 --- /dev/null +++ b/src/c/gnuregex.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#ifndef __GNUREGEX_H__ +#define __GNUREGEX_H__ + +extern int regexp_match(const char *str, const char *pattern, const char *flags, int errors); +extern char * regexp_matches(const char *str, const char *pattern, const char *flags, int errors); +extern char * regexp_replace(const char *str, const char *pattern, const char *replacement, const char *flags, int errors); + +#endif diff --git a/src/c/libdfregex.a b/src/c/libdfregex.a new file mode 100644 index 0000000..575172c Binary files /dev/null and b/src/c/libdfregex.a differ diff --git a/src/c/memman.c b/src/c/memman.c new file mode 100644 index 0000000..57e7465 --- /dev/null +++ b/src/c/memman.c @@ -0,0 +1,64 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#include +#include +#include + +/* + * Wrappers around malloc/realloc/free + */ +void * wmalloc(unsigned int size) { + char *result; + + if ((result = malloc(size)) == NULL) { + fprintf(stderr, "Failed to malloc %d bytes\n", size); + exit(1); + } + return result; +} + +void * wrealloc(void *iptr, unsigned int size) { + char *result; + + assert(iptr != NULL); + + if ((result = realloc(iptr, size)) == NULL) { + fprintf(stderr, "Failed to realloc %d bytes\n", size); + exit(1); + } + return result; +} + +void wfree(void *iptr){ + assert(iptr != NULL); + + if (iptr) { + free(iptr); + } + iptr = NULL; +} + +/* + * Reallocate memory block pointed to by iptr in chunks of chunk_size when + * required_size is greater than value pointed to be allocated_size. + * Sets value of allocated_size to current allocation. + */ +void * reallocate_block(void *iptr, int *allocated_size, int required_size, int chunk_size) { + void *result; + + if (*allocated_size >= required_size) + return iptr; + + *allocated_size += (((required_size-*allocated_size)/chunk_size)+1)*chunk_size; + + result = wrealloc(iptr, *allocated_size); + + return result; +} diff --git a/src/c/memman.h b/src/c/memman.h new file mode 100644 index 0000000..10ae667 --- /dev/null +++ b/src/c/memman.h @@ -0,0 +1,18 @@ +/*------------------------------------------------------------------------- + * posix regex extensions + * + * Copyright (c) 2015, glyn@8kb.co.uk + * Author: Glyn Astill + * + *------------------------------------------------------------------------- + */ + +#ifndef __MEMMAN_H__ +#define __MEMMAN_H__ + +extern void * wmalloc(unsigned int size); +extern void * wrealloc(void *iptr, unsigned int size); +extern void sfree(void *iptr); +extern void * reallocate_block(void *iptr, int *allocated_size, int required_size, int chunk_size); + +#endif diff --git a/src/df32/dataflex.pkg b/src/df32/dataflex.pkg new file mode 100644 index 0000000..6ef6a02 --- /dev/null +++ b/src/df32/dataflex.pkg @@ -0,0 +1,95 @@ +//------------------------------------------------------------------------- +// posix regex extensions +// +// Copyright (c) 2015, glyn@8kb.co.uk +// Author: Glyn Astill +// +//------------------------------------------------------------------------- +// + +use dll + +Define max_dfregex_buffer for 16384 +Define errors_to_stderr for 0 + +external_function RegexpMatch "RegexpMatch" dfregex.dll pointer str pointer pattern pointer flags integer errors returns integer +external_function RegexpMatches "RegexpMatches" dfregex.dll pointer str pointer pattern pointer flags pointer out pointer out_len integer errors returns integer +external_function RegexpReplace "RegexpReplace" dfregex.dll pointer str pointer pattern pointer replacement pointer flags pointer out pointer out_len integer errors returns integer + +//Purely check if a regex expression produces match in the input string +// Returns 1 on match, 0 on no match +// E.g +// move (regexp_match('the quick brown fox jumps over the lazy dog.', 'fox', 'g')) +function regexp_match global string str string pattern string flags returns integer + local integer l_iReturn + local pointer l_pStr l_pPattern l_pFlags + + getaddress of str to l_pStr + getaddress of pattern to l_pPattern + getaddress of flags to l_pFlags + + move (RegexpMatch(l_pStr, l_pPattern, l_pFlags, errors_to_stderr)) to l_iReturn + + function_return l_iReturn +end_function + +//Return a string containing all regex matches in the input string +// E.g +// move (regexp_matches('the quick brown fox jumps over the la\{zy d"og.', 'fox|(the)|brown|(la\\\{zy)|(d"og)', 'g')) to myString +function regexp_matches global string str string pattern string flags returns string + local integer l_iReturn + local pointer l_pStr l_pPattern l_pFlags l_pOut + local string l_sOut l_sReturn + + move "" to l_sReturn + getaddress of str to l_pStr + getaddress of pattern to l_pPattern + getaddress of flags to l_pFlags + zerostring max_dfregex_buffer to l_sOut + getaddress of l_sOut to l_pOut + + move (RegexpMatches(l_pStr, l_pPattern, l_pFlags, l_pOut, max_dfregex_buffer, errors_to_stderr)) to l_iReturn + + if (l_iReturn = 0); + move (cstring(l_sOut)) To l_sReturn + else begin + if (l_iReturn = -1); + error 999997 "Regex output buffer too small" + if (l_iReturn = -2); + error 999998 "Regex compilation failure" + move "" to l_sReturn + end + + function_return l_sReturn +end_function + +//Perform a replacement on the input string all matches with the given pattern +// E.g. +// move (regexp_replace('22 quick brown foxes jump over the 44 lazy dogs.', '([0-9]*).* (foxes) .* ([0-9]*) .* (dogs).*', 'SELECT build_data(\1,\2), build_data(\3,\4);', 'g')) to myString +function regexp_replace global string str string pattern string replacement string flags returns string + local integer l_iReturn + local pointer l_pStr l_pPattern l_pFlags l_pReplacement l_pOut + local string l_sOut l_sReturn + + move "" to l_sReturn + getaddress of str to l_pStr + getaddress of pattern to l_pPattern + getaddress of flags to l_pFlags + getaddress of replacement to l_pReplacement + zerostring max_dfregex_buffer to l_sOut + getaddress of l_sOut to l_pOut + + move (RegexpReplace(l_pStr, l_pPattern, l_pReplacement, l_pFlags, l_pOut, max_dfregex_buffer, errors_to_stderr)) to l_iReturn + + if (l_iReturn = 0); + move (cstring(l_sOut)) To l_sReturn + else begin + if (l_iReturn = -1); + error 999997 "Regex output buffer too small" + if (l_iReturn = -2); + error 999998 "Regex compilation failure" + move "" to l_sReturn + end + + function_return l_sReturn +end_function \ No newline at end of file