Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*************************************************
- * PCRE DEMONSTRATION PROGRAM *
- *************************************************/
- /* This is a demonstration program to illustrate the most straightforward ways
- of calling the PCRE regular expression library from a C program. See the
- pcresample documentation for a short discussion ("man pcresample" if you have
- the PCRE man pages installed).
- In Unix-like environments, if PCRE is installed in your standard system
- libraries, you should be able to compile this program using this command:
- gcc -Wall pcredemo.c -lpcre -o pcredemo
- If PCRE is not installed in a standard place, it is likely to be installed with
- support for the pkg-config mechanism. If you have pkg-config, you can compile
- this program using this command:
- gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
- If you do not have pkg-config, you may have to use this:
- gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
- -R/usr/local/lib -lpcre -o pcredemo
- Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
- library files for PCRE are installed on your system. Only some operating
- systems (e.g. Solaris) use the -R option.
- Building under Windows:
- If you want to statically link this program against a non-dll .a file, you must
- define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
- pcre_free() exported functions will be declared __declspec(dllimport), with
- unwanted results. So in this environment, uncomment the following line. */
- #define PCRE_STATIC
- #include <stdio.h>
- #include <string.h>
- #include <pcre.h>
- #define OVECCOUNT 30 /* should be a multiple of 3 */
- FILE *inputFile;
- FILE *outputFile;
- char PLIK_TXT[] = "/home/rocket/CLionProjects/clear/sample2.tex";
- char patternPreserveTextInsideBrackets[] = "(.*)\\\\(?>title|author|date|enumsentence|section|subsection|emph).?\\{(.*)\\}(.*)";
- char patternDeleteAllOccurences[] = "";
- char paternDeletWholeLine[] = "^(?>\\s*)\\\\(?>documentclass|usepackage|begin|maketitle|begin|end|includegraphics|nodeconnect).*$";
- char *returnLine;
- char *checkLineWithRegex(char *pattern, char *subject);
- void odczyt_pliku_tekstowego(void);
- int main() {
- returnLine = malloc(sizeof(char) * 255);
- odczyt_pliku_tekstowego();
- free(returnLine);
- return 0;
- }
- char *checkLineWithRegex(char *pattern, char *subject) {
- pcre *re;
- const char *error;
- int erroffset;
- int ovector[OVECCOUNT];
- int subject_length;
- int rc, i;
- subject_length = (int) strlen(subject);
- /*************************************************************************
- * Now we are going to compile the regular expression pattern, and handle *
- * and errors that are detected. *
- *************************************************************************/
- re = pcre_compile(
- pattern, /* the pattern */
- 0, /* default options */
- &error, /* for error message */
- &erroffset, /* for error offset */
- NULL); /* use default character tables */
- /* Compilation failed: print the error message and exit */
- if (re == NULL) {
- printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
- return "";
- }
- /*************************************************************************
- * If the compilation succeeded, we call PCRE again, in order to do a *
- * pattern match against the subject string. This does just ONE match. If *
- * further matching is needed, it will be done below. *
- *************************************************************************/
- rc = pcre_exec(
- re, /* the compiled pattern */
- NULL, /* no extra data - we didn't study the pattern */
- subject, /* the subject string */
- subject_length, /* the length of the subject */
- 0, /* start at offset 0 in the subject */
- 0, /* default options */
- ovector, /* output vector for substring information */
- OVECCOUNT); /* number of elements in the output vector */
- /* Matching failed: handle error cases */
- if (rc < 0) {
- switch (rc) {
- case PCRE_ERROR_NOMATCH:
- printf("No match\n");
- return strcpy(returnLine, subject);
- // break;
- /*
- Handle other special cases if you like
- */
- default:
- printf("Matching error %d\n", rc);
- break;
- }
- pcre_free(re); /* Release memory used for the compiled pattern */
- return "";
- }
- /* Match succeded */
- // printf("\nMatch succeeded at offset %d\n", ovector[0]);
- /*************************************************************************
- * We have found the first match within the subject string. If the output *
- * vector wasn't big enough, say so. Then output any substrings that were *
- * captured. *
- *************************************************************************/
- /* The output vector wasn't big enough */
- if (rc == 0) {
- rc = OVECCOUNT / 3;
- printf("ovector only has room for %d captured substrings\n", rc - 1);
- }
- /* Show substrings stored in the output vector by number. Obviously, in a real
- application you might want to do things other than print them. */
- // if (rc >= 2) {
- for (i = 0; i < rc; i++) {
- char *substring_start = subject + ovector[2 * i];
- int substring_length = ovector[2 * i + 1] - ovector[2 * i];
- printf("%2d: %.*s\n", i, substring_length, substring_start);
- }
- // }
- if (rc >= 2) {
- char *substring_start = subject + ovector[2 * 2];
- return strcpy(returnLine, substring_start);
- }
- // wyciety kod
- unsigned int option_bits;
- int crlf_is_newline;
- int utf8;
- /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
- sequence. First, find the options with which the regex was compiled; extract
- the UTF-8 state, and mask off all but the newline options. */
- (void)
- pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits
- );
- utf8 = option_bits & PCRE_UTF8;
- option_bits &= PCRE_NEWLINE_CR | PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF |
- PCRE_NEWLINE_ANY |
- PCRE_NEWLINE_ANYCRLF;
- crlf_is_newline =
- option_bits == PCRE_NEWLINE_ANY ||
- option_bits == PCRE_NEWLINE_CRLF ||
- option_bits == PCRE_NEWLINE_ANYCRLF;
- //kilka loopow
- for (;;) {
- int options = 0; /* Normally no options */
- int start_offset = ovector[1]; /* Start at end of previous match */
- /* If the previous match was for an empty string, we are finished if we are
- at the end of the subject. Otherwise, arrange to run another match at the
- same point to see if a non-empty match can be found. */
- if (ovector[0] == ovector[1]) {
- if (ovector[0] == subject_length) break;
- options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
- }
- /* Run the next matching operation */
- rc = pcre_exec(
- re, /* the compiled pattern */
- NULL, /* no extra data - we didn't study the pattern */
- subject, /* the subject string */
- subject_length, /* the length of the subject */
- start_offset, /* starting offset in the subject */
- options, /* options */
- ovector, /* output vector for substring information */
- OVECCOUNT); /* number of elements in the output vector */
- /* This time, a result of NOMATCH isn't an error. If the value in "options"
- is zero, it just means we have found all possible matches, so the loop ends.
- Otherwise, it means we have failed to find a non-empty-string match at a
- point where there was a previous empty-string match. In this case, we do what
- Perl does: advance the matching position by one character, and continue. We
- do this by setting the "end of previous match" offset, because that is picked
- up at the top of the loop as the point at which to start again.
- There are two complications: (a) When CRLF is a valid newline sequence, and
- the current position is just before it, advance by an extra byte. (b)
- Otherwise we must ensure that we skip an entire UTF-8 character if we are in
- UTF-8 mode. */
- if (rc == PCRE_ERROR_NOMATCH) {
- if (options == 0) break; /* All matches found */
- ovector[1] = start_offset + 1; /* Advance one byte */
- if (
- crlf_is_newline && /* If CRLF is newline & */
- start_offset < subject_length
- - 1 && /* we are at CRLF, */
- subject[start_offset] == '\r' &&
- subject[start_offset + 1] == '\n')
- ovector[1] += 1; /* Advance by one more. */
- else if (utf8) /* Otherwise, ensure we */
- { /* advance a whole UTF-8 */
- while (ovector[1] < subject_length) /* character. */
- {
- if ((subject[ovector[1]] & 0xc0) != 0x80) break;
- ovector[1] += 1;
- }
- }
- continue; /* Go round the loop again */
- }
- /* Other matching errors are not recoverable. */
- if (rc < 0) {
- printf("Matching error %d\n", rc);
- pcre_free(re); /* Release memory used for the compiled pattern */
- return "";
- }
- /* Match succeded */
- printf("\nMatch succeeded again at offset %d\n", ovector[0]);
- /* The match succeeded, but the output vector wasn't big enough. */
- if (rc == 0) {
- rc = OVECCOUNT / 3;
- printf("ovector only has room for %d captured substrings\n", rc - 1);
- }
- /* As before, show substrings stored in the output vector by number, and then
- also any named substrings. */
- for (i = 0; i < rc; i++) {
- char *substring_start = subject + ovector[2 * i];
- int substring_length = ovector[2 * i + 1] - ovector[2 * i];
- printf("%2d: %.*s\n", i, substring_length, substring_start);
- }
- printf("\n");
- pcre_free(re); /* Release memory used for the compiled pattern */
- return 0;
- }
- }
- void odczyt_pliku_tekstowego(void) {
- char buffer[255];
- char *temp = malloc(sizeof(char) * 255);
- char outName[60];
- if ((inputFile = fopen(PLIK_TXT, "r")) !=
- NULL) // Probuje otworzyc plik w trybie tekstowym do odczytu - ta operacja moze sie nie udac. Kiedy?
- {
- fprintf(stdout, "Otwarto plik %s w trybie odczytu tekstowego.\n",
- PLIK_TXT); // W taki sposob rowniez mozna wyswietlac informacje w konsoli
- strcpy(outName, PLIK_TXT);
- outputFile = fopen(strcat(outName, "_cleaned.txt"), "w");
- printf("Utworzono plik tekstowy %s\n", outName);
- while (fgets(buffer, 255, (FILE *) inputFile)) {
- strcpy(temp, buffer);
- // checkLineWithRegex(paternDeletWholeLine, temp);
- strcpy(returnLine, checkLineWithRegex(patternPreserveTextInsideBrackets, temp));
- // strcpy(returnLine, checkLineWithRegex(patternDeleteAllOccurences, returnLine));
- fputs(returnLine, outputFile);
- printf("%s\n", returnLine);
- }
- fclose(outputFile);
- free(temp);
- fclose(inputFile);
- } else {
- printf("Nie moge odczytac pliku %s!\n", PLIK_TXT);
- }
- }
Add Comment
Please, Sign In to add comment