Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <string.h>
- #define PCRE2_CODE_UNIT_WIDTH 8
- #include <pcre2.h>
- #define ANSI_COLOR_RED "\x1b[31m"
- #define ANSI_COLOR_GREEN "\x1b[32m"
- #define ANSI_COLOR_YELLOW "\x1b[33m"
- #define ANSI_COLOR_BLUE "\x1b[34m"
- #define ANSI_COLOR_MAGENTA "\x1b[35m"
- #define ANSI_COLOR_CYAN "\x1b[36m"
- #define ANSI_COLOR_RESET "\x1b[0m"
- void cleanFile(void);
- char *checkLineWithRegex(char *pattern, char *subject, char *replacement);
- void finalizeCleaning(void);
- void filterLine(char *line, char *result, int runCount);
- FILE *inputFile;
- FILE *outputFile;
- FILE *resultFile;
- PCRE2_UCHAR *returnLine;
- char PLIK_TXT[70];
- char paternDeletWholeLine[] =
- "^(?>\\s*)\\\\(?>hline|centerline|medskip|noindent|documentclass|usepackage|begin|maketitle|begin|end|includegraphics|nodeconnect|hspace|setlength|frac|(?>re)?newcommand|epsfxsize).*$";
- char patternEmbeddedCommands[] = "(.*?)\\\\(.*?)\\{(.*?)\\(.*?)\\}(.*?)\\}(.*)";
- char patternPreserveTextInsideBrackets[] =
- "(.*?)\\\\(?>title|author|date|enumsentence|section|subsection|emph|href|url|item|subfigure|leftline|epsffile|caption|label|footnote|text).?\\{?(.*?)\\}?(.*?)?";
- char patternNoBrackets[] = "(.*?)\\\\(?:\\btt\\b|left|item|noindent|right|medskip|qquad|over|partial|right|small|tiny|smallskip|large|bigskip|mathop|longrightarrow|propto|\\bem\\b|sum|vec|cdot|\\bverb\\b|hfill|bigr|sinh)(.*?)";
- char patternDeleteInlineEntirely[] = "(.*)\\\\(?>end|bibitem|mbox|vspace|ref|scalebox|includegraphics|limits|log|textbf|textsl|texttt|textit|bigl|eqref|mathbf|nabla|verbatiminput).?\\{(?:.*?)\\}?(.*)?";
- char patternCustomColor[] = "(.*)\\\\color.*?\\}(.*)";
- char patternSpaces[] = "(\\s\\s)";
- char patternGreekLetters[] = "(.*?)\\\\([Aa]lpha|[Dd]igamma|[Kk]appa|[Oo]micron|[Uu]psilon|[Bb]eta|[Zz]eta|[Ll]ambda|[Pp]i|[Pp]hi|[Gg]amma|[Ee]ta|[Mm]u|[Rr]ho|[Cc]hi|[Dd]elta|[Tt]heta|[Nn]u|[Ss]igma|[Pp]si|[Ee]psilon|[Ii]ota|[Xx]i|[Tt]au|[Oo]mega)(.*)";
- int main(int argc, char **argv) {
- int isEnd = 0;
- int option = 0;
- while (!isEnd) {
- printf(ANSI_COLOR_GREEN"Witaj w Czyścicielu!"ANSI_COLOR_RESET"\n");
- printf("[1] Wyczyść plik .tex\n");
- printf(ANSI_COLOR_MAGENTA"[0] Wyjdź z aplikacji"ANSI_COLOR_RESET"\n\n");
- printf("Wybierz opcję: \n");
- scanf("%d", &option);
- while (getchar() != '\n');
- switch (option) {
- case 1:
- returnLine = malloc(sizeof(PCRE2_UCHAR) * 1024);
- cleanFile();
- finalizeCleaning();
- break;
- case 0:
- default:
- isEnd = 1;
- break;
- }
- }
- return 0;
- }
- void cleanFile(void) {
- char buffer[255];
- char *temp = malloc(sizeof(char) * 255);
- char outName[100];
- printf("Podaj pełną ścieżkę do pliku .tex\n");
- scanf("%70s", PLIK_TXT);
- if ((inputFile = fopen(PLIK_TXT, "r")) != NULL) {
- fprintf(stdout, "Otwarto plik %s w trybie odczytu tekstowego.\n", PLIK_TXT);
- strcpy(outName, PLIK_TXT);
- outputFile = fopen(strcat(outName, "_cleaned.txt.tmp"), "w");
- printf("Utworzono plik tymczasowy %s\n", outName);
- while (fgets(buffer, 255, (FILE *) inputFile)) {
- strcpy(temp, buffer);
- if (strcmp(temp, "\n") == 0) {
- continue;
- }
- filterLine(temp, returnLine, 3);
- fputs(returnLine, outputFile);
- }
- fclose(outputFile);
- free(temp);
- fclose(inputFile);
- } else {
- printf("Nie moge odczytac pliku %s!\n", PLIK_TXT);
- }
- }
- void filterLine(char *line, char *result, int runCount) {
- checkLineWithRegex(paternDeletWholeLine, line, "");
- for (int i = 0; i < runCount; i++) {
- checkLineWithRegex(patternEmbeddedCommands, result, "\\${4} ${5}");
- checkLineWithRegex(patternEmbeddedCommands, result, "\\${4} ${5}");
- checkLineWithRegex(patternEmbeddedCommands, result, "\\${4} ${5}");
- checkLineWithRegex(patternGreekLetters, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternGreekLetters, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternGreekLetters, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternGreekLetters, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternGreekLetters, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternPreserveTextInsideBrackets, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternPreserveTextInsideBrackets, result, "${1} ${2} ${3}");
- checkLineWithRegex(patternDeleteInlineEntirely, result, "${1} ${2}");
- checkLineWithRegex(patternDeleteInlineEntirely, result, "${1} ${2}");
- checkLineWithRegex(patternDeleteInlineEntirely, result, "${1} ${2}");
- checkLineWithRegex(patternDeleteInlineEntirely, result, "${1} ${2}");
- checkLineWithRegex(patternNoBrackets, result, "${1} ${2}");
- checkLineWithRegex(patternNoBrackets, result, "${1} ${2}");
- checkLineWithRegex(patternNoBrackets, result, "${1} ${2}");
- checkLineWithRegex(patternNoBrackets, result, "${1} ${2}");
- checkLineWithRegex(patternNoBrackets, result, "${1} ${2}");
- checkLineWithRegex(patternCustomColor, result, "${1} ${2}");
- checkLineWithRegex(patternCustomColor, result, "${1} ${2}");
- checkLineWithRegex(patternCustomColor, result, "${1} ${2}");
- checkLineWithRegex(patternSpaces, result, " ");
- checkLineWithRegex(patternSpaces, result, " ");
- checkLineWithRegex(patternSpaces, result, " ");
- checkLineWithRegex(patternSpaces, result, " ");
- }
- }
- void finalizeCleaning() {
- char resultName[100];
- char tmpName[100];
- strcat(strcpy(tmpName, PLIK_TXT), "_cleaned.txt.tmp");
- strcat(strcpy(resultName, PLIK_TXT), "_cleaned.txt");
- if ((outputFile = fopen(tmpName, "r")) != NULL) {
- resultFile = fopen(resultName, "w");
- printf("Utworzono plik wynikowy %s\n", resultName);
- int curr;
- while ((curr = fgetc(outputFile)) != EOF) {
- if (curr != '{' && curr != '}' && curr != '\\') {
- fputc(curr, resultFile);
- }
- }
- fclose(outputFile);
- fclose(resultFile);
- remove(tmpName);
- printf("Usunięto plik tymczasowy %s\n", tmpName);
- } else {
- printf("Nie moge odczytac pliku %s!\n", tmpName);
- }
- printf(ANSI_COLOR_GREEN"Gotowe!"ANSI_COLOR_RESET"\n");
- free(returnLine);
- }
- char *checkLineWithRegex(char *patternIn, char *subjectIn, char *replacementIn) {
- int error;
- PCRE2_SIZE erroffset;
- const PCRE2_SPTR pattern = (PCRE2_SPTR) patternIn;
- const PCRE2_SPTR subject = (PCRE2_SPTR) subjectIn;
- const PCRE2_SPTR replacement = (PCRE2_SPTR) replacementIn;
- int subject_length = (PCRE2_SIZE) strlen((char *) subject);
- pcre2_code *re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, 0, &error, &erroffset, 0);
- if (re == 0) {
- return "";
- }
- pcre2_jit_compile(re, PCRE2_JIT_COMPLETE);
- PCRE2_UCHAR output[1024] = "";
- PCRE2_SIZE outlen = sizeof(output) / sizeof(PCRE2_UCHAR);
- pcre2_match_data *match_data = pcre2_match_data_create_from_pattern(re, NULL);
- int matched = pcre2_match(
- re, /* the compiled pattern */
- subject, /* the subject string */
- subject_length, /* the length of the subject */
- 0, /* start at offset 0 in the subject */
- 0, /* default options */
- match_data, /* block for storing the result */
- NULL);
- if (matched == 1) {
- pcre2_code_free(re);
- return strcpy((char *) returnLine, (char *) output);
- }
- int rc = pcre2_substitute(re, subject, PCRE2_ZERO_TERMINATED, 0,
- PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED, 0, 0, replacement,
- PCRE2_ZERO_TERMINATED, output, &outlen);
- pcre2_code_free(re);
- if (rc > 0) {
- return strcpy((char *) returnLine, (char *) output);
- } else {
- return strcpy((char *) returnLine, subjectIn);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement