Advertisement
Taraxacum

sensitive words filter

Nov 21st, 2018
328
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.07 KB | None | 0 0
  1. #include <cstring>
  2. #include <iostream>
  3.  
  4. using namespace std;
  5.  
  6. // ********** Declarations **********
  7. char init_(const char* pattern, char type);
  8. void init(const char* puncuation, const char* space, const char* noise, const char** sensitive_words);
  9.  
  10. // ********** Global States **********
  11. char CharClassify[256] = { 0 };
  12. char** SensitiveWords = nullptr;
  13.  
  14. int swCount;
  15.  
  16. bool swMode = false;
  17. char swBuf[80] = { '\0' };
  18. int swBufPos = 0;
  19. int swIdx = 0;
  20.  
  21. char* result;
  22. int rdIdx = 0;
  23.  
  24. bool ReTry = true;
  25.  
  26. // ********** State Machine **********
  27. void gotInfoChar(const char& ch)
  28. {
  29.     result[rdIdx++] = ch;
  30.  
  31.     if (swMode) {
  32.         // in sensitive word matching mode
  33.         swBuf[swBufPos++] = CharClassify[ch];
  34.  
  35.         int i;
  36.         for (i = 0; i < swCount; i++) {
  37.             if (strncmp(swBuf, SensitiveWords[i], swBufPos) == 0) {
  38.                 break;
  39.             }
  40.         }
  41.  
  42.         if (i == swCount) {
  43.             // matching failed
  44.             swMode = false;
  45.             swBufPos = 0;
  46.         } else if (SensitiveWords[i][swBufPos] == 0) {
  47.             // whether the matching finishes
  48.             rdIdx = swIdx;
  49.             swMode = false;
  50.             swBufPos = 0;
  51.             ReTry = true;
  52.  
  53.             return;
  54.         }
  55.     }
  56.  
  57.     // matching failed or not in matching mode
  58.     // is it the header of any sensitive word?
  59.     for (int i = 0; i < swCount; i++) {
  60.         if (CharClassify[ch] == SensitiveWords[i][0]) {
  61.             swBuf[swBufPos++] = CharClassify[ch];
  62.             swMode = true;
  63.             swIdx = rdIdx - 1;
  64.             break;
  65.         }
  66.     }
  67. }
  68.  
  69. void gotPunc(const char& ch)
  70. {
  71.     // if we read a punctuation, the sensitive word is interruptted if exists
  72.     result[rdIdx++] = ch;
  73.     swMode = false;
  74.     swBufPos = 0;
  75. }
  76.  
  77. void gotSpaceOrNoise(const char& ch)
  78. {
  79.     // do nothing when got the useless or space character
  80.     result[rdIdx++] = ch;
  81. }
  82.  
  83. void doit(const char* buffer)
  84. {
  85.     rdIdx = 0;
  86.     result = new char[strlen(buffer) + 1];
  87.  
  88.     for (int idx = 0; buffer[idx] != '\0'; idx++) {
  89.         char type = CharClassify[buffer[idx]];
  90.  
  91.         if (type == 0) {
  92.             cout << "锟斤拷锟诫不锟斤拷锟斤拷要锟斤拷" << endl;
  93.             exit(EXIT_FAILURE);
  94.         } else {
  95.             switch (type & 0xf0) {
  96.             case 0x10:
  97.             case 0x20:
  98.             case 0x30:
  99.                 gotInfoChar(buffer[idx]);
  100.                 break;
  101.             case 0x40:
  102.                 gotPunc(buffer[idx]);
  103.                 break;
  104.             default:
  105.                 gotSpaceOrNoise(buffer[idx]);
  106.             }
  107.         }
  108.     }
  109.  
  110.     result[rdIdx] = '\0';
  111. }
  112.  
  113. // ********** Main Program **********
  114. int main()
  115. {
  116.     swCount = 3;
  117.     SensitiveWords = new char*[swCount];
  118.  
  119.     const char* sw[swCount] = { "L4", "Fd", "D26" };
  120.     init(".,\"", " \n", "@#$", sw);
  121.  
  122.     char buffer[64];
  123.     cout << "Give no more than 20 characters";
  124.     cin.getline(buffer, 20);
  125.  
  126.     doit(buffer);
  127.  
  128.     while (ReTry) {
  129.         ReTry = false;
  130.         strcpy(buffer, result);
  131.         delete[] result;
  132.         result = nullptr;
  133.         doit(buffer);
  134.     }
  135.  
  136.     cout << result << endl;
  137.  
  138.     delete[] result;
  139.     for (int i = 0; i < swCount; i++) {
  140.         delete[] SensitiveWords[i];
  141.     }
  142.     delete[] SensitiveWords;
  143.  
  144.     return 0;
  145. }
  146.  
  147. char init_(const char* pattern, char type)
  148. {
  149.     for (int idx = 0; pattern[idx] != '\0'; idx++) {
  150.         CharClassify[pattern[idx]] = type++;
  151.     }
  152.  
  153.     return type;
  154. }
  155.  
  156. void init(const char* puncuation, const char* space, const char* noise, const char** sensitive_words)
  157. {
  158.     init_("ABCDEFGHIJKLMNOPQRSTUVWXYZ", 0x10);
  159.     init_("abcdefghijklmnopqrstuvwxyz", 0x10);
  160.     init_("0123456789", 0x30);
  161.     init_(puncuation, 0x40);
  162.     init_(space, init_(noise, 0x80));
  163.  
  164.     for (int i = 0; i < swCount; i++) {
  165.         SensitiveWords[i] = new char[strlen(sensitive_words[i]) + 1]{ 0 };
  166.  
  167.         for (int idx = 0; sensitive_words[i][idx] != '\0'; idx++) {
  168.             SensitiveWords[i][idx] = CharClassify[sensitive_words[i][idx]];
  169.         }
  170.     }
  171. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement