jasonaudy36

RegExp.h

Mar 18th, 2021 (edited)
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.59 KB | None | 0 0
  1.  
  2. #ifndef REGEXP_H
  3. #define REGEXP_H
  4.  
  5. #include <iostream>
  6. #include <string>
  7. #include <vector>
  8. using namespace std;
  9.  
  10. #pragma warning(push)
  11. #pragma warning(disable: 6385 6011 4127)
  12. #include "atlrx.h"
  13. #pragma warning(pop)
  14.  
  15. #include <algorithm>
  16. #include <cassert>
  17. #include <cstring>
  18. #include <string>
  19.  
  20. /*
  21. * Parameters
  22. * [in] regExp: Value of type string which is the input regular expression.
  23. * [in] caseSensitive: Value of type bool which indicate whether the parse is case sensitive.
  24. * [in] groupCount: Value of type int which is the group count of the regular expression.
  25. * [in] source: Value of type string reference which is the source to parse.
  26. * [out] result: Value of type vecotr of strings which is the output of the parse.
  27. * [in] allowDuplicate: Value of type bool which indicates whether duplicate items are added to the output result.
  28. *
  29. * Return Value
  30. * Returns true if the function succeeds, or false otherwise.
  31. *
  32. * Remarks
  33. * The output result is devided into groups. User should get the groups according to the group count. For example:
  34. * 1. RegExp = L"{ab}", source = L"abcabe", then result = L"ab", L"ab".
  35. * 2. RegExp = L"{ab}{cd}", source = L"abcdeabecd", then result = L"ab", L"cd", L"ab", L"cd".
  36. */
  37. inline bool ParseRegExp(const wstring& regExp, bool caseSensitive, int groupCount, const wstring& source, vector<wstring>& result, bool allowDuplicate = false)
  38. {
  39. result.clear();
  40. if (regExp.size() <= 0)
  41. {
  42. return false;
  43. }
  44. if (groupCount <= 0)
  45. {
  46. return false;
  47. }
  48. if (source.size() <= 0)
  49. {
  50. return false;
  51. }
  52. CAtlRegExp<> re;
  53. REParseError error = re.Parse((char*)regExp.c_str(), caseSensitive);
  54. if (error != REPARSE_ERROR_OK)
  55. {
  56. return false;
  57. }
  58. wchar_t* pSource = new wchar_t[source.size() + 1];
  59. wchar_t* pSourceEnd = pSource + source.size();
  60. if (pSource == NULL)
  61. {
  62. return false;
  63. }
  64. wcscpy_s(pSource, source.size() + 1, source.c_str());
  65. BOOL bSucceed = TRUE;
  66. CAtlREMatchContext<> mc;
  67. const wchar_t* pFrom = pSource;
  68. const wchar_t* pTo = NULL;
  69. while (bSucceed)
  70. {
  71. bSucceed = re.Match((const char*)pFrom, &mc, (const char**)&pTo);
  72. if (bSucceed)
  73. {
  74. const wchar_t* pStart = NULL;
  75. const wchar_t* pEnd = NULL;
  76. vector<wstring> tempMatch;
  77. for (int i = 0; i < groupCount; i++)
  78. {
  79. mc.GetMatch(i, (const char**)&pStart, (const char**)&pEnd);
  80. if (pStart != NULL && pEnd != NULL)
  81. {
  82. wstring match(pStart, pEnd - pStart);
  83. tempMatch.push_back(match);
  84. }
  85. else
  86. {
  87. break;
  88. }
  89. }
  90. bool bAdd = true;
  91. if (!allowDuplicate)
  92. {
  93. // Check whether this match already exists in the vector.
  94. for (vector<wstring>::iterator it = result.begin(); it != result.end();)
  95. {
  96. bool bEqual = true;
  97. for (vector<wstring>::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++, it++)
  98. {
  99. bool bGroupEqual = true;
  100. if (caseSensitive)
  101. {
  102. bGroupEqual = (wcscmp(it->c_str(), tempMatchIt->c_str()) == 0);
  103. }
  104. else
  105. {
  106. bGroupEqual = (_wcsicmp(it->c_str(), tempMatchIt->c_str()) == 0);
  107. }
  108. if (!bGroupEqual)
  109. {
  110. bEqual = false;
  111. }
  112. }
  113. if (bEqual)
  114. {
  115. bAdd = false;
  116. break;
  117. }
  118. }
  119. }
  120. if (bAdd)
  121. {
  122. for (vector<wstring>::iterator tempMatchIt = tempMatch.begin(); tempMatchIt != tempMatch.end(); tempMatchIt++)
  123. {
  124. result.push_back(*tempMatchIt);
  125. }
  126. }
  127. if (pTo < pSourceEnd)
  128. {
  129. pFrom = pTo;
  130. }
  131. else
  132. {
  133. break;
  134. }
  135. }
  136. else
  137. {
  138. break;
  139. }
  140. }
  141.  
  142. delete[] pSource;
  143.  
  144. return true;
  145. }
  146.  
  147. #endif // REGEXP_H
  148.  
Add Comment
Please, Sign In to add comment