Guest User

Untitled

a guest
Jun 22nd, 2018
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.07 KB | None | 0 0
  1. diff -prU12 pcre2-10.30-release/src/pcre2_internal.h pcre2-10.30/src/pcre2_internal.h
  2. --- pcre2-10.30-release/src/pcre2_internal.h 2017-07-19 12:00:20.000000000 -0400
  3. +++ pcre2-10.30/src/pcre2_internal.h 2018-06-20 17:03:09.000000000 -0400
  4. @@ -271,103 +271,157 @@ is not supported. */
  5.  
  6. /* The following macros were originally written in the form of loops that used
  7. data from the tables whose names start with PRIV(utf8_table). They were
  8. rewritten by a user so as not to use loops, because in some environments this
  9. gives a significant performance advantage, and it seems never to do any harm.
  10. */
  11.  
  12. /* Base macro to pick up the remaining bytes of a UTF-8 character, not
  13. advancing the pointer. */
  14.  
  15. #define GETUTF8(c, eptr) \
  16. { \
  17. - if ((c & 0x20u) == 0) \
  18. + if ((eptr[1] & 0xc0) != 0x80) \
  19. + c = 0xFFFD; \
  20. + else if ((c & 0x20u) == 0) \
  21. c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
  22. + else if ((eptr[2] & 0xc0) != 0x80) \
  23. + c = 0xFFFD; \
  24. else if ((c & 0x10u) == 0) \
  25. c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
  26. + else if ((eptr[3] & 0xc0) != 0x80) \
  27. + c = 0xFFFD; \
  28. else if ((c & 0x08u) == 0) \
  29. c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
  30. ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
  31. + else if ((eptr[4] & 0xc0) != 0x80) \
  32. + c = 0xFFFD; \
  33. else if ((c & 0x04u) == 0) \
  34. c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
  35. ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
  36. (eptr[4] & 0x3fu); \
  37. + else if ((eptr[5] & 0xc0) != 0x80) \
  38. + c = 0xFFFD; \
  39. else \
  40. c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
  41. ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
  42. ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
  43. }
  44.  
  45. /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
  46. the pointer. */
  47.  
  48. #define GETUTF8INC(c, eptr) \
  49. { \
  50. - if ((c & 0x20u) == 0) \
  51. + if ((*eptr & 0xc0) != 0x80) \
  52. + c = 0xFFFD; \
  53. + else if ((c & 0x20u) == 0) \
  54. c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
  55. + else if ((eptr[1] & 0xc0) != 0x80) \
  56. + { \
  57. + c = 0xFFFD; \
  58. + eptr += 1; \
  59. + } \
  60. else if ((c & 0x10u) == 0) \
  61. { \
  62. c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
  63. eptr += 2; \
  64. } \
  65. + else if ((eptr[2] & 0xc0) != 0x80) \
  66. + { \
  67. + c = 0xFFFD; \
  68. + eptr += 2; \
  69. + } \
  70. else if ((c & 0x08u) == 0) \
  71. { \
  72. c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
  73. ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
  74. eptr += 3; \
  75. } \
  76. + else if ((eptr[3] & 0xc0) != 0x80) \
  77. + { \
  78. + c = 0xFFFD; \
  79. + eptr += 3; \
  80. + } \
  81. else if ((c & 0x04u) == 0) \
  82. { \
  83. c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
  84. ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
  85. (eptr[3] & 0x3fu); \
  86. eptr += 4; \
  87. } \
  88. + else if ((eptr[4] & 0xc0) != 0x80) \
  89. + { \
  90. + c = 0xFFFD; \
  91. + eptr += 4; \
  92. + } \
  93. else \
  94. { \
  95. c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
  96. ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
  97. ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
  98. eptr += 5; \
  99. } \
  100. }
  101.  
  102. /* Base macro to pick up the remaining bytes of a UTF-8 character, not
  103. advancing the pointer, incrementing the length. */
  104.  
  105. #define GETUTF8LEN(c, eptr, len) \
  106. { \
  107. - if ((c & 0x20u) == 0) \
  108. + if ((eptr[1] & 0xc0) != 0x80) \
  109. + c = 0xFFFD; \
  110. + else if ((c & 0x20u) == 0) \
  111. { \
  112. c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
  113. len++; \
  114. } \
  115. + else if ((eptr[2] & 0xc0) != 0x80) \
  116. + { \
  117. + c = 0xFFFD; \
  118. + len++; \
  119. + } \
  120. else if ((c & 0x10u) == 0) \
  121. { \
  122. c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
  123. len += 2; \
  124. } \
  125. + else if ((eptr[3] & 0xc0) != 0x80) \
  126. + { \
  127. + c = 0xFFFD; \
  128. + len += 2; \
  129. + } \
  130. else if ((c & 0x08u) == 0) \
  131. {\
  132. c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
  133. ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
  134. len += 3; \
  135. } \
  136. + else if ((eptr[4] & 0xc0) != 0x80) \
  137. + { \
  138. + c = 0xFFFD; \
  139. + len += 3; \
  140. + } \
  141. else if ((c & 0x04u) == 0) \
  142. { \
  143. c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
  144. ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
  145. (eptr[4] & 0x3fu); \
  146. len += 4; \
  147. } \
  148. + else if ((eptr[5] & 0xc0) != 0x80) \
  149. + { \
  150. + c = 0xFFFD; \
  151. + len += 4; \
  152. + } \
  153. else \
  154. {\
  155. c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
  156. ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
  157. ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
  158. len += 5; \
  159. } \
  160. }
  161.  
  162. /* --------------- Whitespace macros ---------------- */
  163.  
  164. /* Tests for Unicode horizontal and vertical whitespace characters must check a
Add Comment
Please, Sign In to add comment