Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff -prU12 pcre2-10.30-release/src/pcre2_internal.h pcre2-10.30/src/pcre2_internal.h
- --- pcre2-10.30-release/src/pcre2_internal.h 2017-07-19 12:00:20.000000000 -0400
- +++ pcre2-10.30/src/pcre2_internal.h 2018-06-20 17:03:09.000000000 -0400
- @@ -271,103 +271,157 @@ is not supported. */
- /* The following macros were originally written in the form of loops that used
- data from the tables whose names start with PRIV(utf8_table). They were
- rewritten by a user so as not to use loops, because in some environments this
- gives a significant performance advantage, and it seems never to do any harm.
- */
- /* Base macro to pick up the remaining bytes of a UTF-8 character, not
- advancing the pointer. */
- #define GETUTF8(c, eptr) \
- { \
- - if ((c & 0x20u) == 0) \
- + if ((eptr[1] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- + else if ((c & 0x20u) == 0) \
- c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
- + else if ((eptr[2] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- else if ((c & 0x10u) == 0) \
- c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
- + else if ((eptr[3] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- else if ((c & 0x08u) == 0) \
- c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
- ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
- + else if ((eptr[4] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- else if ((c & 0x04u) == 0) \
- c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
- ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
- (eptr[4] & 0x3fu); \
- + else if ((eptr[5] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- else \
- c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
- ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
- ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
- }
- /* Base macro to pick up the remaining bytes of a UTF-8 character, advancing
- the pointer. */
- #define GETUTF8INC(c, eptr) \
- { \
- - if ((c & 0x20u) == 0) \
- + if ((*eptr & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- + else if ((c & 0x20u) == 0) \
- c = ((c & 0x1fu) << 6) | (*eptr++ & 0x3fu); \
- + else if ((eptr[1] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + eptr += 1; \
- + } \
- else if ((c & 0x10u) == 0) \
- { \
- c = ((c & 0x0fu) << 12) | ((*eptr & 0x3fu) << 6) | (eptr[1] & 0x3fu); \
- eptr += 2; \
- } \
- + else if ((eptr[2] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + eptr += 2; \
- + } \
- else if ((c & 0x08u) == 0) \
- { \
- c = ((c & 0x07u) << 18) | ((*eptr & 0x3fu) << 12) | \
- ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
- eptr += 3; \
- } \
- + else if ((eptr[3] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + eptr += 3; \
- + } \
- else if ((c & 0x04u) == 0) \
- { \
- c = ((c & 0x03u) << 24) | ((*eptr & 0x3fu) << 18) | \
- ((eptr[1] & 0x3fu) << 12) | ((eptr[2] & 0x3fu) << 6) | \
- (eptr[3] & 0x3fu); \
- eptr += 4; \
- } \
- + else if ((eptr[4] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + eptr += 4; \
- + } \
- else \
- { \
- c = ((c & 0x01u) << 30) | ((*eptr & 0x3fu) << 24) | \
- ((eptr[1] & 0x3fu) << 18) | ((eptr[2] & 0x3fu) << 12) | \
- ((eptr[3] & 0x3fu) << 6) | (eptr[4] & 0x3fu); \
- eptr += 5; \
- } \
- }
- /* Base macro to pick up the remaining bytes of a UTF-8 character, not
- advancing the pointer, incrementing the length. */
- #define GETUTF8LEN(c, eptr, len) \
- { \
- - if ((c & 0x20u) == 0) \
- + if ((eptr[1] & 0xc0) != 0x80) \
- + c = 0xFFFD; \
- + else if ((c & 0x20u) == 0) \
- { \
- c = ((c & 0x1fu) << 6) | (eptr[1] & 0x3fu); \
- len++; \
- } \
- + else if ((eptr[2] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + len++; \
- + } \
- else if ((c & 0x10u) == 0) \
- { \
- c = ((c & 0x0fu) << 12) | ((eptr[1] & 0x3fu) << 6) | (eptr[2] & 0x3fu); \
- len += 2; \
- } \
- + else if ((eptr[3] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + len += 2; \
- + } \
- else if ((c & 0x08u) == 0) \
- {\
- c = ((c & 0x07u) << 18) | ((eptr[1] & 0x3fu) << 12) | \
- ((eptr[2] & 0x3fu) << 6) | (eptr[3] & 0x3fu); \
- len += 3; \
- } \
- + else if ((eptr[4] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + len += 3; \
- + } \
- else if ((c & 0x04u) == 0) \
- { \
- c = ((c & 0x03u) << 24) | ((eptr[1] & 0x3fu) << 18) | \
- ((eptr[2] & 0x3fu) << 12) | ((eptr[3] & 0x3fu) << 6) | \
- (eptr[4] & 0x3fu); \
- len += 4; \
- } \
- + else if ((eptr[5] & 0xc0) != 0x80) \
- + { \
- + c = 0xFFFD; \
- + len += 4; \
- + } \
- else \
- {\
- c = ((c & 0x01u) << 30) | ((eptr[1] & 0x3fu) << 24) | \
- ((eptr[2] & 0x3fu) << 18) | ((eptr[3] & 0x3fu) << 12) | \
- ((eptr[4] & 0x3fu) << 6) | (eptr[5] & 0x3fu); \
- len += 5; \
- } \
- }
- /* --------------- Whitespace macros ---------------- */
- /* Tests for Unicode horizontal and vertical whitespace characters must check a
Add Comment
Please, Sign In to add comment