Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <cstdio>
- #include <cassert>
- #include <cstring>
- #include <cstdlib>
- #include <cctype>
- #include <cstdint>
- #include "immintrin.h"
- using namespace std;
- const int N = (int)3e6 + 10;
- const int LEN = (int)1e5 + 10;
- #ifdef _MSC_VER
- #define ALIGN(x) __declspec(align(x))
- #define ATTR
- #else
- #define ALIGN(x) __attribute__((aligned(x)))
- #define ATTR __attribute__((target("avx")))
- #endif
- ALIGN(16) char str[N];
- ALIGN(16) char rStr[LEN];
- ALIGN(16) char result[N];
- ATTR int is_equal(const char* src_1, const char* src_2, size_t size)
- {
- const char* head_1 = (const char*)src_1;
- const char* head_2 = (const char*)src_2;
- size_t tail_n = 0;
- while (((uintptr_t)head_1 % 16) != 0 && tail_n < size)
- {
- if (*head_1 != *head_2)
- return 0;
- head_1++, head_2++, tail_n++;
- }
- const __m128i* src1 = (const __m128i*)head_1;
- const __m128i* src2 = (const __m128i*)head_2;
- const size_t n = (size - tail_n) / 32;
- for (size_t i = 0; i < n; ++i, src1 += 2, src2 += 2)
- {
- __m128i mm11 = _mm_load_si128(src1);
- __m128i mm12 = _mm_load_si128(src1 + 1);
- __m128i mm21 = _mm_load_si128(src2);
- __m128i mm22 = _mm_load_si128(src2 + 1);
- __m128i mm1 = _mm_xor_si128(mm11, mm21);
- __m128i mm2 = _mm_xor_si128(mm12, mm22);
- __m128i mm = _mm_or_si128(mm1, mm2);
- if (!_mm_testz_si128(mm, mm))
- return 0;
- }
- const size_t rem = (size - tail_n) % 32;
- const char* tail_1 = (const char*)head_1 + n * 32;
- const char* tail_2 = (const char*)head_2 + n * 32;
- for (size_t i = 0; i < rem; i++, tail_1++, tail_2++)
- {
- if (*tail_1 != *tail_2)
- return 0;
- }
- return 1;
- }
- #ifdef _MSC_VER
- #define ALIGN(x) __declspec(align(x))
- #define ATTR
- #else
- #define ALIGN(x) __attribute__((aligned(x)))
- #define ATTR __attribute__((target("avx")))
- #endif
- typedef __m128i Vec;
- ATTR int memcmp_aligned(const char* src_1, const char* src_2, int len)
- {
- // Intrisincs òðåáóþò êîíñòàíòíûå óêàçàòåëè
- const Vec* ptr1 = (const Vec*)src_1;
- const Vec* ptr2 = (const Vec*)src_2;
- for (int i = 0; i < len; i += 16, ptr1++, ptr2++)
- {
- // Çàãðóæàåì â XMM 128-áèòíûé ðåãèñòð äàííûìè, íà÷èíàþùèìèñÿ ñ óêàçàòåëÿ ptr1
- Vec M1 = _mm_load_si128(ptr1);
- Vec M2 = _mm_load_si128(ptr2);
- // Ñ÷èòàåì ïîáèòîâûé xor äâóõ âåêòîðîâ
- Vec MX = _mm_xor_si128(M1, M2);
- // _mm_testz_si128(A, B) âîçâðàùàåò 1, åñëè (A & B) == 0
- if (!_mm_testz_si128(MX, MX))
- {
- /* Ýòà ïðîâåðêà âûïîëíÿåòñÿ îäèí ðàç, ìîæíî íå çàìîðà÷èâàòüñÿ c SIMD
- Õîòÿ ìîæíî ñäåëàòü ýòî èñïîëüçóÿ ôóíêöèè _mm_cmplt_epi16 è _mm_movemask_epi8
- */
- for (int a = 0; a < 16; a++)
- if (src_1[i + a] != src_2[i + a])
- return src_1[i + a] < src_2[i + a] ? -1 : 1;
- }
- }
- return 0;
- }
- typedef __m128i Vec;
- ATTR int memcmp_unaligned(const char* src_1, const char* src_2, int len)
- {
- // Intrisincs òðåáóþò êîíñòàíòíûå óêàçàòåëè
- const Vec* ptr1 = (const Vec*)src_1;
- const Vec* ptr2 = (const Vec*)src_2;
- for (int i = 0; i < len; i += 16, ptr1++, ptr2++)
- {
- // Çàãðóæàåì â XMM 128-áèòíûé ðåãèñòð äàííûìè, íà÷èíàþùèìèñÿ ñ óêàçàòåëÿ ptr1
- // Óêàçàòåëè íå âûðîâíåíû - ïîýòîìó èñïîëüçóåì ñïåöèàëüíóþ ôóíêöèþ _mm_loadu - load unaligned
- Vec M1 = _mm_loadu_si128(ptr1);
- Vec M2 = _mm_loadu_si128(ptr2);
- // Ñ÷èòàåì ïîáèòîâûé xor äâóõ âåêòîðîâ
- Vec MX = _mm_xor_si128(M1, M2);
- // _mm_testz_si128(A, B) âîçâðàùàåò 1, åñëè (A & B) == 0
- if (!_mm_testz_si128(MX, MX))
- {
- /* Ýòà ïðîâåðêà âûïîëíÿåòñÿ îäèí ðàç, ìîæíî íå çàìîðà÷èâàòüñÿ c SIMD
- Õîòÿ ìîæíî ñäåëàòü ýòî èñïîëüçóÿ ôóíêöèè _mm_cmplt_epi16 è _mm_movemask_epi8
- */
- for (int a = 0; a < 16; a++)
- if (src_1[i + a] != src_2[i + a])
- return src_1[i + a] < src_2[i + a] ? -1 : 1;
- }
- }
- return 0;
- }
- typedef __m128i Vec;
- ATTR int memcmp_unaligned_unrolled(const char* src_1, const char* src_2, int len)
- {
- // Intrisincs òðåáóþò êîíñòàíòíûå óêàçàòåëè
- const Vec* ptr1 = (const Vec*)src_1;
- const Vec* ptr2 = (const Vec*)src_2;
- for (int i = 0; i < len; i += 32, ptr1 += 2, ptr2 += 2)
- {
- // Çàãðóæàåì â XMM 128-áèòíûé ðåãèñòð äàííûìè, íà÷èíàþùèìèñÿ ñ óêàçàòåëÿ ptr1
- // Óêàçàòåëè íå âûðîâíåíû - ïîýòîìó èñïîëüçóåì ñïåöèàëüíóþ ôóíêöèþ _mm_loadu - load unaligned
- Vec M11 = _mm_loadu_si128(ptr1);
- Vec M12 = _mm_loadu_si128(ptr2);
- Vec M21 = _mm_loadu_si128(ptr1 + 1);
- Vec M22 = _mm_loadu_si128(ptr2 + 1);
- // Ñ÷èòàåì ïîáèòîâûé xor äâóõ âåêòîðîâ
- Vec MX1 = _mm_xor_si128(M11, M12);
- Vec MX2 = _mm_xor_si128(M21, M22);
- MX1 = _mm_or_si128(MX1, MX2);
- // _mm_testz_si128(A, B) âîçâðàùàåò 1, åñëè (A & B) == 0
- if (!_mm_testz_si128(MX1, MX1))
- {
- /* Ýòà ïðîâåðêà âûïîëíÿåòñÿ îäèí ðàç, ìîæíî íå çàìîðà÷èâàòüñÿ c SIMD
- Õîòÿ ìîæíî ñäåëàòü ýòî èñïîëüçóÿ ôóíêöèè _mm_cmplt_epi16 è _mm_movemask_epi8
- */
- for (int a = 0; a < 32; a++)
- if (src_1[i + a] != src_2[i + a])
- return src_1[i + a] < src_2[i + a] ? -1 : 1;
- }
- }
- return 0;
- }
- int memcmp_fast(const char* src_1, const char* src_2, int len)
- {
- int middle_len = len & (~31);
- int result = memcmp_unaligned_unrolled(src_1, src_2, middle_len);
- if (result != 0)
- return result;
- // Ñðàâíèâàåì õâîñò
- src_1 += middle_len, src_2 += middle_len;
- for (int i = 0; i < len - middle_len; i++, src_1++, src_2++)
- {
- if (*src_1 != *src_2)
- return *src_1 < *src_2 ? -1 : 1;
- }
- return 0;
- }
- typedef __m128i Vec;
- // Íóæíûå íàì êóñêè ñòðîêè íà÷èíàåòñÿ ñ àäðåñîâ [src_1] è [src_2 + offset_2]
- template <int offset_2>
- ATTR int memcmp_alignr(const char* src_1, const char* src_2, int len)
- {
- if (len <= 0)
- return 0;
- const Vec* ptr1 = (const Vec*)src_1;
- const Vec* ptr2 = (const Vec*)src_2;
- Vec A = _mm_load_si128(ptr2);
- for (int i = 0; i < len; i += 32, ptr1 += 2, ptr2 += 2)
- {
- Vec M11 = _mm_load_si128(ptr1);
- Vec M21 = _mm_load_si128(ptr1 + 1);
- Vec B = _mm_load_si128(ptr2 + 1);
- Vec C = _mm_load_si128(ptr2 + 2);
- // alignr âû÷èñëÿåò çíà÷åíèå ((B << 128) | A) >> (8 * offset_2) è çàïèñûâàåò ìëàäøèå 128 áèò â ðåãèñòð
- Vec M12 = _mm_alignr_epi8(B, A, offset_2);
- Vec M22 = _mm_alignr_epi8(C, B, offset_2);
- A = C;
- Vec MX1 = _mm_xor_si128(M11, M12);
- Vec MX2 = _mm_xor_si128(M21, M22);
- MX1 = _mm_or_si128(MX1, MX2);
- if (!_mm_testz_si128(MX1, MX1))
- {
- for (int a = 0; a < 32; a++)
- if (src_1[i + a] != src_2[i + offset_2 + a])
- return src_1[i + a] < src_2[i + offset_2 + a] ? -1 : 1;
- }
- }
- return 0;
- }
- int memcmp_fast_alignr(const char* src_1, const char* src_2, int len)
- {
- // Âûðàâíèâàåì ïåðâûé óêàçàòåëü
- while (((uintptr_t)src_1 % 16) != 0 && len > 0)
- {
- if (*src_1 != *src_2)
- return *src_1 < *src_2 ? -1 : 1;
- src_1++, src_2++, len--;
- }
- uintptr_t ptr_value = (uintptr_t)src_2;
- int offset = (ptr_value & 15);
- /* Ñðàâíèâàåì áîëüøóþ ÷àñòü äëèíû ñ ïîìîùüþ âûðîâíåííîé âåðñèè,
- èñïîëüçóþùåé SSE/AVX */
- int result = 0;
- int middle_len = (len - 32 < 0 ? 0 : len - 32) & (~31);
- switch (offset)
- {
- case 0:
- result = memcmp_alignr<0>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 1:
- result = memcmp_alignr<1>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 2:
- result = memcmp_alignr<2>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 3:
- result = memcmp_alignr<3>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 4:
- result = memcmp_alignr<4>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 5:
- result = memcmp_alignr<5>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 6:
- result = memcmp_alignr<6>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 7:
- result = memcmp_alignr<7>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 8:
- result = memcmp_alignr<8>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 9:
- result = memcmp_alignr<9>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 10:
- result = memcmp_alignr<10>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 11:
- result = memcmp_alignr<11>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 12:
- result = memcmp_alignr<12>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 13:
- result = memcmp_alignr<13>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 14:
- result = memcmp_alignr<14>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- case 15:
- result = memcmp_alignr<15>(src_1, (char*)(ptr_value & (~15)), middle_len); break;
- }
- if (result != 0)
- return result;
- // Ñðàâíèâàåì õâîñò
- src_1 += middle_len, src_2 += middle_len;
- for (int i = 0; i < len - middle_len; i++, src_1++, src_2++)
- {
- if (*src_1 != *src_2)
- return *src_1 < *src_2 ? -1 : 1;
- }
- return 0;
- }
- int readInt(int &pos)
- {
- int value = 0;
- while (str[pos] > 32 && str[pos] < 64)
- {
- value *= 10;
- value += str[pos] - '0';
- pos++;
- }
- return value;
- }
- int main()
- {
- int shift = 0;
- while (gets(str + shift))
- shift += strlen(str + shift);
- int len = 0;
- while (!isdigit(str[len])) len++;
- for (int i = 0; i < len; i++)
- rStr[len - i - 1] = str[i];
- int pos = len;
- int q = readInt(pos);
- int rIt = 0;
- for (int i = 0; i < q; i++)
- {
- if (str[pos] == 'p')
- {
- pos += 12;
- int l = readInt(pos);
- pos++;
- int r = readInt(pos);
- l--, r--;
- if (memcmp_fast(str + l, rStr + (len - r - 1), (r - l + 1) / 2) == 0)
- {
- strcpy(result + rIt, "Yes\n");
- rIt += 4;
- }
- else
- {
- strcpy(result + rIt, "No\n");
- rIt += 3;
- }
- }
- else
- {
- pos += 7;
- int x = readInt(pos);
- pos++;
- char c = str[pos++];
- x--;
- str[x] = c;
- rStr[len - x - 1] = c;
- }
- }
- puts(result);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement