Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma GCC optimize("Ofast")
- #pragma GCC target("avx")
- #include <iostream>
- #include <iomanip>
- #include <algorithm>
- #include <assert.h>
- #include <immintrin.h>
- namespace Solution {
- const int NMAX = 100000;
- const int blockSize = 512;
- const int MaxNBlocks = (NMAX + blockSize - 1) / blockSize;
- float* cntTillBorder[MaxNBlocks];
- float* zeroBuffer;
- float* arr;
- int n, nBlocks;
- void alloc(int n_) {
- n = n_;
- nBlocks = (n + blockSize - 1) / blockSize;
- arr = (float*)_mm_malloc(n * sizeof(float), 32);
- zeroBuffer = (float*)_mm_malloc(n * sizeof(float), 32);
- std::fill(zeroBuffer, zeroBuffer+n, 0);
- for (int i = 0; i < nBlocks; ++i) {
- cntTillBorder[i] = (float*)_mm_malloc(n * sizeof(float), 32);
- std::fill(cntTillBorder[i], cntTillBorder[i]+n, 0);
- }
- }
- void free() {
- _mm_free(arr);
- for (int i = 0; i < nBlocks; ++i) {
- _mm_free(cntTillBorder[i]);
- }
- }
- void precalc() {
- for (int i = 0; i < n; ++i) {
- int value = (int)arr[i];
- for (int b = i / blockSize; b < nBlocks; ++b) {
- cntTillBorder[b][value]++;
- }
- }
- }
- void naiveUpdateItems(int begin, int after, float x, float y) {
- int cnt = 0;
- for (int i = begin; i < after; ++i) {
- cnt += (arr[i] == x);
- arr[i] = (arr[i] == x) ? y : arr[i];
- }
- for (int b = begin / blockSize; b < nBlocks; ++b) {
- cntTillBorder[b][int(x)] -= (float)cnt;
- cntTillBorder[b][int(y)] += (float)cnt;
- }
- }
- void modify(int lt, int rt, float x, float y) {
- int bl = lt / blockSize;
- int br = rt / blockSize;
- if (bl == br) {
- naiveUpdateItems(lt, rt+1, x, y);
- return;
- }
- naiveUpdateItems(lt, (bl+1) * blockSize, x, y);
- naiveUpdateItems(br * blockSize, rt+1, x, y);
- __m256 vx = _mm256_set1_ps(x);
- __m256 vy = _mm256_set1_ps(y);
- int changes = 0;
- for (int b = bl+1; b < br; ++b) {
- cntTillBorder[b][int(x)] -= (float)changes;
- cntTillBorder[b][int(y)] += (float)changes;
- float *blockBegin = arr + b * blockSize;
- for (int i = 0; i < blockSize; i += 32) {
- uint32_t bitmask = 0;
- for (int j = 0; j < 32; j += 8) {
- __m256 va = _mm256_load_ps(blockBegin + i + j);
- __m256 rs = _mm256_cmp_ps(vx, va, _CMP_EQ_OQ);
- bitmask = (bitmask << 8) | _mm256_movemask_ps(rs);
- _mm256_maskstore_ps(blockBegin + i + j, _mm256_cvtps_epi32(rs), vy);
- }
- changes += __builtin_popcountll(bitmask);
- }
- }
- }
- int nth_element(int lt, int rt, int k) {
- int bl = lt / blockSize;
- int br = rt / blockSize;
- float* arrLT = (bl == 0) ? zeroBuffer : cntTillBorder[bl-1];
- float* arrRT = cntTillBorder[br];
- for (int i = rt+1; i < std::min((br+1) * blockSize, n); ++i) { arrRT[(int)arr[i]]--; }
- for (int i = bl * blockSize; i < lt; ++i) { arrLT[(int)arr[i]]++; }
- int last = -1;
- for (int i = 0; i + 31 < n; i += 32) {
- __m256 sum = _mm256_setzero_ps(), vr, vl;
- for (int j = 0; j < 32; j += 8) {
- vr = _mm256_load_ps(arrRT+i+j);
- vl = _mm256_load_ps(arrLT+i+j);
- sum = _mm256_add_ps(sum, _mm256_sub_ps(vr,vl));
- }
- alignas(32) static float tmp[8];
- _mm256_store_ps(tmp, sum);
- int cnt = 0;
- for (int j = 0; j < 8; ++j) { cnt += (int)tmp[j]; }
- if (cnt >= k) {
- last = i-1;
- break;
- }
- k -= cnt;
- last = i + 31;
- }
- int cnt = 0;
- while (cnt < k) { last++; cnt += int(arrRT[last] - arrLT[last]); }
- for (int i = bl * blockSize; i < lt; ++i) { arrLT[(int)arr[i]]--; }
- for (int i = rt+1; i < std::min((br+1) * blockSize, n); ++i) { arrRT[(int)arr[i]]++; }
- return last;
- }
- }
- char getChar() {
- static const int SIZE = 1 << 16;
- static char buffer[SIZE];
- static int pos = 0;
- static int size = 0;
- if (pos == size) {
- size = (int)fread(buffer, 1, SIZE, stdin),
- pos = 0;
- }
- if (pos == size) {
- return EOF;
- }
- return buffer[pos++];
- }
- template<typename T>
- T getInt() {
- char c = '?';
- while (!(c == '-' || ('0' <= c && c <= '9'))) { c = getChar(); }
- bool pos = true;
- if (c == '-') { pos = false; c = getChar(); }
- T ret = 0;
- while ('0' <= c && c <= '9') { (ret *= 10) += (c - '0'); c = getChar(); }
- return pos ? ret : -ret;
- }
- void putChar(char c) {
- static const int SIZE = 1 << 16;
- static char buffer[SIZE];
- static int size = 0;
- if (size == SIZE || c == EOF) {
- fwrite(buffer, 1, size, stdout),
- size = 0;
- }
- if (c != EOF) { buffer[size++] = c; }
- }
- template<typename T>
- void putInt(T value) {
- bool pos = true;
- if (value < 0) { pos = false; value = -value; }
- static char buf[24];
- int size = 0;
- do { buf[size++] = char(value % 10 + '0'); value /= 10; } while (value > 0);
- if (!pos) { buf[size++] = '-'; }
- while (size--) { putChar(buf[size]); }
- }
- int main() {
- int n = getInt<int>(), q = getInt<int>();
- Solution::alloc(n);
- for (int i = 0; i < n; ++i) {
- Solution::arr[i] = (float)(getInt<int>()-1);
- }
- Solution::precalc();
- while (q--) {
- int t = getInt<int>();
- if (t == 1) {
- int lt = getInt<int>()-1, rt = getInt<int>()-1, x = getInt<int>()-1, y = getInt<int>()-1;
- Solution::modify(lt, rt, (float)x, (float)y);
- } else {
- int lt = getInt<int>()-1, rt = getInt<int>()-1, k = getInt<int>();
- putInt(Solution::nth_element(lt, rt, k)+1);
- putChar('\n');
- assert(t == 2);
- }
- }
- putChar(EOF);
- Solution::free();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement