Advertisement
Guest User

Untitled

a guest
Nov 17th, 2019
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 6.56 KB | None | 0 0
  1. #include <inttypes.h>
  2. #include <math.h>
  3. #include <stdbool.h>
  4. #include <stdio.h>
  5. #include <stdlib.h>
  6. #include <smmintrin.h>
  7. #include <xmmintrin.h>
  8. #include <immintrin.h>
  9.  
  10.  
  11. const uint8_t INF_BYTES = 26;
  12.  
  13. bool read_pack(int n, FILE* f_in, FILE* f_out)
  14. {
  15.     uint8_t* a = (uint8_t*)_mm_malloc(n * sizeof(uint8_t), sizeof(uint8_t));
  16.     int readed = fread(a, sizeof(*a), n, f_in);
  17.     if (f_out != NULL) {
  18.         fwrite(a, sizeof(*a), readed, f_out);
  19.     }
  20.     _mm_free(a);
  21.     return readed == n;
  22. }
  23.  
  24. uint32_t read_information(FILE* fin,
  25.     FILE* fout,
  26.     int32_t* width,
  27.     int32_t* height)
  28. {
  29.     read_pack(10, fin, fout);
  30.     uint32_t bOffBits;
  31.     fread(&bOffBits, sizeof(uint32_t), 1, fin);
  32.  
  33.     if (fout != NULL) {
  34.         fwrite(&bOffBits, sizeof(bOffBits), 1, fout);
  35.     }
  36.     read_pack(4, fin, fout);
  37.    
  38.        
  39.     fread(width, sizeof(*width), 1, fin);
  40.     fread(height, sizeof(*height), 1, fin);
  41.     if (fout != NULL) {
  42.         fwrite(width, sizeof(*width), 1, fout);
  43.         fwrite(height, sizeof(*height), 1, fout);
  44.     }
  45.     return bOffBits;
  46. }
  47.  
  48. void read_from_file(
  49.     FILE* fin,
  50.     FILE* fout,
  51.     uint8_t*** color,
  52.     int32_t* width,
  53.     int32_t* height)
  54. {
  55.     uint32_t bOffBits = read_information(fin, fout, width, height);
  56.  
  57.     read_pack(bOffBits - INF_BYTES, fin, fout);
  58.  
  59.     (*color)[0] = (uint8_t*)_mm_malloc((*height) * (*width) * sizeof(uint8_t), sizeof(uint8_t));
  60.     (*color)[1] = (uint8_t*)_mm_malloc((*height) * (*width) * sizeof(uint8_t), sizeof(uint8_t));
  61.     (*color)[2] = (uint8_t*)_mm_malloc((*height) * (*width) * sizeof(uint8_t), sizeof(uint8_t));
  62.     (*color)[3] = (uint8_t*)_mm_malloc((*height) * (*width) * sizeof(uint8_t), sizeof(uint8_t));
  63.    
  64.     for (int32_t i = 0; i < (*height) * (*width); ++i) {
  65.         fread((*color)[0] + i, sizeof(uint8_t), 1, fin);
  66.         fread((*color)[1] + i, sizeof(uint8_t), 1, fin);
  67.         fread((*color)[2] + i, sizeof(uint8_t), 1, fin);
  68.         fread((*color)[3] + i, sizeof(uint8_t), 1, fin);
  69.     }
  70.  
  71. }
  72.  
  73. void recalculate(uint8_t** color1, int32_t width, int32_t height, uint8_t** color2)
  74. {
  75.     uint8_t* recalc = _mm_malloc(sizeof(uint8_t) * 4, sizeof(uint8_t));
  76.    
  77.     for (int32_t i = 0; i < height; ++i) {
  78.         for (int32_t j = 0; j + 4 < width; j += 4) {
  79.             int32_t ind = i * width + j;
  80.                color1[0][ind] = 255;
  81.                color1[0][ind + 1] = 255;
  82.                color1[0][ind + 2] = 255;
  83.                color1[0][ind + 3] = 255;
  84.              
  85.              
  86.                __m256 A2 = _mm256_cvtepi64_pd(
  87.                           _mm256_set_epi64x(color2[0][ind + 3], color2[0][ind + 2], color2[0][ind + 1], color2[0][ind + 0]));
  88.                __m256 A2_255 = _mm256_cvtepi64_pd(_mm256_set_epi64x(
  89.                           255 - color2[0][ind + 3], 255 - color2[0][ind + 2], 255 - color2[0][ind + 1], 255 - color2[0][ind]));
  90.                __m256 max = _mm256_cvtepi64_pd(_mm256_set_epi64x(255, 255, 255, 255));
  91.              
  92.                __m256 col1 = _mm256_cvtepi64_pd(
  93.                           _mm256_set_epi64x(color1[2][ind + 3], color1[2][ind + 2], color1[2][ind + 1], color1[2][ind + 0]));
  94.                __m256 col2 = _mm256_cvtepi64_pd(
  95.                           _mm256_set_epi64x(color2[2][ind + 3], color2[2][ind + 2], color2[2][ind + 1], color2[2][ind + 0]));
  96.              
  97.                col1 = _mm256_div_pd(_mm256_add_pd(_mm256_mul_pd(col1, A2_255), _mm256_mul_pd(col2, A2)), max);
  98.                _mm256_store_pd(recalc, col1);
  99.                color1[2][ind] = (uint8_t)recalc[0];
  100.                color1[2][ind + 1] = (uint8_t)recalc[1];
  101.                color1[2][ind + 2] = (uint8_t)recalc[2];
  102.                color1[2][ind + 3] = (uint8_t)recalc[3];
  103.              
  104.              
  105.              
  106.                col1 = _mm256_cvtepi64_pd(
  107.                           _mm256_set_epi64x(color1[1][ind + 3], color1[1][ind + 2], color1[1][ind + 1], color1[1][ind + 0]));
  108.                col2 = _mm256_cvtepi64_pd(
  109.                           _mm256_set_epi64x(color2[1][ind + 3], color2[1][ind + 2], color2[1][ind + 1], color2[1][ind + 0]));
  110.              
  111.                col1 = _mm256_div_pd(_mm256_add_pd(_mm256_mul_pd(col1, A2_255), _mm256_mul_pd(col2, A2)), max);
  112.                _mm256_store_pd(recalc, col1);
  113.                color1[1][ind] = (uint8_t)recalc[0];
  114.                color1[1][ind + 1] = (uint8_t)recalc[1];
  115.                color1[1][ind + 2] = (uint8_t)recalc[2];
  116.                color1[1][ind + 3] = (uint8_t)recalc[3];
  117.            
  118.                col1 = _mm256_cvtepi64_pd(
  119.                           _mm256_set_epi64x(color1[3][ind + 3], color1[3][ind + 2], color1[3][ind + 1], color1[3][ind + 0]));
  120.                col2 = _mm256_cvtepi64_pd(
  121.                           _mm256_set_epi64x(color2[3][ind + 3], color2[3][ind + 2], color2[3][ind + 1], color2[3][ind + 0]));
  122.                col1 = _mm256_div_pd(_mm256_add_pd(_mm256_mul_pd(col1, A2_255), _mm256_mul_pd(col2, A2)), max);
  123.                _mm256_store_pd(recalc, col1);
  124.                color1[3][ind] = (uint8_t)recalc[0];
  125.                color1[3][ind + 1] = (uint8_t)recalc[1];
  126.                color1[3][ind + 2] = (uint8_t)recalc[2];
  127.                color1[3][ind + 3] = (uint8_t)recalc[3];
  128.         }
  129.     }
  130.     _mm_free(recalc);
  131. }
  132.  
  133. void write_to_file(FILE* file, uint32_t height, uint32_t width, uint8_t** color1)
  134. {
  135.     for (int32_t i = 0; i < height * width; ++i) {
  136.         fwrite(color1[0] + i, sizeof(uint8_t), 1, file);
  137.         fwrite(color1[1] + i, sizeof(uint8_t), 1, file);
  138.         fwrite(color1[2] + i, sizeof(uint8_t), 1, file);
  139.         fwrite(color1[3] + i, sizeof(uint8_t), 1, file);
  140.     }
  141. }
  142.  
  143. void free_arr(uint8_t*** col)
  144. {
  145.     _mm_free((*col)[0]);
  146.     _mm_free((*col)[1]);
  147.     _mm_free((*col)[2]);
  148.     _mm_free((*col)[3]);
  149.     _mm_free(*col);
  150. }
  151.  
  152. int main(int argc, char* argv[])
  153. {
  154.     FILE* f1 = fopen(argv[1], "rb");
  155.     FILE* f2 = fopen(argv[2], "rb");
  156.     FILE* fout = fopen(argv[3], "wb");
  157.  
  158.    
  159.     uint8_t** color1 = (uint8_t**)_mm_malloc(4 * sizeof(uint8_t*), sizeof(uint8_t*));
  160.     int32_t width1;
  161.     int32_t height1;
  162.  
  163.     uint8_t** color2 = (uint8_t**)_mm_malloc(4 * sizeof(uint8_t*), sizeof(uint8_t*));
  164.     int32_t width2;
  165.     int32_t height2;
  166.  
  167.     read_from_file(f1, fout, &color1, &width1, &height1);
  168.     read_from_file(f2, NULL, &color2, &width2, &height2);
  169.  
  170.    
  171.     recalculate(color1, width1, height1, color2);
  172.  
  173.     write_to_file(fout, height1, width1, color1);
  174.    
  175.     while (read_pack(1000, f1, fout)) {
  176.         continue;
  177.     }
  178.    
  179.     free_arr(&color1);
  180.     free_arr(&color2);
  181.     return 0;
  182. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement