Advertisement
Guest User

Untitled

a guest
Sep 11th, 2019
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.72 KB | None | 0 0
  1. // 256-bit ver
  2. __m256i k0123 = ...;
  3. __m256i k4567 = ...;
  4.  
  5. __m256i acc32 = _mm256_setzero_si256();
  6.  
  7. // 2x VPDPBUSD
  8. // 2p01
  9. // Best case execution: 1p0 1p1
  10. acc32 = _mm256_dpbusd_epi32(acc32, k0123, _mm256_load_si256(...));
  11. acc32 = _mm256_dpbusd_epi32(acc32, k4567, _mm256_load_si256(...));
  12.  
  13. _mm256_store_si256(..., acc32);
  14.  
  15.  
  16. // 512-bit ver
  17. __m512i k0123 = ...;
  18. __m512i k4567 = ...;
  19.  
  20. __m512i acc32 = _mm512_setzero_si512();
  21.  
  22. // 2x VPDPBUSD
  23. // Port breakdown
  24. // 1FMA: 2p0
  25. // 2FMA: 2p05
  26. // Best execution
  27. // 1FMA: 2p0
  28. // 2FMA: 1p0 1p5
  29. acc32 = _mm512_dpbusd_epi32(acc32, k0123, _mm512_load_si512(...));
  30. acc32 = _mm512_dpbusd_epi32(acc32, k4567, _mm512_load_si512(...));
  31.  
  32. _mm512_store_si512(..., acc32);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement