Advertisement
Guest User

Untitled

a guest
Nov 26th, 2014
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.26 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <time.h>
  3. #include<xmmintrin.h> //SSE1 support
  4. #include<emmintrin.h> //SSE2 support
  5. #include<pmmintrin.h> //SSE3 support
  6.  
  7.  
  8. //Gibt einen Zufallsfloat im Intervall [0.0; max) aus
  9. float getRand(int max) {
  10. return (rand() % (max * 10)) / 10.0;
  11. }
  12.  
  13.  
  14. int main()
  15. {
  16. srand(time(NULL));
  17. int i, j, k, m;
  18.  
  19. // Dimension der Matrix
  20. int n;
  21. printf("n=");
  22. scanf("%d", &n);
  23. int numberOfEntries = n*n;
  24.  
  25. // Speicher der Grüße n*n holen;
  26. float *A = (float*)calloc(numberOfEntries, sizeof(float));
  27. float *B = (float*)calloc(numberOfEntries, sizeof(float));
  28. float *C = (float*)calloc(numberOfEntries, sizeof(float));
  29.  
  30. float temp[4];
  31.  
  32. //Matrizen mit Zufallszahlen füllen
  33. for (i=0; i < numberOfEntries; i++) {
  34. A[i] = getRand(10);
  35. B[i] = getRand(20);
  36. }
  37.  
  38. //Timer starten
  39. int time = (int)clock();
  40.  
  41. // Jeweils zwei "SSE-Register" erstellen
  42. __m128 SSE_A;
  43. __m128 SSE_B;
  44. __m128 SSE_temp;
  45. float tempSum;
  46. float sum = 0;
  47.  
  48. //Matrizen ausgeben
  49.  
  50. /*
  51. printf("A: ");
  52. for (int i = 0; i < numberOfEntries; i++) {
  53. printf("%f, ", A[i]);
  54. }
  55. printf("\n B: ");
  56. for (int i = 0; i < numberOfEntries; i++) {
  57. printf("%f, ", B[i]);
  58. }
  59. */
  60.  
  61. for (i=0; i < (n*n); i += n) {
  62. for (j=0; j < n; j++) {
  63. for (k=0; k < n; k += 4) {
  64. // Die 4 aktuellen Werte der Matrix in SSE_A speichern
  65. SSE_A = (__m128) _mm_loadu_ps(&A[i + k]);
  66.  
  67. // Die 4 Werte aus Matrix B in ein temporäres Array speichern
  68. for (m=0; m < 4; m++) {
  69. temp[m] = B[(n*k) + (m*n) + j];
  70. }
  71.  
  72. // Werte aus Array in SSE_B Register schreiben
  73. SSE_B = (__m128) _mm_loadu_ps(temp);
  74.  
  75. // Parallele Multiplikation durchführen
  76. SSE_temp = _mm_mul_ps(SSE_A, SSE_B);
  77.  
  78. // 4 Werte aufaddieren
  79. SSE_temp = _mm_hadd_ps(SSE_temp, SSE_temp);
  80. SSE_temp = _mm_hadd_ps(SSE_temp, SSE_temp);
  81.  
  82. // Summe in sum speichern
  83. _mm_store_ss(&tempSum, SSE_temp);
  84. sum += tempSum;
  85. }
  86. // Ergebnis in Matrix C speichern
  87. C[i+j] = sum;
  88. sum = 0;
  89. }
  90. }
  91.  
  92. // Timer stoppen
  93. time = -(time - (int)clock())/CLOCKS_PER_SEC;
  94. /*
  95. printf("\n \n C: ");
  96. for (int i = 0; i < numberOfEntries; i++) {
  97. printf("%f, ", C[i]);
  98. }
  99. */
  100.  
  101. // Zeit ausgeben
  102. printf("Ende!, Zeit: %d Sekunden \n", time);
  103. return 0;
  104.  
  105. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement