Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <time.h>
- #include<xmmintrin.h> //SSE1 support
- #include<emmintrin.h> //SSE2 support
- #include<pmmintrin.h> //SSE3 support
- //Gibt einen Zufallsfloat im Intervall [0.0; max) aus
- float getRand(int max) {
- return (rand() % (max * 10)) / 10.0;
- }
- int main()
- {
- srand(time(NULL));
- int i, j, k, m;
- // Dimension der Matrix
- int n;
- printf("n=");
- scanf("%d", &n);
- int numberOfEntries = n*n;
- // Speicher der Grüße n*n holen;
- float *A = (float*)calloc(numberOfEntries, sizeof(float));
- float *B = (float*)calloc(numberOfEntries, sizeof(float));
- float *C = (float*)calloc(numberOfEntries, sizeof(float));
- float temp[4];
- //Matrizen mit Zufallszahlen füllen
- for (i=0; i < numberOfEntries; i++) {
- A[i] = getRand(10);
- B[i] = getRand(20);
- }
- //Timer starten
- int time = (int)clock();
- // Jeweils zwei "SSE-Register" erstellen
- __m128 SSE_A;
- __m128 SSE_B;
- __m128 SSE_temp;
- float tempSum;
- float sum = 0;
- //Matrizen ausgeben
- /*
- printf("A: ");
- for (int i = 0; i < numberOfEntries; i++) {
- printf("%f, ", A[i]);
- }
- printf("\n B: ");
- for (int i = 0; i < numberOfEntries; i++) {
- printf("%f, ", B[i]);
- }
- */
- for (i=0; i < (n*n); i += n) {
- for (j=0; j < n; j++) {
- for (k=0; k < n; k += 4) {
- // Die 4 aktuellen Werte der Matrix in SSE_A speichern
- SSE_A = (__m128) _mm_loadu_ps(&A[i + k]);
- // Die 4 Werte aus Matrix B in ein temporäres Array speichern
- for (m=0; m < 4; m++) {
- temp[m] = B[(n*k) + (m*n) + j];
- }
- // Werte aus Array in SSE_B Register schreiben
- SSE_B = (__m128) _mm_loadu_ps(temp);
- // Parallele Multiplikation durchführen
- SSE_temp = _mm_mul_ps(SSE_A, SSE_B);
- // 4 Werte aufaddieren
- SSE_temp = _mm_hadd_ps(SSE_temp, SSE_temp);
- SSE_temp = _mm_hadd_ps(SSE_temp, SSE_temp);
- // Summe in sum speichern
- _mm_store_ss(&tempSum, SSE_temp);
- sum += tempSum;
- }
- // Ergebnis in Matrix C speichern
- C[i+j] = sum;
- sum = 0;
- }
- }
- // Timer stoppen
- time = -(time - (int)clock())/CLOCKS_PER_SEC;
- /*
- printf("\n \n C: ");
- for (int i = 0; i < numberOfEntries; i++) {
- printf("%f, ", C[i]);
- }
- */
- // Zeit ausgeben
- printf("Ende!, Zeit: %d Sekunden \n", time);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement