Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <immintrin.h>
- #include <malloc.h>
- #include <time.h>
- #include <string.h>
- #include <pthread.h>
- #include <sched.h>
- #include <sys/time.h>
- #include <sys/resource.h>
- #define ITERATION_COUNT 200
- #define BUFFER_SIZE 128 * 1024 * 1024
- #define MEMCPY_BUFFER_SIZE 8192
- //#define NO_CACHE
- #define thread_data struct _thread_data
- int THREAD_COUNT = 0;
- thread_data {
- double* speeds;
- int id;
- };
- void test_write(__m128i* array) {
- register __m128i tmp;
- #ifdef NO_CACHE
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
- _mm_stream_si128(&(array[i]), tmp);
- }
- }
- #else
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i += MEMCPY_BUFFER_SIZE / sizeof(__m128i)) {
- memset(&(array[i]), 0, MEMCPY_BUFFER_SIZE);
- }
- }
- #endif
- }
- void test_read(__m128i* array) {
- register __m128i tmp;
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
- #ifdef NO_CACHE
- tmp += _mm_stream_load_si128(&(array[i]));
- #else
- tmp += array[i];
- #endif
- }
- }
- printf("%d ", ((int*)&tmp)[0]);
- }
- void test_copy(__m128i* source, __m128i* dest) {
- #ifdef NO_CACHE
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
- _mm_stream_si128(&(dest[i]), _mm_stream_load_si128(&(source[i])));
- }
- }
- #else
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i += MEMCPY_BUFFER_SIZE / sizeof(__m128i)) {
- //dest[i] = source[i];
- memcpy(&(dest[i]), &(source[i]), MEMCPY_BUFFER_SIZE);
- }
- }
- #endif
- }
- void test_copy_no_cache_read(__m128i* source, __m128i* dest) {
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
- dest[i] = _mm_stream_load_si128(&(source[i]));
- }
- }
- }
- void test_copy_no_cache_write(__m128i* source, __m128i* dest) {
- for(int it = 0; it < ITERATION_COUNT; it++) {
- for(int i = 0; i < BUFFER_SIZE / sizeof(__m128i); i++) {
- _mm_stream_si128(&(dest[i]), source[i]);
- }
- }
- }
- void bind_thread(int id) {
- pthread_t thread = pthread_self();
- cpu_set_t cpuset;
- CPU_SET(id, &cpuset);
- pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
- }
- double get_seconds(struct rusage* start, struct rusage* end) {
- return end->ru_utime.tv_sec - start->ru_utime.tv_sec + (end->ru_utime.tv_usec - start->ru_utime.tv_usec) * 0.000001;
- }
- void* test_single_thread(void* ptr) {
- thread_data* data = (thread_data*)ptr;
- if(data->id >= 0) {
- bind_thread(data->id);
- }
- double* speeds = data->speeds;
- __m128i* source = (__m128i*)malloc(BUFFER_SIZE);
- __m128i* dest = (__m128i*)malloc(BUFFER_SIZE);
- struct rusage start, end;
- //read
- getrusage(RUSAGE_THREAD, &start);
- test_read(source);
- getrusage(RUSAGE_THREAD, &end);
- speeds[0] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
- //write
- getrusage(RUSAGE_THREAD, &start);
- test_write(dest);
- getrusage(RUSAGE_THREAD, &end);
- speeds[1] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
- //copy
- getrusage(RUSAGE_THREAD, &start);
- test_copy(source, dest);
- getrusage(RUSAGE_THREAD, &end);
- speeds[2] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
- //copy no cache read
- getrusage(RUSAGE_THREAD, &start);
- test_copy_no_cache_read(source, dest);
- getrusage(RUSAGE_THREAD, &end);
- speeds[3] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
- //copy no cache write
- getrusage(RUSAGE_THREAD, &start);
- test_copy_no_cache_write(source, dest);
- getrusage(RUSAGE_THREAD, &end);
- speeds[4] = BUFFER_SIZE / (get_seconds(&start, &end) / ITERATION_COUNT) / 1024 / 1024;
- free(source);
- free(dest);
- return NULL;
- }
- void test_multi_thread(double* speeds) {
- pthread_t threads[THREAD_COUNT];
- thread_data datas[THREAD_COUNT];
- for(int i = 0; i < THREAD_COUNT; i++) {
- datas[i].speeds = &(speeds[i * 5]);
- datas[i].id = i;
- pthread_create(&(threads[i]), NULL, test_single_thread, &(datas[i]));
- }
- for(int i = 0; i < THREAD_COUNT; i++) {
- pthread_join(threads[i], NULL);
- }
- double speeds_overall[5];
- for(int i = 0; i < 5; i++) {
- double speed = 0;
- for(int it = 0; it < THREAD_COUNT; it++) {
- speed += speeds[it * 5 + i];
- }
- speeds_overall[i] = speed;
- }
- printf("multithread read speed: %lf MB/S\n", speeds_overall[0]);
- printf("multithread write speed: %lf MB/S\n", speeds_overall[1]);
- printf("multithread copy speed: %lf MB/S\n", speeds_overall[2]);
- printf("multithread copy no cache read speed: %lf MB/S\n", speeds_overall[3]);
- printf("multithread copy no cache write speed: %lf MB/S\n", speeds_overall[4]);
- }
- int main(int argc, char** argv) {
- THREAD_COUNT = atoi(argv[1]);
- double speeds_single[5];
- thread_data data;
- data.speeds = speeds_single;
- data.id = -1;
- test_single_thread(&data);
- printf("read speed: %lf MB/S\n", speeds_single[0]);
- printf("write speed: %lf MB/S\n", speeds_single[1]);
- printf("copy speed: %lf MB/S\n", speeds_single[2]);
- printf("copy no cache read speed: %lf MB/S\n", speeds_single[3]);
- printf("copy no cache write speed: %lf MB/S\n", speeds_single[4]);
- double* speeds_multi = (double*)malloc(THREAD_COUNT * 5 * sizeof(double));
- test_multi_thread(speeds_multi);
- return EXIT_SUCCESS;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement