Advertisement
Guest User

Untitled

a guest
Jan 1st, 2023
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.91 KB | None | 0 0
  1. #include <mpi.h>
  2. #include <stdio.h>
  3. #include <windows.h>
  4. #include <intrin.h>
  5. #include <processthreadsapi.h>
  6. #include <barrier>
  7. #include <memory>
  8.  
  9. #define MSG_ROWS 8
  10. #define MSG_COLS 158
  11. #define DISP_ROWS 8
  12. #define DISP_COLS 34
  13. #define NUM_PROCS DISP_ROWS * DISP_COLS
  14. #define MILLISECONDS_PER_TICK 5000
  15.  
  16. // 8 rows x 158 columns
  17. // each hex value is an 8-pixel column
  18.  
  19. const int message[MSG_COLS] = {
  20. 0x00, 0x00, 0xe7, 0xe7, 0xe7, 0x00, 0x00, 0xff, 0xfb, 0x1b, 0x0b, 0x6b, 0x03, 0x03, 0xff, 0x00,
  21. 0x00, 0xde, 0xcc, 0xc0, 0xe1, 0xff, 0x00, 0x00, 0xde, 0xcc, 0xc0, 0xe1, 0xff, 0xfc, 0x30, 0x03,
  22. 0x83, 0xe0, 0xfc, 0xff, 0xff, 0xff, 0xfe, 0xfe, 0x00, 0x00, 0xf0, 0xc3, 0x0f, 0x00, 0x00, 0xff,
  23. 0x87, 0x03, 0x0b, 0x4b, 0x43, 0xe7, 0xff, 0xf3, 0x83, 0x1f, 0x07, 0xe3, 0x03, 0x1f, 0x83, 0xf3,
  24. 0xff, 0xff, 0xff, 0xfe, 0xf8, 0x01, 0x07, 0xf1, 0xf8, 0xfe, 0x87, 0x03, 0x0b, 0x4b, 0x43, 0xe7,
  25. 0xff, 0xfb, 0x1b, 0x0b, 0x6b, 0x03, 0x03, 0xff, 0x03, 0x03, 0xe3, 0xf3, 0xff, 0xff, 0xff, 0xff,
  26. 0x03, 0x03, 0xe3, 0xf3, 0x3f, 0x1f, 0x8f, 0xc7, 0xe3, 0xf3, 0xff, 0x00, 0x00, 0xf7, 0xe7, 0x07,
  27. 0x0f, 0xff, 0x87, 0x03, 0x7b, 0x33, 0x03, 0x87, 0xff, 0x01, 0x01, 0xfd, 0xf9, 0x01, 0x01, 0xfd,
  28. 0x01, 0x01, 0xff, 0x87, 0x03, 0x0b, 0x4b, 0x43, 0xe7, 0xff, 0x00, 0x00, 0xff, 0xfb, 0x1b, 0x0b,
  29. 0x6b, 0x03, 0x03, 0xff, 0x00, 0x00, 0x33, 0x7b, 0x33, 0x03, 0xcf, 0xff, 0x20, 0x20 };
  30.  
  31. typedef struct ThreadArgs {
  32. int thread_rank;
  33. std::shared_ptr<std::barrier<std::_No_completion_function>> barrier;
  34. } MYDATA, *PMYDATA;
  35.  
  36. DWORD WINAPI run_thread(LPVOID arg_ptr);
  37.  
  38. int main(int argc, char** argv) {
  39.  
  40. PMYDATA pDataArray[NUM_PROCS];
  41. DWORD dwThreadIdArray[NUM_PROCS - 1];
  42. HANDLE hThreadArray[NUM_PROCS - 1];
  43.  
  44. // Set own affinity to 0
  45. auto newAffinity = new _GROUP_AFFINITY;
  46. newAffinity->Group = 0;
  47. newAffinity->Mask = static_cast<KAFFINITY>(1);
  48. newAffinity->Reserved[0] = 0;
  49. newAffinity->Reserved[1] = 0;
  50. newAffinity->Reserved[2] = 0;
  51. SetThreadGroupAffinity(GetCurrentThread(), newAffinity, NULL);
  52.  
  53. // Create barrier
  54. auto barrier = std::make_shared<std::barrier<std::_No_completion_function>>(NUM_PROCS);
  55.  
  56. // Populate own data
  57. pDataArray[0] = (PMYDATA)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(MYDATA));
  58. pDataArray[0]->barrier = std::shared_ptr(barrier);
  59. pDataArray[0]->thread_rank = 0;
  60.  
  61. // Create MAX_THREADS - 1 worker threads.
  62.  
  63. for (int i = 0; i < NUM_PROCS - 1; i++)
  64. {
  65. // Create the thread to begin execution on its own.
  66. pDataArray[i + 1] = (PMYDATA)HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(MYDATA));
  67. pDataArray[i + 1]->barrier = std::shared_ptr(barrier);
  68. pDataArray[i + 1]->thread_rank = i + 1;
  69. hThreadArray[i] = CreateThread(
  70. NULL, // default security attributes
  71. 0, // use default stack size
  72. run_thread, // thread function name
  73. pDataArray[i + 1], // argument to thread function
  74. 0, // use default creation flags
  75. &dwThreadIdArray[i]); // returns the thread identifier
  76.  
  77. // Set affinity.
  78. auto newAffinity = new _GROUP_AFFINITY;
  79. newAffinity->Group = (i+1) / 64;
  80. newAffinity->Mask = static_cast<KAFFINITY>(1) << ((i+1) % 64);
  81. newAffinity->Reserved[0] = 0;
  82. newAffinity->Reserved[1] = 0;
  83. newAffinity->Reserved[2] = 0;
  84. SetThreadGroupAffinity(hThreadArray[i], newAffinity, NULL);
  85. }
  86.  
  87. run_thread(pDataArray[0]);
  88.  
  89. // Wait until all threads have terminated.
  90.  
  91. WaitForMultipleObjects(NUM_PROCS - 1, hThreadArray, TRUE, INFINITE);
  92.  
  93. // Close all thread handles.
  94.  
  95. for (int i = 0; i < NUM_PROCS - 1; i++)
  96. {
  97. CloseHandle(hThreadArray[i]);
  98. }
  99. }
  100.  
  101. DWORD WINAPI run_thread(LPVOID arg_ptr) {
  102. PMYDATA args = (PMYDATA)arg_ptr;
  103. int rank = args->thread_rank;
  104. args->barrier->arrive_and_wait();
  105. auto procnum = new PROCESSOR_NUMBER;
  106. GetCurrentProcessorNumberEx(procnum);
  107.  
  108. int my_row = rank / DISP_COLS;
  109. int my_col = rank % DISP_COLS;
  110.  
  111. // count cols from -disp_cols to msg_cols + disp_cols
  112. // add my_col. If count < 0 or count > msg_cols - 1, sleep/barrier
  113. // otherwise, look up in array and bit-shift to find out whether to spin
  114. for (int i = -DISP_COLS; i <= MSG_COLS + DISP_COLS; i++)
  115. {
  116. int cur_pixel_val;
  117. if (i + my_col < 0 || i + my_col > MSG_COLS - 1) {
  118. cur_pixel_val = 1; }
  119. else {
  120. cur_pixel_val = message[i + my_col] & (1 << my_row);
  121. }
  122. if (cur_pixel_val) {
  123. args->barrier->arrive_and_wait();
  124. }
  125. else {
  126. LARGE_INTEGER frequency;
  127. LARGE_INTEGER ticks;
  128. QueryPerformanceFrequency(&frequency);
  129. QueryPerformanceCounter(&ticks);
  130. int64_t start_ticks = ticks.QuadPart;
  131. int64_t cur_ticks = start_ticks;
  132. while (cur_ticks - start_ticks < frequency.QuadPart * MILLISECONDS_PER_TICK / 1000.0) {
  133. QueryPerformanceCounter(&ticks);
  134. cur_ticks = ticks.QuadPart;
  135. for (int j = 0; j < 10000; j++) {
  136. __nop();
  137. }
  138. }
  139. args->barrier->arrive_and_wait();
  140. }
  141. }
  142.  
  143. return 0;
  144. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement