Advertisement
Guest User

Untitled

a guest
Feb 2nd, 2015
138
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /* Copyright 2014 HSA Foundation Inc. All Rights Reserved.
  2. *
  3. * HSAF is granting you permission to use this software and documentation (if
  4. * any) (collectively, the "Materials") pursuant to the terms and conditions
  5. * of the Software License Agreement included with the Materials. If you do
  6. * not have a copy of the Software License Agreement, contact the HSA Foundation for a copy.
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution
  15. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
  17. * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  18. * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
  21. */
  22.  
  23. #include <stdio.h>
  24. #include <stdint.h>
  25. #include <stdlib.h>
  26. #include <string.h>
  27. #include <sys/time.h>
  28. #include "hsa.h"
  29. #include "hsa_ext_finalize.h"
  30. #include "elf_utils.h"
  31.  
  32. #define check(msg, status) \
  33. if (status != HSA_STATUS_SUCCESS) { \
  34. printf("%s failed.\n", #msg); \
  35. exit(1); \
  36. } else { \
  37. printf("%s succeeded.\n", #msg); \
  38. }
  39.  
  40. #define check_build(msg, status) \
  41. if (status != STATUS_SUCCESS) { \
  42. printf("%s failed.\n", #msg); \
  43. exit(1); \
  44. } else { \
  45. printf("%s succeeded.\n", #msg); \
  46. }
  47.  
  48.  
  49. long time_difference(struct timeval tv1, struct timeval tv2)
  50. {
  51. return ((tv2.tv_sec - tv1.tv_sec)*1000000L +tv2.tv_usec) - tv1.tv_usec;
  52. }
  53.  
  54.  
  55. /*
  56. * Define required BRIG data structures.
  57. */
  58.  
  59. typedef uint32_t BrigCodeOffset32_t;
  60.  
  61. typedef uint32_t BrigDataOffset32_t;
  62.  
  63. typedef uint16_t BrigKinds16_t;
  64.  
  65. typedef uint8_t BrigLinkage8_t;
  66.  
  67. typedef uint8_t BrigExecutableModifier8_t;
  68.  
  69. typedef BrigDataOffset32_t BrigDataOffsetString32_t;
  70.  
  71. enum BrigKinds {
  72. BRIG_KIND_NONE = 0x0000,
  73. BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
  74. BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
  75. };
  76.  
  77. typedef struct BrigBase BrigBase;
  78. struct BrigBase {
  79. uint16_t byteCount;
  80. BrigKinds16_t kind;
  81. };
  82.  
  83. typedef struct BrigExecutableModifier BrigExecutableModifier;
  84. struct BrigExecutableModifier {
  85. BrigExecutableModifier8_t allBits;
  86. };
  87.  
  88. typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
  89. struct BrigDirectiveExecutable {
  90. uint16_t byteCount;
  91. BrigKinds16_t kind;
  92. BrigDataOffsetString32_t name;
  93. uint16_t outArgCount;
  94. uint16_t inArgCount;
  95. BrigCodeOffset32_t firstInArg;
  96. BrigCodeOffset32_t firstCodeBlockEntry;
  97. BrigCodeOffset32_t nextModuleEntry;
  98. uint32_t codeBlockEntryCount;
  99. BrigExecutableModifier modifier;
  100. BrigLinkage8_t linkage;
  101. uint16_t reserved;
  102. };
  103.  
  104. typedef struct BrigData BrigData;
  105. struct BrigData {
  106. uint32_t byteCount;
  107. uint8_t bytes[1];
  108. };
  109.  
  110. /*
  111. * Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
  112. * and sets the value of data to the agent handle if it is.
  113. */
  114. static hsa_status_t find_gpu(hsa_agent_t agent, void *data) {
  115. if (data == NULL) {
  116. return HSA_STATUS_ERROR_INVALID_ARGUMENT;
  117. }
  118. hsa_device_type_t device_type;
  119. hsa_status_t stat =
  120. hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
  121. if (stat != HSA_STATUS_SUCCESS) {
  122. return stat;
  123. }
  124. if (device_type == HSA_DEVICE_TYPE_GPU) {
  125. *((hsa_agent_t *)data) = agent;
  126. }
  127. return HSA_STATUS_SUCCESS;
  128. }
  129.  
  130. /*
  131. * Determines if a memory region can be used for kernarg
  132. * allocations.
  133. */
  134. static hsa_status_t get_kernarg(hsa_region_t region, void* data) {
  135. hsa_region_flag_t flags;
  136. hsa_region_get_info(region, HSA_REGION_INFO_FLAGS, &flags);
  137. if (flags & HSA_REGION_FLAG_KERNARG) {
  138. hsa_region_t* ret = (hsa_region_t*) data;
  139. *ret = region;
  140. }
  141. return HSA_STATUS_SUCCESS;
  142. }
  143.  
  144. /*
  145. * Finds the specified symbols offset in the specified brig_module.
  146. * If the symbol is found the function returns HSA_STATUS_SUCCESS,
  147. * otherwise it returns HSA_STATUS_ERROR.
  148. */
  149. hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module,
  150. char* symbol_name,
  151. hsa_ext_brig_code_section_offset32_t* offset) {
  152.  
  153. /*
  154. * Get the data section
  155. */
  156. hsa_ext_brig_section_header_t* data_section_header =
  157. brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
  158. /*
  159. * Get the code section
  160. */
  161. hsa_ext_brig_section_header_t* code_section_header =
  162. brig_module->section[HSA_EXT_BRIG_SECTION_CODE];
  163.  
  164. /*
  165. * First entry into the BRIG code section
  166. */
  167. BrigCodeOffset32_t code_offset = code_section_header->header_byte_count;
  168. BrigBase* code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
  169. while (code_offset != code_section_header->byte_count) {
  170. if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) {
  171. /*
  172. * Now find the data in the data section
  173. */
  174. BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
  175. BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
  176. BrigData* data_entry = (BrigData*)((char*) data_section_header + data_name_offset);
  177. if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) {
  178. *offset = code_offset;
  179. return HSA_STATUS_SUCCESS;
  180. }
  181. }
  182. code_offset += code_entry->byteCount;
  183. code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
  184. }
  185. return HSA_STATUS_ERROR;
  186. }
  187.  
  188.  
  189. void submit_packet( hsa_ext_code_descriptor_t *hsaCodeDescriptor, hsa_queue_t* commandQueue, void* kernel_arg_buffer, hsa_signal_t signal)
  190. {
  191. hsa_status_t err;
  192.  
  193. /*
  194. * Initialize the dispatch packet.
  195. */
  196. hsa_dispatch_packet_t aql;
  197. memset(&aql, 0, sizeof(aql));
  198.  
  199. /*
  200. * Setup the dispatch information.
  201. */
  202. aql.completion_signal=signal;
  203. aql.dimensions=1;
  204. aql.workgroup_size_x=256;
  205. aql.workgroup_size_y=1;
  206. aql.workgroup_size_z=1;
  207. aql.grid_size_x=1024*1024;
  208. aql.grid_size_y=1;
  209. aql.grid_size_z=1;
  210. aql.header.type=HSA_PACKET_TYPE_DISPATCH;
  211. aql.header.acquire_fence_scope=2;
  212. aql.header.release_fence_scope=2;
  213. aql.header.barrier=1;
  214. aql.group_segment_size=0;
  215. aql.private_segment_size=0;
  216.  
  217.  
  218. /*
  219. * Bind kernel code and the kernel argument buffer to the
  220. * aql packet.
  221. */
  222. aql.kernel_object_address=hsaCodeDescriptor->code.handle;
  223. aql.kernarg_address=(uint64_t)kernel_arg_buffer;
  224.  
  225.  
  226. /*
  227. * Obtain the current queue write index.
  228. */
  229. uint64_t index = hsa_queue_load_write_index_relaxed(commandQueue);
  230.  
  231. /*
  232. * Write the aql packet at the calculated queue index address.
  233. */
  234. const uint32_t queueMask = commandQueue->size - 1;
  235. ((hsa_dispatch_packet_t*)(commandQueue->base_address))[index&queueMask]=aql;
  236.  
  237. /*
  238. * Increment the write index and ring the doorbell to dispatch the kernel.
  239. */
  240. hsa_queue_store_write_index_relaxed(commandQueue, index+1);
  241. hsa_signal_store_relaxed(commandQueue->doorbell_signal, index);
  242. }
  243.  
  244. int main(int argc, char **argv) {
  245. hsa_status_t err;
  246. status_t build_err;
  247.  
  248. err = hsa_init();
  249. check(Initializing the hsa runtime, err);
  250.  
  251. /*
  252. * Iterate over the agents and pick the gpu agent using
  253. * the find_gpu callback.
  254. */
  255. hsa_agent_t device = 0;
  256. err = hsa_iterate_agents(find_gpu, &device);
  257. check(Calling hsa_iterate_agents, err);
  258.  
  259. err = (device == 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
  260. check(Checking if the GPU device is non-zero, err);
  261.  
  262. /*
  263. * Query the name of the device.
  264. */
  265. char name[64] = { 0 };
  266. err = hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, name);
  267. check(Querying the device name, err);
  268. printf("The device name is %s.\n", name);
  269.  
  270. /*
  271. * Query the maximum size of the queue.
  272. */
  273. uint32_t queue_size = 0;
  274. err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
  275. check(Querying the device maximum queue size, err);
  276. printf("The maximum queue size is %u.\n", (unsigned int) queue_size);
  277.  
  278. /*
  279. * Create a queue using the maximum size.
  280. */
  281. hsa_queue_t* commandQueue;
  282. err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, &commandQueue);
  283. check(Creating the queue, err);
  284.  
  285. /*
  286. * Load BRIG, encapsulated in an ELF container, into a BRIG module.
  287. */
  288. hsa_ext_brig_module_t* brigModule;
  289. char file_name[128] = "vector_copy.brig";
  290. build_err = create_brig_module_from_brig_file(file_name, &brigModule);
  291. check_build(Creating the brig module from vector_copy.brig, build_err);
  292.  
  293. /*
  294. * Create hsa program.
  295. */
  296. hsa_ext_program_handle_t hsaProgram;
  297. err = hsa_ext_program_create(&device, 1, HSA_EXT_BRIG_MACHINE_LARGE, HSA_EXT_BRIG_PROFILE_FULL, &hsaProgram);
  298. check(Creating the hsa program, err);
  299.  
  300. /*
  301. * Add the BRIG module to hsa program.
  302. */
  303. hsa_ext_brig_module_handle_t module;
  304. err = hsa_ext_add_module(hsaProgram, brigModule, &module);
  305. check(Adding the brig module to the program, err);
  306.  
  307. /*
  308. * Construct finalization request list.
  309. */
  310. hsa_ext_finalization_request_t finalization_request_list;
  311. finalization_request_list.module = module;
  312. finalization_request_list.program_call_convention = 0;
  313. char kernel_name[128] = "&__vector_copy_kernel";
  314. err = find_symbol_offset(brigModule, kernel_name, &finalization_request_list.symbol);
  315. check(Finding the symbol offset for the kernel, err);
  316.  
  317. /*
  318. * Finalize the hsa program.
  319. */
  320. err = hsa_ext_finalize_program(hsaProgram, device, 1, &finalization_request_list, NULL, NULL, 0, NULL, 0);
  321. check(Finalizing the program, err);
  322.  
  323. /*
  324. * Destroy the brig module. The program was successfully created the kernel
  325. * symbol was found and the program was finalized, so it is no longer needed.
  326. */
  327. destroy_brig_module(brigModule);
  328.  
  329. /*
  330. * Get the hsa code descriptor address.
  331. */
  332. hsa_ext_code_descriptor_t *hsaCodeDescriptor;
  333. err = hsa_ext_query_kernel_descriptor_address(hsaProgram, module, finalization_request_list.symbol, &hsaCodeDescriptor);
  334. check(Querying the kernel descriptor address, err);
  335.  
  336. /*
  337. * Allocate and initialize the kernel arguments.
  338. */
  339. char* in=(char*)malloc(1024*1024*4);
  340. memset(in, 1, 1024*1024*4);
  341. err=hsa_memory_register(in, 1024*1024*4);
  342. check(Registering argument memory for input parameter, err);
  343.  
  344. char* out=(char*)malloc(1024*1024*4);
  345. memset(out, 0, 1024*1024*4);
  346. err=hsa_memory_register(out, 1024*1024*4);
  347. check(Registering argument memory for output parameter, err);
  348.  
  349. struct __attribute__ ((aligned(HSA_ARGUMENT_ALIGN_BYTES))) args_t {
  350. void* arg0;
  351. void* arg1;
  352. } args;
  353.  
  354. args.arg0=out;
  355. args.arg1=in;
  356.  
  357. /*
  358. * Find a memory region that supports kernel arguments.
  359. */
  360. hsa_region_t kernarg_region = 0;
  361. hsa_agent_iterate_regions(device, get_kernarg, &kernarg_region);
  362. err = (kernarg_region == 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
  363. check(Finding a kernarg memory region, err);
  364. void* kernel_arg_buffer = NULL;
  365.  
  366. size_t kernel_arg_buffer_size = hsaCodeDescriptor->kernarg_segment_byte_size;
  367.  
  368. /*
  369. * Allocate the kernel argument buffer from the correct region.
  370. */
  371. err = hsa_memory_allocate(kernarg_region, kernel_arg_buffer_size,
  372. &kernel_arg_buffer);
  373. check(Allocating kernel argument memory buffer, err);
  374. memcpy(kernel_arg_buffer, &args, sizeof(args));
  375. /*
  376. * Register the memory region for the argument buffer.
  377. */
  378. err = hsa_memory_register(&args, sizeof(struct args_t));
  379. check(Registering the argument buffer, err);
  380.  
  381. #define ITERATIONS 1000
  382. struct timeval tv1;
  383. struct timeval tv2;
  384. gettimeofday(&tv1, 0);
  385.  
  386. for (int i = 0 ;i < ITERATIONS ; ++i)
  387. {
  388. hsa_signal_t signal;
  389. hsa_signal_create(1, 0, NULL, &signal);
  390.  
  391. submit_packet(hsaCodeDescriptor, commandQueue, kernel_arg_buffer, signal);
  392. /*
  393. * Wait on the dispatch signal until the kernel is finished.
  394. */
  395. hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_LT, 1, (uint64_t) -1, HSA_WAIT_EXPECTANCY_UNKNOWN);
  396.  
  397. /*
  398. * Cleanup all allocated resources.
  399. */
  400. hsa_signal_destroy(signal);
  401. }
  402. gettimeofday(&tv2, 0);
  403.  
  404. printf("!!!!! Elapsed submit->wait->repeat %ld\n", time_difference(tv1, tv2));
  405.  
  406.  
  407. {
  408. hsa_signal_t signal;
  409. err=hsa_signal_create(1, 0, NULL, &signal);
  410. check(Creating a HSA signal, err);
  411. gettimeofday(&tv1, 0);
  412. for (int i = 0 ;i < ITERATIONS ; ++i)
  413. {
  414. submit_packet(hsaCodeDescriptor, commandQueue, kernel_arg_buffer, i == ITERATIONS -1 ? signal : 0);
  415. }
  416. /*
  417. * Wait on the dispatch signal until the kernel is finished.
  418. */
  419. hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_LT, 1, (uint64_t) -1, HSA_WAIT_EXPECTANCY_UNKNOWN);
  420. gettimeofday(&tv2, 0);
  421.  
  422. /*
  423. * Cleanup all allocated resources.
  424. */
  425. err=hsa_signal_destroy(signal);
  426. check(Destroying the signal, err);
  427. printf("!!!!! Elapsed submit->repeat->wait %ld\n", time_difference(tv1, tv2));
  428.  
  429. }
  430.  
  431. /*
  432. * Validate the data in the output buffer.
  433. */
  434. int valid=1;
  435. int failIndex=0;
  436. for(int i=0; i<1024*1024; i++) {
  437. if(out[i]!=in[i]) {
  438. failIndex=i;
  439. valid=0;
  440. break;
  441. }
  442. }
  443.  
  444. if(valid) {
  445. printf("Passed validation.\n");
  446. } else {
  447. printf("VALIDATION FAILED!\nBad index: %d\n", failIndex);
  448. }
  449.  
  450.  
  451.  
  452. err=hsa_ext_program_destroy(hsaProgram);
  453. check(Destroying the program, err);
  454.  
  455. err=hsa_queue_destroy(commandQueue);
  456. check(Destroying the queue, err);
  457.  
  458. err=hsa_shut_down();
  459. check(Shutting down the runtime, err);
  460.  
  461. free(in);
  462. free(out);
  463.  
  464. return 0;
  465. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement