View difference between Paste ID: HVa5q2Nc and L70a4dVN
SHOW: | | - or go back to the newest paste.
1
/* Copyright 2014 HSA Foundation Inc.  All Rights Reserved.
2
 *
3
 * HSAF is granting you permission to use this software and documentation (if
4
 * any) (collectively, the "Materials") pursuant to the terms and conditions
5
 * of the Software License Agreement included with the Materials.  If you do
6
 * not have a copy of the Software License Agreement, contact the  HSA Foundation for a copy.
7
 * Redistribution and use in source and binary forms, with or without
8
 * modification, are permitted provided that the following conditions
9
 * are met:
10
 * 1. Redistributions of source code must retain the above copyright
11
 *    notice, this list of conditions and the following disclaimer.
12
 * 2. Redistributions in binary form must reproduce the above copyright
13
 *    notice, this list of conditions and the following disclaimer in the
14
 *    documentation and/or other materials provided with the distribution
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
 * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
18
 * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE SOFTWARE.
21
 */
22
23
#include <stdio.h>
24
#include <stdint.h>
25
#include <stdlib.h>
26
#include <string.h>
27
#include <sys/time.h>
28
#include "hsa.h"
29
#include "hsa_ext_finalize.h"
30
#include "elf_utils.h"
31
32
#define check(msg, status) \
33
if (status != HSA_STATUS_SUCCESS) { \
34
    printf("%s failed.\n", #msg); \
35
    exit(1); \
36
} else { \
37
   printf("%s succeeded.\n", #msg); \
38
}
39
40
#define check_build(msg, status) \
41
if (status != STATUS_SUCCESS) { \
42
    printf("%s failed.\n", #msg); \
43
    exit(1); \
44
} else { \
45
   printf("%s succeeded.\n", #msg); \
46
}
47
48
49
long time_difference(struct timeval tv1, struct timeval tv2)
50
{
51
	return ((tv2.tv_sec - tv1.tv_sec)*1000000L    +tv2.tv_usec) - tv1.tv_usec;
52
}
53
54
55
/*
56
 * Define required BRIG data structures.
57
 */
58
59
typedef uint32_t BrigCodeOffset32_t;
60
61
typedef uint32_t BrigDataOffset32_t;
62
63
typedef uint16_t BrigKinds16_t;
64
65
typedef uint8_t BrigLinkage8_t;
66
67
typedef uint8_t BrigExecutableModifier8_t;
68
69
typedef BrigDataOffset32_t BrigDataOffsetString32_t;
70
71
enum BrigKinds {
72
    BRIG_KIND_NONE = 0x0000,
73
    BRIG_KIND_DIRECTIVE_BEGIN = 0x1000,
74
    BRIG_KIND_DIRECTIVE_KERNEL = 0x1008,
75
};
76
77
typedef struct BrigBase BrigBase;
78
struct BrigBase {
79
    uint16_t byteCount;
80
    BrigKinds16_t kind;
81
};
82
83
typedef struct BrigExecutableModifier BrigExecutableModifier;
84
struct BrigExecutableModifier {
85
    BrigExecutableModifier8_t allBits;
86
};
87
88
typedef struct BrigDirectiveExecutable BrigDirectiveExecutable;
89
struct BrigDirectiveExecutable {
90
    uint16_t byteCount;
91
    BrigKinds16_t kind;
92
    BrigDataOffsetString32_t name;
93
    uint16_t outArgCount;
94
    uint16_t inArgCount;
95
    BrigCodeOffset32_t firstInArg;
96
    BrigCodeOffset32_t firstCodeBlockEntry;
97
    BrigCodeOffset32_t nextModuleEntry;
98
    uint32_t codeBlockEntryCount;
99
    BrigExecutableModifier modifier;
100
    BrigLinkage8_t linkage;
101
    uint16_t reserved;
102
};
103
104
typedef struct BrigData BrigData;
105
struct BrigData {
106
    uint32_t byteCount;
107
    uint8_t bytes[1];
108
};
109
110
/*
111
 * Determines if the given agent is of type HSA_DEVICE_TYPE_GPU
112
 * and sets the value of data to the agent handle if it is.
113
 */
114
static hsa_status_t find_gpu(hsa_agent_t agent, void *data) {
115
    if (data == NULL) {
116
        return HSA_STATUS_ERROR_INVALID_ARGUMENT;
117
    }
118
    hsa_device_type_t device_type;
119
    hsa_status_t stat =
120
    hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
121
    if (stat != HSA_STATUS_SUCCESS) {
122
        return stat;
123
    }
124
    if (device_type == HSA_DEVICE_TYPE_GPU) {
125
        *((hsa_agent_t *)data) = agent;
126
    }
127
    return HSA_STATUS_SUCCESS;
128
}
129
130
/*
131
 * Determines if a memory region can be used for kernarg
132
 * allocations.
133
 */
134
static hsa_status_t get_kernarg(hsa_region_t region, void* data) {
135
    hsa_region_flag_t flags;
136
    hsa_region_get_info(region, HSA_REGION_INFO_FLAGS, &flags);
137
    if (flags & HSA_REGION_FLAG_KERNARG) {
138
        hsa_region_t* ret = (hsa_region_t*) data;
139
        *ret = region;
140
    }
141
    return HSA_STATUS_SUCCESS;
142
}
143
144
/*
145
 * Finds the specified symbols offset in the specified brig_module.
146
 * If the symbol is found the function returns HSA_STATUS_SUCCESS, 
147
 * otherwise it returns HSA_STATUS_ERROR.
148
 */
149
hsa_status_t find_symbol_offset(hsa_ext_brig_module_t* brig_module, 
150
    char* symbol_name,
151
    hsa_ext_brig_code_section_offset32_t* offset) {
152
    
153
    /* 
154
     * Get the data section 
155
     */
156
    hsa_ext_brig_section_header_t* data_section_header = 
157
                brig_module->section[HSA_EXT_BRIG_SECTION_DATA];
158
    /* 
159
     * Get the code section
160
     */
161
    hsa_ext_brig_section_header_t* code_section_header =
162
             brig_module->section[HSA_EXT_BRIG_SECTION_CODE];
163
164
    /* 
165
     * First entry into the BRIG code section
166
     */
167
    BrigCodeOffset32_t code_offset = code_section_header->header_byte_count;
168
    BrigBase* code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
169
    while (code_offset != code_section_header->byte_count) {
170
        if (code_entry->kind == BRIG_KIND_DIRECTIVE_KERNEL) {
171
            /* 
172
             * Now find the data in the data section
173
             */
174
            BrigDirectiveExecutable* directive_kernel = (BrigDirectiveExecutable*) (code_entry);
175
            BrigDataOffsetString32_t data_name_offset = directive_kernel->name;
176
            BrigData* data_entry = (BrigData*)((char*) data_section_header + data_name_offset);
177
            if (!strncmp(symbol_name, (char*) data_entry->bytes, strlen(symbol_name))) {
178
                *offset = code_offset;
179
                return HSA_STATUS_SUCCESS;
180
            }
181
        }
182
        code_offset += code_entry->byteCount;
183
        code_entry = (BrigBase*) ((char*)code_section_header + code_offset);
184
    }
185
    return HSA_STATUS_ERROR;
186
}
187
188
189
void submit_packet( hsa_ext_code_descriptor_t *hsaCodeDescriptor, hsa_queue_t* commandQueue, void* kernel_arg_buffer, hsa_signal_t signal) 
190
{ 
191
    hsa_status_t err;
192
193
    /*
194
     * Initialize the dispatch packet.
195
     */
196
    hsa_dispatch_packet_t aql;
197
    memset(&aql, 0, sizeof(aql));
198
199
    /*
200
     * Setup the dispatch information.
201
     */
202
    aql.completion_signal=signal;
203
    aql.dimensions=1;
204
    aql.workgroup_size_x=256;
205
    aql.workgroup_size_y=1;
206
    aql.workgroup_size_z=1;
207
    aql.grid_size_x=1024*1024;
208
    aql.grid_size_y=1;
209
    aql.grid_size_z=1;
210
    aql.header.type=HSA_PACKET_TYPE_DISPATCH;
211
    aql.header.acquire_fence_scope=2;
212
    aql.header.release_fence_scope=2;
213
    aql.header.barrier=1;
214
    aql.group_segment_size=0;
215
    aql.private_segment_size=0;
216
    
217
 
218
    /*
219
     * Bind kernel code and the kernel argument buffer to the
220
     * aql packet.
221
     */
222
    aql.kernel_object_address=hsaCodeDescriptor->code.handle;
223
    aql.kernarg_address=(uint64_t)kernel_arg_buffer;
224
225
226
    /*
227
     * Obtain the current queue write index.
228
     */
229
    uint64_t index = hsa_queue_load_write_index_relaxed(commandQueue);
230
231
    /*
232
     * Write the aql packet at the calculated queue index address.
233
     */
234
    const uint32_t queueMask = commandQueue->size - 1;
235
    ((hsa_dispatch_packet_t*)(commandQueue->base_address))[index&queueMask]=aql;
236
237
    /*
238
     * Increment the write index and ring the doorbell to dispatch the kernel.
239
     */
240
    hsa_queue_store_write_index_relaxed(commandQueue, index+1);
241
    hsa_signal_store_relaxed(commandQueue->doorbell_signal, index);
242
}
243
244
int main(int argc, char **argv) {
245
    hsa_status_t err;
246
    status_t build_err;
247
248
    err = hsa_init();
249
    check(Initializing the hsa runtime, err);
250
251
    /* 
252
     * Iterate over the agents and pick the gpu agent using 
253
     * the find_gpu callback.
254
     */
255
    hsa_agent_t device = 0;
256
    err = hsa_iterate_agents(find_gpu, &device);
257
    check(Calling hsa_iterate_agents, err);
258
259
    err = (device == 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
260
    check(Checking if the GPU device is non-zero, err);
261
262
    /*
263
     * Query the name of the device.
264
     */
265
    char name[64] = { 0 };
266
    err = hsa_agent_get_info(device, HSA_AGENT_INFO_NAME, name);
267
    check(Querying the device name, err);
268
    printf("The device name is %s.\n", name);
269
270
    /*
271
     * Query the maximum size of the queue.
272
     */
273
    uint32_t queue_size = 0;
274
    err = hsa_agent_get_info(device, HSA_AGENT_INFO_QUEUE_MAX_SIZE, &queue_size);
275
    check(Querying the device maximum queue size, err);
276
    printf("The maximum queue size is %u.\n", (unsigned int) queue_size);
277
278
    /*
279
     * Create a queue using the maximum size.
280
     */
281
    hsa_queue_t* commandQueue;
282
    err = hsa_queue_create(device, queue_size, HSA_QUEUE_TYPE_MULTI, NULL, NULL, &commandQueue);
283
    check(Creating the queue, err);
284
285
    /*
286
     * Load BRIG, encapsulated in an ELF container, into a BRIG module.
287
     */
288
    hsa_ext_brig_module_t* brigModule;
289
    char file_name[128] = "vector_copy.brig";
290
    build_err = create_brig_module_from_brig_file(file_name, &brigModule);
291
    check_build(Creating the brig module from vector_copy.brig, build_err);
292
293
    /*
294
     * Create hsa program.
295
     */
296
    hsa_ext_program_handle_t hsaProgram;
297
    err = hsa_ext_program_create(&device, 1, HSA_EXT_BRIG_MACHINE_LARGE, HSA_EXT_BRIG_PROFILE_FULL, &hsaProgram);
298
    check(Creating the hsa program, err);
299
300
    /*
301
     * Add the BRIG module to hsa program.
302
     */
303
    hsa_ext_brig_module_handle_t module;
304
    err = hsa_ext_add_module(hsaProgram, brigModule, &module);
305
    check(Adding the brig module to the program, err);
306
307
    /* 
308
     * Construct finalization request list.
309
     */
310
    hsa_ext_finalization_request_t finalization_request_list;
311
    finalization_request_list.module = module;
312
    finalization_request_list.program_call_convention = 0;
313
    char kernel_name[128] = "&__vector_copy_kernel";
314
    err = find_symbol_offset(brigModule, kernel_name, &finalization_request_list.symbol);
315
    check(Finding the symbol offset for the kernel, err);
316
317
    /*
318
     * Finalize the hsa program.
319
     */
320
    err = hsa_ext_finalize_program(hsaProgram, device, 1, &finalization_request_list, NULL, NULL, 0, NULL, 0);
321
    check(Finalizing the program, err);
322
323
    /*
324
     * Destroy the brig module. The program was successfully created the kernel
325
     * symbol was found and the program was finalized, so it is no longer needed.
326
     */
327
     destroy_brig_module(brigModule);
328
329
    /*
330
     * Get the hsa code descriptor address.
331
     */
332
    hsa_ext_code_descriptor_t *hsaCodeDescriptor;
333
    err = hsa_ext_query_kernel_descriptor_address(hsaProgram, module, finalization_request_list.symbol, &hsaCodeDescriptor);
334
    check(Querying the kernel descriptor address, err);
335
336
    /*
337
     * Allocate and initialize the kernel arguments.
338
     */
339
    char* in=(char*)malloc(1024*1024*4);
340
    memset(in, 1, 1024*1024*4);
341
    err=hsa_memory_register(in, 1024*1024*4);
342
    check(Registering argument memory for input parameter, err);
343
344
    char* out=(char*)malloc(1024*1024*4);
345
    memset(out, 0, 1024*1024*4);
346
    err=hsa_memory_register(out, 1024*1024*4);
347
    check(Registering argument memory for output parameter, err);
348
349
    struct __attribute__ ((aligned(HSA_ARGUMENT_ALIGN_BYTES))) args_t {
350
        void* arg0;
351
        void* arg1;
352
    } args;
353
354
    args.arg0=out;
355
    args.arg1=in;
356
357
    /*
358
     * Find a memory region that supports kernel arguments.
359
     */
360
    hsa_region_t kernarg_region = 0;
361
    hsa_agent_iterate_regions(device, get_kernarg, &kernarg_region);
362
    err = (kernarg_region == 0) ? HSA_STATUS_ERROR : HSA_STATUS_SUCCESS;
363
    check(Finding a kernarg memory region, err);
364
    void* kernel_arg_buffer = NULL;
365
   
366
    size_t kernel_arg_buffer_size = hsaCodeDescriptor->kernarg_segment_byte_size;
367
368
    /*
369
     * Allocate the kernel argument buffer from the correct region.
370
     */   
371
    err = hsa_memory_allocate(kernarg_region, kernel_arg_buffer_size, 
372
                        &kernel_arg_buffer);
373
    check(Allocating kernel argument memory buffer, err);
374
    memcpy(kernel_arg_buffer, &args, sizeof(args));
375
    /*
376
     * Register the memory region for the argument buffer.
377
     */
378
    err = hsa_memory_register(&args, sizeof(struct args_t));
379
    check(Registering the argument buffer, err);
380
381
#define ITERATIONS 1000
382
    struct timeval tv1;
383
    struct timeval tv2;
384
    gettimeofday(&tv1, 0);
385
386
    for (int i = 0 ;i < ITERATIONS ; ++i) 
387
    {
388
	    hsa_signal_t signal;
389
	    hsa_signal_create(1, 0, NULL, &signal);
390
391
	    submit_packet(hsaCodeDescriptor, commandQueue, kernel_arg_buffer, signal);
392
	    /*
393
	     * Wait on the dispatch signal until the kernel is finished.
394
	     */
395
	    hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_LT, 1, (uint64_t) -1, HSA_WAIT_EXPECTANCY_UNKNOWN);
396
397
	    /*
398
	     * Cleanup all allocated resources.
399
	     */
400
	    hsa_signal_destroy(signal);
401
    }
402
    gettimeofday(&tv2, 0);
403
     
404
    printf("!!!!! Elapsed submit->wait->repeat %ld\n", time_difference(tv1, tv2));
405
406
407
    {
408
	    hsa_signal_t signal;
409
	    err=hsa_signal_create(1, 0, NULL, &signal);
410
	    check(Creating a HSA signal, err);
411
	   gettimeofday(&tv1, 0);
412
	    for (int i = 0 ;i < ITERATIONS ; ++i) 
413
	    {
414
   	        submit_packet(hsaCodeDescriptor, commandQueue, kernel_arg_buffer, i == ITERATIONS -1 ? signal : 0);
415
	    }
416
	    /*
417
	     * Wait on the dispatch signal until the kernel is finished.
418
	     */
419
	    hsa_signal_value_t value = hsa_signal_wait_acquire(signal, HSA_LT, 1, (uint64_t) -1, HSA_WAIT_EXPECTANCY_UNKNOWN);
420
	 gettimeofday(&tv2, 0);
421
422
	    /*
423
	     * Cleanup all allocated resources.
424
	     */
425
	    err=hsa_signal_destroy(signal);
426
	    check(Destroying the signal, err);
427
            printf("!!!!! Elapsed submit->repeat->wait %ld\n", time_difference(tv1, tv2));
428
429
    }	
430
431
    /*
432
     * Validate the data in the output buffer.
433
     */
434
    int valid=1;
435
    int failIndex=0;
436
    for(int i=0; i<1024*1024; i++) {
437
        if(out[i]!=in[i]) {
438
            failIndex=i;
439
            valid=0;
440
            break;
441
        }
442
    }
443
444
    if(valid) {
445
        printf("Passed validation.\n");
446
    } else {
447
        printf("VALIDATION FAILED!\nBad index: %d\n", failIndex);
448
    }
449
450
   
451
452
    err=hsa_ext_program_destroy(hsaProgram);
453
    check(Destroying the program, err);
454
455
    err=hsa_queue_destroy(commandQueue);
456
    check(Destroying the queue, err);
457
    
458
    err=hsa_shut_down();
459
    check(Shutting down the runtime, err);
460
461
    free(in);
462
    free(out);
463
464
    return 0;
465
}