Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- const std = @import("std");
- const print = std.debug.print;
- const math = std.math;
- const fabs = std.zig.c_builtins.__builtin_fabsf;
- const tMilli = std.time.milliTimestamp;
- const rand = std.rand;
- const N = 1024; // Size of the matrix (N x N)
- const simdWidth = 8; // Number of elements processed at a time (AVX2)
- // Initialize a matrix with random values
- fn initializeMatrix(matrix: []f32, size: usize, rng: *rand.DefaultPrng) void {
- for (0..size) |i| {
- for (0..size) |j| {
- matrix[i * size + j] = rng.random().float(f32);
- matrix[i * size + j] = rng.random().float(f32);
- }
- }
- }
- // Perform matrix multiplication using SIMD
- fn matrixMultiplySimd(A: []const f32, B: []const f32, C: []f32, size: usize) void {
- for (0..size) |i| {
- var j: usize = 0;
- while (j < size) : (j += simdWidth) {
- var c: @Vector(simdWidth, f32) = @splat(@as(f32, 0.0));
- for (0..size) |k| {
- const a_val = A[i * size + k];
- const a_vec: @Vector(simdWidth, f32) = @splat(a_val);
- const b_vec: @Vector(simdWidth, f32) = @as(*align(1) const @Vector(simdWidth, f32), @ptrCast(&B[k * size + j])).*;
- c += a_vec * b_vec;
- }
- @as(*align(1) @Vector(simdWidth, f32), @ptrCast(&C[i * size + j])).* = c;
- }
- }
- }
- // // Perform matrix multiplication without SIMD (for comparison)
- // fn matrixMultiplyScalar(A: []const f32, B: []const f32, C: []f32, size: usize) void {
- // for (0..size) |i| {
- // for (0..size) |j| {
- // var sum: f32 = 0.0;
- // for (0..size) |k| {
- // sum += A[i * size + k] * B[k * size + j];
- // }
- // C[i * size + j] = sum;
- // }
- // }
- // }
- pub fn main() !void {
- // Allocate memory for matrices
- const allocator = std.heap.page_allocator;
- const A = try allocator.alloc(f32, N * N);
- defer allocator.free(A);
- const B = try allocator.alloc(f32, N * N);
- defer allocator.free(B);
- const C_simd = try allocator.alloc(f32, N * N);
- defer allocator.free(C_simd);
- // const C_scalar = try allocator.alloc(f32, N * N);
- // defer allocator.free(C_scalar);
- // Initialize matrices with random values
- var rng = rand.DefaultPrng.init(@intCast(tMilli()));
- initializeMatrix(A, N, &rng);
- initializeMatrix(B, N, &rng);
- // Benchmark SIMD matrix multiplication
- const start_simd = tMilli();
- matrixMultiplySimd(A, B, C_simd, N);
- const end_simd = tMilli();
- const simd_time = @as(f32, @floatFromInt(end_simd - start_simd));
- print("SIMD Matrix Multiplication Time: {d:.2} ms\n", .{simd_time});
- // Benchmark scalar matrix multiplication
- // const start_scalar = tMilli();
- // matrixMultiplyScalar(A, B, C_scalar, N);
- // const end_scalar = tMilli();
- // const scalar_time = @as(f32, @floatFromInt(end_scalar - start_scalar));
- // print("Scalar Matrix Multiplication Time: {d:.2} ms\n", .{scalar_time});
- //
- // // Verify correctness
- // for (0..N) |i| {
- // for (0..N) |j| {
- // if (fabs(C_simd[i * N + j] - C_scalar[i * N + j]) > 1e-5) {
- // print("Mismatch at ({d}, {d}): SIMD={d}, Scalar={d}\n", .{ i, j, C_simd[i * N + j], C_scalar[i * N + j] });
- // return;
- // }
- // }
- // }
- // print("Results match!\n", .{});
- }
Advertisement
Add Comment
Please, Sign In to add comment