mirror of
https://github.com/AdaCore/cuda.git
synced 2026-02-12 13:05:54 -08:00
94 lines
2.2 KiB
Ada
94 lines
2.2 KiB
Ada
with System;
|
|
with Interfaces.C; use Interfaces.C;
|
|
|
|
with Ada.Numerics.Float_Random; use Ada.Numerics.Float_Random;
|
|
with Ada.Text_IO; use Ada.Text_IO;
|
|
|
|
with CUDA.Driver_Types; use CUDA.Driver_Types;
|
|
with CUDA.Runtime_Api; use CUDA.Runtime_Api;
|
|
with CUDA.Stddef;
|
|
with CUDA.Storage_Models; use CUDA.Storage_Models;
|
|
|
|
with Kernel; use Kernel;
|
|
|
|
with Ada.Unchecked_Deallocation;
|
|
with Ada.Unchecked_Conversion;
|
|
|
|
with Ada.Calendar; use Ada.Calendar;
|
|
with Ada.Command_Line; use Ada.Command_Line;
|
|
|
|
procedure Main is
|
|
|
|
type Array_Host_Access is access all Float_Array;
|
|
|
|
procedure Free is new
|
|
Ada.Unchecked_Deallocation (Float_Array, Array_Host_Access);
|
|
|
|
procedure Free is new Ada.Unchecked_Deallocation
|
|
(Float_Array, Array_Device_Access);
|
|
|
|
Num_Elements : Integer := 2 ** 8;
|
|
|
|
H_A, H_B, H_C : Array_Host_Access;
|
|
D_A, D_B, D_C : Array_Device_Access;
|
|
|
|
Threads_Per_Block : Integer := 256;
|
|
Blocks_Per_Grid : Integer := Num_Elements / Threads_Per_Block + 1;
|
|
|
|
Gen : Generator;
|
|
Err : Error_T;
|
|
|
|
T0 : Time;
|
|
Lapsed : Duration;
|
|
begin
|
|
if Ada.Command_Line.Argument_Count >= 1 then
|
|
Num_Elements := 2 ** Integer'Value (Ada.Command_Line.Argument (1));
|
|
end if;
|
|
|
|
H_A := new Float_Array (1 .. Num_Elements);
|
|
H_B := new Float_Array (1 .. Num_Elements);
|
|
H_C := new Float_Array (1 .. Num_Elements);
|
|
|
|
H_A.all := (others => Float (Random (Gen)));
|
|
H_B.all := (others => Float (Random (Gen)));
|
|
|
|
T0 := Clock;
|
|
|
|
for I in 0 .. Num_Elements - 1 loop
|
|
Complex_Computation (H_A.all, H_B.all, H_C.all, I);
|
|
end loop;
|
|
|
|
Lapsed := Clock - T0;
|
|
|
|
Put_Line ("Host processing took " & Lapsed'Img & " seconds");
|
|
|
|
T0 := Clock;
|
|
|
|
-- INSERT HERE DEVICE CALL
|
|
|
|
D_A := new Float_Array'(H_A.all);
|
|
D_B := new Float_Array'(H_B.all);
|
|
D_C := new Float_Array (H_C.all'Range);
|
|
|
|
pragma CUDA_Execute
|
|
(Device_Complex_Computation (D_A, D_B, D_C),
|
|
Threads_Per_Block,
|
|
Blocks_Per_Grid);
|
|
|
|
Err := Get_Last_Error;
|
|
|
|
H_C.all := D_C.all;
|
|
|
|
Lapsed := Clock - T0;
|
|
|
|
Put_Line ("Device processing took " & Lapsed'Img & " seconds");
|
|
|
|
Free (D_A);
|
|
Free (D_B);
|
|
Free (D_C);
|
|
|
|
Free (H_A);
|
|
Free (H_B);
|
|
Free (H_C);
|
|
end Main;
|