Let's create first program which operates with arrays using GPGPU.
Suppose, you have .NET runtime, F# development environment, and OpenCL-compatible device.
Now you can create new F# project and add Brahma.FSharp packet to it.
Let's check that Brahma.FSharp can detect at least one appropriate device.
1:
2:
3:
4:
|
open Brahma.FSharp
let device = ClDevice.GetFirstAppropriateDevice()
printfn "Device: %A" device.Name
|
As a result of execution you should get a name of detected device. For example, something like this:
Device: "Intel(R) UHD Graphics 620 [0x5917]"
If you have truobles with device detection, you can:
Let's create map2
-like kernel with following features.
- Kernel should be generic. We want to create a single function for all possible types of arrays.
- Kernel should be parameterized by operation, like a classic
map
.
It can be done with the following code.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
|
open Brahma.FSharp
let arrayMap2 operation (clContext: ClContext) workGroupSize =
// The first argument of kernel function is a virtual grid. It is by contract.
let kernel =
<@
fun (range: Range1D) arrLength (array1: ClArray<_>) (array2: ClArray<_>) (result: ClArray<_>) ->
let i = range.GlobalID0
if i < arrLength then
result.[i] <- (%operation) array1.[i] array2.[i]
@>
let kernel = clContext.Compile kernel
// You can inspect generated OpenCL code.
printfn "Code: %A" kernel.Code
// Compilation can be done once.
// Compiled kernel can be executed many times on different queues with different arguments.
fun (commandQueue: MailboxProcessor<_>) (inputArray1: ClArray<_>) (inputArray2: ClArray<_>) ->
let ndRange = Range1D.CreateValid(inputArray1.Length, workGroupSize)
// We use default allocation mode, but it can be tuned for specific cases.
let outputArray = clContext.CreateClArray(inputArray1.Length, allocationMode = AllocationMode.Default)
// We should use new executable object for safety.
let kernel = kernel.GetKernel()
commandQueue.Post(
Msg.MsgSetArguments
(fun () -> kernel.KernelFunc ndRange inputArray1.Length inputArray1 inputArray2 outputArray)
)
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
outputArray
|
Note, that our function operates over ClArray<_>
. It allows one to provide fine-grained memory management.
To execute or kernel we should do following steps.
- Create OpenCL-compatible device.
- Create execution context. Multiple contexts can be created on the top of single device. Moreover, one can use translators with different options in different contexts.
- Get at least one queue. Queue is a way to communicate with device. Moreover, it is a synchronization primitive: commands in queue will be executed in order.
- Allocate input data.
- Create instances of kernel.
- Run instances.
- Get result back to the host.
It can be done using the following code.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
|
let main (argv: string[]) =
// Size of arrays
let n =
if argv.Length > 0 then
int argv.[0]
else
10
let device = ClDevice.GetFirstAppropriateDevice()
printfn "Device: %A" device.Name
let context = ClContext(device)
let mainQueue = context.QueueProvider.CreateQueue()
// Specific instances of kernel.
let intArraySum = arrayMap2 <@ (+) @> context 32
let boolArraySum = arrayMap2 <@ (&&) @> context 32
let arrayMask = arrayMap2 <@ fun x y -> if y then x else 0 @> context 32
// Helpers for random data generation.
let rnd = System.Random()
let randomIntArray () = Array.init n (fun _ -> rnd.Next() / 10000)
let randomBoolArray () = Array.init n (fun _ -> rnd.Next() % 2 = 1)
let intA1 = randomIntArray ()
let intA2 = randomIntArray ()
let boolA1 = randomBoolArray ()
let boolA2 = randomBoolArray ()
let boolA3 = randomBoolArray ()
// Allocation of input data.
// Data will be copied to device automatically.
let clIntA1 = context.CreateClArray<_>(intA1)
let clIntA2 = context.CreateClArray<_>(intA2)
let clBoolA1 = context.CreateClArray<_>(boolA1)
let clBoolA2 = context.CreateClArray<_>(boolA2)
let clBoolA3 = context.CreateClArray<_>(boolA3)
// Evaluation.
let intRes = intArraySum mainQueue clIntA1 clIntA2
// Result of first call will be passed as to the next call without copying.
let boolRes =
boolArraySum mainQueue clBoolA1 clBoolA2
|> boolArraySum mainQueue clBoolA3
let res = arrayMask mainQueue intRes boolRes
// Getting result as a F# array.
// We use PostAdReply for synchronization.
let resOnHost = Array.zeroCreate n
let res = mainQueue.PostAndReply(fun ch -> Msg.CreateToHostMsg(res, resOnHost, ch))
0
|
Finally, full code looks like follows.
1:
2:
3:
4:
5:
6:
7:
8:
9:
10:
11:
12:
13:
14:
15:
16:
17:
18:
19:
20:
21:
22:
23:
24:
25:
26:
27:
28:
29:
30:
31:
32:
33:
34:
35:
36:
37:
38:
39:
40:
41:
42:
43:
44:
45:
46:
47:
48:
49:
50:
51:
52:
53:
54:
55:
56:
57:
58:
59:
60:
61:
62:
63:
64:
65:
66:
67:
68:
69:
70:
71:
72:
73:
74:
75:
76:
77:
78:
79:
80:
81:
82:
83:
84:
85:
86:
|
module FinalVersion =
open Brahma.FSharp
open Brahma.FSharp.OpenCL.Translator
let arrayMap2 operation (clContext: ClContext) workGroupSize =
let kernel =
<@
fun (range: Range1D) arrLength (array1: ClArray<_>) (array2: ClArray<_>) (result: ClArray<_>) ->
let i = range.GlobalID0
if i < arrLength then
result.[i] <- (%operation) array1.[i] array2.[i]
@>
let kernel = clContext.Compile kernel
fun (commandQueue: MailboxProcessor<_>) (inputArray1: ClArray<_>) (inputArray2: ClArray<_>) ->
let ndRange = Range1D.CreateValid(inputArray1.Length, workGroupSize)
let outputArray = clContext.CreateClArray(inputArray1.Length, allocationMode = AllocationMode.Default)
let kernel = kernel.GetKernel()
commandQueue.Post(
Msg.MsgSetArguments
(fun () -> kernel.KernelFunc ndRange inputArray1.Length inputArray1 inputArray2 outputArray)
)
commandQueue.Post(Msg.CreateRunMsg<_, _> kernel)
outputArray
[<EntryPoint>]
let main argv =
let n =
if argv.Length > 0 then
int argv.[0]
else
10
let device = ClDevice.GetFirstAppropriateDevice()
let context = ClContext(device)
let mainQueue = context.QueueProvider.CreateQueue()
let intArraySum = arrayMap2 <@ (+) @> context 32
let boolArraySum = arrayMap2 <@ (&&) @> context 32
let arrayMask = arrayMap2 <@ fun x y -> if y then x else 0 @> context 32
let rnd = System.Random()
let randomIntArray () = Array.init n (fun _ -> rnd.Next() / 10000)
let randomBoolArray () = Array.init n (fun _ -> rnd.Next() % 2 = 1)
let intA1 = randomIntArray ()
let intA2 = randomIntArray ()
let boolA1 = randomBoolArray ()
let boolA2 = randomBoolArray ()
let boolA3 = randomBoolArray ()
let clIntA1 = context.CreateClArray<_>(intA1)
let clIntA2 = context.CreateClArray<_>(intA2)
let clBoolA1 = context.CreateClArray<_>(boolA1)
let clBoolA2 = context.CreateClArray<_>(boolA2)
let clBoolA3 = context.CreateClArray<_>(boolA3)
let intRes = intArraySum mainQueue clIntA1 clIntA2
let boolRes =
boolArraySum mainQueue clBoolA1 clBoolA2
|> boolArraySum mainQueue clBoolA3
let res = arrayMask mainQueue intRes boolRes
let resOnHost = Array.zeroCreate n
let res = mainQueue.PostAndReply(fun ch -> Msg.CreateToHostMsg(res, resOnHost, ch))
printfn "First int array: %A" intA1
printfn "Second int array: %A" intA2
printfn "First bool array: %A" boolA1
printfn "Second bool array: %A" boolA2
printfn "Third bool array: %A" boolA3
printfn "Result: %A" res
0
|
namespace Brahma
namespace Brahma.FSharp
val device : ClDevice
Multiple items
type ClDevice =
interface IDevice
new : device:Device -> ClDevice
override ToString : unit -> string
member Device : Device
member DeviceExtensions : string
member DeviceType : DeviceType
member MaxWorkGroupSize : int
member MaxWorkItemDimensions : int
member MaxWorkItemSizes : int []
member Name : string
...
--------------------
new : device:OpenCL.Net.Device -> ClDevice
static member ClDevice.GetFirstAppropriateDevice : ?platform:Platform * ?deviceType:DeviceType -> ClDevice
val printfn : format:Printf.TextWriterFormat<'T> -> 'T
property ClDevice.Name: string with get
val arrayMap2 : operation:Quotations.Expr<('a -> 'b -> 'c)> -> clContext:ClContext -> workGroupSize:int -> (MailboxProcessor<Msg> -> ClArray<'a> -> ClArray<'b> -> ClArray<'c>)
val operation : Quotations.Expr<('a -> 'b -> 'c)>
val clContext : ClContext
Multiple items
type ClContext =
new : clDevice:ClDevice * ?translator:FSQuotationToOpenCLTranslator * ?compilerOptions:string -> ClContext
override ToString : unit -> string
member ClDevice : ClDevice
member CompilerOptions : string option
member Context : Context
member QueueProvider : CommandQueueProvider
member Translator : FSQuotationToOpenCLTranslator
--------------------
new : clDevice:ClDevice * ?translator:OpenCL.Translator.FSQuotationToOpenCLTranslator * ?compilerOptions:string -> ClContext
val workGroupSize : int
val kernel : Quotations.Expr<(Range1D -> int -> ClArray<'a> -> ClArray<'b> -> ClArray<'c> -> unit)>
val range : Range1D
Multiple items
type Range1D =
interface INDRange
new : globalWorkSize:int -> Range1D
new : globalWorkSize:int * localWorkSize:int -> Range1D
private new : globalWorkSize:int * localWorkSize:int * __:unit -> Range1D
member GlobalID0 : int
member GlobalWorkSize : int
member LocalID0 : int
member LocalWorkSize : int
static member CreateValid : neededSize:int * localWorkSize:int -> Range1D
--------------------
new : globalWorkSize:int -> Range1D
new : globalWorkSize:int * localWorkSize:int -> Range1D
val arrLength : int
val array1 : ClArray<'a>
Multiple items
module ClArray
from Brahma.FSharp
--------------------
type ClArray<'a> =
interface IBuffer<'a>
interface IClMem
interface IDisposable
private new : buffer:ClBuffer<'a> -> ClArray<'a>
member Dispose : unit -> unit
override ToString : unit -> string
member private Buffer : ClBuffer<'a>
member Item : idx:int -> 'a with get
member Length : int
member Item : idx:int -> 'a with set
val array2 : ClArray<'b>
val result : ClArray<'c>
val i : int
property Range1D.GlobalID0: int with get
val kernel : ClProgram<Range1D,(int -> ClArray<'a> -> ClArray<'b> -> ClArray<'c> -> unit)>
member ClContext.Compile : srcLambda:Quotations.Expr<('TRange -> 'a)> -> ClProgram<'TRange,'a> (requires 'TRange :> INDRange)
property ClProgram.Code: string with get
val commandQueue : MailboxProcessor<Msg>
Multiple items
type MailboxProcessor<'Msg> =
interface IDisposable
new : body:(MailboxProcessor<'Msg> -> Async<unit>) * ?cancellationToken:CancellationToken -> MailboxProcessor<'Msg>
member Post : message:'Msg -> unit
member PostAndAsyncReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> Async<'Reply>
member PostAndReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> 'Reply
member PostAndTryAsyncReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> Async<'Reply option>
member Receive : ?timeout:int -> Async<'Msg>
member Scan : scanner:('Msg -> Async<'T> option) * ?timeout:int -> Async<'T>
member Start : unit -> unit
member TryPostAndReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> 'Reply option
...
--------------------
new : body:(MailboxProcessor<'Msg> -> Async<unit>) * ?cancellationToken:System.Threading.CancellationToken -> MailboxProcessor<'Msg>
val inputArray1 : ClArray<'a>
val inputArray2 : ClArray<'b>
val ndRange : Range1D
static member Range1D.CreateValid : neededSize:int * localWorkSize:int -> Range1D
property ClArray.Length: int with get
val outputArray : ClArray<'c>
member ClContext.CreateClArray : data:'a [] * ?hostAccessMode:HostAccessMode * ?deviceAccessMode:DeviceAccessMode * ?allocationMode:AllocationMode -> ClArray<'a>
member ClContext.CreateClArray : size:int * ?hostAccessMode:HostAccessMode * ?deviceAccessMode:DeviceAccessMode * ?allocationMode:AllocationMode -> ClArray<'a>
type AllocationMode =
| UseHostPtr
| AllocHostPtr
| CopyHostPtr
| AllocAndCopyHostPtr
| Default
union case AllocationMode.Default: AllocationMode
val kernel : ClKernel<Range1D,(int -> ClArray<'a> -> ClArray<'b> -> ClArray<'c> -> unit)>
member MailboxProcessor.Post : message:'Msg -> unit
type Msg =
| MsgToHost of IToHostCrate
| MsgToGPU of IToGPUCrate
| MsgRun of IRunCrate
| MsgFree of IFreeCrate
| MsgSetArguments of (unit -> unit)
| MsgNotifyMe of AsyncReplyChannel<unit>
| MsgBarrier of SyncObject
static member CreateBarrierMessages : numOfQueuesOnBarrier:int -> Msg []
static member CreateFreeMsg : src:IDisposable -> Msg
static member CreateRunMsg : kernel:IKernel -> Msg
static member CreateToGPUMsg : src:'a [] * dst:IBuffer<'a> -> Msg
static member CreateToHostMsg : src:IBuffer<'a> * dst:'a [] * ?ch:AsyncReplyChannel<'a []> -> Msg
union case Msg.MsgSetArguments: (unit -> unit) -> Msg
property ClKernel.KernelFunc: Range1D -> int -> ClArray<'a> -> ClArray<'b> -> ClArray<'c> -> unit with get
static member Msg.CreateRunMsg : kernel:IKernel -> Msg
val main : argv:string [] -> int
val argv : string []
Multiple items
val string : value:'T -> string
--------------------
type string = System.String
val n : int
property System.Array.Length: int with get
Multiple items
val int : value:'T -> int (requires member op_Explicit)
--------------------
type int = int32
--------------------
type int<'Measure> = int
val context : ClContext
val mainQueue : MailboxProcessor<Msg>
property ClContext.QueueProvider: CommandQueueProvider with get
member CommandQueueProvider.CreateQueue : unit -> MailboxProcessor<Msg>
val intArraySum : (MailboxProcessor<Msg> -> ClArray<int> -> ClArray<int> -> ClArray<int>)
val boolArraySum : (MailboxProcessor<Msg> -> ClArray<bool> -> ClArray<bool> -> ClArray<bool>)
val arrayMask : (MailboxProcessor<Msg> -> ClArray<int> -> ClArray<bool> -> ClArray<int>)
val x : int
val y : bool
val rnd : System.Random
namespace System
Multiple items
type Random =
new : unit -> Random + 1 overload
member Next : unit -> int + 2 overloads
member NextBytes : buffer:byte[] -> unit + 1 overload
member NextDouble : unit -> float
--------------------
System.Random() : System.Random
System.Random(Seed: int) : System.Random
val randomIntArray : (unit -> int [])
module Array
from Microsoft.FSharp.Collections
val init : count:int -> initializer:(int -> 'T) -> 'T []
System.Random.Next() : int
System.Random.Next(maxValue: int) : int
System.Random.Next(minValue: int, maxValue: int) : int
val randomBoolArray : (unit -> bool [])
val intA1 : int []
val intA2 : int []
val boolA1 : bool []
val boolA2 : bool []
val boolA3 : bool []
val clIntA1 : ClArray<int>
val clIntA2 : ClArray<int>
val clBoolA1 : ClArray<bool>
val clBoolA2 : ClArray<bool>
val clBoolA3 : ClArray<bool>
val intRes : ClArray<int>
val boolRes : ClArray<bool>
val res : ClArray<int>
val resOnHost : int []
val zeroCreate : count:int -> 'T []
val res : int []
member MailboxProcessor.PostAndReply : buildMessage:(AsyncReplyChannel<'Reply> -> 'Msg) * ?timeout:int -> 'Reply
val ch : AsyncReplyChannel<int []>
static member Msg.CreateToHostMsg : src:OpenCL.Shared.IBuffer<'a> * dst:'a [] * ?ch:AsyncReplyChannel<'a []> -> Msg
module FinalVersion
from Basic_Example
namespace Brahma.FSharp.OpenCL
namespace Brahma.FSharp.OpenCL.Translator
Multiple items
type ClContext =
new : clDevice:ClDevice * ?translator:FSQuotationToOpenCLTranslator * ?compilerOptions:string -> ClContext
override ToString : unit -> string
member ClDevice : ClDevice
member CompilerOptions : string option
member Context : Context
member QueueProvider : CommandQueueProvider
member Translator : FSQuotationToOpenCLTranslator
--------------------
new : clDevice:ClDevice * ?translator:FSQuotationToOpenCLTranslator * ?compilerOptions:string -> ClContext
Multiple items
type EntryPointAttribute =
inherit Attribute
new : unit -> EntryPointAttribute
--------------------
new : unit -> EntryPointAttribute