Brahma.FSharp


Brahma.FSharp

Documentation

Brahma.FSharp is a library for F# quotations to OpenCL translation.

The Brahma.FSharp library can be installed from NuGet:
PM> Install-Package Brahma.FSharp

If you want to use Brahma.FSharp on Linux/macOS, check OpenCL.Net.dll.config after installation and fix path to opencl.dll if necessary.

Features of Brahma.FSharp:

  • We are aimed to translate native F# code to OpenCL with minimization of different wrappers and custom types.
  • We use OpenCL for communication with GPU. So, you can work not only with NVIDIA hardware but with any device, which supports OpenCL (e.g. with AMD devices).
  • We support tuples and structures.
  • We can use strongly typed kernels from OpenCL code in F#.

Example

This example demonstrates using a function defined in this library.

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
70: 
71: 
72: 
73: 
74: 
75: 
76: 
77: 
78: 
79: 
80: 
81: 
82: 
83: 
84: 
85: 
86: 
87: 
88: 
89: 
90: 
91: 
92: 
93: 
94: 
95: 
96: 
97: 
module MatrixMultiply

open OpenCL.Net
open Brahma.OpenCL
open Brahma.FSharp.OpenCL.Core
open Microsoft.FSharp.Quotations
open Brahma.FSharp.OpenCL.Extensions

let random = new System.Random()
        
let MakeMatrix rows cols =
    Array.init (rows * cols) (fun i -> float32 (random.NextDouble()))

let GetOutputMatrixDimensions aRows aCols bRows bCols =
    if aCols <> bRows
    then failwith "Cannot multiply these two matrices"
    aRows,bCols

let Multiply (a:array<_>) aRows aCols (b:array<_>) bRows bCols (c:array<_>) =
    let cRows, cCols = GetOutputMatrixDimensions aRows aCols bRows bCols
    for i in 0 .. cRows - 1 do
        for j in 0 .. cCols - 1 do
            let mutable buf = 0.0f
            for k in 0 .. aCols - 1 do
                 buf <- buf + a.[i * aCols + k] * b.[k * bCols + j]
            c.[i * cCols + j] <- c.[i * cCols + j] + buf
    
let Main platformName mSize =    

    let m1 = (MakeMatrix mSize mSize)
    let m2 = (MakeMatrix mSize mSize)
    let localWorkSize = 2
    let iterations = 10
    let deviceType = DeviceType.Default

    let provider =
        try  ComputeProvider.Create(platformName, deviceType)
        with 
        | ex -> failwith ex.Message

    let mutable commandQueue = new CommandQueue(provider, provider.Devices |> Seq.head)

    let aValues = m1
    let bValues = m2
    let cParallel = Array.zeroCreate(mSize * mSize)

    let command = 
        <@
            fun (r:_2D) (a:array<_>) (b:array<_>) (c:array<_>) -> 
                let tx = r.GlobalID0
                let ty = r.GlobalID1
                let mutable buf = c.[ty * mSize + tx]
                for k in 0 .. mSize - 1 do
                    buf <- buf + (a.[ty * mSize + k] * b.[k * mSize + tx])
                c.[ty * mSize + tx] <- buf
        @>

    printfn "Multiplying two %Ax%A matrices %A times using .NET..." mSize mSize iterations
    let cNormal = Array.zeroCreate (mSize * mSize)
    let cpuStart = System.DateTime.Now
    for i in 0 .. iterations - 1 do
        Multiply aValues mSize mSize bValues mSize mSize cNormal
    let cpuTime = System.DateTime.Now - cpuStart

    printfn "done."

    printfn "Multiplying two %Ax%A matrices %A times using OpenCL and selected platform/device : %A ..." mSize mSize iterations provider

    let kernel, kernelPrepare, kernelRun = provider.Compile command
    let d =(new _2D(mSize, mSize, localWorkSize, localWorkSize))
    kernelPrepare d aValues bValues cParallel
    
    let gpuStart = System.DateTime.Now
    for i in 0 .. iterations - 1 do
        commandQueue.Add(kernelRun()).Finish() |> ignore
    let gpuTime = System.DateTime.Now - gpuStart

    let _ = commandQueue.Add(cParallel.ToHost provider).Finish()
    
    printfn "Verifying results..."
    let mutable isSuccess = true
    for i in 0 .. mSize * mSize - 1 do
        if isSuccess && System.Math.Abs(float32 (cParallel.[i] - cNormal.[i])) > 0.01f
        then
            isSuccess <- false
            printfn "Expected: %A Actual: %A Error = %A" cNormal.[i] cParallel.[i] (System.Math.Abs(cParallel.[i] - cNormal.[i]))            
            
    printfn "done."

    cpuTime.TotalMilliseconds / float iterations |> printfn "Avg. time, F#: %A"
    gpuTime.TotalMilliseconds / float iterations |> printfn "Avg. time, OpenCL: %A"

    commandQueue.Dispose()
    provider.CloseAllBuffers()
    provider.Dispose()    
            
Main "NVIDIA*" 300

Note

Sometimes calculations could be interrupted buy GPU driver (OS) timeout (TDR). For hot fix you can set TdrLevel registry key (KeyPath : HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\GraphicsDrivers) value to 0. If this key is not exists, then you should crete it. For more details look at "TDR Registry Keys (Windows Drivers)".

Samples & documentation

  • Tutorial contains a further explanation of this sample library.

  • API Reference contains automatically generated documentation for all types, modules and functions in the library. This includes additional brief samples on using most of the functions.

  • More examples are available here.

Contributing and copyright

The project is hosted on GitHub where you can report issues, fork the project and submit pull requests. If you're adding a new public API, please also consider adding samples that can be turned into a documentation. You might also want to read the library design notes to understand how it works.

The library is available under Eclipse Public License, which allows modification and redistribution for both commercial and non-commercial purposes. For more information see the License file in the GitHub repository.

Fork me on GitHub