From 26003ed9560f36aaa1572d60c789435f1e1d8365 Mon Sep 17 00:00:00 2001 From: xeals Date: Fri, 3 May 2019 01:00:44 +1000 Subject: [PATCH] Initial commit --- .gitignore | 2 ++ Cargo.lock | 21 +++++++++++++++++++++ Cargo.toml | 16 ++++++++++++++++ build.rs | 9 +++++++++ src/c/test.cu | 38 ++++++++++++++++++++++++++++++++++++++ src/rust/main.rs | 34 ++++++++++++++++++++++++++++++++++ 6 files changed, 120 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 build.rs create mode 100644 src/c/test.cu create mode 100644 src/rust/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f0e3bca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +**/*.rs.bk \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..4b781ea --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,21 @@ +[[package]] +name = "cc" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "cuda-test" +version = "0.1.0" +dependencies = [ + "cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.53 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.2.53" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum cc 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)" = "a0c56216487bb80eec9c4516337b2588a4f2a2290d72a1416d930e4dcdb0c90d" +"checksum libc 0.2.53 (registry+https://github.com/rust-lang/crates.io-index)" = "ec350a9417dfd244dc9a6c4a71e13895a4db6b92f0b106f07ebbc3f3bc580cee" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..bfc943e --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "cuda-test" +version = "0.1.0" +authors = ["xeals "] +build = "build.rs" +edition = "2018" + +[dependencies] +libc = "0.2.53" + +[build-dependencies] +cc = "1.0.36" + +[[bin]] +name = "cuda-test" +path = "src/rust/main.rs" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..4e2b7e3 --- /dev/null +++ b/build.rs @@ -0,0 +1,9 @@ +fn main() { + cc::Build::new() + .cuda(true) + .flag("-cudart=shared") + .file("src/c/test.cu") + .compile("libtest.a"); + println!("cargo:rustc-link-search=native=/opt/cuda/lib64"); + println!("cargo:rustc-link-lib=cudart"); +} diff --git a/src/c/test.cu b/src/c/test.cu new file mode 100644 index 0000000..a4d4086 --- /dev/null +++ b/src/c/test.cu @@ -0,0 +1,38 @@ +#include +#include +#include "kernel.cu" + +#define CUDA_ERRCHK(fn) { __gpucheck((fn), __FILE__, __LINE__); } + +inline cudaError_t __gpucheck(cudaError_t code, const char *file, int line) { + if (code != cudaSuccess) { + fprintf(stderr, "CUDA runtime error [%s:%d]: %s\n", file, line, cudaGetErrorString(code)); + exit(-1); + } + return code; +} + +__global__ +void mulAll_kernel(int *out, int *in, int n, size_t size) { + for (int i = blockIdx.x * blockDim.x + threadIdx.x; + i < size; + i += blockDim.x + gridDim.x) { + out[i] = in[i] * n; + } +} + +extern "C" { +void mulAll(int *out, const int *input, int n, size_t size) { + int *out_d, *in_d, i_d, size_d; + + CUDA_ERRCHK(cudaMalloc((void **) &in_d, sizeof(int) * size)); + CUDA_ERRCHK(cudaMemcpy(in_d, input, sizeof(int) * size, cudaMemcpyHostToDevice)); + + CUDA_ERRCHK(cudaMalloc((void **) &out_d, sizeof(int) * size)); + mulAll_kernel << < 32, 32 >> > (out_d, in_d, n, size); + CUDA_ERRCHK(cudaMemcpy(out, out_d, sizeof(int) * size, cudaMemcpyDeviceToHost)); + + CUDA_ERRCHK(cudaFree(in_d)); + CUDA_ERRCHK(cudaFree(out_d)); +} +} diff --git a/src/rust/main.rs b/src/rust/main.rs new file mode 100644 index 0000000..b787812 --- /dev/null +++ b/src/rust/main.rs @@ -0,0 +1,34 @@ +mod cuda { + use std::os::raw::c_int; + + use libc::size_t; + + #[link(name = "test", kind = "static")] + extern { + fn mulAll(out: *const c_int, input: *const c_int, n: c_int, size: size_t); + } + + pub fn mul_by(src: &[i32], by: i32) -> Vec { + unsafe { + let len = src.len() as size_t; + let psrc = src.as_ptr(); + + let mut res = Vec::with_capacity(src.len()); + let pres = res.as_mut_ptr(); + + mulAll(pres, psrc, by as c_int, len); + + // Turns out converting to a raw pointer drops the length information. + res.set_len(src.len()); + res + } + } +} + +fn main() { + let v = (1..128).collect::>(); + let n = 3; + + let o = cuda::mul_by(&v, n); + assert_eq!(o, v.iter().map(|i| i * n).collect::>(), "output mangled somewhere"); +}