Merge pull request #3 from TimDettmers/cpuonly

Add a CPU-only build option
author: Tim Dettmers <TimDettmers@users.noreply.github.com> 2022-07-18 09:51:37 -0700
committer: GitHub <noreply@github.com> 2022-07-18 09:51:37 -0700
commit: 4cd7ea62b2f51c68aacde2f62e7141765e476111 (patch)
tree: 548b2e77c62acd152330e898a6e17ea949a156d1 /csrc/pythonInterface.c
parent: 3418cd390e952a7752fb6b2544c25e25af7c0371 (diff)
parent: fd750cd2370b3b12e216a9148b23aaae63a80989 (diff)
1 files changed, 8 insertions, 3 deletions
diff --git a/csrc/pythonInterface.c b/csrc/pythonInterface.c
index e0b0d59..c2fed6b 100644
--- a/csrc/pythonInterface.c
+++ b/csrc/pythonInterface.c
@@ -3,7 +3,10 @@
 // This source code is licensed under the MIT license found in the 
 // LICENSE file in the root directory of this source tree.
 
+#if BUILD_CUDA
 #include <ops.cuh>
+#endif
+#include <cpu_ops.h>
 
 // We cannot call templated code from C, so we wrap the template in a C compatible call here if necessary.
 // We use macro functions to expand all the different optimizers. Looks ugly, and is ugly, but its better than to 
@@ -12,6 +15,7 @@
 //                               UNMANGLED CALLS
 //===================================================================================
 
+#if BUILD_CUDA
 void estimateQuantiles_fp32(float *A, float *code, float offset, int n){ estimateQuantiles<float>(A, code, offset, n); }
 void estimateQuantiles_fp16(half *A, float *code, float offset, int n){ estimateQuantiles<half>(A, code, offset, n); }
 
@@ -78,9 +82,11 @@ void quantizeBlockwise_stochastic_fp32(float * code, float *A, float *absmax, un
 
 void dequantizeBlockwise_fp16(float *code, unsigned char *A, float *absmax, half *out, int blocksize, const int n){ dequantizeBlockwise<half>(code, A, absmax, out, blocksize, n); } \
 void dequantizeBlockwise_fp32(float *code, unsigned char *A, float *absmax, float *out, int blocksize, const int n){ dequantizeBlockwise<float>(code, A, absmax, out, blocksize, n); }
+#endif
 
 extern "C"
 {
+    #if BUILD_CUDA
 	void cestimate_quantiles_fp32(float *A, float *code, float offset, int n){ estimateQuantiles_fp32(A, code, offset, n); }
 	void cestimate_quantiles_fp16(half *A, float *code, float offset, int n){ estimateQuantiles_fp16(A, code, offset, n); }
 	void cquantize(float *code, float *A, unsigned char *out, int n){ quantize(code, A, out, n); }
@@ -147,11 +153,10 @@ extern "C"
 
 	void cpercentile_clipping_g32(float * g, float *gnorm_vec, int step, const int n){ percentileClipping_g32(g, gnorm_vec, step, n); }
 	void cpercentile_clipping_g16(half * g, float *gnorm_vec, int step, const int n){ percentileClipping_g16(g, gnorm_vec, step, n); }
+	void chistogram_scatter_add_2d(float* histogram, int *index1, int *index2, float *src, int maxidx1, int n){ histogramScatterAdd2D(histogram, index1, index2, src, maxidx1, n); }
 
+    #endif
 	void cquantize_blockwise_cpu_fp32(float *code, float *A, float *absmax, unsigned char *out, const int n){ quantize_cpu(code, A, absmax, out, n); }
 	void cdequantize_blockwise_cpu_fp32(float *code, unsigned char *A, float *absmax, float *out, const int n){ dequantize_cpu(code, A, absmax, out, n); }
-
-	void chistogram_scatter_add_2d(float* histogram, int *index1, int *index2, float *src, int maxidx1, int n){ histogramScatterAdd2D(histogram, index1, index2, src, maxidx1, n); }
 }
 
-
author	Tim Dettmers <TimDettmers@users.noreply.github.com>	2022-07-18 09:51:37 -0700
committer	GitHub <noreply@github.com>	2022-07-18 09:51:37 -0700
commit	4cd7ea62b2f51c68aacde2f62e7141765e476111 (patch)
tree	548b2e77c62acd152330e898a6e17ea949a156d1 /csrc/pythonInterface.c
parent	3418cd390e952a7752fb6b2544c25e25af7c0371 (diff)
parent	fd750cd2370b3b12e216a9148b23aaae63a80989 (diff)