diff options
author | Tim Dettmers <TimDettmers@users.noreply.github.com> | 2022-07-18 09:51:37 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-07-18 09:51:37 -0700 |
commit | 4cd7ea62b2f51c68aacde2f62e7141765e476111 (patch) | |
tree | 548b2e77c62acd152330e898a6e17ea949a156d1 /csrc/pythonInterface.c | |
parent | 3418cd390e952a7752fb6b2544c25e25af7c0371 (diff) | |
parent | fd750cd2370b3b12e216a9148b23aaae63a80989 (diff) |
Merge pull request #3 from TimDettmers/cpuonly
Add a CPU-only build option
Diffstat (limited to 'csrc/pythonInterface.c')
-rw-r--r-- | csrc/pythonInterface.c | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/csrc/pythonInterface.c b/csrc/pythonInterface.c index e0b0d59..c2fed6b 100644 --- a/csrc/pythonInterface.c +++ b/csrc/pythonInterface.c @@ -3,7 +3,10 @@ // This source code is licensed under the MIT license found in the // LICENSE file in the root directory of this source tree. +#if BUILD_CUDA #include <ops.cuh> +#endif +#include <cpu_ops.h> // We cannot call templated code from C, so we wrap the template in a C compatible call here if necessary. // We use macro functions to expand all the different optimizers. Looks ugly, and is ugly, but its better than to @@ -12,6 +15,7 @@ // UNMANGLED CALLS //=================================================================================== +#if BUILD_CUDA void estimateQuantiles_fp32(float *A, float *code, float offset, int n){ estimateQuantiles<float>(A, code, offset, n); } void estimateQuantiles_fp16(half *A, float *code, float offset, int n){ estimateQuantiles<half>(A, code, offset, n); } @@ -78,9 +82,11 @@ void quantizeBlockwise_stochastic_fp32(float * code, float *A, float *absmax, un void dequantizeBlockwise_fp16(float *code, unsigned char *A, float *absmax, half *out, int blocksize, const int n){ dequantizeBlockwise<half>(code, A, absmax, out, blocksize, n); } \ void dequantizeBlockwise_fp32(float *code, unsigned char *A, float *absmax, float *out, int blocksize, const int n){ dequantizeBlockwise<float>(code, A, absmax, out, blocksize, n); } +#endif extern "C" { + #if BUILD_CUDA void cestimate_quantiles_fp32(float *A, float *code, float offset, int n){ estimateQuantiles_fp32(A, code, offset, n); } void cestimate_quantiles_fp16(half *A, float *code, float offset, int n){ estimateQuantiles_fp16(A, code, offset, n); } void cquantize(float *code, float *A, unsigned char *out, int n){ quantize(code, A, out, n); } @@ -147,11 +153,10 @@ extern "C" void cpercentile_clipping_g32(float * g, float *gnorm_vec, int step, const int n){ percentileClipping_g32(g, gnorm_vec, step, n); } void cpercentile_clipping_g16(half * g, float *gnorm_vec, int step, const int n){ percentileClipping_g16(g, gnorm_vec, step, n); } + void chistogram_scatter_add_2d(float* histogram, int *index1, int *index2, float *src, int maxidx1, int n){ histogramScatterAdd2D(histogram, index1, index2, src, maxidx1, n); } + #endif void cquantize_blockwise_cpu_fp32(float *code, float *A, float *absmax, unsigned char *out, const int n){ quantize_cpu(code, A, absmax, out, n); } void cdequantize_blockwise_cpu_fp32(float *code, unsigned char *A, float *absmax, float *out, const int n){ dequantize_cpu(code, A, absmax, out, n); } - - void chistogram_scatter_add_2d(float* histogram, int *index1, int *index2, float *src, int maxidx1, int n){ histogramScatterAdd2D(histogram, index1, index2, src, maxidx1, n); } } - |