Removed storage() from get_ptr; added boilerplate for bias dequant_mm.

author: Tim Dettmers <tim.dettmers@gmail.com> 2022-08-16 10:56:17 -0700
committer: Tim Dettmers <tim.dettmers@gmail.com> 2022-08-16 10:56:17 -0700
commit: 1ed2fa2f218d8dac401f3315420ffec92014c124 (patch)
tree: 57863d4d1024689100c1b43caccc1d8739c58d99 /csrc/kernels.cu
parent: 26efb154c8d77b4ede2cfc0dbd2381dd385f33e7 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/csrc/kernels.cu b/csrc/kernels.cu
index d4eb56c..0a1bf79 100644
--- a/csrc/kernels.cu
+++ b/csrc/kernels.cu
@@ -1889,7 +1889,7 @@ template __global__ void kgetColRowStats<half, 64, 4, 16, 64*4, 1>(half * __rest
 
 #define MM_DEQUANT_CONST 6.200012e-05f //1.0f/(127.0f*127.0f)
 
-template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kdequant_mm_int32_fp16(int *__restrict__ const A, float *__restrict__ const rowStats, float *__restrict__ const colStats, half *out, float* newRowStats, float* newcolStats, const int numRows, const int numCols, const int tileCols, const int n)
+template <int ITEMS_PER_THREAD, int SUBTILE_ROWS, int THREADS>__global__ void kdequant_mm_int32_fp16(int *__restrict__ const A, float *__restrict__ const rowStats, float *__restrict__ const colStats, half *out, float* newRowStats, float* newcolStats, half *__restrict__ const bias, const int numRows, const int numCols, const int tileCols, const int n)
 {
 
   // Strategy: To dequantize we need to load col/row statistics. This can be very expensive
@@ -2675,7 +2675,7 @@ template __global__ void kTransformRowToFormat<256, 8, 32, 32*8, 1, COL_TURING>(
 template __global__ void kTransformRowToFormat<256, 8, 32, 32*8, 0, COL_AMPERE>(char *__restrict__ const A, char *out, int rows, int cols, int tiledCols, int outRows, int outCols);
 template __global__ void kTransformRowToFormat<256, 8, 32, 32*8, 1, COL_AMPERE>(char *__restrict__ const A, char *out, int rows, int cols, int tiledCols, int outRows, int outCols);
 
-template __global__ void kdequant_mm_int32_fp16<4, 128, 512>(int *__restrict__ const A, float *__restrict__ const rowStats, float *__restrict__ const colStats, half *out, float* newRowStats, float* newcolStats, const int numRows, const int numCols, const int tileCols, const int n);
+template __global__ void kdequant_mm_int32_fp16<4, 128, 512>(int *__restrict__ const A, float *__restrict__ const rowStats, float *__restrict__ const colStats, half *out, float* newRowStats, float* newcolStats, half * __restrict__ const bias, const int numRows, const int numCols, const int tileCols, const int n);
 
 template __global__ void kDoubleRowColQuant<64, 4, 16, 64*4, 0>(half *__restrict__ const A, float *__restrict__ const rowStats, float * __restrict__ const colStats, char *out_col_normed, char *out_row_normed, int *rowidx, int *colidx, half *val, int * __restrict__ nnz_block_ptr, float threshold, int rows, int cols, int tiledCols);
 template __global__ void kDoubleRowColQuant<64, 4, 16, 64*4, 1>(half *__restrict__ const A, float *__restrict__ const rowStats, float * __restrict__ const colStats, char *out_col_normed, char *out_row_normed, int *rowidx, int *colidx, half *val, int * __restrict__ nnz_block_ptr, float threshold, int rows, int cols, int tiledCols);
author	Tim Dettmers <tim.dettmers@gmail.com>	2022-08-16 10:56:17 -0700
committer	Tim Dettmers <tim.dettmers@gmail.com>	2022-08-16 10:56:17 -0700
commit	1ed2fa2f218d8dac401f3315420ffec92014c124 (patch)
tree	57863d4d1024689100c1b43caccc1d8739c58d99 /csrc/kernels.cu
parent	26efb154c8d77b4ede2cfc0dbd2381dd385f33e7 (diff)