1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
"""
extract factors the build is dependent on:
[X] compute capability
[ ] TODO: Q - What if we have multiple GPUs of different makes?
- CUDA version
- Software:
- CPU-only: only CPU quantization functions (no optimizer, no matrix multipl)
- CuBLAS-LT: full-build 8-bit optimizer
- no CuBLAS-LT: no 8-bit matrix multiplication (`nomatmul`)
alle Binaries packagen
evaluation:
- if paths faulty, return meaningful error
- else:
- determine CUDA version
- determine capabilities
- based on that set the default path
"""
import ctypes
import os
from pathlib import Path
from typing import Set, Union
from ..utils import print_err, warn_of_missing_prerequisite, execute_and_return
def check_cuda_result(cuda, result_val):
# 3. Check for CUDA errors
if result_val != 0:
error_str = ctypes.c_char_p()
cuda.cuGetErrorString(result_val, ctypes.byref(error_str))
raise Exception(f"CUDA exception! ERROR: {error_str}")
# taken from https://gist.github.com/f0k/63a664160d016a491b2cbea15913d549
def get_compute_capability():
# 1. find libcuda.so library (GPU driver) (/usr/lib)
# init_device -> init variables -> call function by reference
# 2. call extern C function to determine CC
# (https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html)
# 3. Check for CUDA errors
# https://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
# 1. find libcuda.so library (GPU driver) (/usr/lib)
libnames = ("libcuda.so",)
for libname in libnames:
try:
cuda = ctypes.CDLL(libname)
except OSError:
continue
else:
break
else:
raise OSError("could not load any of: " + " ".join(libnames))
nGpus = ctypes.c_int()
cc_major = ctypes.c_int()
cc_minor = ctypes.c_int()
result = ctypes.c_int()
device = ctypes.c_int()
check_cuda_result(cuda, cuda.cuInit(0))
check_cuda_result(cuda, cuda.cuDeviceGetCount(ctypes.byref(nGpus)))
ccs = []
for i in range(nGpus.value):
check_cuda_result(cuda, cuda.cuDeviceGet(ctypes.byref(device), i))
ref_major = ctypes.byref(cc_major)
ref_minor = ctypes.byref(cc_minor)
# 2. call extern C function to determine CC
check_cuda_result(cuda, cuda.cuDeviceComputeCapability(ref_major, ref_minor, device))
ccs.append(f"{cc_major.value}.{cc_minor.value}")
# TODO: handle different compute capabilities; for now, take the max
ccs.sort()
max_cc = ccs[-1]
return max_cc
CUDA_RUNTIME_LIB: str = "libcudart.so"
def tokenize_paths(paths: str) -> Set[Path]:
return {Path(ld_path) for ld_path in paths.split(":") if ld_path}
def resolve_env_variable(env_var):
'''Searches a given envirionmental library or path for the CUDA runtime library (libcudart.so)'''
paths: Set[Path] = tokenize_paths(env_var)
non_existent_directories: Set[Path] = {
path for path in paths if not path.exists()
}
if non_existent_directories:
print_err(
"WARNING: The following directories listed your path were found to "
f"be non-existent: {non_existent_directories}"
)
cuda_runtime_libs: Set[Path] = {
path / CUDA_RUNTIME_LIB
for path in paths
if (path / CUDA_RUNTIME_LIB).is_file()
} - non_existent_directories
if len(cuda_runtime_libs) > 1:
err_msg = (
f"Found duplicate {CUDA_RUNTIME_LIB} files: {cuda_runtime_libs}.."
)
raise FileNotFoundError(err_msg)
elif len(cuda_runtime_libs) == 0: return None # this is not en error, since other envs can contain CUDA
else: return next(iter(cuda_runtime_libs)) # for now just return the first
def get_cuda_runtime_lib_path() -> Union[Path, None]:
'''Searches conda installation and environmental paths for a cuda installations.'''
cuda_runtime_libs = []
# CONDA_PREFIX/lib is the default location for a default conda
# install of pytorch. This location takes priortiy over all
# other defined variables
if 'CONDA_PREFIX' in os.environ:
lib_conda_path = f'{os.environ["CONDA_PREFIX"]}/lib/'
print(lib_conda_path)
cuda_runtime_libs.append(resolve_env_variable(lib_conda_path))
if len(cuda_runtime_libs) == 1: return cuda_runtime_libs[0]
# if CONDA_PREFIX does not have the library, search the environment
# (in particualr LD_LIBRARY PATH)
for var in os.environ:
cuda_runtime_libs.append(resolve_env_variable(var))
if len(cuda_runtime_libs) < 1:
err_msg = (
f"Did not find {CUDA_RUNTIME_LIB} files: {cuda_runtime_libs}.."
)
raise FileNotFoundError(err_msg)
return cuda_runtime_libs.pop()
def evaluate_cuda_setup():
cuda_path = get_cuda_runtime_lib_path()
print(f'CUDA SETUP: CUDA path found: {cuda_path}')
cc = get_compute_capability()
binary_name = "libbitsandbytes_cpu.so"
if not (has_gpu := bool(cc)):
print(
"WARNING: No GPU detected! Check your CUDA paths. Processing to load CPU-only library..."
)
return binary_name
has_cublaslt = cc in ["7.5", "8.0", "8.6"]
# TODO:
# (1) CUDA missing cases (no CUDA installed by CUDA driver (nvidia-smi accessible)
# (2) Multiple CUDA versions installed
cuda_home = str(Path(cuda_path).parent.parent)
# we use ls -l instead of nvcc to determine the cuda version
# since most installations will have the libcudart.so installed, but not the compiler
ls_output, err = execute_and_return(f"ls -l {cuda_path}")
major, minor, revision = ls_output.split(' ')[-1].replace('libcudart.so.', '').split('.')
cuda_version_string = f"{major}{minor}"
binary_name = f'libbitsandbytes_cuda{cuda_version_string}{("" if has_cublaslt else "_nocublaslt")}.so'
return binary_name
|