Spaces:
Running
on
Zero
Running
on
Zero
Update main.py
Browse files
main.py
CHANGED
@@ -26,6 +26,45 @@ print("可用的 CUDA 设备数量:", device_count)
|
|
26 |
for i in range(device_count):
|
27 |
print(f"设备 {i} 名称:", torch.cuda.get_device_name(i))
|
28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
def run_main(
|
30 |
name="example_tmp",
|
31 |
name_2=None,
|
|
|
26 |
for i in range(device_count):
|
27 |
print(f"设备 {i} 名称:", torch.cuda.get_device_name(i))
|
28 |
|
29 |
+
|
30 |
+
import os
|
31 |
+
|
32 |
+
nvidia_devices = [dev for dev in os.listdir('/dev') if dev.startswith('nvidia')]
|
33 |
+
print("NVIDIA 设备文件:", nvidia_devices)
|
34 |
+
for dev in nvidia_devices:
|
35 |
+
path = os.path.join('/dev', dev)
|
36 |
+
print(f"设备 {path} 的权限:", oct(os.stat(path).st_mode))
|
37 |
+
import ctypes
|
38 |
+
|
39 |
+
try:
|
40 |
+
ctypes.CDLL('libcuda.so')
|
41 |
+
print("CUDA 库已加载。")
|
42 |
+
except Exception as e:
|
43 |
+
print("无法加载 CUDA 库,错误信息:", e)
|
44 |
+
|
45 |
+
import os
|
46 |
+
|
47 |
+
cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES', '未设置')
|
48 |
+
print("CUDA_VISIBLE_DEVICES:", cuda_visible_devices)
|
49 |
+
|
50 |
+
try:
|
51 |
+
torch.cuda.init()
|
52 |
+
x = torch.tensor([1.0], device='cuda')
|
53 |
+
print("CUDA 张量创建成功:", x)
|
54 |
+
except Exception as e:
|
55 |
+
print("无法在 CUDA 上创建张量,错误信息:", e)
|
56 |
+
|
57 |
+
import subprocess
|
58 |
+
|
59 |
+
# 查看 GPU 信息
|
60 |
+
try:
|
61 |
+
result = subprocess.run(['lspci'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
62 |
+
gpu_info = [line for line in result.stdout.decode().split('\n') if 'NVIDIA' in line]
|
63 |
+
print("GPU 信息:", gpu_info)
|
64 |
+
except Exception as e:
|
65 |
+
print("无法获取 GPU 信息,错误信息:", e)
|
66 |
+
|
67 |
+
|
68 |
def run_main(
|
69 |
name="example_tmp",
|
70 |
name_2=None,
|