niulx commited on
Commit
9a3725e
1 Parent(s): 703ff84

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +39 -0
main.py CHANGED
@@ -26,6 +26,45 @@ print("可用的 CUDA 设备数量:", device_count)
26
  for i in range(device_count):
27
  print(f"设备 {i} 名称:", torch.cuda.get_device_name(i))
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def run_main(
30
  name="example_tmp",
31
  name_2=None,
 
26
  for i in range(device_count):
27
  print(f"设备 {i} 名称:", torch.cuda.get_device_name(i))
28
 
29
+
30
+ import os
31
+
32
+ nvidia_devices = [dev for dev in os.listdir('/dev') if dev.startswith('nvidia')]
33
+ print("NVIDIA 设备文件:", nvidia_devices)
34
+ for dev in nvidia_devices:
35
+ path = os.path.join('/dev', dev)
36
+ print(f"设备 {path} 的权限:", oct(os.stat(path).st_mode))
37
+ import ctypes
38
+
39
+ try:
40
+ ctypes.CDLL('libcuda.so')
41
+ print("CUDA 库已加载。")
42
+ except Exception as e:
43
+ print("无法加载 CUDA 库,错误信息:", e)
44
+
45
+ import os
46
+
47
+ cuda_visible_devices = os.environ.get('CUDA_VISIBLE_DEVICES', '未设置')
48
+ print("CUDA_VISIBLE_DEVICES:", cuda_visible_devices)
49
+
50
+ try:
51
+ torch.cuda.init()
52
+ x = torch.tensor([1.0], device='cuda')
53
+ print("CUDA 张量创建成功:", x)
54
+ except Exception as e:
55
+ print("无法在 CUDA 上创建张量,错误信息:", e)
56
+
57
+ import subprocess
58
+
59
+ # 查看 GPU 信息
60
+ try:
61
+ result = subprocess.run(['lspci'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
62
+ gpu_info = [line for line in result.stdout.decode().split('\n') if 'NVIDIA' in line]
63
+ print("GPU 信息:", gpu_info)
64
+ except Exception as e:
65
+ print("无法获取 GPU 信息,错误信息:", e)
66
+
67
+
68
  def run_main(
69
  name="example_tmp",
70
  name_2=None,