I have an instance based on profile:
config:
environment.DISPLAY: :1
environment.PULSE_SERVER: unix:/var/pulse-native
nvidia.driver.capabilities: all
nvidia.runtime: true
security.nesting: true
cloud-init.user-data: |
#cloud-config
package_upgrade: true
runcmd:
- 'apt-get update'
- 'apt-get install -y x11-apps'
- 'apt-get install -y mesa-utils'
- 'apt-get install -y pulseaudio'
- 'apt-get install -y pulseaudio-utils'
- 'apt-get install -y dbus-x11'
- 'apt-get install -y vulkan-tools'
- 'sed -i "s/; enable-shm = yes/enable-shm = no/g" /etc/pulse/client.conf'
- 'echo export PULSE_SERVER=unix:/var/pulse-native | tee --append /home/ubuntu/.profile'
- 'apt-get install -y -f'
description: Steam LXD profile
devices:
PASocket:
bind: container
connect: unix:/run/user/1001/pulse/native
listen: unix:/var/pulse-native
security.gid: "1001"
security.uid: "1001"
uid: "1000"
gid: "1000"
mode: "0777"
type: proxy
X0Socket:
bind: container
connect: unix:/tmp/.X11-unix/X2
listen: unix:/tmp/.X11-unix/X1
security.gid: "1001"
security.uid: "1001"
uid: "1000"
gid: "1000"
mode: "0777"
type: proxy
mygpu:
type: gpu
gid: 44
name: steam
I have two GPUs: builtin intel gpu and nvidia gpu. Vulkaninfo on host shows:
GPU0:
apiVersion = 4206830 (1.3.238)
driverVersion = 96468996 (0x5c00004)
vendorID = 0x8086
deviceID = 0xa7a0
deviceType = PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU
deviceName = Intel(R) Graphics (RPL-P)
driverID = DRIVER_ID_INTEL_OPEN_SOURCE_MESA
driverName = Intel open-source Mesa driver
driverInfo = Mesa 23.0.4-0ubuntu1~22.04.1
conformanceVersion = 1.3.0.0
deviceUUID = d05c91e4-0bd3-f728-1afe-79d1db4dec74
driverUUID = 49579592-3e5c-2e53-be19-7f6d726063a9
GPU1:
apiVersion = 4206834 (1.3.242)
driverVersion = 2246476096 (0x85e68140)
vendorID = 0x10de
deviceID = 0x28e0
deviceType = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
deviceName = NVIDIA GeForce RTX 4060 Laptop GPU
driverID = DRIVER_ID_NVIDIA_PROPRIETARY
driverName = NVIDIA
driverInfo = 535.154.05
conformanceVersion = 1.3.5.0
deviceUUID = e3f4e87b-d44a-d7cb-cbf1-e93f7a8eaab3
driverUUID = 02b61036-1a0b-5721-99e2-071d493de8ce
GPU2:
apiVersion = 4206830 (1.3.238)
driverVersion = 1 (0x0001)
vendorID = 0x10005
deviceID = 0x0000
deviceType = PHYSICAL_DEVICE_TYPE_CPU
deviceName = llvmpipe (LLVM 15.0.7, 256 bits)
driverID = DRIVER_ID_MESA_LLVMPIPE
driverName = llvmpipe
driverInfo = Mesa 23.0.4-0ubuntu1~22.04.1 (LLVM 15.0.7)
conformanceVersion = 1.3.1.1
deviceUUID = 6d657361-3233-2e30-2e34-2d3075627500
driverUUID = 6c6c766d-7069-7065-5555-494400000000
However the vulkaninfo on instance is:
GPU0:
apiVersion = 4206830 (1.3.238)
driverVersion = 96468996 (0x5c00004)
vendorID = 0x8086
deviceID = 0xa7a0
deviceType = PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU
deviceName = Intel(R) Graphics (RPL-P)
driverID = DRIVER_ID_INTEL_OPEN_SOURCE_MESA
driverName = Intel open-source Mesa driver
driverInfo = Mesa 23.0.4-0ubuntu1~22.04.1
conformanceVersion = 1.3.0.0
deviceUUID = d05c91e4-0bd3-f728-1afe-79d1db4dec74
driverUUID = 49579592-3e5c-2e53-be19-7f6d726063a9
GPU1:
apiVersion = 4206830 (1.3.238)
driverVersion = 1 (0x0001)
vendorID = 0x10005
deviceID = 0x0000
deviceType = PHYSICAL_DEVICE_TYPE_CPU
deviceName = llvmpipe (LLVM 15.0.7, 256 bits)
driverID = DRIVER_ID_MESA_LLVMPIPE
driverName = llvmpipe
driverInfo = Mesa 23.0.4-0ubuntu1~22.04.1 (LLVM 15.0.7)
conformanceVersion = 1.3.1.1
deviceUUID = 6d657361-3233-2e30-2e34-2d3075627500
driverUUID = 6c6c766d-7069-7065-5555-494400000000
Also this vkcube --gpu_number 0
works but this vkcube --gpu_number 1
does not.
Is it that my GPU device rule in the profile does not pass gpus properly or is this a driver issue? Installing nvidia drivers fails on instance (I tried to install the same drivers as on the host). Any advice how to debug this issue?
nividia-smi in instance is:
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.154.05 Driver Version: 535.154.05 CUDA Version: 12.2 |
|-----------------------------------------+----------------------+----------------------+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|=========================================+======================+======================|
| 0 NVIDIA GeForce RTX 4060 ... Off | 00000000:01:00.0 Off | N/A |
| N/A 30C P0 N/A / 60W | 14MiB / 8188MiB | 0% Default |
| | | N/A |
+-----------------------------------------+----------------------+----------------------+