lxd容器之GPU发现和加载

时间:2023-12-05 20:21:50

lxd gpu设备发现:

// /dev/nvidia[0-9]+
type nvidiaGpuCards struct {
path string
major int
minor int
id string
} // {/dev/nvidiactl, /dev/nvidia-uvm, ...}
type nvidiaGpuDevices struct {
path string
major int
minor int
} // /dev/dri/card0. If we detect that vendor == nvidia, then nvidia will contain
// the corresponding nvidia car, e.g. {/dev/dri/card1 --> /dev/nvidia1}.
type gpuDevice struct {
vendorid string
productid string
id string // card id e.g. 0
// If related devices have the same PCI address as the GPU we should
// mount them all. Meaning if we detect /dev/dri/card0,
// /dev/dri/controlD64, and /dev/dri/renderD128 with the same PCI
// address, then they should all be made available in the container.
pci string
nvidia nvidiaGpuCards path string
major int
minor int
} func (g *gpuDevice) isNvidiaGpu() bool {
return strings.EqualFold(g.vendorid, "10de")
} type cardIds struct {
id string
pci string
} func deviceLoadGpu() ([]gpuDevice, []nvidiaGpuDevices, error) {
const DRI_PATH = "/sys/bus/pci/devices"
var gpus []gpuDevice
var nvidiaDevices []nvidiaGpuDevices
var cards []cardIds ents, err := ioutil.ReadDir(DRI_PATH)
if err != nil {
if os.IsNotExist(err) {
return nil, nil, nil
}
return nil, nil, err
} isNvidia := false
for _, ent := range ents {
// The pci address == the name of the directory. So let's use
// this cheap way of retrieving it.
pciAddr := ent.Name() // Make sure that we are dealing with a GPU by looking whether
// the "drm" subfolder exists.
drm := filepath.Join(DRI_PATH, pciAddr, "drm")
drmEnts, err := ioutil.ReadDir(drm)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve vendor ID.
vendorIdPath := filepath.Join(DRI_PATH, pciAddr, "vendor")
vendorId, err := ioutil.ReadFile(vendorIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve device ID.
productIdPath := filepath.Join(DRI_PATH, pciAddr, "device")
productId, err := ioutil.ReadFile(productIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Store all associated subdevices, e.g. controlD64, renderD128.
// The name of the directory == the last part of the
// /dev/dri/controlD64 path. So ent.Name() will give us
// controlD64.
for _, drmEnt := range drmEnts {
vendorTmp := strings.TrimSpace(string(vendorId))
productTmp := strings.TrimSpace(string(productId))
vendorTmp = strings.TrimPrefix(vendorTmp, "0x")
productTmp = strings.TrimPrefix(productTmp, "0x")
tmpGpu := gpuDevice{
pci: pciAddr,
vendorid: vendorTmp,
productid: productTmp,
path: filepath.Join("/dev/dri", drmEnt.Name()),
} majMinPath := filepath.Join(drm, drmEnt.Name(), "dev")
majMinByte, err := ioutil.ReadFile(majMinPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
}
majMin := strings.TrimSpace(string(majMinByte))
majMinSlice := strings.Split(string(majMin), ":")
if len(majMinSlice) != 2 {
continue
}
majorInt, err := strconv.Atoi(majMinSlice[0])
if err != nil {
continue
}
minorInt, err := strconv.Atoi(majMinSlice[1])
if err != nil {
continue
} tmpGpu.major = majorInt
tmpGpu.minor = minorInt isCard, err := regexp.MatchString("^card[0-9]+", drmEnt.Name())
if err != nil {
continue
} if isCard {
// If it is a card it's minor number will be its id.
tmpGpu.id = strconv.Itoa(minorInt)
tmp := cardIds{
id: tmpGpu.id,
pci: tmpGpu.pci,
}
cards = append(cards, tmp)
}
// Find matching /dev/nvidia* entry for /dev/dri/card*
if tmpGpu.isNvidiaGpu() && isCard {
if !isNvidia {
isNvidia = true
}
nvidiaPath := "/dev/nvidia" + strconv.Itoa(tmpGpu.minor)
stat := syscall.Stat_t{}
err := syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpGpu.nvidia.path = nvidiaPath
tmpGpu.nvidia.major = int(stat.Rdev / 256)
tmpGpu.nvidia.minor = int(stat.Rdev % 256)
tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor)
}
gpus = append(gpus, tmpGpu)
}
} // We detected a Nvidia card, so let's collect all other nvidia devices
// that are not /dev/nvidia[0-9]+.
if isNvidia {
nvidiaEnts, err := ioutil.ReadDir("/dev")
if err != nil {
if os.IsNotExist(err) {
return nil, nil, err
}
}
validNvidia, err := regexp.Compile(`^nvidia[^0-9]+`)
if err != nil {
return nil, nil, err
}
for _, nvidiaEnt := range nvidiaEnts {
if !validNvidia.MatchString(nvidiaEnt.Name()) {
continue
}
nvidiaPath := filepath.Join("/dev", nvidiaEnt.Name())
stat := syscall.Stat_t{}
err = syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpNividiaGpu := nvidiaGpuDevices{
path: nvidiaPath,
major: int(stat.Rdev / 256),
minor: int(stat.Rdev % 256),
}
nvidiaDevices = append(nvidiaDevices, tmpNividiaGpu)
} } // Since we'll give users to ability to specify and id we need to group
// devices on the same PCI that belong to the same card by id.
for _, card := range cards {
for i := 0; i < len(gpus); i++ {
if gpus[i].pci == card.pci {
gpus[i].id = card.id
}
}
} return gpus, nvidiaDevices, nil
}

lxd gpu设备加载:由下可见

最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致。而客户端又是如何知道vendorid,pci等信息?实际一般是需要建立GPU资源池,GPU元数据由上层管理,通过一定调度规则指定。而GPU资源的发现实际可通过类似上面的函数进行发现或者通过lspci命令发现

else if m["type"] == "gpu" {

			if gpus == nil {
gpus, nvidiaDevices, err = deviceLoadGpu()
if err != nil {
return "", err
}
} sawNvidia := false
for _, gpu := range gpus {
                  //最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致
if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) ||
(m["pci"] != "" && gpu.pci != m["pci"]) ||
(m["productid"] != "" && gpu.productid != m["productid"]) ||
(m["id"] != "" && gpu.id != m["id"]) {
continue
} err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
} if gpu.nvidia.path == "" {
continue
} err = c.setupUnixDevice(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true)
if err != nil {
return "", err
} sawNvidia = true
} if sawNvidia {
for _, gpu := range nvidiaDevices {
err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
}
}
}
}

  

// setupUnixDevice() creates the unix device and sets up the necessary low-level
// liblxc configuration items.
func (c *containerLXC) setupUnixDevice(devType string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error {
if c.IsPrivileged() && !runningInUserns && cgDevicesController {
         //设置设备访问白名单
err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor))
if err != nil {
return err
}
} temp := types.Device{}
if err := shared.DeepCopy(&dev, &temp); err != nil {
return err
} temp["major"] = fmt.Sprintf("%d", major)
temp["minor"] = fmt.Sprintf("%d", minor)
temp["path"] = path paths, err := c.createUnixDevice(temp)
if err != nil {
shared.LogDebug("failed to create device", log.Ctx{"err": err, "device": devType})
if createMustSucceed {
return err
}
return nil
}
devPath := paths[0]
tgtPath := paths[1]
     //设置挂载对象
err = lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", devPath, tgtPath))
if err != nil {
return err
}
return nil
}

  

// Unix devices handling
func (c *containerLXC) createUnixDevice(m types.Device) ([]string, error) {
var err error
var major, minor int // Our device paths
srcPath := m["path"]
tgtPath := strings.TrimPrefix(srcPath, "/")
devName := fmt.Sprintf("unix.%s", strings.Replace(tgtPath, "/", "-", -1))
devPath := filepath.Join(c.DevicesPath(), devName)//var/lib/lxd/devices/容器名称/xxxx // Extra checks for nesting
if runningInUserns {
for key, value := range m {
if shared.StringInSlice(key, []string{"major", "minor", "mode", "uid", "gid"}) && value != "" {
return nil, fmt.Errorf("The \"%s\" property may not be set when adding a device to a nested container", key)
}
}
} // Get the major/minor of the device we want to create
if m["major"] == "" && m["minor"] == "" {
// If no major and minor are set, use those from the device on the host
_, major, minor, err = deviceGetAttributes(srcPath)
if err != nil {
return nil, fmt.Errorf("Failed to get device attributes for %s: %s", m["path"], err)
}
} else if m["major"] == "" || m["minor"] == "" {
return nil, fmt.Errorf("Both major and minor must be supplied for device: %s", m["path"])
} else {
major, err = strconv.Atoi(m["major"])
if err != nil {
return nil, fmt.Errorf("Bad major %s in device %s", m["major"], m["path"])
} minor, err = strconv.Atoi(m["minor"])
if err != nil {
return nil, fmt.Errorf("Bad minor %s in device %s", m["minor"], m["path"])
}
} // Get the device mode
mode := os.FileMode(0660)
if m["mode"] != "" {
tmp, err := deviceModeOct(m["mode"])
if err != nil {
return nil, fmt.Errorf("Bad mode %s in device %s", m["mode"], m["path"])
}
mode = os.FileMode(tmp)
} if m["type"] == "unix-block" {
mode |= syscall.S_IFBLK
} else {
mode |= syscall.S_IFCHR
} // Get the device owner
uid := 0
gid := 0 if m["uid"] != "" {
uid, err = strconv.Atoi(m["uid"])
if err != nil {
return nil, fmt.Errorf("Invalid uid %s in device %s", m["uid"], m["path"])
}
} if m["gid"] != "" {
gid, err = strconv.Atoi(m["gid"])
if err != nil {
return nil, fmt.Errorf("Invalid gid %s in device %s", m["gid"], m["path"])
}
} // Create the devices directory if missing
if !shared.PathExists(c.DevicesPath()) {
os.Mkdir(c.DevicesPath(), 0711)
if err != nil {
return nil, fmt.Errorf("Failed to create devices path: %s", err)
}
} // Clean any existing entry
if shared.PathExists(devPath) {
if runningInUserns {
syscall.Unmount(devPath, syscall.MNT_DETACH)
} err = os.Remove(devPath)
if err != nil {
return nil, fmt.Errorf("Failed to remove existing entry: %s", err)
}
} // Create the new entry
if !runningInUserns {
if err := syscall.Mknod(devPath, uint32(mode), minor|(major<<8)); err != nil {
return nil, fmt.Errorf("Failed to create device %s for %s: %s", devPath, m["path"], err)
} if err := os.Chown(devPath, uid, gid); err != nil {
return nil, fmt.Errorf("Failed to chown device %s: %s", devPath, err)
} // Needed as mknod respects the umask
if err := os.Chmod(devPath, mode); err != nil {
return nil, fmt.Errorf("Failed to chmod device %s: %s", devPath, err)
} if c.idmapset != nil {
if err := c.idmapset.ShiftFile(devPath); err != nil {
// uidshift failing is weird, but not a big problem. Log and proceed
shared.LogDebugf("Failed to uidshift device %s: %s\n", m["path"], err)
}
}
} else {
f, err := os.Create(devPath)
if err != nil {
return nil, err
}
f.Close() err = deviceMountDisk(srcPath, devPath, false, false)
if err != nil {
return nil, err
}
} return []string{devPath, tgtPath}, nil
}

  

func deviceMountDisk(srcPath string, dstPath string, readonly bool, recursive bool) error {
var err error // Prepare the mount flags
flags := 0
if readonly {
flags |= syscall.MS_RDONLY
} // Detect the filesystem
fstype := "none"
if deviceIsBlockdev(srcPath) {
fstype, err = shared.BlockFsDetect(srcPath)
if err != nil {
return err
}
} else {
flags |= syscall.MS_BIND
if recursive {
flags |= syscall.MS_REC
}
} // Mount the filesystem
if err = syscall.Mount(srcPath, dstPath, fstype, uintptr(flags), ""); err != nil {
return fmt.Errorf("Unable to mount %s at %s: %s", srcPath, dstPath, err)
} flags = syscall.MS_REC | syscall.MS_SLAVE
if err = syscall.Mount("", dstPath, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("unable to make mount %s private: %s", dstPath, err)
} return nil
}