@@ -165,9 +165,11 @@ Kubernetes provides to vendors a mechanism called device plugins to:
165
165
166
166
``` go
167
167
service DevicePlugin {
168
- // returns a stream of []Device
168
+ rpc GetDevicePluginOptions (Empty) returns (DevicePluginOptions) {}
169
169
rpc ListAndWatch (Empty) returns (stream ListAndWatchResponse ) {}
170
170
rpc Allocate (AllocateRequest) returns (AllocateResponse) {}
171
+ rpc GetPreferredAllocation (PreferredAllocationRequest) returns (PreferredAllocationResponse) {}
172
+ rpc PreStartContainer (PreStartContainerRequest) returns (PreStartContainerResponse) {}
171
173
}
172
174
```
173
175
@@ -306,27 +308,129 @@ service Registration {
306
308
rpc Register (RegisterRequest) returns (Empty) {}
307
309
}
308
310
311
+ message DevicePluginOptions {
312
+ // Indicates if PreStartContainer call is required before each container start
313
+ bool pre_start_required = 1 ;
314
+ // Indicates if GetPreferredAllocation is implemented and available for calling
315
+ bool get_preferred_allocation_available = 2 ;
316
+ }
317
+
318
+ message RegisterRequest {
319
+ // Version of the API the Device Plugin was built against
320
+ string version = 1 ;
321
+ // Name of the unix socket the device plugin is listening on
322
+ // PATH = path.Join(DevicePluginPath, endpoint)
323
+ string endpoint = 2 ;
324
+ // Schedulable resource name. As of now it's expected to be a DNS Label
325
+ string resource_name = 3 ;
326
+ // Options to be communicated with Device Manager
327
+ DevicePluginOptions options = 4 ;
328
+ }
329
+
330
+ message Empty {
331
+ }
332
+
309
333
// DevicePlugin is the service advertised by Device Plugins
310
334
service DevicePlugin {
335
+ // GetDevicePluginOptions returns options to be communicated with Device
336
+ // Manager
337
+ rpc GetDevicePluginOptions (Empty) returns (DevicePluginOptions) {}
338
+
311
339
// ListAndWatch returns a stream of List of Devices
312
340
// Whenever a Device state change or a Device disappears, ListAndWatch
313
341
// returns the new list
314
342
rpc ListAndWatch (Empty) returns (stream ListAndWatchResponse ) {}
315
343
344
+ // GetPreferredAllocation returns a preferred set of devices to allocate
345
+ // from a list of available ones. The resulting preferred allocation is not
346
+ // guaranteed to be the allocation ultimately performed by the
347
+ // devicemanager. It is only designed to help the devicemanager make a more
348
+ // informed allocation decision when possible.
349
+ rpc GetPreferredAllocation (PreferredAllocationRequest) returns (PreferredAllocationResponse) {}
350
+
316
351
// Allocate is called during container creation so that the Device
317
352
// Plugin can run device specific operations and instruct Kubelet
318
353
// of the steps to make the Device available in the container
319
354
rpc Allocate (AllocateRequest) returns (AllocateResponse) {}
355
+
356
+ // PreStartContainer is called, if indicated by Device Plugin during registeration phase,
357
+ // before each container start. Device plugin can run device specific operations
358
+ // such as resetting the device before making devices available to the container
359
+ rpc PreStartContainer (PreStartContainerRequest) returns (PreStartContainerResponse) {}
320
360
}
321
361
322
- message RegisterRequest {
323
- // Version of the API the Device Plugin was built against
324
- string version = 1 ;
325
- // Name of the unix socket the device plugin is listening on
326
- // PATH = path.Join(DevicePluginPath, endpoint)
327
- string endpoint = 2 ;
328
- // Schedulable resource name
329
- string resource_name = 3 ;
362
+ // ListAndWatch returns a stream of List of Devices
363
+ // Whenever a Device state change or a Device disappears, ListAndWatch
364
+ // returns the new list
365
+ message ListAndWatchResponse {
366
+ repeated Device devices = 1 ;
367
+ }
368
+
369
+ message TopologyInfo {
370
+ repeated NUMANode nodes = 1 ;
371
+ }
372
+
373
+ message NUMANode {
374
+ int64 ID = 1 ;
375
+ }
376
+
377
+ /* E.g:
378
+ * struct Device {
379
+ * ID: "GPU-fef8089b-4820-abfc-e83e-94318197576e",
380
+ * Health: "Healthy",
381
+ * Topology:
382
+ * Node:
383
+ * ID: 1
384
+ *} */
385
+ message Device {
386
+ // A unique ID assigned by the device plugin used
387
+ // to identify devices during the communication
388
+ // Max length of this field is 63 characters
389
+ string ID = 1 ;
390
+ // Health of the device, can be healthy or unhealthy, see constants.go
391
+ string health = 2 ;
392
+ // Topology for device
393
+ TopologyInfo topology = 3 ;
394
+ }
395
+
396
+ // - PreStartContainer is expected to be called before each container start if indicated by plugin during registration phase.
397
+ // - PreStartContainer allows kubelet to pass reinitialized devices to containers.
398
+ // - PreStartContainer allows Device Plugin to run device specific operations on
399
+ // the Devices requested
400
+ message PreStartContainerRequest {
401
+ repeated string devices_ids = 1 [(gogoproto.customname ) = " DevicesIDs" ];
402
+ }
403
+
404
+ // PreStartContainerResponse will be send by plugin in response to PreStartContainerRequest
405
+ message PreStartContainerResponse {
406
+ }
407
+
408
+ // PreferredAllocationRequest is passed via a call to GetPreferredAllocation()
409
+ // at pod admission time. The device plugin should take the list of
410
+ // `available_deviceIDs` and calculate a preferred allocation of size
411
+ // 'allocation_size' from them, making sure to include the set of devices
412
+ // listed in 'must_include_deviceIDs'.
413
+ message PreferredAllocationRequest {
414
+ repeated ContainerPreferredAllocationRequest container_requests = 1 ;
415
+ }
416
+
417
+ message ContainerPreferredAllocationRequest {
418
+ // List of available deviceIDs from which to choose a preferred allocation
419
+ repeated string available_deviceIDs = 1 ;
420
+ // List of deviceIDs that must be included in the preferred allocation
421
+ repeated string must_include_deviceIDs = 2 ;
422
+ // Number of devices to include in the preferred allocation
423
+ int32 allocation_size = 3 ;
424
+ }
425
+
426
+ // PreferredAllocationResponse returns a preferred allocation,
427
+ // resulting from a PreferredAllocationRequest.
428
+ message PreferredAllocationResponse {
429
+ repeated ContainerPreferredAllocationResponse container_responses = 1 ;
430
+ }
431
+
432
+ message ContainerPreferredAllocationResponse {
433
+ repeated string deviceIDs = 1 ;
330
434
}
331
435
332
436
// - Allocate is expected to be called during pod creation since allocation
@@ -336,71 +440,61 @@ message RegisterRequest {
336
440
// - Allocate allows Device Plugin to run device specific operations on
337
441
// the Devices requested
338
442
message AllocateRequest {
339
- repeated string devicesIDs = 1 ;
443
+ repeated ContainerAllocateRequest container_requests = 1 ;
340
444
}
341
445
446
+ message ContainerAllocateRequest {
447
+ repeated string devices_ids = 1 [(gogoproto.customname ) = " DevicesIDs" ];
448
+ }
449
+
450
+ // AllocateResponse includes the artifacts that needs to be injected into
451
+ // a container for accessing 'deviceIDs' that were mentioned as part of
452
+ // 'AllocateRequest'.
342
453
// Failure Handling:
343
454
// if Kubelet sends an allocation request for dev1 and dev2.
344
455
// Allocation on dev1 succeeds but allocation on dev2 fails.
345
456
// The Device plugin should send a ListAndWatch update and fail the
346
457
// Allocation request
347
458
message AllocateResponse {
348
- repeated DeviceRuntimeSpec spec = 1 ;
459
+ repeated ContainerAllocateResponse container_responses = 1 ;
349
460
}
350
461
351
- // ListAndWatch returns a stream of List of Devices
352
- // Whenever a Device state change or a Device disappears, ListAndWatch
353
- // returns the new list
354
- message ListAndWatchResponse {
355
- repeated Device devices = 1 ;
356
- }
357
-
358
- // The list to be added to the CRI spec
359
- message DeviceRuntimeSpec {
360
- string ID = 1 ;
361
-
362
- // List of environment variable to set in the container.
363
- map <string , string > envs = 2 ;
462
+ message ContainerAllocateResponse {
463
+ // List of environment variable to be set in the container to access one of more devices.
464
+ map <string , string > envs = 1 ;
364
465
// Mounts for the container.
365
- repeated Mount mounts = 3 ;
366
- // Devices for the container
367
- repeated DeviceSpec devices = 4 ;
368
- }
369
-
370
- // DeviceSpec specifies a host device to mount into a container.
371
- message DeviceSpec {
372
- // Path of the device within the container.
373
- string container_path = 1 ;
374
- // Path of the device on the host.
375
- string host_path = 2 ;
376
- // Cgroups permissions of the device, candidates are one or more of
377
- // * r - allows container to read from the specified device.
378
- // * w - allows container to write to the specified device.
379
- // * m - allows container to create device files that do not yet exist.
380
- string permissions = 3 ;
466
+ repeated Mount mounts = 2 ;
467
+ // Devices for the container.
468
+ repeated DeviceSpec devices = 3 ;
469
+ // Container annotations to pass to the container runtime
470
+ map <string , string > annotations = 4 ;
381
471
}
382
472
383
473
// Mount specifies a host volume to mount into a container.
384
474
// where device library or tools are installed on host and container
385
475
message Mount {
386
- // Path of the mount on the host.
387
- string host_path = 1 ;
388
476
// Path of the mount within the container.
389
- string mount_path = 2 ;
477
+ string container_path = 1 ;
478
+ // Path of the mount on the host.
479
+ string host_path = 2 ;
390
480
// If set, the mount is read-only.
391
481
bool read_only = 3 ;
392
482
}
393
483
394
- // E.g:
395
- // struct Device {
396
- // ID: "GPU-fef8089b-4820-abfc-e83e-94318197576e",
397
- // State: "Healthy",
398
- // }
399
- message Device {
400
- string ID = 2 ;
401
- string health = 3 ;
484
+ // DeviceSpec specifies a host device to mount into a container.
485
+ message DeviceSpec {
486
+ // Path of the device within the container.
487
+ string container_path = 1 ;
488
+ // Path of the device on the host.
489
+ string host_path = 2 ;
490
+ // Cgroups permissions of the device, candidates are one or more of
491
+ // * r - allows container to read from the specified device.
492
+ // * w - allows container to write to the specified device.
493
+ // * m - allows container to create device files that do not yet exist.
494
+ string permissions = 3 ;
402
495
}
403
496
```
497
+
404
498
### HealthCheck and Failure Recovery
405
499
406
500
We want Kubelet as well as the Device Plugins to recover from failures
@@ -575,17 +669,8 @@ protocol and are able to recover from a Kubelet crash.
575
669
Then, as long as the Device Plugin API does not change upgrading Kubelet can be done
576
670
seamlessly through a Kubelet restart.
577
671
578
- *Currently:*
579
- As mentioned in the Versioning section, we currently expect the Device Plugin's
580
- API version to match exactly the Kubelet's Device Plugin API version.
581
- Therefore if the Device Plugin API version change then you will have to change
582
- the Device Plugin too.
583
-
584
- *Future:*
585
- When the Device Plugin API becomes a stable feature, versioning should be
586
- backward compatible and even if Kubelet has a different Device Plugin API,
587
-
588
- it should not require a Device Plugin upgrade.
672
+ Upgrading Kubelet can be done seamlessly through a Kubelet restart and does not
673
+ require changes to workflow as the device plugin API is stable.
589
674
590
675
Refer to the versioning section for versioning scheme compatibility.
591
676
@@ -603,16 +688,10 @@ the Device Plugins.
603
688
604
689
# ## Version Skew Strategy
605
690
606
- Currently we require exact version match between Kubelet and Device Plugin.
607
- API version is expected to be increased only upon incompatible API changes.
608
-
609
- Follow protobuf guidelines on versioning :
610
- * Do not change ordering
611
- * Do not remove fields or change types
612
- * Add optional fields
613
- * Introducing new fields with proper default values
614
- * Freeze the package name to `apis/device-plugin/v1alpha1`
615
- * Have kubelet and the Device Plugin negotiate versions if we do break the API
691
+ Prior to v1.10, the versioning scheme required the Device Plugin's API version to
692
+ match exactly the Kubelet's version. With the graduation of this feature to Beta
693
+ and move of device plugin API is to a stable API (version v1beta1) backward
694
+ compatibility is supported.
616
695
617
696
# # Production Readiness Review Questionnaire
618
697
@@ -809,4 +888,4 @@ In Kubernetes v1.25, [Dynamic Resource Allocation](https://github.com/kubernetes
809
888
810
889
# # Infrastructure Needed (Optional)
811
890
812
- Not Applicable.
891
+ Not Applicable.
0 commit comments