From 27e539626c47ea57451ec9fe88fc8a3da17874b6 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 14:56:14 +0000 Subject: [PATCH 1/9] JSON output mode --- app/src/app.c | 15 ++++-- lib/include/gddr6.h | 5 +- lib/src/gddr6.c | 123 ++++++++++++++++++++++++++++++-------------- 3 files changed, 99 insertions(+), 44 deletions(-) diff --git a/app/src/app.c b/app/src/app.c index 2292301..8c1f13d 100644 --- a/app/src/app.c +++ b/app/src/app.c @@ -3,6 +3,7 @@ #include #include #include +#include void register_signal_handlers(void) { @@ -24,12 +25,20 @@ int main(int argc, char **argv) if (num_devs == 0) { - printf("No compatible GPU found.\n"); + fprintf(stderr, "No compatible GPU found.\n"); return 1; } - gddr6_memory_map(); - gddr6_monitor_temperatures(); + if (argc >= 2 && !strcmp(argv[1], "-j")) + { + gddr6_memory_map(0); + gddr6_print_temperatures_json(); + } + else + { + gddr6_memory_map(1); + gddr6_monitor_temperatures(); + } return 0; } diff --git a/lib/include/gddr6.h b/lib/include/gddr6.h index 957d71b..25806fd 100644 --- a/lib/include/gddr6.h +++ b/lib/include/gddr6.h @@ -22,12 +22,15 @@ struct gddr6_ctx { struct device *devices; int num_devices; int fd; + uint32_t *temperatures; }; void gddr6_init(void); -void gddr6_memory_map(void); +void gddr6_memory_map(int verbose); void gddr6_cleanup(int signal); +void gddr6_get_temperatures(void); void gddr6_monitor_temperatures(void); +void gddr6_print_temperatures_json(void); int gddr6_detect_compatible_gpus(void); #endif // GDDR6_H diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index d6974f6..3d879ff 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -53,7 +53,8 @@ struct device dev_table[] = void gddr6_init(void) { ctx.fd = open("/dev/mem", O_RDONLY); - if (ctx.fd == -1) { + if (ctx.fd == -1) + { PRINT_ERROR(); } } @@ -73,37 +74,42 @@ int gddr6_detect_compatible_gpus(void) for (pci_dev = pacc->devices; pci_dev != NULL; pci_dev = pci_dev->next) { - pci_fill_info(pci_dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS); - for (uint32_t i = 0; i < dev_table_size; ++i) - { - if (pci_dev->device_id == dev_table[i].dev_id) - { - struct device *new_devices = realloc(ctx.devices, (ctx.num_devices + 1) * sizeof(struct device)); - if (new_devices == NULL) - { - fprintf(stderr, "Memory allocation failed\n"); - pci_cleanup(pacc); - free(ctx.devices); - ctx.devices = NULL; - return 0; - } - ctx.devices = new_devices; - - ctx.devices[ctx.num_devices] = dev_table[i]; - ctx.devices[ctx.num_devices].bar0 = (pci_dev->base_addr[0] & 0xffffffff); - ctx.devices[ctx.num_devices].bus = pci_dev->bus; - ctx.devices[ctx.num_devices].dev = pci_dev->dev; - ctx.devices[ctx.num_devices].func = pci_dev->func; - ctx.num_devices++; - } - } - } + pci_fill_info(pci_dev, PCI_FILL_IDENT | PCI_FILL_BASES | PCI_FILL_CLASS); + for (uint32_t i = 0; i < dev_table_size; ++i) + { + if (pci_dev->device_id == dev_table[i].dev_id) + { + struct device *new_devices = realloc(ctx.devices, (ctx.num_devices + 1) * sizeof(struct device)); + if (new_devices == NULL) + { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + ctx.devices = new_devices; + + ctx.devices[ctx.num_devices] = dev_table[i]; + ctx.devices[ctx.num_devices].bar0 = (pci_dev->base_addr[0] & 0xffffffff); + ctx.devices[ctx.num_devices].bus = pci_dev->bus; + ctx.devices[ctx.num_devices].dev = pci_dev->dev; + ctx.devices[ctx.num_devices].func = pci_dev->func; + ctx.num_devices++; + } + } + } pci_cleanup(pacc); + + ctx.temperatures = malloc(ctx.num_devices * sizeof(uint32_t)); + if (ctx.temperatures == NULL) + { + fprintf(stderr, "Memory allocation failed\n"); + exit(EXIT_FAILURE); + } + return ctx.num_devices; } -void gddr6_memory_map(void) +void gddr6_memory_map(int verbose) { for (uint32_t i = 0; i < ctx.num_devices; i++) { @@ -117,33 +123,70 @@ void gddr6_memory_map(void) fprintf(stderr, "Memory mapping failed for pci=%x:%x:%x\n", ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); fprintf(stderr, "Did you enable iomem=relaxed? Are you r00t?\n"); exit(EXIT_FAILURE); - } else { - printf("Device: %s %s (%s / 0x%04x) pci=%x:%x:%x\n", ctx.devices[i].name, ctx.devices[i].vram, + } + + if (verbose) + { + printf("Device: %s %s (%s / 0x%04x) pci=%02x:%02x:%02x\n", ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); } } } -void gddr6_monitor_temperatures(void) +void gddr6_get_temperatures(void) { - while (1) { - printf("\rVRAM Temps: |"); - for (uint32_t i = 0; i < ctx.num_devices; i++) + for (uint32_t i = 0; i < ctx.num_devices; i++) + { + if (ctx.devices[i].mapped_addr == NULL || ctx.devices[i].mapped_addr == MAP_FAILED) + { + ctx.temperatures[i] = 0;; + } + else { - if (ctx.devices[i].mapped_addr == NULL || ctx.devices[i].mapped_addr == MAP_FAILED) - { - continue; - } - void *virt_addr = (uint8_t *) ctx.devices[i].mapped_addr + (ctx.devices[i].phys_addr - ctx.devices[i].base_offset); uint32_t read_result = *((uint32_t *)virt_addr); uint32_t temp = ((read_result & 0x00000fff) / 0x20); + ctx.temperatures[i] = temp; + } + } +} - printf(" %3u°C |", temp); +void gddr6_monitor_temperatures(void) +{ + while (1) + { + gddr6_get_temperatures(); + printf("\rVRAM Temps: |"); + for (uint32_t i = 0; i < ctx.num_devices; i++) + { + printf(" %3u°C |", ctx.temperatures[i]); } fflush(stdout); sleep(1); - } + } +} + +void gddr6_print_temperatures_json(void) +{ + if (ctx.num_devices == 0) + { + printf("[]\n"); + return; + } + + gddr6_get_temperatures(); + + printf("[\n"); + for (uint32_t i = 0; i < ctx.num_devices; i++) + { + char *delimiter = i < ctx.num_devices - 1 ? "," : ""; + printf( + " {\"name\: \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02x:%02x:%02x\", \"temp\": %d}%s\n", + ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, + ctx.devices[i].func, ctx.temperatures[i] + ); + } + printf("]\n"); } void gddr6_cleanup(int signal) From 0e797a1799d352064e72987e8098eb45f3c585c8 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 14:58:52 +0000 Subject: [PATCH 2/9] Fixes --- lib/src/gddr6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index 3d879ff..4110e55 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -181,9 +181,9 @@ void gddr6_print_temperatures_json(void) { char *delimiter = i < ctx.num_devices - 1 ? "," : ""; printf( - " {\"name\: \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02x:%02x:%02x\", \"temp\": %d}%s\n", + " {\"name\": \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02x:%02x:%02x\", \"temp\": %d}%s\n", ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, - ctx.devices[i].func, ctx.temperatures[i] + ctx.devices[i].func, ctx.temperatures[i], delimiter ); } printf("]\n"); From 90b1f4b3899b7d2e591fffaffc34531a053f794c Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 15:01:31 +0000 Subject: [PATCH 3/9] Typo fix --- lib/src/gddr6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index 4110e55..a3206b7 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -139,7 +139,7 @@ void gddr6_get_temperatures(void) { if (ctx.devices[i].mapped_addr == NULL || ctx.devices[i].mapped_addr == MAP_FAILED) { - ctx.temperatures[i] = 0;; + ctx.temperatures[i] = 0; } else { From a5fa6c3d2367789962310602596cb1019fb47bc0 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 15:03:33 +0000 Subject: [PATCH 4/9] Update README --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 46e9543..1213c46 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,12 @@ cd gddr6 sudo gddr6 ``` +## Running +``` +sudo gddr6 # for human-readable monitoring +sudo gddr6 -j # for one-time JSON output +``` + ## Supported GPUs - RTX 4090 (AD102) - RTX 4080 Super (AD103) From 1c93d11ded156ec48e602d648bcb962c36055a85 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 15:26:38 +0000 Subject: [PATCH 5/9] Change device naming format to match nvidia-smi --- lib/src/gddr6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index a3206b7..be13745 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -120,14 +120,14 @@ void gddr6_memory_map(int verbose) if (ctx.devices[i].mapped_addr == MAP_FAILED) { ctx.devices[i].mapped_addr = NULL; - fprintf(stderr, "Memory mapping failed for pci=%x:%x:%x\n", ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); + fprintf(stderr, "Memory mapping failed for pci=%02X:%02X.%X\n", ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); fprintf(stderr, "Did you enable iomem=relaxed? Are you r00t?\n"); exit(EXIT_FAILURE); } if (verbose) { - printf("Device: %s %s (%s / 0x%04x) pci=%02x:%02x:%02x\n", ctx.devices[i].name, ctx.devices[i].vram, + printf("Device: %s %s (%s / 0x%04x) pci=%02X:%02X.%X\n", ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); } } @@ -181,7 +181,7 @@ void gddr6_print_temperatures_json(void) { char *delimiter = i < ctx.num_devices - 1 ? "," : ""; printf( - " {\"name\": \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02x:%02x:%02x\", \"temp\": %d}%s\n", + " {\"name\": \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02X:%02X:%X\", \"temp\": %d}%s\n", ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func, ctx.temperatures[i], delimiter ); From 9db1663087915f5907a41ece657ffeb92a4530f0 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 15:27:48 +0000 Subject: [PATCH 6/9] Fix --- lib/src/gddr6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index be13745..f999953 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -181,7 +181,7 @@ void gddr6_print_temperatures_json(void) { char *delimiter = i < ctx.num_devices - 1 ? "," : ""; printf( - " {\"name\": \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02X:%02X:%X\", \"temp\": %d}%s\n", + " {\"name\": \"%s\", \"vram\": \"%s\", \"arch\": \"%s\", \"dev_id\": \"0x%04x\", \"pci_id\": \"%02X:%02X.%X\", \"temp\": %d}%s\n", ctx.devices[i].name, ctx.devices[i].vram, ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func, ctx.temperatures[i], delimiter ); From ea58bb132199715e1d00ef0073f9378ea57a31d0 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 16:22:47 +0000 Subject: [PATCH 7/9] Remove verbose arg from gddr6_memory_map --- app/src/app.c | 5 +++-- lib/include/gddr6.h | 3 ++- lib/src/gddr6.c | 17 ++++++++++++----- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/app/src/app.c b/app/src/app.c index 8c1f13d..43a346b 100644 --- a/app/src/app.c +++ b/app/src/app.c @@ -29,14 +29,15 @@ int main(int argc, char **argv) return 1; } + gddr6_memory_map(0); + if (argc >= 2 && !strcmp(argv[1], "-j")) { - gddr6_memory_map(0); gddr6_print_temperatures_json(); } else { - gddr6_memory_map(1); + gddr6_print_memory_map(); gddr6_monitor_temperatures(); } diff --git a/lib/include/gddr6.h b/lib/include/gddr6.h index 25806fd..270266e 100644 --- a/lib/include/gddr6.h +++ b/lib/include/gddr6.h @@ -26,7 +26,8 @@ struct gddr6_ctx { }; void gddr6_init(void); -void gddr6_memory_map(int verbose); +void gddr6_memory_map(); +void gddr6_print_memory_map(); void gddr6_cleanup(int signal); void gddr6_get_temperatures(void); void gddr6_monitor_temperatures(void); diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index f999953..4364d8f 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -109,7 +109,7 @@ int gddr6_detect_compatible_gpus(void) return ctx.num_devices; } -void gddr6_memory_map(int verbose) +void gddr6_memory_map() { for (uint32_t i = 0; i < ctx.num_devices; i++) { @@ -124,12 +124,19 @@ void gddr6_memory_map(int verbose) fprintf(stderr, "Did you enable iomem=relaxed? Are you r00t?\n"); exit(EXIT_FAILURE); } + } +} - if (verbose) - { - printf("Device: %s %s (%s / 0x%04x) pci=%02X:%02X.%X\n", ctx.devices[i].name, ctx.devices[i].vram, - ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); +void gddr6_print_memory_map() +{ + for (uint32_t i = 0; i < ctx.num_devices; i++) + { + if (ctx.devices[i].mapped_addr == NULL || ctx.devices[i].mapped_addr == MAP_FAILED) { + continue; } + + printf("Device: %s %s (%s / 0x%04x) pci=%02X:%02X.%X\n", ctx.devices[i].name, ctx.devices[i].vram, + ctx.devices[i].arch, ctx.devices[i].dev_id, ctx.devices[i].bus, ctx.devices[i].dev, ctx.devices[i].func); } } From 8d2fcda2f57e08e7a649ac5c2981af04407579e8 Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 16:24:00 +0000 Subject: [PATCH 8/9] Fix --- app/src/app.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/src/app.c b/app/src/app.c index 43a346b..06d302e 100644 --- a/app/src/app.c +++ b/app/src/app.c @@ -29,7 +29,7 @@ int main(int argc, char **argv) return 1; } - gddr6_memory_map(0); + gddr6_memory_map(); if (argc >= 2 && !strcmp(argv[1], "-j")) { From cd29f74001cb500f17eb2bb9b661831f0f1b81ee Mon Sep 17 00:00:00 2001 From: Sergey Cheperis Date: Thu, 22 Aug 2024 16:26:16 +0000 Subject: [PATCH 9/9] Fixes --- app/src/app.c | 1 - lib/include/gddr6.h | 4 ++-- lib/src/gddr6.c | 6 ++++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/app/src/app.c b/app/src/app.c index 06d302e..5da76e7 100644 --- a/app/src/app.c +++ b/app/src/app.c @@ -37,7 +37,6 @@ int main(int argc, char **argv) } else { - gddr6_print_memory_map(); gddr6_monitor_temperatures(); } diff --git a/lib/include/gddr6.h b/lib/include/gddr6.h index 270266e..d93c6cc 100644 --- a/lib/include/gddr6.h +++ b/lib/include/gddr6.h @@ -26,8 +26,8 @@ struct gddr6_ctx { }; void gddr6_init(void); -void gddr6_memory_map(); -void gddr6_print_memory_map(); +void gddr6_memory_map(void); +void gddr6_print_memory_map(void); void gddr6_cleanup(int signal); void gddr6_get_temperatures(void); void gddr6_monitor_temperatures(void); diff --git a/lib/src/gddr6.c b/lib/src/gddr6.c index 4364d8f..be91722 100644 --- a/lib/src/gddr6.c +++ b/lib/src/gddr6.c @@ -109,7 +109,7 @@ int gddr6_detect_compatible_gpus(void) return ctx.num_devices; } -void gddr6_memory_map() +void gddr6_memory_map(void) { for (uint32_t i = 0; i < ctx.num_devices; i++) { @@ -127,7 +127,7 @@ void gddr6_memory_map() } } -void gddr6_print_memory_map() +void gddr6_print_memory_map(void) { for (uint32_t i = 0; i < ctx.num_devices; i++) { @@ -160,6 +160,8 @@ void gddr6_get_temperatures(void) void gddr6_monitor_temperatures(void) { + gddr6_print_memory_map(); + while (1) { gddr6_get_temperatures();