Skip to content

Commit

Permalink
Merge pull request xapi-project#8 from johnelse/trunk-build-fixes
Browse files Browse the repository at this point in the history
Enable build on trunk under planex
  • Loading branch information
johnelse committed Nov 12, 2014
2 parents ed04edb + 49e601a commit 2c717df
Show file tree
Hide file tree
Showing 8 changed files with 1,818 additions and 554 deletions.
11 changes: 4 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
include $(B_BASE)/common.mk
include $(B_BASE)/rpmbuild.mk

IPROG=install -m 755
IDATA=install -m 644
DESTDIR ?=
LIBEXECDIR ?= /opt/xensource/libexec

TESTS_FLAG=--enable-tests

Expand All @@ -28,6 +25,6 @@ clean:
.PHONY: install
install: build
mkdir -p $(DESTDIR)$(LIBEXECDIR)/xcp-rrdd-plugins/
$(IPROG) _build/gpumon/gpumon.native $(DESTDIR)$(LIBEXECDIR)/xcp-rrdd-plugins/xcp-rrdd-gpumon
install -m 755 _build/gpumon/gpumon.native $(DESTDIR)$(LIBEXECDIR)/xcp-rrdd-plugins/xcp-rrdd-gpumon
mkdir -p $(DESTDIR)/etc/rc.d/init.d
$(IPROG) scripts/init.d-rrdd-gpumon $(DESTDIR)/etc/rc.d/init.d/xcp-rrdd-gpumon
install -m 755 scripts/init.d-rrdd-gpumon $(DESTDIR)/etc/rc.d/init.d/xcp-rrdd-gpumon
2 changes: 1 addition & 1 deletion _oasis
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Library gpumon_libs
Findlibname: gpumon-libs
Modules: Result, Config, Nvml
CSources: nvml_stubs.c
BuildDepends: stdext
BuildDepends: threads, stdext, rpclib

Executable gpumon
CompiledObject: best
Expand Down
41 changes: 26 additions & 15 deletions _tags
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# OASIS_START
# DO NOT EDIT (digest: f2f08f5df6dc38d9d78a4766df741877)
# Ignore VCS directories, you can use the same kind of rule outside
# OASIS_START/STOP if you want to exclude directories that contains
# DO NOT EDIT (digest: cf3b73d2a8a05491b93d2c9ec4624ae7)
# Ignore VCS directories, you can use the same kind of rule outside
# OASIS_START/STOP if you want to exclude directories that contains
# useless stuff for the build process
true: annot, bin_annot
<**/.svn>: -traverse
<**/.svn>: not_hygienic
".bzr": -traverse
Expand All @@ -16,22 +17,32 @@
# Library gpumon_libs
"lib/gpumon_libs.cmxs": use_gpumon_libs
<lib/gpumon_libs.{cma,cmxa}>: use_libgpumon_libs_stubs
<lib/*.ml{,i}>: pkg_stdext
<lib/*.ml{,i,y}>: pkg_rpclib
<lib/*.ml{,i,y}>: pkg_stdext
<lib/*.ml{,i,y}>: pkg_threads
"lib/nvml_stubs.c": pkg_rpclib
"lib/nvml_stubs.c": pkg_stdext
"lib/nvml_stubs.c": pkg_threads
# Executable gpumon
<gpumon/gpumon.{native,byte}>: use_gpumon_libs
<gpumon/gpumon.{native,byte}>: pkg_threads
<gpumon/gpumon.{native,byte}>: pkg_stdext
<gpumon/gpumon.{native,byte}>: pkg_rpclib
<gpumon/gpumon.{native,byte}>: pkg_rrdd-plugin
<gpumon/*.ml{,i}>: use_gpumon_libs
<gpumon/*.ml{,i}>: pkg_threads
<gpumon/*.ml{,i}>: pkg_stdext
<gpumon/*.ml{,i}>: pkg_rrdd-plugin
<gpumon/gpumon.{native,byte}>: pkg_stdext
<gpumon/gpumon.{native,byte}>: pkg_threads
<gpumon/gpumon.{native,byte}>: use_gpumon_libs
<gpumon/*.ml{,i,y}>: pkg_rpclib
<gpumon/*.ml{,i,y}>: pkg_rrdd-plugin
<gpumon/*.ml{,i,y}>: pkg_stdext
<gpumon/*.ml{,i,y}>: pkg_threads
<gpumon/*.ml{,i,y}>: use_gpumon_libs
# Executable test_main
<test/test_main.{native,byte}>: use_gpumon_libs
<test/test_main.{native,byte}>: pkg_oUnit
<test/test_main.{native,byte}>: pkg_rpclib
<test/test_main.{native,byte}>: pkg_stdext
<test/*.ml{,i}>: use_gpumon_libs
<test/*.ml{,i}>: pkg_oUnit
<test/*.ml{,i}>: pkg_stdext
<test/test_main.{native,byte}>: pkg_threads
<test/test_main.{native,byte}>: use_gpumon_libs
<test/*.ml{,i,y}>: pkg_oUnit
<test/*.ml{,i,y}>: pkg_rpclib
<test/*.ml{,i,y}>: pkg_stdext
<test/*.ml{,i,y}>: pkg_threads
<test/*.ml{,i,y}>: use_gpumon_libs
# OASIS_STOP
59 changes: 31 additions & 28 deletions gpumon/gpumon.ml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
open Fun
open Rrdd_plugin

module Common = Rrdp_common.Common(struct let name = "xcp-rrdd-gpumon" end)
module Process = Process(struct let name = "xcp-rrdd-gpumon" end)

let nvidia_vendor_id = 0x10del

Expand Down Expand Up @@ -90,16 +91,16 @@ let load_config () =
match Config.of_file nvidia_config_path with
| `Ok config -> [nvidia_vendor_id, config]
| `Error `Does_not_exist ->
Common.D.error "Config file %s not found" nvidia_config_path;
Common.D.warn "Using default config";
Process.D.error "Config file %s not found" nvidia_config_path;
Process.D.warn "Using default config";
default_config
| `Error (`Parse_failure msg) ->
Common.D.error "Caught exception parsing config file: %s" msg;
Common.D.warn "Using default config";
Process.D.error "Caught exception parsing config file: %s" msg;
Process.D.warn "Using default config";
default_config
| `Error (`Unknown_version version) ->
Common.D.error "Unknown config file version: %s" version;
Common.D.warn "Using default config";
Process.D.error "Unknown config file version: %s" version;
Process.D.warn "Using default config";
default_config

type gpu = {
Expand All @@ -114,7 +115,7 @@ type gpu = {
(* Adding colons to datasource names confuses RRD parsers, so replace all
* colons with "/" *)
let escape_bus_id bus_id =
String.concat "/" (Stringext.String.split ':' bus_id)
String.concat "/" (Xstringext.String.split ':' bus_id)

(** Get the list of devices recognised by NVML. *)
let get_gpus interface =
Expand Down Expand Up @@ -155,48 +156,48 @@ let generate_gpu_dss interface gpu =
List.map
(function
| Config.Free ->
Rrd.Host,
Ds.ds_make
~name:("gpu_memory_free_" ^ gpu.bus_id_escaped)
~description:"Unallocated framebuffer memory"
~value:(Rrd.VT_Int64 memory_info.Nvml.free)
~ty:Rrd.Gauge
~default:false
~units:"B" (),
Rrd.Host
~units:"B" ()
| Config.Used ->
Rrd.Host,
Ds.ds_make
~name:("gpu_memory_used_" ^ gpu.bus_id_escaped)
~description:"Allocated framebuffer memory"
~value:(Rrd.VT_Int64 memory_info.Nvml.used)
~ty:Rrd.Gauge
~default:false
~units:"B" (),
Rrd.Host)
~units:"B" ())
metrics
in
let other_dss =
List.map
(function
| Config.PowerUsage ->
let power_usage = Nvml.device_get_power_usage interface gpu.device in
Rrd.Host,
Ds.ds_make
~name:("gpu_power_usage_" ^ gpu.bus_id_escaped)
~description:"Power usage of this GPU"
~value:(Rrd.VT_Int64 (Int64.of_int power_usage))
~ty:Rrd.Gauge
~default:false
~units:"mW" (),
Rrd.Host
~units:"mW" ()
| Config.Temperature ->
let temperature = Nvml.device_get_temperature interface gpu.device in
Rrd.Host,
Ds.ds_make
~name:("gpu_temperature_" ^ gpu.bus_id_escaped)
~description:"Temperature of this GPU"
~value:(Rrd.VT_Int64 (Int64.of_int temperature))
~ty:Rrd.Gauge
~default:false
~units:"°C" (),
Rrd.Host)
~units:"°C" ())
gpu.other_metrics
in
let utilisation_dss =
Expand All @@ -208,6 +209,7 @@ let generate_gpu_dss interface gpu =
List.map
(function
| Config.Compute ->
Rrd.Host,
Ds.ds_make
~name:("gpu_utilisation_compute_" ^ gpu.bus_id_escaped)
~description:("Proportion of time over the past sample period during"^
Expand All @@ -217,9 +219,9 @@ let generate_gpu_dss interface gpu =
~default:false
~min:0.0
~max:1.0
~units:"(fraction)" (),
Rrd.Host
~units:"(fraction)" ()
| Config.MemoryIO ->
Rrd.Host,
Ds.ds_make
~name:("gpu_utilisation_memory_io_" ^ gpu.bus_id_escaped)
~description:("Proportion of time over the past sample period during"^
Expand All @@ -229,8 +231,7 @@ let generate_gpu_dss interface gpu =
~default:false
~min:0.0
~max:1.0
~units:"(fraction)" (),
Rrd.Host)
~units:"(fraction)" ())
metrics
in
List.fold_left
Expand Down Expand Up @@ -263,7 +264,7 @@ let close_nvml_interface interface =
(fun () -> Nvml.library_close interface)

let () =
Common.initialise ();
Process.initialise ();
(* Try to open an interface to NVML. If this fails for an expected reason,
* log the error, wait 5 minutes, then try again. *)
let interface =
Expand All @@ -272,27 +273,29 @@ let () =
with e ->
begin match e with
| Nvml.Library_not_loaded msg ->
Common.D.warn "NVML interface not loaded: %s" msg
Process.D.warn "NVML interface not loaded: %s" msg
| Nvml.Symbol_not_loaded msg ->
Common.D.warn "NVML missing expected symbol: %s" msg
Process.D.warn "NVML missing expected symbol: %s" msg
| e ->
(* This could just be that the NVIDIA driver is not running on
* any devices; in this case NVML throws NVML_ERROR_UNKNOWN. *)
Common.D.warn
Process.D.warn
"Caught unexpected error initialising NVML: %s"
(Printexc.to_string e);
end;
Common.D.info "Sleeping for 5 minutes";
Process.D.info "Sleeping for 5 minutes";
Thread.delay 300.0;
open_if ()
in
open_if ()
in
Common.D.info "Opened NVML interface";
Process.D.info "Opened NVML interface";
try
let gpus = get_gpus interface in
Common.main_loop
~dss_f:(fun () -> generate_all_gpu_dss interface gpus)
Process.main_loop
~neg_shift:0.5
~target:Reporter.Local
~protocol:Rrd_interface.V2
~dss_f:(fun () -> generate_all_gpu_dss interface gpus)
with _ ->
close_nvml_interface interface
6 changes: 6 additions & 0 deletions lib/gpumon_libs.mldylib
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# OASIS_START
# DO NOT EDIT (digest: 1352f9119978668edf6eb9d40c7ded7c)
Result
Config
Nvml
# OASIS_STOP
Loading

0 comments on commit 2c717df

Please # to comment.