Skip to content

Commit f2c37a5

Browse files
committed
ci: Collect CPU usage statistics on Azure
This commit adds a script which we'll execute on Azure Pipelines which is intended to run in the background and passively collect CPU usage statistics for our builders. The intention here is that we can use this information over time to diagnose issues with builders, see where we can optimize our build, fix parallelism issues, etc. This might not end up being too useful in the long run but it's data we've wanted to collect for quite some time now, so here's a stab at it! Comments about how this is intended to work can be found in the python script used here to collect CPU usage statistics. Closes #48828
1 parent 817d2fe commit f2c37a5

File tree

2 files changed

+191
-0
lines changed

2 files changed

+191
-0
lines changed

.azure-pipelines/steps/run.yml

+16
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ steps:
1111
- checkout: self
1212
fetchDepth: 2
1313

14+
# Spawn a background process to collect CPU usage statistics which we'll upload
15+
# at the end of the build. See the comments in the script here for more
16+
# information.
17+
- bash: python src/ci/cpu-usage-over-time.py &> cpu-usage.csv &
18+
displayName: "Collect CPU-usage statistics in the background"
19+
1420
- bash: printenv | sort
1521
displayName: Show environment variables
1622

@@ -136,3 +142,13 @@ steps:
136142
AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY)
137143
condition: and(succeeded(), or(eq(variables.DEPLOY, '1'), eq(variables.DEPLOY_ALT, '1')))
138144
displayName: Upload artifacts
145+
146+
# Upload CPU usage statistics that we've been gathering this whole time. Always
147+
# execute this step in case we want to inspect failed builds, but don't let
148+
# errors here ever fail the build since this is just informational.
149+
- bash: aws s3 cp --acl public-read cpu-usage.csv s3://$DEPLOY_BUCKET/rustc-builds/$BUILD_SOURCEVERSION/cpu-$SYSTEM_JOBNAME.csv
150+
env:
151+
AWS_SECRET_ACCESS_KEY: $(AWS_SECRET_ACCESS_KEY)
152+
condition: contains(variables, 'AWS_SECRET_ACCESS_KEY')
153+
continueOnError: true
154+
displayName: Upload CPU usage statistics

src/ci/cpu-usage-over-time.py

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
#!/usr/bin/env python
2+
# ignore-tidy-linelength
3+
4+
# This is a small script that we use on CI to collect CPU usage statistics of
5+
# our builders. By seeing graphs of CPU usage over time we hope to correlate
6+
# that with possible improvements to Rust's own build system, ideally diagnosing
7+
# that either builders are always fully using their CPU resources or they're
8+
# idle for long stretches of time.
9+
#
10+
# This script is relatively simple, but it's platform specific. Each platform
11+
# (OSX/Windows/Linux) has a different way of calculating the current state of
12+
# CPU at a point in time. We then compare two captured states to determine the
13+
# percentage of time spent in one state versus another. The state capturing is
14+
# all platform-specific but the loop at the bottom is the cross platform part
15+
# that executes everywhere.
16+
#
17+
# # Viewing statistics
18+
#
19+
# All builders will upload their CPU statistics as CSV files to our S3 buckets.
20+
# These URLS look like:
21+
#
22+
# https://$bucket.s3.amazonaws.com/rustc-builds/$commit/cpu-$builder.csv
23+
#
24+
# for example
25+
#
26+
# https://rust-lang-ci2.s3.amazonaws.com/rustc-builds/68baada19cd5340f05f0db15a3e16d6671609bcc/cpu-x86_64-apple.csv
27+
#
28+
# Each CSV file has two columns. The first is the timestamp of the measurement
29+
# and the second column is the % of idle cpu time in that time slice. Ideally
30+
# the second column is always zero.
31+
#
32+
# Once you've downloaded a file there's various ways to plot it and visualize
33+
# it. For command line usage you can use a script like so:
34+
#
35+
# set timefmt '%Y-%m-%dT%H:%M:%S'
36+
# set xdata time
37+
# set ylabel "Idle CPU %"
38+
# set xlabel "Time"
39+
# set datafile sep ','
40+
# set term png
41+
# set output "printme.png"
42+
# set grid
43+
# builder = "i686-apple"
44+
# plot "cpu-".builder.".csv" using 1:2 with lines title builder
45+
#
46+
# Executed as `gnuplot < ./foo.plot` it will generate a graph called
47+
# `printme.png` which you can then open up. If you know how to improve this
48+
# script or the viewing process that would be much appreciated :) (or even if
49+
# you know how to automate it!)
50+
51+
import datetime
52+
import sys
53+
import time
54+
55+
if sys.platform == 'linux2':
56+
class State:
57+
def __init__(self):
58+
with open('/proc/stat', 'r') as file:
59+
data = file.readline().split()
60+
if data[0] != 'cpu':
61+
raise Exception('did not start with "cpu"')
62+
self.user = int(data[1])
63+
self.nice = int(data[2])
64+
self.system = int(data[3])
65+
self.idle = int(data[4])
66+
self.iowait = int(data[5])
67+
self.irq = int(data[6])
68+
self.softirq = int(data[7])
69+
self.steal = int(data[8])
70+
self.guest = int(data[9])
71+
self.guest_nice = int(data[10])
72+
73+
def idle_since(self, prev):
74+
user = self.user - prev.user
75+
nice = self.nice - prev.nice
76+
system = self.system - prev.system
77+
idle = self.idle - prev.idle
78+
iowait = self.iowait - prev.iowait
79+
irq = self.irq - prev.irq
80+
softirq = self.softirq - prev.softirq
81+
steal = self.steal - prev.steal
82+
guest = self.guest - prev.guest
83+
guest_nice = self.guest_nice - prev.guest_nice
84+
total = user + nice + system + idle + iowait + irq + softirq + steal + guest + guest_nice
85+
return float(idle) / float(total) * 100
86+
87+
elif sys.platform == 'win32':
88+
from ctypes.wintypes import DWORD
89+
from ctypes import Structure, windll, WinError, GetLastError, byref
90+
91+
class FILETIME(Structure):
92+
_fields_ = [
93+
("dwLowDateTime", DWORD),
94+
("dwHighDateTime", DWORD),
95+
]
96+
97+
class State:
98+
def __init__(self):
99+
idle, kernel, user = FILETIME(), FILETIME(), FILETIME()
100+
101+
success = windll.kernel32.GetSystemTimes(
102+
byref(idle),
103+
byref(kernel),
104+
byref(user),
105+
)
106+
107+
assert success, WinError(GetLastError())[1]
108+
109+
self.idle = (idle.dwHighDateTime << 32) | idle.dwLowDateTime
110+
self.kernel = (kernel.dwHighDateTime << 32) | kernel.dwLowDateTime
111+
self.user = (user.dwHighDateTime << 32) | user.dwLowDateTime
112+
113+
def idle_since(self, prev):
114+
idle = self.idle - prev.idle
115+
user = self.user - prev.user
116+
kernel = self.kernel - prev.kernel
117+
return float(idle) / float(user + kernel) * 100
118+
119+
elif sys.platform == 'darwin':
120+
from ctypes import *
121+
libc = cdll.LoadLibrary('/usr/lib/libc.dylib')
122+
123+
PROESSOR_CPU_LOAD_INFO = c_int(2)
124+
CPU_STATE_USER = 0
125+
CPU_STATE_SYSTEM = 1
126+
CPU_STATE_IDLE = 2
127+
CPU_STATE_NICE = 3
128+
c_int_p = POINTER(c_int)
129+
130+
class State:
131+
def __init__(self):
132+
num_cpus_u = c_uint(0)
133+
cpu_info = c_int_p()
134+
cpu_info_cnt = c_int(0)
135+
err = libc.host_processor_info(
136+
libc.mach_host_self(),
137+
PROESSOR_CPU_LOAD_INFO,
138+
byref(num_cpus_u),
139+
byref(cpu_info),
140+
byref(cpu_info_cnt),
141+
)
142+
assert err == 0
143+
self.user = 0
144+
self.system = 0
145+
self.idle = 0
146+
self.nice = 0
147+
cur = 0
148+
while cur < cpu_info_cnt.value:
149+
self.user += cpu_info[cur + CPU_STATE_USER]
150+
self.system += cpu_info[cur + CPU_STATE_SYSTEM]
151+
self.idle += cpu_info[cur + CPU_STATE_IDLE]
152+
self.nice += cpu_info[cur + CPU_STATE_NICE]
153+
cur += num_cpus_u.value
154+
155+
def idle_since(self, prev):
156+
user = self.user - prev.user
157+
system = self.system - prev.system
158+
idle = self.idle - prev.idle
159+
nice = self.nice - prev.nice
160+
return float(idle) / float(user + system + idle + nice) * 100.0
161+
162+
else:
163+
print('unknown platform', sys.platform)
164+
sys.exit(1)
165+
166+
cur_state = State();
167+
print("Time,Idle")
168+
while True:
169+
time.sleep(1);
170+
next_state = State();
171+
now = datetime.datetime.utcnow().isoformat()
172+
idle = next_state.idle_since(cur_state)
173+
print("%s,%s" % (now, idle))
174+
sys.stdout.flush()
175+
cur_state = next_state

0 commit comments

Comments
 (0)