Skip to content

Commit 082afcc

Browse files
Bryan C. Millsgopherbot
Bryan C. Mills
authored andcommitted
internal/testenv: adjust timeout calculations in CommandContext
I noticed some test failures in the build dashboard after CL 445597 that made me realize the grace period should be based on the test timeout, not the Context timeout: if the test itself sets a short timeout for a command, we still want to give the test process enough time to consume and log its output. I also put some more thought into how one might debug a test hang, and realized that in that case we don't want to set a WaitDelay at all: instead, we want to leave the processes in their stuck state so that they can be investigated with tools like `ps` and 'lsof'. Updates #50436. Change-Id: I65421084f44eeaaaec5dd2741cd836e9e68dd380 Reviewed-on: https://go-review.googlesource.com/c/go/+/446875 Auto-Submit: Bryan Mills <bcmills@google.com> Reviewed-by: Ian Lance Taylor <iant@google.com> Run-TryBot: Bryan Mills <bcmills@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
1 parent 50c5919 commit 082afcc

File tree

1 file changed

+39
-37
lines changed

1 file changed

+39
-37
lines changed

src/internal/testenv/exec.go

+39-37
Original file line numberDiff line numberDiff line change
@@ -79,63 +79,65 @@ func CleanCmdEnv(cmd *exec.Cmd) *exec.Cmd {
7979
// - skips t if the platform does not support os/exec,
8080
// - sends SIGQUIT (if supported by the platform) instead of SIGKILL
8181
// in its Cancel function
82-
// - adds a timeout (with an arbitrary grace period) before the test's deadline expires,
83-
// - sets a WaitDelay for an arbitrary grace period,
82+
// - if the test has a deadline, adds a Context timeout and WaitDelay
83+
// for an arbitrary grace period before the test's deadline expires,
8484
// - fails the test if the command does not complete before the test's deadline, and
8585
// - sets a Cleanup function that verifies that the test did not leak a subprocess.
8686
func CommandContext(t testing.TB, ctx context.Context, name string, args ...string) *exec.Cmd {
8787
t.Helper()
8888
MustHaveExec(t)
8989

9090
var (
91-
gracePeriod = 100 * time.Millisecond
92-
cancel context.CancelFunc
91+
cancelCtx context.CancelFunc
92+
gracePeriod time.Duration // unlimited unless the test has a deadline (to allow for interactive debugging)
9393
)
94-
if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
95-
scale, err := strconv.Atoi(s)
96-
if err != nil {
97-
t.Fatalf("invalid GO_TEST_TIMEOUT_SCALE: %v", err)
98-
}
99-
gracePeriod *= time.Duration(scale)
100-
}
10194

10295
if t, ok := t.(interface {
10396
testing.TB
10497
Deadline() (time.Time, bool)
10598
}); ok {
10699
if td, ok := t.Deadline(); ok {
107-
if cd, ok := ctx.Deadline(); !ok || cd.Sub(td) > gracePeriod {
108-
// Either ctx doesn't have a deadline, or its deadline would expire
109-
// after (or too close before) the test has already timed out.
110-
// Compute a new timeout that will expire before the test does so that
111-
// we can terminate the subprocess with a more useful signal.
112-
113-
timeout := time.Until(td)
114-
115-
// If time allows, increase the termination grace period to 5% of the
116-
// remaining time.
117-
if gp := timeout / 20; gp > gracePeriod {
118-
gracePeriod = gp
100+
// Start with a minimum grace period, just long enough to consume the
101+
// output of a reasonable program after it terminates.
102+
gracePeriod = 100 * time.Millisecond
103+
if s := os.Getenv("GO_TEST_TIMEOUT_SCALE"); s != "" {
104+
scale, err := strconv.Atoi(s)
105+
if err != nil {
106+
t.Fatalf("invalid GO_TEST_TIMEOUT_SCALE: %v", err)
119107
}
108+
gracePeriod *= time.Duration(scale)
109+
}
110+
111+
// If time allows, increase the termination grace period to 5% of the
112+
// test's remaining time.
113+
testTimeout := time.Until(td)
114+
if gp := testTimeout / 20; gp > gracePeriod {
115+
gracePeriod = gp
116+
}
120117

121-
// When we run commands that execute subprocesses, we want to reserve two
122-
// grace periods to clean up. We will send the first termination signal when
123-
// the context expires, then wait one grace period for the process to
124-
// produce whatever useful output it can (such as a stack trace). After the
125-
// first grace period expires, we'll escalate to os.Kill, leaving the second
126-
// grace period for the test function to record its output before the test
127-
// process itself terminates.
128-
timeout -= 2 * gracePeriod
129-
130-
ctx, cancel = context.WithTimeout(ctx, timeout)
131-
t.Cleanup(cancel)
118+
// When we run commands that execute subprocesses, we want to reserve two
119+
// grace periods to clean up: one for the delay between the first
120+
// termination signal being sent (via the Cancel callback when the Context
121+
// expires) and the process being forcibly terminated (via the WaitDelay
122+
// field), and a second one for the delay becween the process being
123+
// terminated and and the test logging its output for debugging.
124+
//
125+
// (We want to ensure that the test process itself has enough time to
126+
// log the output before it is also terminated.)
127+
cmdTimeout := testTimeout - 2*gracePeriod
128+
129+
if cd, ok := ctx.Deadline(); !ok || time.Until(cd) > cmdTimeout {
130+
// Either ctx doesn't have a deadline, or its deadline would expire
131+
// after (or too close before) the test has already timed out.
132+
// Add a shorter timeout so that the test will produce useful output.
133+
ctx, cancelCtx = context.WithTimeout(ctx, cmdTimeout)
132134
}
133135
}
134136
}
135137

136138
cmd := exec.CommandContext(ctx, name, args...)
137139
cmd.Cancel = func() error {
138-
if cancel != nil && ctx.Err() == context.DeadlineExceeded {
140+
if cancelCtx != nil && ctx.Err() == context.DeadlineExceeded {
139141
// The command timed out due to running too close to the test's deadline.
140142
// There is no way the test did that intentionally — it's too close to the
141143
// wire! — so mark it as a test failure. That way, if the test expects the
@@ -154,8 +156,8 @@ func CommandContext(t testing.TB, ctx context.Context, name string, args ...stri
154156
cmd.WaitDelay = gracePeriod
155157

156158
t.Cleanup(func() {
157-
if cancel != nil {
158-
cancel()
159+
if cancelCtx != nil {
160+
cancelCtx()
159161
}
160162
if cmd.Process != nil && cmd.ProcessState == nil {
161163
t.Errorf("command was started, but test did not wait for it to complete: %v", cmd)

0 commit comments

Comments
 (0)