From 512b3426ddcc717b956071ee1602616e07dba247 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 26 Mar 2020 17:28:04 -0700 Subject: [PATCH 1/5] benchmark: Measure cost of Trace and Caller+Trace This adds benchmarks to measure the cost of logging with stack trace and of logging with both, caller and stack trace. --- logger_bench_test.go | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/logger_bench_test.go b/logger_bench_test.go index 71c6b5911..f39ef2f4a 100644 --- a/logger_bench_test.go +++ b/logger_bench_test.go @@ -179,6 +179,41 @@ func BenchmarkAddCallerHook(b *testing.B) { }) } +func BenchmarkAddStack(b *testing.B) { + logger := New( + zapcore.NewCore( + zapcore.NewJSONEncoder(NewProductionConfig().EncoderConfig), + &ztest.Discarder{}, + InfoLevel, + ), + AddStacktrace(InfoLevel), + ) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + logger.Info("foo") + } + }) +} + +func BenchmarkAddCallerAndStack(b *testing.B) { + logger := New( + zapcore.NewCore( + zapcore.NewJSONEncoder(NewProductionConfig().EncoderConfig), + &ztest.Discarder{}, + InfoLevel, + ), + AddCaller(), + AddStacktrace(InfoLevel), + ) + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + logger.Info("foo") + } + }) +} + func Benchmark10Fields(b *testing.B) { withBenchedLogger(b, func(log *Logger) { log.Info("Ten fields, passed at the log site.", From 3e14bd1088fe2794290e694a29642f1b38befca8 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Thu, 26 Mar 2020 17:56:00 -0700 Subject: [PATCH 2/5] Share callstack b/w Caller and Stack A log entry that includes both, the caller and the stack trace requests stack information twice: once to determine the caller and once to build the stack trace. This change optimizes this by requesting call stack information once and sharing it for both cases. --- I ran the benchmarks with `-count 3`. **Before** ``` BenchmarkAddCallerHook-4 1265024 912 ns/op 248 B/op 3 allocs/op BenchmarkAddCallerHook-4 1389937 949 ns/op 248 B/op 3 allocs/op BenchmarkAddCallerHook-4 1327134 862 ns/op 248 B/op 3 allocs/op BenchmarkAddStack-4 565006 1911 ns/op 304 B/op 2 allocs/op BenchmarkAddStack-4 644364 2180 ns/op 304 B/op 2 allocs/op BenchmarkAddStack-4 547028 1896 ns/op 304 B/op 2 allocs/op BenchmarkAddCallerAndStack-4 405864 2471 ns/op 552 B/op 5 allocs/op BenchmarkAddCallerAndStack-4 492368 2940 ns/op 552 B/op 5 allocs/op BenchmarkAddCallerAndStack-4 414658 2588 ns/op 552 B/op 5 allocs/op ``` **After** ``` BenchmarkAddCallerHook-4 1000000 1054 ns/op 240 B/op 2 allocs/op BenchmarkAddCallerHook-4 1245952 1009 ns/op 240 B/op 2 allocs/op BenchmarkAddCallerHook-4 1102441 974 ns/op 240 B/op 2 allocs/op BenchmarkAddStack-4 908425 1269 ns/op 304 B/op 2 allocs/op BenchmarkAddStack-4 774799 1345 ns/op 304 B/op 2 allocs/op BenchmarkAddStack-4 836653 1575 ns/op 304 B/op 2 allocs/op BenchmarkAddCallerAndStack-4 716456 1623 ns/op 544 B/op 4 allocs/op BenchmarkAddCallerAndStack-4 562585 1826 ns/op 544 B/op 4 allocs/op BenchmarkAddCallerAndStack-4 705979 1830 ns/op 544 B/op 4 allocs/op ``` On average, 1. Adding caller alone is ~104 nanoseconds *slower* 2. Adding stack alone is ~600 nanoseconds *faster* 3. Adding both, caller and stack is ~906 nanoseconds *faster* The second of these rightfully raises eyebrows because it was expected to have no changes at best. A cursory investigation doesn't reveal any obvious reason for the speed up unless there's something incredibly wrong with the change. --- logger.go | 24 ++++++++++++++++++--- stacktrace.go | 60 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/logger.go b/logger.go index cd6e19551..871d25534 100644 --- a/logger.go +++ b/logger.go @@ -296,15 +296,33 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry { // Thread the error output through to the CheckedEntry. ce.ErrorOutput = log.errorOutput + + addStack := log.addStack.Enabled(ce.Entry.Level) + if !log.addCaller && !addStack { + return ce + } + + programCounters := newProgramCounters() + defer programCounters.Release() + + pcs := programCounters.Callers(callerSkipOffset) if log.addCaller { - ce.Entry.Caller = zapcore.NewEntryCaller(runtime.Caller(log.callerSkip + callerSkipOffset)) + callerIdx := log.callerSkip + if len(pcs) <= callerIdx { + ce.Entry.Caller = zapcore.NewEntryCaller(0, "", 0, false) + } else { + frame, _ := runtime.CallersFrames(pcs[callerIdx:]).Next() + ce.Entry.Caller = zapcore.NewEntryCaller(frame.PC, frame.File, frame.Line, true) + } + if !ce.Entry.Caller.Defined { fmt.Fprintf(log.errorOutput, "%v Logger.check error: failed to get caller\n", time.Now().UTC()) log.errorOutput.Sync() } } - if log.addStack.Enabled(ce.Entry.Level) { - ce.Entry.Stack = Stack("").String + + if addStack { + ce.Entry.Stack = makeStacktrace(pcs) } return ce diff --git a/stacktrace.go b/stacktrace.go index 100fac216..e70f8f5ba 100644 --- a/stacktrace.go +++ b/stacktrace.go @@ -31,12 +31,6 @@ import ( const _zapPackage = "go.uber.org/zap" var ( - _stacktracePool = sync.Pool{ - New: func() interface{} { - return newProgramCounters(64) - }, - } - // We add "." and "/" suffixes to the package name to ensure we only match // the exact package and not any package with the same prefix. _zapStacktracePrefixes = addPrefix(_zapPackage, ".", "/") @@ -44,27 +38,19 @@ var ( ) func takeStacktrace() string { + pcs := newProgramCounters() + defer pcs.Release() + + return makeStacktrace(pcs.Callers(1)) +} + +func makeStacktrace(pcs []uintptr) string { buffer := bufferpool.Get() defer buffer.Free() - programCounters := _stacktracePool.Get().(*programCounters) - defer _stacktracePool.Put(programCounters) - - var numFrames int - for { - // Skip the call to runtime.Counters and takeStacktrace so that the - // program counters start at the caller of takeStacktrace. - numFrames = runtime.Callers(2, programCounters.pcs) - if numFrames < len(programCounters.pcs) { - break - } - // Don't put the too-short counter slice back into the pool; this lets - // the pool adjust if we consistently take deep stacktraces. - programCounters = newProgramCounters(len(programCounters.pcs) * 2) - } i := 0 skipZapFrames := true // skip all consecutive zap frames at the beginning. - frames := runtime.CallersFrames(programCounters.pcs[:numFrames]) + frames := runtime.CallersFrames(pcs) // Note: On the last iteration, frames.Next() returns false, with a valid // frame, but we ignore this frame. The last frame is a a runtime frame which @@ -113,8 +99,34 @@ type programCounters struct { pcs []uintptr } -func newProgramCounters(size int) *programCounters { - return &programCounters{make([]uintptr, size)} +var _stacktracePool = sync.Pool{ + New: func() interface{} { + return &programCounters{make([]uintptr, 64)} + }, +} + +func newProgramCounters() *programCounters { + return _stacktracePool.Get().(*programCounters) +} + +func (pcs *programCounters) Release() { + _stacktracePool.Put(pcs) +} + +func (pcs *programCounters) Callers(skip int) []uintptr { + var numFrames int + for { + // Skip the call to runtime.Callers and programCounter.Callers + // so that the program counters start at our caller. + numFrames = runtime.Callers(skip+2, pcs.pcs) + if numFrames < len(pcs.pcs) { + return pcs.pcs[:numFrames] + } + + // Don't put the too-short counter slice back into the pool; this lets + // the pool adjust if we consistently take deep stacktraces. + pcs.pcs = make([]uintptr, len(pcs.pcs)*2) + } } func addPrefix(prefix string, ss ...string) []string { From 3520ae7a3dbacc64dae329a8a9f773630da25139 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 27 Mar 2020 10:54:07 -0700 Subject: [PATCH 3/5] Optimize caller-only path The change previously had a degradation on the caller-only route because it captured more of the stack than necessary. This optimizes that route back to original performance by limiting the depth of the captured stack. master: ``` BenchmarkAddCallerHook-4 1042035 1036 ns/op 248 B/op 3 allocs/op BenchmarkAddCallerHook-4 916015 1096 ns/op 248 B/op 3 allocs/op BenchmarkAddCallerHook-4 1178632 1193 ns/op 248 B/op 3 allocs/op ``` With this change: ``` BenchmarkAddCallerHook-4 1090138 1056 ns/op 240 B/op 2 allocs/op BenchmarkAddCallerHook-4 1135695 1068 ns/op 240 B/op 2 allocs/op BenchmarkAddCallerHook-4 896173 1122 ns/op 240 B/op 2 allocs/op ``` --- logger.go | 19 +++++++++++++++---- stacktrace.go | 33 +++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/logger.go b/logger.go index 871d25534..8907fc9dc 100644 --- a/logger.go +++ b/logger.go @@ -302,16 +302,27 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry { return ce } + // If a stack trace was requested, capture the entire stack. + stackLimit := 0 + + // For historical reasons, stack traces reported by Zap don't respect + // CallerSkip. See also #727. + stackSkip := callerSkipOffset + + if !addStack { + stackLimit = 1 + stackSkip += log.callerSkip + } + programCounters := newProgramCounters() defer programCounters.Release() - pcs := programCounters.Callers(callerSkipOffset) + pcs := programCounters.Callers(stackSkip, stackLimit) if log.addCaller { - callerIdx := log.callerSkip - if len(pcs) <= callerIdx { + if len(pcs) == 0 { ce.Entry.Caller = zapcore.NewEntryCaller(0, "", 0, false) } else { - frame, _ := runtime.CallersFrames(pcs[callerIdx:]).Next() + frame, _ := runtime.CallersFrames(pcs).Next() ce.Entry.Caller = zapcore.NewEntryCaller(frame.PC, frame.File, frame.Line, true) } diff --git a/stacktrace.go b/stacktrace.go index e70f8f5ba..30e3f8df5 100644 --- a/stacktrace.go +++ b/stacktrace.go @@ -41,7 +41,7 @@ func takeStacktrace() string { pcs := newProgramCounters() defer pcs.Release() - return makeStacktrace(pcs.Callers(1)) + return makeStacktrace(pcs.Callers(1 /* skip */, 0 /* limit */)) } func makeStacktrace(pcs []uintptr) string { @@ -113,19 +113,40 @@ func (pcs *programCounters) Release() { _stacktracePool.Put(pcs) } -func (pcs *programCounters) Callers(skip int) []uintptr { +// Callers captures program counters of callers of the current function. +// +// skip specifies the number of functions to skip. +// limit specifies how deep into the stack we go, zero meaning unlimited. +func (pcs *programCounters) Callers(skip int, limit int) []uintptr { + pc := pcs.pcs + if limit > 0 { + // The shortened slice will never be placed back into the + // pool, but the longer one will. + if limit < len(pc) { + pc = pc[:limit] + } else { + pc = make([]uintptr, limit) + pcs.pcs = pc + } + } + var numFrames int for { // Skip the call to runtime.Callers and programCounter.Callers // so that the program counters start at our caller. - numFrames = runtime.Callers(skip+2, pcs.pcs) - if numFrames < len(pcs.pcs) { - return pcs.pcs[:numFrames] + numFrames = runtime.Callers(skip+2, pc) + + // All required frames have been captured if the limit was + // non-zero or if the stack depth is smaller than the size of + // the slice. + if limit > 0 || numFrames < len(pc) { + return pc[:numFrames] } // Don't put the too-short counter slice back into the pool; this lets // the pool adjust if we consistently take deep stacktraces. - pcs.pcs = make([]uintptr, len(pcs.pcs)*2) + pc = make([]uintptr, len(pcs.pcs)*2) + pcs.pcs = pc } } From 954fa36110b1cbfee65c4ba6ad3d664eff91fde8 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 27 Mar 2020 14:30:20 -0700 Subject: [PATCH 4/5] Callers: Verify behavior for large limits This tests the beahvior of Callers when the limit is larger than 64 (the value used by the pool by default). To test this reliably, I had to add an option to disable skipping zap frames in makeStacktrace. --- logger.go | 2 +- stacktrace.go | 9 ++++++--- stacktrace_test.go | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) diff --git a/logger.go b/logger.go index 8907fc9dc..4397a6bbb 100644 --- a/logger.go +++ b/logger.go @@ -333,7 +333,7 @@ func (log *Logger) check(lvl zapcore.Level, msg string) *zapcore.CheckedEntry { } if addStack { - ce.Entry.Stack = makeStacktrace(pcs) + ce.Entry.Stack = makeStacktrace(pcs, true /* skip zap */) } return ce diff --git a/stacktrace.go b/stacktrace.go index 30e3f8df5..6f99f7265 100644 --- a/stacktrace.go +++ b/stacktrace.go @@ -41,21 +41,24 @@ func takeStacktrace() string { pcs := newProgramCounters() defer pcs.Release() - return makeStacktrace(pcs.Callers(1 /* skip */, 0 /* limit */)) + return makeStacktrace( + pcs.Callers(1 /* skip */, 0 /* limit */), + true, // skip zap + ) } -func makeStacktrace(pcs []uintptr) string { +func makeStacktrace(pcs []uintptr, skipZapFrames bool) string { buffer := bufferpool.Get() defer buffer.Free() i := 0 - skipZapFrames := true // skip all consecutive zap frames at the beginning. frames := runtime.CallersFrames(pcs) // Note: On the last iteration, frames.Next() returns false, with a valid // frame, but we ignore this frame. The last frame is a a runtime frame which // adds noise, since it's only either runtime.main or runtime.goexit. for frame, more := frames.Next(); more; frame, more = frames.Next() { + // skip all consecutive zap frames at the beginning. if skipZapFrames && isZapFrame(frame.Function) { continue } else { diff --git a/stacktrace_test.go b/stacktrace_test.go index 3c9a41cfd..c8c5438c8 100644 --- a/stacktrace_test.go +++ b/stacktrace_test.go @@ -73,3 +73,35 @@ func BenchmarkTakeStacktrace(b *testing.B) { takeStacktrace() } } + +func TestProgramCountersCallersWithLargeLimit(t *testing.T) { + const ( + N = 128 + testName = "go.uber.org/zap.TestProgramCountersCallersWithLargeLimit" + withStackDepthName = "go.uber.org/zap.withStackDepth" + ) + + var trace string + withStackDepth(N, func() { + pcs := newProgramCounters().Callers(0, N+2) + trace = makeStacktrace(pcs, false /* don't skip zap */) + }) + + assert.Contains(t, trace, testName) + for found := 0; found < N; found++ { + i := strings.Index(trace, withStackDepthName) + if i < 0 { + t.Fatalf(`expected %d occurrences of %q, found %d`, N, withStackDepthName, found) + } + + trace = trace[i+len(withStackDepthName):] + } +} + +func withStackDepth(depth int, f func()) { + if depth > 0 { + withStackDepth(depth-1, f) + } else { + f() + } +} From 332434f4c392361264a3c14a6c7a9aecf1b4f826 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 27 Mar 2020 14:49:41 -0700 Subject: [PATCH 5/5] Test program counter stack resizing This verifies that when there is no limit on the stack depth, deep stacks are resized as expected. --- stacktrace_test.go | 57 +++++++++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/stacktrace_test.go b/stacktrace_test.go index c8c5438c8..9c488b6b2 100644 --- a/stacktrace_test.go +++ b/stacktrace_test.go @@ -74,28 +74,49 @@ func BenchmarkTakeStacktrace(b *testing.B) { } } -func TestProgramCountersCallersWithLargeLimit(t *testing.T) { - const ( - N = 128 - testName = "go.uber.org/zap.TestProgramCountersCallersWithLargeLimit" - withStackDepthName = "go.uber.org/zap.withStackDepth" - ) +func TestProgramCountersCallersDeepStack(t *testing.T) { + tests := []struct { + desc string + stackDepth int + limit int + wantOccurrences int + }{ + { + desc: "large limit", + stackDepth: 128, + limit: 128, + wantOccurrences: 127, + // one of the slots is taken by the closure we pass to + // withStackDepth. + }, + { + desc: "unlimited", + stackDepth: 500, + wantOccurrences: 500, + }, + } - var trace string - withStackDepth(N, func() { - pcs := newProgramCounters().Callers(0, N+2) - trace = makeStacktrace(pcs, false /* don't skip zap */) - }) + for _, tt := range tests { + t.Run(tt.desc, func(t *testing.T) { + const withStackDepthName = "go.uber.org/zap.withStackDepth" - assert.Contains(t, trace, testName) - for found := 0; found < N; found++ { - i := strings.Index(trace, withStackDepthName) - if i < 0 { - t.Fatalf(`expected %d occurrences of %q, found %d`, N, withStackDepthName, found) - } + var trace string + withStackDepth(tt.stackDepth, func() { + pcs := newProgramCounters().Callers(1, tt.limit) + trace = makeStacktrace(pcs, false /* don't skip zap */) + }) + + for found := 0; found < tt.wantOccurrences; found++ { + i := strings.Index(trace, withStackDepthName) + if i < 0 { + t.Fatalf(`expected %d occurrences of %q, found %d`, tt.wantOccurrences, withStackDepthName, found) + } - trace = trace[i+len(withStackDepthName):] + trace = trace[i+len(withStackDepthName):] + } + }) } + } func withStackDepth(depth int, f func()) {