Skip to content

Commit 3c78ace

Browse files
committed
strings: optimize Repeat for common substrings
According to static analysis of Go source code known by the module proxy, spaces, dashes, zeros, and tabs are the most commonly repeated string literals. Out of ~69k total calls to Repeat: * ~25k calls are repeats of " " * ~7k calls are repeats of "-" * ~4k calls are repeats of "0" * ~2k calls are repeats of "=" * ~2k calls are repeats of "\t" After this optimization, ~60% of Repeat calls will go through the fast path. These are often used in padding of fixed-width terminal UI or in the presentation of humanly readable text (e.g., indentation made of spaces or tabs). Optimize for this case by handling short repeated sequences of common literals. Performance: name old time/op new time/op delta RepeatSpaces-24 19.3ns ± 1% 5.0ns ± 1% -74.27% (p=0.000 n=8+9) name old alloc/op new alloc/op delta RepeatSpaces-24 2.00B ± 0% 0.00B -100.00% (p=0.000 n=10+10) name old allocs/op new allocs/op delta RepeatSpaces-24 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) Change-Id: Id1cafd0cc509e835c8241a626489eb206e0adc3c Reviewed-on: https://go-review.googlesource.com/c/go/+/536615 Reviewed-by: Ian Lance Taylor <iant@google.com> Reviewed-by: Emmanuel Odeke <emmanuel@orijtech.com> Reviewed-by: Than McIntosh <thanm@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
1 parent 0d7afc2 commit 3c78ace

File tree

2 files changed

+58
-0
lines changed

2 files changed

+58
-0
lines changed

Diff for: src/strings/strings.go

+38
Original file line numberDiff line numberDiff line change
@@ -530,6 +530,27 @@ func Map(mapping func(rune) rune, s string) string {
530530
return b.String()
531531
}
532532

533+
// According to static analysis, spaces, dashes, zeros, equals, and tabs
534+
// are the most commonly repeated string literal,
535+
// often used for display on fixed-width terminal windows.
536+
// Pre-declare constants for these for O(1) repetition in the common-case.
537+
const (
538+
repeatedSpaces = "" +
539+
" " +
540+
" "
541+
repeatedDashes = "" +
542+
"----------------------------------------------------------------" +
543+
"----------------------------------------------------------------"
544+
repeatedZeroes = "" +
545+
"0000000000000000000000000000000000000000000000000000000000000000"
546+
repeatedEquals = "" +
547+
"================================================================" +
548+
"================================================================"
549+
repeatedTabs = "" +
550+
"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t" +
551+
"\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
552+
)
553+
533554
// Repeat returns a new string consisting of count copies of the string s.
534555
//
535556
// It panics if count is negative or if the result of (len(s) * count)
@@ -557,6 +578,23 @@ func Repeat(s string, count int) string {
557578
return ""
558579
}
559580

581+
// Optimize for commonly repeated strings of relatively short length.
582+
switch s[0] {
583+
case ' ', '-', '0', '=', '\t':
584+
switch {
585+
case n <= len(repeatedSpaces) && HasPrefix(repeatedSpaces, s):
586+
return repeatedSpaces[:n]
587+
case n <= len(repeatedDashes) && HasPrefix(repeatedDashes, s):
588+
return repeatedDashes[:n]
589+
case n <= len(repeatedZeroes) && HasPrefix(repeatedZeroes, s):
590+
return repeatedZeroes[:n]
591+
case n <= len(repeatedEquals) && HasPrefix(repeatedEquals, s):
592+
return repeatedEquals[:n]
593+
case n <= len(repeatedTabs) && HasPrefix(repeatedTabs, s):
594+
return repeatedTabs[:n]
595+
}
596+
}
597+
560598
// Past a certain chunk size it is counterproductive to use
561599
// larger chunks as the source of the write, as when the source
562600
// is too large we are basically just thrashing the CPU D-cache.

Diff for: src/strings/strings_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,13 @@ func TestCaseConsistency(t *testing.T) {
11111111
}
11121112

11131113
var longString = "a" + string(make([]byte, 1<<16)) + "z"
1114+
var longSpaces = func() string {
1115+
b := make([]byte, 200)
1116+
for i := range b {
1117+
b[i] = ' '
1118+
}
1119+
return string(b)
1120+
}()
11141121

11151122
var RepeatTests = []struct {
11161123
in, out string
@@ -1123,6 +1130,12 @@ var RepeatTests = []struct {
11231130
{"-", "-", 1},
11241131
{"-", "----------", 10},
11251132
{"abc ", "abc abc abc ", 3},
1133+
{" ", " ", 1},
1134+
{"--", "----", 2},
1135+
{"===", "======", 2},
1136+
{"000", "000000000", 3},
1137+
{"\t\t\t\t", "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", 4},
1138+
{" ", longSpaces, len(longSpaces)},
11261139
// Tests for results over the chunkLimit
11271140
{string(rune(0)), string(make([]byte, 1<<16)), 1 << 16},
11281141
{longString, longString + longString, 2},
@@ -1925,6 +1938,13 @@ func BenchmarkRepeatLarge(b *testing.B) {
19251938
}
19261939
}
19271940

1941+
func BenchmarkRepeatSpaces(b *testing.B) {
1942+
b.ReportAllocs()
1943+
for i := 0; i < b.N; i++ {
1944+
Repeat(" ", 2)
1945+
}
1946+
}
1947+
19281948
func BenchmarkIndexAnyASCII(b *testing.B) {
19291949
x := Repeat("#", 2048) // Never matches set
19301950
cs := "0123456789abcdefghijklmnopqrstuvwxyz0123456789abcdefghijklmnopqrstuvwxyz"

0 commit comments

Comments
 (0)