Skip to content

Commit

Permalink
internal/export/idna: make more space for mapping index
Browse files Browse the repository at this point in the history
This prepares for an upcoming Unicode upgrade.

Beyond Unicode 13 the size of mappings will grow beyond
what can be represented in the allocated 13 bits. Instread
of doubling the size of info, we introduce a table of indices
into the mappings data. This also allows us to remove the
length byte, reducing the overhead of introducing this new
table.

This change allows for about a 5x growth of the number
of mappings, which should suffice for the foreseeable
future.

Change-Id: Id475dc2473145a1f36bd83b983fa4aa170df6206
Reviewed-on: https://go-review.googlesource.com/c/text/+/501515
Run-TryBot: Ian Lance Taylor <iant@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
  • Loading branch information
mpvl authored and rsc committed Jun 12, 2023
1 parent d61dd50 commit 3a7a255
Show file tree
Hide file tree
Showing 5 changed files with 1,620 additions and 1,458 deletions.
15 changes: 10 additions & 5 deletions internal/export/idna/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func genTables() {
gen.WriteUnicodeVersion(w)

w.WriteVar("mappings", string(mappings))
w.WriteVar("mappingIndex", mappingIndex)
w.WriteVar("xorData", string(xorData))

sz, err := t.Gen(w, triegen.Compact(&normCompacter{}))
Expand All @@ -116,10 +117,14 @@ func genTables() {
}

var (
// mappings contains replacement strings for mapped runes, each prefixed
// with a byte containing the length of the following string.
// mappings contains replacement strings for mapped runes.
mappings = []byte{}
mapCache = map[string]int{}

// mappingIndex contains an offset in mappingBytes representing the start
// of a mapping. Then next entry in mappingIndex points past the end of the
// string.
mappingIndex = []uint16{0}
mapCache = map[string]int{}

// xorData is like mappings, except that it contains XOR data.
// We split these two tables so that we don't get an overflow.
Expand All @@ -133,13 +138,13 @@ func makeEntry(r rune, mapped string) info {

if len(orig) != len(mapped) {
// Store the mapped value as is in the mappings table.
index := len(mappings)
index := len(mappingIndex) - 1
if x, ok := mapCache[mapped]; ok {
index = x
} else {
mapCache[mapped] = index
mappings = append(mappings, byte(len(mapped)))
mappings = append(mappings, mapped...)
mappingIndex = append(mappingIndex, uint16(len(mappings)))
}
return info(index) << indexShift
}
Expand Down
Loading

0 comments on commit 3a7a255

Please # to comment.