Skip to content

Commit

Permalink
Contextful validity errors, null-byte specs fix
Browse files Browse the repository at this point in the history
  • Loading branch information
diamondburned committed Mar 26, 2021
1 parent 2b0e3cd commit 124dee0
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 63 deletions.
168 changes: 105 additions & 63 deletions bottom/bottom.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (
"fmt"
"io"
"strings"
"unicode/utf8"
)

const (
Expand All @@ -21,22 +20,54 @@ const (
sectionSeparator = "\U0001F449\U0001F448"
)

// characterValues looks up known runes; it returns 0 for unknown runes.
func characterValues(s rune) byte {
switch s {
case char1:
return 1
case char5:
return 5
case char10:
return 10
case char50:
return 50
case char200:
return 200
default:
return 0 // unknown
// sumByte sums up the given part string. If the sum overflows a byte or an
// invalid input is encountered, then an error wrapped with wrapChunkError is
// returned.
func sumByte(part string) (byte, error) {
partRune := []rune(part)

var sum int

for i := 0; i < len(partRune); i++ {
switch r := partRune[i]; r {
case []rune(char0)[0]:
// Edge case: null-byte emoji must have a valid byte after it.
// Ensure that we can peak to the next byte for this.
if i >= len(partRune)-1 {
// The string stopped short when we're expecting another rune,
// so an UnexpectedEOF is valid.
return 0, wrapChunkError(part, io.ErrUnexpectedEOF)
}
if []rune(char0)[1] != partRune[i+1] {
return 0, wrapChunkError(part, InvalidRuneError(r))
}
i++ // skip peeked rune
sum += 0
case char1:
sum += 1
case char5:
sum += 5
case char10:
sum += 10
case char50:
sum += 50
case char200:
sum += 200
default:
return 0, wrapChunkError(part, InvalidRuneError(r))
}
}

if sum > 0xFF {
return 0, wrapChunkError(part, ErrByteOverflow)
}

return byte(sum), nil
}

// wrapChunkError wraps the given error with a "failed to decode chunk" error.
func wrapChunkError(part string, err error) error {
return fmt.Errorf("failed to decode chunk %q: %w", part, err)
}

// valueCharacterBases looks up known values for the corresponding runes; it
Expand All @@ -54,10 +85,13 @@ var valueCharacterBases = [255]string{
var valueCharacters = calculateValueCharacters()

func calculateValueCharacters() [255]string {
var values = []byte{200, 50, 10, 5, 1}
var valueCharacters = [255]string{0: char0}
var buf bytes.Buffer

values := []byte{200, 50, 10, 5, 1}
valueCharacters := [255]string{
0: char0 + sectionSeparator,
}

for i := byte(1); i < 255; i++ {
char := i
for char > 0 {
Expand All @@ -78,6 +112,20 @@ func calculateValueCharacters() [255]string {
return valueCharacters
}

// ErrByteOverflow is returned when the given input string overflows a byte when
// decoded.
var ErrByteOverflow = errors.New("sum overflows byte")

// InvalidRuneError is returned when an invalid rune is encountered when
// decoding.
type InvalidRuneError rune

// Error formats InvalidRuneError to show the quoted rune and the Unicode
// codepoint notation.
func (r InvalidRuneError) Error() string {
return fmt.Sprintf("unexpected rune %q (%U)", rune(r), rune(r))
}

// Encode encodes a string in bottom
func Encode(s string) string {
builder := strings.Builder{}
Expand All @@ -102,15 +150,11 @@ func EncodedLen(s string) int {

// EncodeTo encodes the given string into the writer.
func EncodeTo(out io.StringWriter, s string) error {
var sum int

for _, sChar := range []byte(s) {
n, err := out.WriteString(valueCharacters[sChar])
_, err := out.WriteString(valueCharacters[sChar])
if err != nil {
return err
}

sum += n
}

return nil
Expand All @@ -133,16 +177,24 @@ func EncodeFrom(out io.StringWriter, src io.ByteReader) error {
}
}

// Validate validates a bottom string.
// Validate validates a bottom string. False is returned if the validation
// fails.
func Validate(bottom string) bool {
return DecodedLen(bottom) > -1
}

// DecodedLen validates the given bottom string and returns the calculated
// length. It returns -1 if the given bottom string is invalid.
func DecodedLen(bottom string) int {
l, _ := decodedLen(bottom, true)
return l
}

// decodedLen is the implementation of DecodedLen that returns an error if the
// input bottom string is invalid.
func decodedLen(bottom string, verify bool) (int, error) {
if !strings.HasSuffix(bottom, sectionSeparator) {
return -1
return -1, errors.New("missing trailing separator")
}

// We used to trim the sectionSeparator suffix here, but since our current
Expand All @@ -151,41 +203,35 @@ func DecodedLen(bottom string) int {
//
// This assumption is validated by the above HasSuffix check.

var length, sum int
var length int

for {
m := strings.Index(bottom, sectionSeparator)
if m < 0 {
break
}

sum = 0

for _, r := range bottom[:m] {
v := characterValues(r)
if v == 0 {
return -1
}

// overflow check
if sum += int(v); sum > 0xFF {
return -1
if verify {
_, err := sumByte(string(bottom[:m]))
if err != nil {
return -1, err
}
}

length++
bottom = bottom[m+len(sectionSeparator):]
}

return length
return length, nil
}

// Decode verifies and decodes a bottom string. An error is returned if the
// verification fails.
func Decode(bottom string) (string, error) {
l := DecodedLen(bottom)
if l == -1 {
return "", errors.New("invalid bottom text")
// Skip verification, since we're doing it in the loop.
l, err := decodedLen(bottom, false)
if err != nil {
return "", err
}

builder := strings.Builder{}
Expand All @@ -198,7 +244,12 @@ func Decode(bottom string) (string, error) {
break
}

builder.WriteByte(sumByte(bottom[:m]))
sum, err := sumByte(bottom[:m])
if err != nil {
return "", err
}

builder.WriteByte(sum)
bottom = bottom[m+len(sectionSeparator):]
i++
}
Expand All @@ -214,7 +265,12 @@ func DecodeTo(w io.ByteWriter, bottom string) error {
break
}

if err := w.WriteByte(sumByte(bottom[:m])); err != nil {
sum, err := sumByte(bottom[:m])
if err != nil {
return err
}

if err := w.WriteByte(sum); err != nil {
return err
}

Expand All @@ -224,41 +280,27 @@ func DecodeTo(w io.ByteWriter, bottom string) error {
return nil
}

func sumByte(part string) (sum byte) {
for _, r := range part {
sum += characterValues(r)
}
return
}

// DecodeFrom decodes from a src reader.
func DecodeFrom(w io.ByteWriter, src io.Reader) error {
scanner := bufio.NewScanner(src)
scanner.Split(scanUntilSeparator)

var sum byte
for scanner.Scan() {
sum = 0
bytes := scanner.Bytes()

for len(bytes) > 0 {
r, sz := utf8.DecodeRune(bytes)
if sz == -1 {
return fmt.Errorf("invalid bytes %q", bytes)
}

sum += characterValues(r)
bytes = bytes[sz:]
sum, err := sumByte(scanner.Text())
if err != nil {
return err
}

if err := w.WriteByte(sum); err != nil {
if err := w.WriteByte(byte(sum)); err != nil {
return err
}
}

return scanner.Err()
}

// scanUntilSeparator is used with bufio.Scanner to scan chunks separated by
// sectionSeparator.
func scanUntilSeparator(data []byte, eof bool) (int, []byte, error) {
if eof && len(data) == 0 {
return 0, nil, nil
Expand Down
1 change: 1 addition & 0 deletions bottom/bottom_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ var testCases = []testCase{
{"がんばれ", "🫂✨✨🥺,,👉👈💖💖✨✨🥺,,,,👉👈💖💖✨✨✨✨👉👈🫂✨✨🥺,,👉👈💖💖✨✨✨👉👈💖💖✨✨✨✨🥺,,👉👈🫂✨✨🥺,,👉👈💖💖✨✨🥺,,,,👉👈💖💖💖✨✨🥺,👉👈" +
"🫂✨✨🥺,,👉👈💖💖✨✨✨👉👈💖💖✨✨✨✨👉👈",
},
{"Te\x00st", "💖✨✨✨,,,,👉👈💖💖,👉👈❤️👉👈💖💖✨🥺👉👈💖💖✨🥺,👉👈"},
}

func TestEncode(t *testing.T) {
Expand Down

0 comments on commit 124dee0

Please # to comment.