diff --git a/compress/_golden/data_compressed_lz4hc.hex b/compress/_golden/data_compressed_lz4hc.hex new file mode 100644 index 00000000..ab9cf987 --- /dev/null +++ b/compress/_golden/data_compressed_lz4hc.hex @@ -0,0 +1,3 @@ +00000000 34 39 ad b3 8d 96 d2 87 bb 3b aa 1e 3f 4b 64 f5 |49.......;..?Kd.| +00000010 82 1d 00 00 00 af 00 00 00 8f 48 65 6c 6c 6f 21 |..........Hello!| +00000020 0a 48 07 00 8d 70 48 65 6c 6c 6f 21 0a |.H...pHello!.| diff --git a/compress/_golden/data_compressed_lz4hc.raw b/compress/_golden/data_compressed_lz4hc.raw new file mode 100644 index 00000000..e3784427 Binary files /dev/null and b/compress/_golden/data_compressed_lz4hc.raw differ diff --git a/proto/_golden/col_json_str.hex b/proto/_golden/col_json_str.hex new file mode 100644 index 00000000..2e639c9f --- /dev/null +++ b/proto/_golden/col_json_str.hex @@ -0,0 +1,6 @@ +00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| +00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| +00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| +00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| +00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| +00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| diff --git a/proto/_golden/col_json_str.raw b/proto/_golden/col_json_str.raw new file mode 100644 index 00000000..9bdfa94e Binary files /dev/null and b/proto/_golden/col_json_str.raw differ diff --git a/proto/_golden/col_json_str_bytes.hex b/proto/_golden/col_json_str_bytes.hex new file mode 100644 index 00000000..2e639c9f --- /dev/null +++ b/proto/_golden/col_json_str_bytes.hex @@ -0,0 +1,6 @@ +00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| +00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| +00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| +00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| +00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| +00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| diff --git a/proto/_golden/col_json_str_bytes.raw b/proto/_golden/col_json_str_bytes.raw new file mode 100644 index 00000000..9bdfa94e Binary files /dev/null and b/proto/_golden/col_json_str_bytes.raw differ diff --git a/proto/_golden/column_of_json_bytes.hex b/proto/_golden/column_of_json_bytes.hex new file mode 100644 index 00000000..2e639c9f --- /dev/null +++ b/proto/_golden/column_of_json_bytes.hex @@ -0,0 +1,6 @@ +00000000 01 00 00 00 00 00 00 00 11 7b 22 78 22 3a 20 35 |.........{"x": 5| +00000010 2c 20 22 79 22 3a 20 31 30 7d 1b 7b 22 61 22 3a |, "y": 10}.{"a":| +00000020 20 22 74 65 73 74 22 2c 20 22 62 22 3a 20 22 74 | "test", "b": "t| +00000030 65 73 74 32 22 7d 21 7b 22 61 22 3a 20 22 6f 62 |est2"}!{"a": "ob| +00000040 6a 20 74 65 73 74 22 2c 20 22 62 22 3a 20 7b 22 |j test", "b": {"| +00000050 63 22 3a 20 32 30 7d 7d |c": 20}}| diff --git a/proto/_golden/column_of_json_bytes.raw b/proto/_golden/column_of_json_bytes.raw new file mode 100644 index 00000000..9bdfa94e Binary files /dev/null and b/proto/_golden/column_of_json_bytes.raw differ diff --git a/proto/col_json_str.go b/proto/col_json_str.go new file mode 100644 index 00000000..110dd8b9 --- /dev/null +++ b/proto/col_json_str.go @@ -0,0 +1,158 @@ +package proto + +import ( + "github.com/go-faster/errors" +) + +const JSONStringSerializationVersion uint64 = 1 + +// ColJSONStr represents String column. +// +// Use ColJSONBytes for []bytes ColumnOf implementation. +type ColJSONStr struct { + str ColStr +} + +// Append string to column. +func (c *ColJSONStr) Append(v string) { + c.str.Append(v) +} + +// AppendBytes append byte slice as string to column. +func (c *ColJSONStr) AppendBytes(v []byte) { + c.str.AppendBytes(v) +} + +func (c *ColJSONStr) AppendArr(v []string) { + c.str.AppendArr(v) +} + +// Compile-time assertions for ColJSONStr. +var ( + _ ColInput = ColJSONStr{} + _ ColResult = (*ColJSONStr)(nil) + _ Column = (*ColJSONStr)(nil) + _ ColumnOf[string] = (*ColJSONStr)(nil) + _ Arrayable[string] = (*ColJSONStr)(nil) +) + +// Type returns ColumnType of JSON. +func (ColJSONStr) Type() ColumnType { + return ColumnTypeJSON +} + +// Rows returns count of rows in column. +func (c ColJSONStr) Rows() int { + return c.str.Rows() +} + +// Reset resets data in row, preserving capacity for efficiency. +func (c *ColJSONStr) Reset() { + c.str.Reset() +} + +// EncodeColumn encodes String rows to *Buffer. +func (c ColJSONStr) EncodeColumn(b *Buffer) { + b.PutUInt64(JSONStringSerializationVersion) + + c.str.EncodeColumn(b) +} + +// WriteColumn writes JSON rows to *Writer. +func (c ColJSONStr) WriteColumn(w *Writer) { + w.ChainBuffer(func(b *Buffer) { + b.PutUInt64(JSONStringSerializationVersion) + }) + + c.str.WriteColumn(w) +} + +// ForEach calls f on each string from column. +func (c ColJSONStr) ForEach(f func(i int, s string) error) error { + return c.str.ForEach(f) +} + +// First returns the first row of the column. +func (c ColJSONStr) First() string { + return c.str.First() +} + +// Row returns row with number i. +func (c ColJSONStr) Row(i int) string { + return c.str.Row(i) +} + +// RowBytes returns row with number i as byte slice. +func (c ColJSONStr) RowBytes(i int) []byte { + return c.str.RowBytes(i) +} + +// ForEachBytes calls f on each string from column as byte slice. +func (c ColJSONStr) ForEachBytes(f func(i int, b []byte) error) error { + return c.str.ForEachBytes(f) +} + +// DecodeColumn decodes String rows from *Reader. +func (c *ColJSONStr) DecodeColumn(r *Reader, rows int) error { + jsonSerializationVersion, err := r.UInt64() + if err != nil { + return errors.Wrap(err, "failed to read json serialization version") + } + + if jsonSerializationVersion != JSONStringSerializationVersion { + return errors.Errorf("received invalid JSON string serialization version %d. Setting \"output_format_native_write_json_as_string\" must be enabled.", jsonSerializationVersion) + } + + return c.str.DecodeColumn(r, rows) +} + +// LowCardinality returns LowCardinality(JSON). +func (c *ColJSONStr) LowCardinality() *ColLowCardinality[string] { + return c.str.LowCardinality() +} + +// Array is helper that creates Array(JSON). +func (c *ColJSONStr) Array() *ColArr[string] { + return c.str.Array() +} + +// Nullable is helper that creates Nullable(JSON). +func (c *ColJSONStr) Nullable() *ColNullable[string] { + return c.str.Nullable() +} + +// ColJSONBytes is ColJSONStr wrapper to be ColumnOf for []byte. +type ColJSONBytes struct { + ColJSONStr +} + +// Row returns row with number i. +func (c ColJSONBytes) Row(i int) []byte { + return c.RowBytes(i) +} + +// Append byte slice to column. +func (c *ColJSONBytes) Append(v []byte) { + c.AppendBytes(v) +} + +// AppendArr append slice of byte slices to column. +func (c *ColJSONBytes) AppendArr(v [][]byte) { + for _, s := range v { + c.Append(s) + } +} + +// Array is helper that creates Array(JSON). +func (c *ColJSONBytes) Array() *ColArr[[]byte] { + return &ColArr[[]byte]{ + Data: c, + } +} + +// Nullable is helper that creates Nullable(JSON). +func (c *ColJSONBytes) Nullable() *ColNullable[[]byte] { + return &ColNullable[[]byte]{ + Values: c, + } +} diff --git a/proto/col_json_str_test.go b/proto/col_json_str_test.go new file mode 100644 index 00000000..f2fd76b9 --- /dev/null +++ b/proto/col_json_str_test.go @@ -0,0 +1,150 @@ +package proto + +import ( + "bytes" + "io" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/ClickHouse/ch-go/internal/gold" +) + +var testJSONValues = []string{ + "{\"x\": 5, \"y\": 10}", + "{\"a\": \"test\", \"b\": \"test2\"}", + "{\"a\": \"obj test\", \"b\": {\"c\": 20}}", +} + +func TestColJSONBytes(t *testing.T) { + testColumn(t, "json_bytes", func() ColumnOf[[]byte] { + return new(ColJSONBytes) + }, []byte(testJSONValues[0]), []byte(testJSONValues[1]), []byte(testJSONValues[2])) +} + +func TestColJSONStr_AppendBytes(t *testing.T) { + var data ColJSONStr + + data.AppendBytes([]byte(testJSONValues[0])) + data.AppendBytes([]byte(testJSONValues[1])) + data.AppendBytes([]byte(testJSONValues[2])) + + var buf Buffer + data.EncodeColumn(&buf) + + t.Run("Golden", func(t *testing.T) { + gold.Bytes(t, buf.Buf, "col_json_str_bytes") + }) + t.Run("Ok", func(t *testing.T) { + br := bytes.NewReader(buf.Buf) + r := NewReader(br) + + var dec ColJSONStr + require.NoError(t, dec.DecodeColumn(r, 3)) + require.Equal(t, data, dec) + + t.Run("ForEach", func(t *testing.T) { + var output []string + f := func(i int, s string) error { + output = append(output, s) + return nil + } + require.NoError(t, dec.ForEach(f)) + require.Equal(t, testJSONValues, output) + }) + }) +} + +func TestColJSONStr_EncodeColumn(t *testing.T) { + var data ColJSONStr + + input := testJSONValues + rows := len(input) + for _, s := range input { + data.Append(s) + } + + var buf Buffer + data.EncodeColumn(&buf) + + t.Run("Golden", func(t *testing.T) { + gold.Bytes(t, buf.Buf, "col_json_str") + }) + t.Run("Ok", func(t *testing.T) { + br := bytes.NewReader(buf.Buf) + r := NewReader(br) + + var dec ColJSONStr + require.NoError(t, dec.DecodeColumn(r, rows)) + require.Equal(t, data, dec) + + t.Run("ForEach", func(t *testing.T) { + var output []string + f := func(i int, s string) error { + output = append(output, s) + return nil + } + require.NoError(t, dec.ForEach(f)) + require.Equal(t, input, output) + }) + }) + t.Run("EOF", func(t *testing.T) { + r := NewReader(bytes.NewReader(nil)) + + var dec ColJSONStr + require.ErrorIs(t, dec.DecodeColumn(r, rows), io.EOF) + }) +} + +func BenchmarkColJSONStr_DecodeColumn(b *testing.B) { + const rows = 1_000 + var data ColJSONStr + for i := 0; i < rows; i++ { + data.Append("{\"x\": 5}") + } + + var buf Buffer + data.EncodeColumn(&buf) + + br := bytes.NewReader(buf.Buf) + r := NewReader(br) + + var dec ColJSONStr + if err := dec.DecodeColumn(r, rows); err != nil { + b.Fatal(err) + } + + b.SetBytes(int64(len(buf.Buf))) + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + br.Reset(buf.Buf) + r.raw.Reset(br) + dec.Reset() + + if err := dec.DecodeColumn(r, rows); err != nil { + b.Fatal(err) + } + } +} + +func BenchmarkColJSONStr_EncodeColumn(b *testing.B) { + const rows = 1_000 + var data ColJSONStr + for i := 0; i < rows; i++ { + data.Append("{\"x\": 5}") + } + + var buf Buffer + data.EncodeColumn(&buf) + + b.SetBytes(int64(len(buf.Buf))) + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + buf.Reset() + data.EncodeColumn(&buf) + } +} diff --git a/proto/column.go b/proto/column.go index 40ab7cb8..4f0dbff0 100644 --- a/proto/column.go +++ b/proto/column.go @@ -250,6 +250,7 @@ const ( ColumnTypePoint ColumnType = "Point" ColumnTypeInterval ColumnType = "Interval" ColumnTypeNothing ColumnType = "Nothing" + ColumnTypeJSON ColumnType = "JSON" ) // colWrap wraps Column with type t.