Skip to content

Commit

Permalink
Csv decoder (#1290)
Browse files Browse the repository at this point in the history
* WIP: adding CSV decoder

* Adding CSV decoder

* Added CSV roundtrip

* Fixing from review
  • Loading branch information
mikefarah authored Aug 1, 2022
1 parent 3c222d8 commit c8815f5
Show file tree
Hide file tree
Showing 20 changed files with 837 additions and 87 deletions.
47 changes: 47 additions & 0 deletions acceptance_tests/inputs-format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
setUp() {
rm test*.yml 2>/dev/null || true
rm test*.properties 2>/dev/null || true
rm test*.csv 2>/dev/null || true
rm test*.tsv 2>/dev/null || true
rm test*.xml 2>/dev/null || true
}

Expand Down Expand Up @@ -40,6 +42,51 @@ EOM
assertEquals "$expected" "$X"
}

testInputCSV() {
cat >test.csv <<EOL
fruit,yumLevel
apple,5
banana,4
EOL

read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM

X=$(./yq e -p=csv test.csv)
assertEquals "$expected" "$X"

X=$(./yq ea -p=csv test.csv)
assertEquals "$expected" "$X"
}

testInputTSV() {
cat >test.tsv <<EOL
fruit yumLevel
apple 5
banana 4
EOL

read -r -d '' expected << EOM
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOM

X=$(./yq e -p=t test.tsv)
assertEquals "$expected" "$X"

X=$(./yq ea -p=t test.tsv)
assertEquals "$expected" "$X"
}




testInputXml() {
cat >test.yml <<EOL
<cat legs="4">BiBi</cat>
Expand Down
42 changes: 42 additions & 0 deletions acceptance_tests/output-format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,48 @@ EOM
assertEquals "$expected" "$X"
}

testOutputCSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL

read -r -d '' expected << EOM
fruit,yumLevel
apple,5
banana,4
EOM

X=$(./yq -o=c test.yml)
assertEquals "$expected" "$X"

X=$(./yq ea -o=csv test.yml)
assertEquals "$expected" "$X"
}

testOutputTSV() {
cat >test.yml <<EOL
- fruit: apple
yumLevel: 5
- fruit: banana
yumLevel: 4
EOL

read -r -d '' expected << EOM
fruit yumLevel
apple 5
banana 4
EOM

X=$(./yq -o=t test.yml)
assertEquals "$expected" "$X"

X=$(./yq ea -o=tsv test.yml)
assertEquals "$expected" "$X"
}

testOutputXml() {
cat >test.yml <<EOL
a: {b: {c: ["cat"]}}
Expand Down
4 changes: 4 additions & 0 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ func configureDecoder() (yqlib.Decoder, error) {
return yqlib.NewPropertiesDecoder(), nil
case yqlib.JsonInputFormat:
return yqlib.NewJSONDecoder(), nil
case yqlib.CSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder(','), nil
case yqlib.TSVObjectInputFormat:
return yqlib.NewCSVObjectDecoder('\t'), nil
}

return yqlib.NewYamlDecoder(), nil
Expand Down
3 changes: 3 additions & 0 deletions examples/sample_objects.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
273 changes: 273 additions & 0 deletions pkg/yqlib/csv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
package yqlib

import (
"bufio"
"fmt"
"testing"

"github.com/mikefarah/yq/v4/test"
)

const csvSimple = `name,numberOfCats,likesApples,height
Gary,1,true,168.8
Samantha's Rabbit,2,false,-188.8
`

const expectedUpdatedSimpleCsv = `name,numberOfCats,likesApples,height
Gary,3,true,168.8
Samantha's Rabbit,2,false,-188.8
`

const csvSimpleShort = `Name,Number of Cats
Gary,1
Samantha's Rabbit,2
`

const tsvSimple = `name numberOfCats likesApples height
Gary 1 true 168.8
Samantha's Rabbit 2 false -188.8
`

const expectedYamlFromCSV = `- name: Gary
numberOfCats: 1
likesApples: true
height: 168.8
- name: Samantha's Rabbit
numberOfCats: 2
likesApples: false
height: -188.8
`

const expectedYamlFromCSVMissingData = `- name: Gary
numberOfCats: 1
height: 168.8
- name: Samantha's Rabbit
height: -188.8
likesApples: false
`

const csvSimpleMissingData = `name,numberOfCats,height
Gary,1,168.8
Samantha's Rabbit,,-188.8
`

const csvTestSimpleYaml = `- [i, like, csv]
- [because, excel, is, cool]`

const expectedSimpleCsv = `i,like,csv
because,excel,is,cool
`

const tsvTestExpectedSimpleCsv = `i like csv
because excel is cool
`

var csvScenarios = []formatScenario{
{
description: "Encode CSV simple",
input: csvTestSimpleYaml,
expected: expectedSimpleCsv,
scenarioType: "encode-csv",
},
{
description: "Encode TSV simple",
input: csvTestSimpleYaml,
expected: tsvTestExpectedSimpleCsv,
scenarioType: "encode-tsv",
},
{
description: "Encode Empty",
skipDoc: true,
input: `[]`,
expected: "",
scenarioType: "encode-csv",
},
{
description: "Comma in value",
skipDoc: true,
input: `["comma, in, value", things]`,
expected: "\"comma, in, value\",things\n",
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv",
input: expectedYamlFromCSV,
expected: csvSimple,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to custom csv format",
subdescription: "Add the header row manually, then the we convert each object into an array of values - resulting in an array of arrays. Pick the columns and call the header whatever you like.",
input: expectedYamlFromCSV,
expected: csvSimpleShort,
expression: `[["Name", "Number of Cats"]] + [.[] | [.name, .numberOfCats ]]`,
scenarioType: "encode-csv",
},
{
description: "Encode array of objects to csv - missing fields behaviour",
subdescription: "First entry is used to determine the headers, and it is missing 'likesApples', so it is not included in the csv. Second entry does not have 'numberOfCats' so that is blank",
input: expectedYamlFromCSVMissingData,
expected: csvSimpleMissingData,
scenarioType: "encode-csv",
},
{
description: "Parse CSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: csvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-csv-object",
},
{
description: "Parse TSV into an array of objects",
subdescription: "First row is assumed to be the header row.",
input: tsvSimple,
expected: expectedYamlFromCSV,
scenarioType: "decode-tsv-object",
},
{
description: "Round trip",
input: csvSimple,
expected: expectedUpdatedSimpleCsv,
expression: `(.[] | select(.name == "Gary") | .numberOfCats) = 3`,
scenarioType: "roundtrip-csv",
},
}

func testCSVScenario(t *testing.T, s formatScenario) {
switch s.scenarioType {
case "encode-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(',')), s.description)
case "encode-tsv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder('\t')), s.description)
case "decode-csv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewYamlEncoder(2, false, true, true)), s.description)
case "decode-tsv-object":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder('\t'), NewYamlEncoder(2, false, true, true)), s.description)
case "roundtrip-csv":
test.AssertResultWithContext(t, s.expected, processFormatScenario(s, NewCSVObjectDecoder(','), NewCsvEncoder(',')), s.description)
default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}

func documentCSVDecodeObjectScenario(t *testing.T, w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))

if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}

writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))

writeOrPanic(w, "then\n")
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v sample.%v\n```\n", formatType, formatType))
writeOrPanic(w, "will output\n")

separator := ','
if formatType == "tsv" {
separator = '\t'
}

writeOrPanic(w, fmt.Sprintf("```yaml\n%v```\n\n",
processFormatScenario(s, NewCSVObjectDecoder(separator), NewYamlEncoder(s.indent, false, true, true))),
)
}

func documentCSVEncodeScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))

if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}

writeOrPanic(w, "Given a sample.yml file of:\n")
writeOrPanic(w, fmt.Sprintf("```yaml\n%v\n```\n", s.input))

writeOrPanic(w, "then\n")

expression := s.expression

if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v '%v' sample.yml\n```\n", formatType, expression))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -o=%v sample.yml\n```\n", formatType))
}
writeOrPanic(w, "will output\n")

separator := ','
if formatType == "tsv" {
separator = '\t'
}

writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewYamlDecoder(), NewCsvEncoder(separator))),
)
}

func documentCSVRoundTripScenario(w *bufio.Writer, s formatScenario, formatType string) {
writeOrPanic(w, fmt.Sprintf("## %v\n", s.description))

if s.subdescription != "" {
writeOrPanic(w, s.subdescription)
writeOrPanic(w, "\n\n")
}

writeOrPanic(w, fmt.Sprintf("Given a sample.%v file of:\n", formatType))
writeOrPanic(w, fmt.Sprintf("```%v\n%v\n```\n", formatType, s.input))

writeOrPanic(w, "then\n")

expression := s.expression

if expression != "" {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v '%v' sample.%v\n```\n", formatType, formatType, expression, formatType))
} else {
writeOrPanic(w, fmt.Sprintf("```bash\nyq -p=%v -o=%v sample.%v\n```\n", formatType, formatType, formatType))
}
writeOrPanic(w, "will output\n")

separator := ','
if formatType == "tsv" {
separator = '\t'
}

writeOrPanic(w, fmt.Sprintf("```%v\n%v```\n\n", formatType,
processFormatScenario(s, NewCSVObjectDecoder(separator), NewCsvEncoder(separator))),
)
}

func documentCSVScenario(t *testing.T, w *bufio.Writer, i interface{}) {
s := i.(formatScenario)
if s.skipDoc {
return
}
switch s.scenarioType {
case "encode-csv":
documentCSVEncodeScenario(w, s, "csv")
case "encode-tsv":
documentCSVEncodeScenario(w, s, "tsv")
case "decode-csv-object":
documentCSVDecodeObjectScenario(t, w, s, "csv")
case "decode-tsv-object":
documentCSVDecodeObjectScenario(t, w, s, "tsv")
case "roundtrip-csv":
documentCSVRoundTripScenario(w, s, "csv")

default:
panic(fmt.Sprintf("unhandled scenario type %q", s.scenarioType))
}
}

func TestCSVScenarios(t *testing.T) {
for _, tt := range csvScenarios {
testCSVScenario(t, tt)
}
genericScenarios := make([]interface{}, len(csvScenarios))
for i, s := range csvScenarios {
genericScenarios[i] = s
}
documentScenarios(t, "usage", "csv-tsv", genericScenarios, documentCSVScenario)
}
Loading

0 comments on commit c8815f5

Please # to comment.