Skip to content

Commit

Permalink
Merge pull request jupyterlab#9557 from krassowski/empty-csv
Browse files Browse the repository at this point in the history
Fix parsing of empty CSV files
  • Loading branch information
jasongrout authored Jan 6, 2021
2 parents 7050927 + c0f4a8a commit d2e6a23
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 20 deletions.
44 changes: 25 additions & 19 deletions packages/csvviewer/src/model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -423,43 +423,50 @@ export class DSVModel extends DataModel implements IDisposable {
}).ncols;
}

// Parse the data up to and including the requested row, starting from the
// last row offset we have.
// `reparse` is the number of rows we are requesting to parse over again.
// We generally start at the beginning of the last row offset, so that the
// first row offset returned is the same as the last row offset we already
// have. We parse the data up to and including the requested row.
const reparse = this._rowCount! > 0 ? 1 : 0;
const { nrows, offsets } = PARSERS[this._parser]({
data: this._rawData,
startIndex: this._rowOffsets[this._rowCount! - 1],
startIndex: this._rowOffsets[this._rowCount! - reparse] ?? 0,
delimiter: this._delimiter,
rowDelimiter: this._rowDelimiter,
quote: this._quote,
columnOffsets: false,
maxRows: endRow - this._rowCount! + 1
maxRows: endRow - this._rowCount! + reparse
});

// Return if we didn't actually get any new rows beyond the one we've
// already parsed.
if (this._startedParsing && nrows <= 1) {
// If we have already set up our initial bookkeeping, return early if we
// did not get any new rows beyond the last row that we've parsed, i.e.,
// nrows===1.
if (this._startedParsing && (nrows <= reparse)) {
this._doneParsing = true;
this._ready.resolve(undefined);
return;
}

this._startedParsing = true;

// Update the row count.
// Update the row count, accounting for how many rows were reparsed.
const oldRowCount = this._rowCount!;
this._rowCount = oldRowCount + nrows - 1;
const duplicateRows = Math.min(nrows, reparse);
this._rowCount = oldRowCount + nrows - duplicateRows;

// If we didn't reach the requested row, we must be done.
if (this._rowCount < endRow) {
this._doneParsing = true;
this._ready.resolve(undefined);
}

// Copy the new offsets into a new row offset array.
const oldRowOffsets = this._rowOffsets;
this._rowOffsets = new Uint32Array(this._rowCount);
this._rowOffsets.set(oldRowOffsets);
this._rowOffsets.set(offsets, oldRowCount - 1);
// Copy the new offsets into a new row offset array if needed.
if (this._rowCount > oldRowCount) {
const oldRowOffsets = this._rowOffsets;
this._rowOffsets = new Uint32Array(this._rowCount);
this._rowOffsets.set(oldRowOffsets);
this._rowOffsets.set(offsets, oldRowCount - duplicateRows);
}

// Expand the column offsets array if needed

Expand Down Expand Up @@ -596,9 +603,8 @@ export class DSVModel extends DataModel implements IDisposable {
private _resetParser(): void {
this._columnCount = undefined;

// First row offset is *always* 0, so we always have the first row offset.
this._rowOffsets = new Uint32Array(1);
this._rowCount = 1;
this._rowOffsets = new Uint32Array(0);
this._rowCount = 0;
this._startedParsing = false;

this._columnOffsets = new Uint32Array(0);
Expand Down Expand Up @@ -631,7 +637,7 @@ export class DSVModel extends DataModel implements IDisposable {

// Data values
private _rawData: string;
private _rowCount: number | undefined = 1;
private _rowCount: number | undefined = 0;
private _columnCount: number | undefined;

// Cache information
Expand All @@ -658,7 +664,7 @@ export class DSVModel extends DataModel implements IDisposable {
/**
* The index for the start of each row.
*/
private _rowOffsets: Uint32Array = new Uint32Array(1);
private _rowOffsets: Uint32Array = new Uint32Array(0);
/**
* The number of rows to parse initially before doing a delayed parse of the
* entire data.
Expand Down
4 changes: 3 additions & 1 deletion packages/csvviewer/test/model.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ const CSV_TEST_FILES = [
],

[
'empty',
'empty_values',
readCSV('csv-spectrum/csvs/empty.csv'),
require('csv-spectrum/json/empty.json')
],
Expand All @@ -30,6 +30,8 @@ const CSV_TEST_FILES = [
require('csv-spectrum/json/empty_crlf.json')
],

['empty_file', '', []],

[
'escaped_quotes',
readCSV('csv-spectrum/csvs/escaped_quotes.csv'),
Expand Down
15 changes: 15 additions & 0 deletions packages/csvviewer/test/parse.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,21 @@ describe('csvviewer/parse', () => {
expect(results.ncols).toEqual(3);
expect(results.offsets).toEqual([0, 2, 5]);
});

it('handles empty file', () => {
const data = ``;
const options = { data, rowDelimiter: '\n' };
let results;

results = parser({ ...options, columnOffsets: false });
expect(results.nrows).toEqual(0);
expect(results.offsets).toEqual([]);

results = parser({ ...options, columnOffsets: true });
expect(results.nrows).toEqual(0);
expect(results.ncols).toEqual(0);
expect(results.offsets).toEqual([]);
});
});
});

Expand Down

0 comments on commit d2e6a23

Please # to comment.