Skip to content

Commit

Permalink
Merge pull request #2503 from DennisHeimbigner/zarrfixes4.dmh
Browse files Browse the repository at this point in the history
Fix some addtional errors in NCZarr
  • Loading branch information
WardF authored Sep 14, 2022
2 parents dc46ac6 + db86c2c commit 8173d96
Show file tree
Hide file tree
Showing 21 changed files with 193 additions and 49 deletions.
1 change: 1 addition & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release

## 4.9.1 - T.B.D.

* [Bug Fix] Fix some errors detected in PR 2497. [PR #2497](https://github.com/Unidata/netcdf-c/pull/2497) . See [Github #2503](https://github.com/Unidata/netcdf-c/pull/2503).
* [Bug Fix] Split the remote tests into two parts: one for the remotetest server and one for all other external servers. Also add a configure option to enable the latter set. See [Github #2491](https://github.com/Unidata/netcdf-c/pull/2491).
* [Bug Fix] Fix blosc plugin errors. See [Github #2461](https://github.com/Unidata/netcdf-c/pull/2461).
* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466).
Expand Down
13 changes: 6 additions & 7 deletions docs/nczarr.md
Original file line number Diff line number Diff line change
Expand Up @@ -835,12 +835,12 @@ There are mutiple cases to consider.
3. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters –
**is** parseable as a legal JSON expression.
* Parse to produce a JSON expression and write that expression.
* Use "|S1" as the dtype and store in the NCZarr metadata.
* Use "|U1" as the dtype and store in the NCZarr metadata.
4. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters –
**is not** parseable as a legal JSON expression.
* Convert to a JSON string and write that expression
* Use "|S1" as the dtype and store in the NCZarr metadata.
* Use "|U1" as the dtype and store in the NCZarr metadata.
## Reading an attribute:
Expand Down Expand Up @@ -915,12 +915,11 @@ and the netcdf-c NC_CHAR type such that if a pure zarr
implementation reads them, it will still work.
For writing variables and NCZarr attributes, the type mapping is as follows:
* "|S1" for NC_CHAR.
* ">S1" for NC_STRING && MAXSTRLEN==1
* ">Sn" for NC_STRING && MAXSTRLEN==n
* ">S1" for NC_CHAR.
* "|S1" for NC_STRING && MAXSTRLEN==1
* "|Sn" for NC_STRING && MAXSTRLEN==n
Note that it is a bit of a hack to use endianness, but it should be ok since for
string/char, the endianness has no meaning.
Admittedly, this encoding is a bit of a hack.
So when reading data with a pure zarr implementaion
the above types should always appear as strings,
Expand Down
40 changes: 39 additions & 1 deletion libdispatch/dfile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2031,6 +2031,7 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
}

/* Suppress unsupported formats */
#if 0
/* (should be more compact, table-driven, way to do this) */
{
int hdf5built = 0;
Expand All @@ -2041,10 +2042,10 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
int nczarrbuilt = 0;
#ifdef USE_NETCDF4
hdf5built = 1;
#endif
#ifdef USE_HDF4
hdf4built = 1;
#endif
#endif
#ifdef ENABLE_CDF5
cdf5built = 1;
#endif
Expand All @@ -2069,6 +2070,43 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
if(!udf1built && model.impl == NC_FORMATX_UDF1)
{stat = NC_ENOTBUILT; goto done;}
}
#else
{
unsigned built = 0 /* leave off the trailing semicolon so we can build constant */
| (1<<NC_FORMATX_NC3) /* NC3 always supported */
#ifdef USE_HDF5
| (1<<NC_FORMATX_NC_HDF5)
#endif
#ifdef USE_HDF4
| (1<<NC_FORMATX_NC_HDF4)
#endif
#ifdef ENABLE_NCZARR
| (1<<NC_FORMATX_NCZARR)
#endif
#ifdef ENABLE_DAP
| (1<<NC_FORMATX_DAP2)
#endif
#ifdef ENABLE_DAP4
| (1<<NC_FORMATX_DAP4)
#endif
#ifdef USE_PNETCDF
| (1<<NC_FORMATX_PNETCDF)
#endif
; /* end of the built flags */
if(UDF0_dispatch_table != NULL)
built |= (1<<NC_FORMATX_UDF0);
if(UDF1_dispatch_table != NULL)
built |= (1<<NC_FORMATX_UDF1);
/* Verify */
if((built & (1 << model.impl)) == 0)
{stat = NC_ENOTBUILT; goto done;}
#ifndef ENABLE_CDF5
/* Special case because there is no separate CDF5 dispatcher */
if(model.impl == NC_FORMATX_NC3 && (omode & NC_64BIT_DATA))
{stat = NC_ENOTBUILT; goto done;}
#endif
}
#endif
/* Figure out what dispatcher to use */
if (!dispatcher) {
switch (model.impl) {
Expand Down
6 changes: 3 additions & 3 deletions libnczarr/zdebug.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
#ifndef ZDEBUG_H
#define ZDEBUG_H

#undef ZCATCH /* Warning: significant performance impact */
#undef ZTRACING /* Warning: significant performance impact */

#undef ZDEBUG /* general debug */
#undef ZDEBUG1 /* detailed debug */

#define ZCATCH /* Warning: significant performance impact */
#define ZTRACING /* Warning: significant performance impact */

#include "ncexternl.h"
#include "nclog.h"

Expand Down
11 changes: 10 additions & 1 deletion libnczarr/zsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,11 @@ zconvert(NCjson* src, nc_type typeid, size_t typelen, int* countp, NCbytes* dst)
if(typeid == NC_CHAR) {
if((stat = zcharify(src,dst))) goto done;
count = ncbyteslength(dst);
/* Special case for "" */
if(count == 0) {
ncbytesappend(dst,'\0');
count = 1;
}
} else {
if((stat = NCZ_convert1(src, typeid, dst))) goto done;
count = 1;
Expand Down Expand Up @@ -1511,13 +1516,17 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
if((stat = ncz_gettype(file,grp,vtype,&var->type_info)))
goto done;
} else {stat = NC_EBADTYPE; goto done;}
#if 0 /* leave native in place */
if(endianness == NC_ENDIAN_NATIVE)
endianness = zinfo->native_endianness;
if(endianness == NC_ENDIAN_NATIVE)
endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG);
if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) {
var->endianness = endianness;
} else {stat = NC_EBADTYPE; goto done;}
#else
var->endianness = endianness;
#endif
var->type_info->endianness = var->endianness; /* Propagate */
if(vtype == NC_STRING) {
zvar->maxstrlen = vtypelen;
Expand Down Expand Up @@ -2244,7 +2253,7 @@ ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
/* Remember that we have read the metadata for this var. */
var->meta_read = NC_TRUE;
done:
return retval;
return ZUNTRACE(retval);
}

#if 0
Expand Down
40 changes: 20 additions & 20 deletions libnczarr/zutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ to decide how to infer the type: NC_STRING vs NC_CHAR.
Solution:
For variables and for NCZarr type attributes, distinquish by using:
* "|S1" for NC_CHAR.
* ">S1" for NC_STRING && MAXSTRLEN==1
It is a bit of a hack to use endianness, but it should be ok since for
string/char, the endianness has no meaning.
* ">S1" for NC_CHAR.
* "|S1" for NC_STRING && MAXSTRLEN==1
* "|Sn" for NC_STRING && MAXSTRLEN==n
This is admittedly a bit of a hack, and the first case in particular
will probably cause errors in some other Zarr implementations; the Zarr
spec is unclear about what combinations are legal.
Note that we could use "|U1", but since this is utf-16 or utf-32
in python, it may cause problems when reading what amounts to utf-8.
Expand All @@ -46,7 +48,7 @@ For attributes, we infer:
- e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING
Note also that if we read a pure zarr file we will probably always
see "|S1", so we will never see a variable of type NC_STRING with length == 1.
see "|S1", so we will never see a variable of type NC_CHAR.
We might however see an attribute of type string.
*/
static const struct ZTYPES {
Expand All @@ -57,7 +59,7 @@ static const struct ZTYPES {
NE LE BE NE LE BE*/
/*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}},
/*NC_BYTE*/ {{"|i1","<i1",">i1"},{"|i1","<i1",">i1"}},
/*NC_CHAR*/ {{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}},
/*NC_CHAR*/ {{">S1",">S1",">S1"},{">S1",">S1",">S1"}},
/*NC_SHORT*/ {{"|i2","<i2",">i2"},{"|i2","<i2",">i2"}},
/*NC_INT*/ {{"|i4","<i4",">i4"},{"|i4","<i4",">i4"}},
/*NC_FLOAT*/ {{"|f4","<f4",">f4"},{"|f4","<f4",">f4"}},
Expand All @@ -67,7 +69,7 @@ static const struct ZTYPES {
/*NC_UINT*/ {{"|u4","<u4",">u4"},{"|u4","<u4",">u4"}},
/*NC_INT64*/ {{"|i8","<i8",">i8"},{"|i8","<i8",">i8"}},
/*NC_UINT64*/ {{"|u8","<u8",">u8"},{"|u8","<u8",">u8"}},
/*NC_STRING*/ {{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}},
/*NC_STRING*/ {{"|S%d","|S%d","|S%d"},{"|S%d","|S%d","|S%d"}},
};

#if 0
Expand Down Expand Up @@ -576,7 +578,6 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
switch (*p++) {
case '<': endianness = NC_ENDIAN_LITTLE; break;
case '>': endianness = NC_ENDIAN_BIG; break;
case '=': endianness = NC_ENDIAN_NATIVE; break;
case '|': endianness = NC_ENDIAN_NATIVE; break;
default: p--; endianness = NC_ENDIAN_NATIVE; break;
}
Expand All @@ -589,20 +590,17 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
/* Short circuit fixed length strings */
if(tchar == 'S') {
/* Fixed length string */
switch (typehint) {
case NC_CHAR: nctype = NC_CHAR; typelen = 1; break;
case NC_STRING: nctype = NC_STRING; break;
switch (typelen) {
case 1:
nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING);
if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */
break;
default:
if(typelen == 1) {/* so |S1 => NC_CHAR */
if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR;
} else
nctype = NC_STRING;
nctype = NC_STRING;
break;
}
#if 0
} else if(tchar == 'U') {/*back compatibility*/
if(purezarr || typelen != 1) goto zerr;
nctype = NC_CHAR;
#endif
/* String/char have no endianness */
endianness = NC_ENDIAN_NATIVE;
} else {
switch(typelen) {
case 1:
Expand Down Expand Up @@ -639,9 +637,11 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
}
}

#if 0
/* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */
if(endianness == NC_ENDIAN_NATIVE)
endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG);
#endif

if(nctypep) *nctypep = nctype;
if(typelenp) *typelenp = typelen;
Expand Down
5 changes: 2 additions & 3 deletions nc_test/tst_pnetcdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

int main(int argc, char* argv[])
{
int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf;
int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf=NULL;
int st, nerrs=0;
char str[32];
size_t start[2], count[2];
Expand Down Expand Up @@ -143,9 +143,8 @@ int main(int argc, char* argv[])
}
}
st = nc_close(ncid); CHK_ERR(st)
free(buf);

fn_exit:
free(buf);
MPI_Finalize();
err = nerrs;
SUMMARIZE_ERR;
Expand Down
2 changes: 1 addition & 1 deletion nc_test4/tst_specific_filters.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ if test "x$TESTNCZARR" = x1 ; then
BLOSCARGS="32001,0,0,0,256,5,1,1"
BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]'
else
BLOSCARGS="32001,0,0,0,256,5,1,1"
BLOSCARGS="32001,0,0,4,256,5,1,1"
BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]'
fi

Expand Down
4 changes: 2 additions & 2 deletions ncdump/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ erealloc (void* p0, /* check return from realloc */
}

void
check(int err, const char* file, const int line)
check(int err, const char* file, const char* fcn, const int line)
{
fprintf(stderr,"%s\n",nc_strerror(err));
fprintf(stderr,"Location: file %s; line %d\n", file,line);
fprintf(stderr,"Location: file %s; fcn %s line %d\n",(file?file:"?"),(fcn?fcn:"?"),line);
fflush(stderr); fflush(stdout);
exit(1);
}
Expand Down
6 changes: 3 additions & 3 deletions ncdump/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ extern "C" {
* debian package builders. They already use NDEBUG to turn off the
* file names in asserts. */
#ifdef NDEBUG
#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,"",__LINE__);}
#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,NULL,NULL,__LINE__);}
#else
#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__LINE__);}
#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__func__,__LINE__);}
#endif /* NDEBUG */

/* Print error message to stderr and exit */
Expand All @@ -88,7 +88,7 @@ extern void* ecalloc ( size_t size );
extern void* erealloc (void* p, size_t size );

/* Check error return. If bad, print error message and exit. */
extern void check(int err, const char* file, const int line);
extern void check(int err, const char* file, const char* fcn, const int line);

/* Return malloced name with chars special to CDL escaped. */
char* escaped_name(const char* cp);
Expand Down
1 change: 1 addition & 0 deletions nczarr_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ IF(ENABLE_TESTS)
add_sh_test(nczarr_test run_jsonconvention)
add_sh_test(nczarr_test run_strings)
add_sh_test(nczarr_test run_scalar)
add_sh_test(nczarr_test run_nulls)

BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_quantize)
Expand Down
6 changes: 4 additions & 2 deletions nczarr_test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ TESTS += run_nczarr_fill.sh
TESTS += run_jsonconvention.sh
TESTS += run_strings.sh
TESTS += run_scalar.sh
TESTS += run_nulls.sh
endif

if BUILD_UTILITIES
Expand Down Expand Up @@ -131,7 +132,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \
run_filter.sh run_specific_filters.sh \
run_newformat.sh run_nczarr_fill.sh run_quantize.sh \
run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \
run_scalar.sh run_strings.sh
run_scalar.sh run_strings.sh run_nulls.sh

EXTRA_DIST += \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
Expand All @@ -152,7 +153,8 @@ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \
ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \
ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl \
ref_jsonconvention.cdl ref_jsonconvention.zmap \
ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl
ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl \
ref_nulls_nczarr.baseline ref_nulls_zarr.baseline ref_nulls.cdl

# Interoperability files
EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl
Expand Down
4 changes: 2 additions & 2 deletions nczarr_test/ref_jsonconvention.zmap
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": "|S1", "globalillegal": "|S1", "_NCProperties": "|S1"}}}|
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
[1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}|
[3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "<i4", "chunks": [1], "fill_value": -2147483647, "order": "C", "compressor": null, "filters": null, "_nczarr_array": {"dimrefs": ["/d1"], "storage": "chunked"}}|
[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": "|S1", "varjson2": "|S1", "varvec1": "|S1", "varvec2": "|S1"}}}|
[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": ">S1", "varjson2": ">S1", "varvec1": ">S1", "varvec2": ">S1"}}}|
[5] /v/0 : (4) (ubyte) |...|
4 changes: 2 additions & 2 deletions nczarr_test/ref_newformatpure.cdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dimensions:
variables:
int lat(lat) ;
lat:_FillValue = -1 ;
lat:lat_attr = "latitude" ;
string lat:lat_attr = "latitude" ;
data:

lat = 1, 2, 3, 4, 5, 6, 7, 8 ;
Expand All @@ -15,7 +15,7 @@ group: g1 {
variables:
int pos(_zdim_8, _zdim_10) ;
pos:_FillValue = -1 ;
pos:pos_attr = "latXlon" ;
string pos:pos_attr = "latXlon" ;

// group attributes:
:g1_attr = 17 ;
Expand Down
12 changes: 12 additions & 0 deletions nczarr_test/ref_nulls.cdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
netcdf ref_nulls {
variables:
int test;
test:nul_sng = '\0';
test:empty_sng = "";
test:space_sng = " ";
test:zero_sng = "0";
test:nul0 = '\0' ;
data:

test = 1;
}
Loading

0 comments on commit 8173d96

Please # to comment.