diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 490dced939..2f5420a10a 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release ## 4.9.1 - T.B.D. +* [Bug Fix] Fix some errors detected in PR 2497. [PR #2497](https://github.com/Unidata/netcdf-c/pull/2497) . See [Github #2503](https://github.com/Unidata/netcdf-c/pull/2503). * [Bug Fix] Split the remote tests into two parts: one for the remotetest server and one for all other external servers. Also add a configure option to enable the latter set. See [Github #2491](https://github.com/Unidata/netcdf-c/pull/2491). * [Bug Fix] Fix blosc plugin errors. See [Github #2461](https://github.com/Unidata/netcdf-c/pull/2461). * [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466). diff --git a/docs/nczarr.md b/docs/nczarr.md index 1e4dc93c0f..078e451e45 100644 --- a/docs/nczarr.md +++ b/docs/nczarr.md @@ -835,12 +835,12 @@ There are mutiple cases to consider. 3. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – **is** parseable as a legal JSON expression. * Parse to produce a JSON expression and write that expression. - * Use "|S1" as the dtype and store in the NCZarr metadata. + * Use "|U1" as the dtype and store in the NCZarr metadata. 4. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – **is not** parseable as a legal JSON expression. * Convert to a JSON string and write that expression - * Use "|S1" as the dtype and store in the NCZarr metadata. + * Use "|U1" as the dtype and store in the NCZarr metadata. ## Reading an attribute: @@ -915,12 +915,11 @@ and the netcdf-c NC_CHAR type such that if a pure zarr implementation reads them, it will still work. For writing variables and NCZarr attributes, the type mapping is as follows: -* "|S1" for NC_CHAR. -* ">S1" for NC_STRING && MAXSTRLEN==1 -* ">Sn" for NC_STRING && MAXSTRLEN==n +* ">S1" for NC_CHAR. +* "|S1" for NC_STRING && MAXSTRLEN==1 +* "|Sn" for NC_STRING && MAXSTRLEN==n -Note that it is a bit of a hack to use endianness, but it should be ok since for -string/char, the endianness has no meaning. +Admittedly, this encoding is a bit of a hack. So when reading data with a pure zarr implementaion the above types should always appear as strings, diff --git a/libdispatch/dfile.c b/libdispatch/dfile.c index c2149a6396..ae756a8f27 100644 --- a/libdispatch/dfile.c +++ b/libdispatch/dfile.c @@ -2031,6 +2031,7 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp, } /* Suppress unsupported formats */ +#if 0 /* (should be more compact, table-driven, way to do this) */ { int hdf5built = 0; @@ -2041,10 +2042,10 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp, int nczarrbuilt = 0; #ifdef USE_NETCDF4 hdf5built = 1; +#endif #ifdef USE_HDF4 hdf4built = 1; #endif -#endif #ifdef ENABLE_CDF5 cdf5built = 1; #endif @@ -2069,6 +2070,43 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp, if(!udf1built && model.impl == NC_FORMATX_UDF1) {stat = NC_ENOTBUILT; goto done;} } +#else + { + unsigned built = 0 /* leave off the trailing semicolon so we can build constant */ + | (1<type_info))) goto done; } else {stat = NC_EBADTYPE; goto done;} +#if 0 /* leave native in place */ if(endianness == NC_ENDIAN_NATIVE) endianness = zinfo->native_endianness; if(endianness == NC_ENDIAN_NATIVE) @@ -1518,6 +1524,9 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { var->endianness = endianness; } else {stat = NC_EBADTYPE; goto done;} +#else + var->endianness = endianness; +#endif var->type_info->endianness = var->endianness; /* Propagate */ if(vtype == NC_STRING) { zvar->maxstrlen = vtypelen; @@ -2244,7 +2253,7 @@ ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) /* Remember that we have read the metadata for this var. */ var->meta_read = NC_TRUE; done: - return retval; + return ZUNTRACE(retval); } #if 0 diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index a6b115c11d..0cb64979c5 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -32,10 +32,12 @@ to decide how to infer the type: NC_STRING vs NC_CHAR. Solution: For variables and for NCZarr type attributes, distinquish by using: -* "|S1" for NC_CHAR. -* ">S1" for NC_STRING && MAXSTRLEN==1 -It is a bit of a hack to use endianness, but it should be ok since for -string/char, the endianness has no meaning. +* ">S1" for NC_CHAR. +* "|S1" for NC_STRING && MAXSTRLEN==1 +* "|Sn" for NC_STRING && MAXSTRLEN==n +This is admittedly a bit of a hack, and the first case in particular +will probably cause errors in some other Zarr implementations; the Zarr +spec is unclear about what combinations are legal. Note that we could use "|U1", but since this is utf-16 or utf-32 in python, it may cause problems when reading what amounts to utf-8. @@ -46,7 +48,7 @@ For attributes, we infer: - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING Note also that if we read a pure zarr file we will probably always -see "|S1", so we will never see a variable of type NC_STRING with length == 1. +see "|S1", so we will never see a variable of type NC_CHAR. We might however see an attribute of type string. */ static const struct ZTYPES { @@ -57,7 +59,7 @@ static const struct ZTYPES { NE LE BE NE LE BE*/ /*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, /*NC_BYTE*/ {{"|i1","i1"},{"|i1","i1"}}, -/*NC_CHAR*/ {{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}}, +/*NC_CHAR*/ {{">S1",">S1",">S1"},{">S1",">S1",">S1"}}, /*NC_SHORT*/ {{"|i2","i2"},{"|i2","i2"}}, /*NC_INT*/ {{"|i4","i4"},{"|i4","i4"}}, /*NC_FLOAT*/ {{"|f4","f4"},{"|f4","f4"}}, @@ -67,7 +69,7 @@ static const struct ZTYPES { /*NC_UINT*/ {{"|u4","u4"},{"|u4","u4"}}, /*NC_INT64*/ {{"|i8","i8"},{"|i8","i8"}}, /*NC_UINT64*/ {{"|u8","u8"},{"|u8","u8"}}, -/*NC_STRING*/ {{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}}, +/*NC_STRING*/ {{"|S%d","|S%d","|S%d"},{"|S%d","|S%d","|S%d"}}, }; #if 0 @@ -576,7 +578,6 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct switch (*p++) { case '<': endianness = NC_ENDIAN_LITTLE; break; case '>': endianness = NC_ENDIAN_BIG; break; - case '=': endianness = NC_ENDIAN_NATIVE; break; case '|': endianness = NC_ENDIAN_NATIVE; break; default: p--; endianness = NC_ENDIAN_NATIVE; break; } @@ -589,20 +590,17 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct /* Short circuit fixed length strings */ if(tchar == 'S') { /* Fixed length string */ - switch (typehint) { - case NC_CHAR: nctype = NC_CHAR; typelen = 1; break; - case NC_STRING: nctype = NC_STRING; break; + switch (typelen) { + case 1: + nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING); + if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */ + break; default: - if(typelen == 1) {/* so |S1 => NC_CHAR */ - if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR; - } else - nctype = NC_STRING; + nctype = NC_STRING; + break; } -#if 0 - } else if(tchar == 'U') {/*back compatibility*/ - if(purezarr || typelen != 1) goto zerr; - nctype = NC_CHAR; -#endif + /* String/char have no endianness */ + endianness = NC_ENDIAN_NATIVE; } else { switch(typelen) { case 1: @@ -639,9 +637,11 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct } } +#if 0 /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ if(endianness == NC_ENDIAN_NATIVE) endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); +#endif if(nctypep) *nctypep = nctype; if(typelenp) *typelenp = typelen; diff --git a/nc_test/tst_pnetcdf.c b/nc_test/tst_pnetcdf.c index 2ae175c708..5cb7afabe2 100644 --- a/nc_test/tst_pnetcdf.c +++ b/nc_test/tst_pnetcdf.c @@ -38,7 +38,7 @@ int main(int argc, char* argv[]) { - int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf; + int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf=NULL; int st, nerrs=0; char str[32]; size_t start[2], count[2]; @@ -143,9 +143,8 @@ int main(int argc, char* argv[]) } } st = nc_close(ncid); CHK_ERR(st) - free(buf); - fn_exit: + free(buf); MPI_Finalize(); err = nerrs; SUMMARIZE_ERR; diff --git a/nc_test4/tst_specific_filters.sh b/nc_test4/tst_specific_filters.sh index 6fa1709031..e094852049 100755 --- a/nc_test4/tst_specific_filters.sh +++ b/nc_test4/tst_specific_filters.sh @@ -15,7 +15,7 @@ if test "x$TESTNCZARR" = x1 ; then BLOSCARGS="32001,0,0,0,256,5,1,1" BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]' else -BLOSCARGS="32001,0,0,0,256,5,1,1" +BLOSCARGS="32001,0,0,4,256,5,1,1" BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]' fi diff --git a/ncdump/utils.c b/ncdump/utils.c index deb22e23e3..f2b79863ad 100644 --- a/ncdump/utils.c +++ b/ncdump/utils.c @@ -79,10 +79,10 @@ erealloc (void* p0, /* check return from realloc */ } void -check(int err, const char* file, const int line) +check(int err, const char* file, const char* fcn, const int line) { fprintf(stderr,"%s\n",nc_strerror(err)); - fprintf(stderr,"Location: file %s; line %d\n", file,line); + fprintf(stderr,"Location: file %s; fcn %s line %d\n",(file?file:"?"),(fcn?fcn:"?"),line); fflush(stderr); fflush(stdout); exit(1); } diff --git a/ncdump/utils.h b/ncdump/utils.h index e6b746c38a..dc2a7aee29 100644 --- a/ncdump/utils.h +++ b/ncdump/utils.h @@ -72,9 +72,9 @@ extern "C" { * debian package builders. They already use NDEBUG to turn off the * file names in asserts. */ #ifdef NDEBUG -#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,"",__LINE__);} +#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,NULL,NULL,__LINE__);} #else -#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__LINE__);} +#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__func__,__LINE__);} #endif /* NDEBUG */ /* Print error message to stderr and exit */ @@ -88,7 +88,7 @@ extern void* ecalloc ( size_t size ); extern void* erealloc (void* p, size_t size ); /* Check error return. If bad, print error message and exit. */ -extern void check(int err, const char* file, const int line); +extern void check(int err, const char* file, const char* fcn, const int line); /* Return malloced name with chars special to CDL escaped. */ char* escaped_name(const char* cp); diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index a681808b24..191dc56f3e 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -118,6 +118,7 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_jsonconvention) add_sh_test(nczarr_test run_strings) add_sh_test(nczarr_test run_scalar) + add_sh_test(nczarr_test run_nulls) BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC}) add_sh_test(nczarr_test run_quantize) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index cbbdb37099..6c237d778f 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -63,6 +63,7 @@ TESTS += run_nczarr_fill.sh TESTS += run_jsonconvention.sh TESTS += run_strings.sh TESTS += run_scalar.sh +TESTS += run_nulls.sh endif if BUILD_UTILITIES @@ -131,7 +132,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \ run_filter.sh run_specific_filters.sh \ run_newformat.sh run_nczarr_fill.sh run_quantize.sh \ run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \ -run_scalar.sh run_strings.sh +run_scalar.sh run_strings.sh run_nulls.sh EXTRA_DIST += \ ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \ @@ -152,7 +153,8 @@ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \ ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \ ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl \ ref_jsonconvention.cdl ref_jsonconvention.zmap \ -ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl +ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl \ +ref_nulls_nczarr.baseline ref_nulls_zarr.baseline ref_nulls.cdl # Interoperability files EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl diff --git a/nczarr_test/ref_jsonconvention.zmap b/nczarr_test/ref_jsonconvention.zmap index fed6cfba1e..f4b72a37e3 100644 --- a/nczarr_test/ref_jsonconvention.zmap +++ b/nczarr_test/ref_jsonconvention.zmap @@ -1,5 +1,5 @@ -[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}| [1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}| [3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "S1", "varjson2": ">S1", "varvec1": ">S1", "varvec2": ">S1"}}}| [5] /v/0 : (4) (ubyte) |...| diff --git a/nczarr_test/ref_newformatpure.cdl b/nczarr_test/ref_newformatpure.cdl index 080df793f1..51058889f9 100644 --- a/nczarr_test/ref_newformatpure.cdl +++ b/nczarr_test/ref_newformatpure.cdl @@ -6,7 +6,7 @@ dimensions: variables: int lat(lat) ; lat:_FillValue = -1 ; - lat:lat_attr = "latitude" ; + string lat:lat_attr = "latitude" ; data: lat = 1, 2, 3, 4, 5, 6, 7, 8 ; @@ -15,7 +15,7 @@ group: g1 { variables: int pos(_zdim_8, _zdim_10) ; pos:_FillValue = -1 ; - pos:pos_attr = "latXlon" ; + string pos:pos_attr = "latXlon" ; // group attributes: :g1_attr = 17 ; diff --git a/nczarr_test/ref_nulls.cdl b/nczarr_test/ref_nulls.cdl new file mode 100644 index 0000000000..a16ca34a24 --- /dev/null +++ b/nczarr_test/ref_nulls.cdl @@ -0,0 +1,12 @@ +netcdf ref_nulls { +variables: + int test; + test:nul_sng = '\0'; + test:empty_sng = ""; + test:space_sng = " "; + test:zero_sng = "0"; + test:nul0 = '\0' ; +data: + + test = 1; +} diff --git a/nczarr_test/ref_nulls_nczarr.baseline b/nczarr_test/ref_nulls_nczarr.baseline new file mode 100644 index 0000000000..263561c0e0 --- /dev/null +++ b/nczarr_test/ref_nulls_nczarr.baseline @@ -0,0 +1,12 @@ +netcdf ref_nulls { +variables: + int test ; + test:nul_sng = 0b ; + test:empty_sng = "" ; + test:space_sng = " " ; + test:zero_sng = "0" ; + test:nul0 = 0b ; +data: + + test = 1 ; +} diff --git a/nczarr_test/ref_nulls_zarr.baseline b/nczarr_test/ref_nulls_zarr.baseline new file mode 100644 index 0000000000..37119af01b --- /dev/null +++ b/nczarr_test/ref_nulls_zarr.baseline @@ -0,0 +1,14 @@ +netcdf ref_nulls { +dimensions: + _scalar_ = 1 ; +variables: + int test(_scalar_) ; + test:nul_sng = 0 ; + test:empty_sng = "" ; + test:space_sng = " " ; + test:zero_sng = 0 ; + test:nul0 = 0 ; +data: + + test = 1 ; +} diff --git a/nczarr_test/ref_oldformat.zip b/nczarr_test/ref_oldformat.zip index ee423b2816..49d9d38892 100644 Binary files a/nczarr_test/ref_oldformat.zip and b/nczarr_test/ref_oldformat.zip differ diff --git a/nczarr_test/ref_string_zarr.baseline b/nczarr_test/ref_string_zarr.baseline index aedfc605ab..a78886ae26 100644 --- a/nczarr_test/ref_string_zarr.baseline +++ b/nczarr_test/ref_string_zarr.baseline @@ -3,7 +3,7 @@ dimensions: d2 = 2 ; _scalar_ = 1 ; variables: - char c(d2) ; + string c(d2) ; string truncated(_scalar_) ; truncated:_nczarr_maxstrlen = 4 ; string v(d2) ; @@ -14,7 +14,7 @@ variables: :_nczarr_default_maxstrlen = 6 ; data: - c = "ab" ; + c = "a", "b" ; truncated = "0123" ; diff --git a/nczarr_test/run_newformat.sh b/nczarr_test/run_newformat.sh index d5bc2ce76a..e4f945ab17 100755 --- a/nczarr_test/run_newformat.sh +++ b/nczarr_test/run_newformat.sh @@ -11,6 +11,7 @@ echo "" echo "*** Testing backward compatibilty between nczarr meta data format V1 vs V2" testcaseold() { +echo "*** Test old format support" zext=$1 fileargs ${srcdir}/ref_oldformat ${NCDUMP} -n ref_oldformat "$fileurl" > ./tmp_oldformat.cdl @@ -18,6 +19,7 @@ diff -w ${srcdir}/ref_oldformat.cdl ./tmp_oldformat.cdl } testcasecvt() { +echo "*** Test old format to new format nczarr copy" zext=$1 fileargs ${srcdir}/ref_oldformat ${NCCOPY} "$fileurl" "file://tmp_newformat.file#mode=nczarr,file" @@ -26,6 +28,7 @@ diff -w ${srcdir}/ref_oldformat.cdl ./tmp_newformat.cdl } testcasepure() { +echo "*** Test old format to new format pure zarr copy" zext=$1 fileargs ${srcdir}/ref_oldformat ${NCCOPY} "$fileurl" "file://tmp_newformat.file#mode=nczarr,file" @@ -33,6 +36,7 @@ ${NCDUMP} -n ref_oldformat "file://tmp_newformat.file#mode=zarr,file" > ./tmp_ne diff -w ${srcdir}/ref_newformatpure.cdl ./tmp_newpure.cdl } +# Do zip tests only if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcaseold zip testcasecvt zip diff --git a/nczarr_test/run_nulls.sh b/nczarr_test/run_nulls.sh new file mode 100755 index 0000000000..2dc7484bf2 --- /dev/null +++ b/nczarr_test/run_nulls.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +# This shell script tests support for special cases of NC_CHAR + +set -e + +testcase() { +zext=$1 + +echo "*** Test: nczarr string write then read; format=$zext" +# Get pure zarr args +fileargs tmp_nulls_zarr "mode=zarr,$zext" +zarrurl="$fileurl" +zarrfile="$file" +# Get nczarr args +fileargs tmp_nulls_nczarr "mode=nczarr,$zext" +nczarrurl="$fileurl" +nczarrfile="$file" + +# setup +deletemap $zext $zarrfile +deletemap $zext $nczarrfile + +# Create alternate ref files +echo "*** create pure zarr file" +${NCGEN} -4 -b -o "$zarrurl" $srcdir/ref_nulls.cdl +echo "*** create nczarr file" +${NCGEN} -4 -b -o "$nczarrurl" $srcdir/ref_nulls.cdl + +echo "*** read purezarr" +${NCDUMP} -n ref_nulls $zarrurl > tmp_nulls_zarr_${zext}.cdl +${ZMD} -h $zarrurl > tmp_nulls_zarr_${zext}.txt +echo "*** read nczarr" +${NCDUMP} -n ref_nulls $nczarrurl > tmp_nulls_nczarr_${zext}.cdl +${ZMD} -h $nczarrurl > tmp_nulls_nczarr_${zext}.txt + +echo "*** verify zarr output" +diff -bw ${srcdir}/ref_nulls_zarr.baseline tmp_nulls_zarr_${zext}.cdl + +echo "*** verify nczarr output" +diff -bw ${srcdir}/ref_nulls_nczarr.baseline tmp_nulls_nczarr_${zext}.cdl +} + +testcase file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0