Merge pull request #2503 from DennisHeimbigner/zarrfixes4.dmh

Fix some addtional errors in NCZarr
Unidata · Sep 14, 2022 · 8173d96 · 8173d96
2 parents dc46ac6 + db86c2c
commit 8173d96
Show file tree

Hide file tree

Showing 21 changed files with 193 additions and 49 deletions.
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
@@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
 
 ## 4.9.1 - T.B.D.
 
+* [Bug Fix] Fix some errors detected in PR 2497. [PR #2497](https://github.com/Unidata/netcdf-c/pull/2497) . See [Github #2503](https://github.com/Unidata/netcdf-c/pull/2503).
 * [Bug Fix] Split the remote tests into two parts: one for the remotetest server and one for all other external servers. Also add a configure option to enable the latter set. See [Github #2491](https://github.com/Unidata/netcdf-c/pull/2491).
 * [Bug Fix] Fix blosc plugin errors. See [Github #2461](https://github.com/Unidata/netcdf-c/pull/2461).
 * [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466).

diff --git a/docs/nczarr.md b/docs/nczarr.md
@@ -835,12 +835,12 @@ There are mutiple cases to consider.
 3. The netcdf attribute **is** of type NC_CHAR and its value &ndash; taken as a single sequence of characters &ndash;
 **is** parseable as a legal JSON expression.
     * Parse to produce a JSON expression and write that expression.
-    * Use "|S1" as the dtype and store in the NCZarr metadata.
+    * Use "|U1" as the dtype and store in the NCZarr metadata.
 
 4. The netcdf attribute **is** of type NC_CHAR and its value &ndash; taken as a single sequence of characters &ndash;
 **is not** parseable as a legal JSON expression.
     * Convert to a JSON string and write that expression
-    * Use "|S1" as the dtype and store in the NCZarr metadata.
+    * Use "|U1" as the dtype and store in the NCZarr metadata.
 
 ## Reading an attribute:
 
@@ -915,12 +915,11 @@ and the netcdf-c NC_CHAR type such that if a pure zarr
 implementation reads them, it will still work.
 
 For writing variables and NCZarr attributes, the type mapping is as follows:
-* "|S1" for NC_CHAR.
-* ">S1" for NC_STRING && MAXSTRLEN==1
-* ">Sn" for NC_STRING && MAXSTRLEN==n
+* ">S1" for NC_CHAR.
+* "|S1" for NC_STRING && MAXSTRLEN==1
+* "|Sn" for NC_STRING && MAXSTRLEN==n
 
-Note that it is a bit of a hack to use endianness, but it should be ok since for
-string/char, the endianness has no meaning.
+Admittedly, this encoding is a bit of a hack.
 
 So when reading data with a pure zarr implementaion
 the above types should always appear as strings,

diff --git a/libdispatch/dfile.c b/libdispatch/dfile.c
@@ -2031,6 +2031,7 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
     }
 
     /* Suppress unsupported formats */
+#if 0
     /* (should be more compact, table-driven, way to do this) */
     {
 	int hdf5built = 0;
@@ -2041,10 +2042,10 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
 	int nczarrbuilt = 0;
 #ifdef USE_NETCDF4
         hdf5built = 1;
+#endif
 #ifdef USE_HDF4
         hdf4built = 1;
 #endif
-#endif
 #ifdef ENABLE_CDF5
         cdf5built = 1;
 #endif
@@ -2069,6 +2070,43 @@ NC_open(const char *path0, int omode, int basepe, size_t *chunksizehintp,
         if(!udf1built && model.impl == NC_FORMATX_UDF1)
         {stat = NC_ENOTBUILT; goto done;}
     }
+#else
+    {
+	unsigned built = 0 /* leave off the trailing semicolon so we can build constant */
+		| (1<<NC_FORMATX_NC3) /* NC3 always supported */
+#ifdef USE_HDF5
+		| (1<<NC_FORMATX_NC_HDF5)
+#endif
+#ifdef USE_HDF4
+		| (1<<NC_FORMATX_NC_HDF4)
+#endif
+#ifdef ENABLE_NCZARR
+		| (1<<NC_FORMATX_NCZARR)
+#endif
+#ifdef ENABLE_DAP
+		| (1<<NC_FORMATX_DAP2)
+#endif
+#ifdef ENABLE_DAP4
+		| (1<<NC_FORMATX_DAP4)
+#endif
+#ifdef USE_PNETCDF
+		| (1<<NC_FORMATX_PNETCDF)
+#endif
+		; /* end of the built flags */
+        if(UDF0_dispatch_table != NULL)
+	    built |= (1<<NC_FORMATX_UDF0);
+        if(UDF1_dispatch_table != NULL)
+	    built |= (1<<NC_FORMATX_UDF1);
+	/* Verify */
+	if((built & (1 << model.impl)) == 0)
+            {stat = NC_ENOTBUILT; goto done;}
+#ifndef ENABLE_CDF5
+	/* Special case because there is no separate CDF5 dispatcher */
+        if(model.impl == NC_FORMATX_NC3 && (omode & NC_64BIT_DATA))
+            {stat = NC_ENOTBUILT; goto done;}
+#endif
+    }
+#endif
     /* Figure out what dispatcher to use */
     if (!dispatcher) {
         switch (model.impl) {

diff --git a/libnczarr/zdebug.h b/libnczarr/zdebug.h
@@ -5,12 +5,12 @@
 #ifndef ZDEBUG_H
 #define ZDEBUG_H
 
+#undef ZCATCH /* Warning: significant performance impact */
+#undef ZTRACING /* Warning: significant performance impact */
+
 #undef ZDEBUG /* general debug */
 #undef ZDEBUG1 /* detailed debug */
 
-#define ZCATCH /* Warning: significant performance impact */
-#define ZTRACING /* Warning: significant performance impact */
-
 #include "ncexternl.h"
 #include "nclog.h"
 

diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c
@@ -986,6 +986,11 @@ zconvert(NCjson* src, nc_type typeid, size_t typelen, int* countp, NCbytes* dst)
 	if(typeid == NC_CHAR) {
 	    if((stat = zcharify(src,dst))) goto done;
 	    count = ncbyteslength(dst);
+	    /* Special case for "" */
+	    if(count == 0) {
+	        ncbytesappend(dst,'\0');
+	        count = 1;
+	    }
 	} else {
 	    if((stat = NCZ_convert1(src, typeid, dst))) goto done;
 	    count = 1;
@@ -1511,13 +1516,17 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
 		if((stat = ncz_gettype(file,grp,vtype,&var->type_info)))
 		    goto done;
 	    } else {stat = NC_EBADTYPE; goto done;}
+#if 0 /* leave native in place */
 	    if(endianness == NC_ENDIAN_NATIVE)
 		endianness = zinfo->native_endianness;
 	    if(endianness == NC_ENDIAN_NATIVE)
 	        endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG);
 	    if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) {
 		var->endianness = endianness;
 	    } else {stat = NC_EBADTYPE; goto done;}
+#else
+	    var->endianness = endianness;
+#endif
 	    var->type_info->endianness = var->endianness; /* Propagate */
 	    if(vtype == NC_STRING) {
 		zvar->maxstrlen = vtypelen;
@@ -2244,7 +2253,7 @@ ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
     /* Remember that we have read the metadata for this var. */
     var->meta_read = NC_TRUE;
 done:
-    return retval;
+    return ZUNTRACE(retval);
 }
 
 #if 0

diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c
@@ -32,10 +32,12 @@ to decide how to infer the type: NC_STRING vs NC_CHAR.
 
 Solution:
 For variables and for NCZarr type attributes, distinquish by using:
-* "|S1" for NC_CHAR.
-* ">S1" for NC_STRING && MAXSTRLEN==1
-It is a bit of a hack to use endianness, but it should be ok since for
-string/char, the endianness has no meaning.
+* ">S1" for NC_CHAR.
+* "|S1" for NC_STRING && MAXSTRLEN==1
+* "|Sn" for NC_STRING && MAXSTRLEN==n
+This is admittedly a bit of a hack, and the first case in particular
+will probably cause errors in some other Zarr implementations; the Zarr
+spec is unclear about what combinations are legal.
 Note that we could use "|U1", but since this is utf-16 or utf-32
 in python, it may cause problems when reading what amounts to utf-8.
 
@@ -46,7 +48,7 @@ For attributes, we infer:
   - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING
 
 Note also that if we read a pure zarr file we will probably always
-see "|S1", so we will never see a variable of type NC_STRING with length == 1.
+see "|S1", so we will never see a variable of type NC_CHAR.
 We might however see an attribute of type string.
 */
 static const struct ZTYPES {
@@ -57,7 +59,7 @@ static const struct ZTYPES {
                    NE   LE     BE       NE    LE    BE*/
 /*NC_NAT*/	{{NULL,NULL,NULL},   {NULL,NULL,NULL}},
 /*NC_BYTE*/	{{"|i1","<i1",">i1"},{"|i1","<i1",">i1"}},
-/*NC_CHAR*/	{{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}},
+/*NC_CHAR*/	{{">S1",">S1",">S1"},{">S1",">S1",">S1"}},
 /*NC_SHORT*/	{{"|i2","<i2",">i2"},{"|i2","<i2",">i2"}},
 /*NC_INT*/	{{"|i4","<i4",">i4"},{"|i4","<i4",">i4"}},
 /*NC_FLOAT*/	{{"|f4","<f4",">f4"},{"|f4","<f4",">f4"}},
@@ -67,7 +69,7 @@ static const struct ZTYPES {
 /*NC_UINT*/	{{"|u4","<u4",">u4"},{"|u4","<u4",">u4"}},
 /*NC_INT64*/	{{"|i8","<i8",">i8"},{"|i8","<i8",">i8"}},
 /*NC_UINT64*/	{{"|u8","<u8",">u8"},{"|u8","<u8",">u8"}},
-/*NC_STRING*/	{{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}},
+/*NC_STRING*/	{{"|S%d","|S%d","|S%d"},{"|S%d","|S%d","|S%d"}},
 };
 
 #if 0
@@ -576,7 +578,6 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
     switch (*p++) {
     case '<': endianness = NC_ENDIAN_LITTLE; break;
     case '>': endianness = NC_ENDIAN_BIG; break;
-    case '=': endianness = NC_ENDIAN_NATIVE; break;
     case '|': endianness = NC_ENDIAN_NATIVE; break;
     default: p--; endianness = NC_ENDIAN_NATIVE; break;
     }
@@ -589,20 +590,17 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
     /* Short circuit fixed length strings */
     if(tchar == 'S') {
 	/* Fixed length string */
-	switch (typehint) {
-	case NC_CHAR: nctype = NC_CHAR; typelen = 1; break;
-	case NC_STRING: nctype = NC_STRING; break;
+	switch (typelen) {
+	case 1:
+	    nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING);
+	    if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */
+	    break;
 	default:
-	    if(typelen == 1) {/* so |S1 => NC_CHAR */
-	        if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR;		    
-	    } else
-	        nctype = NC_STRING;
+	    nctype = NC_STRING;
+	    break;
 	}
-#if 0
-    } else if(tchar == 'U') {/*back compatibility*/
-	if(purezarr || typelen != 1) goto zerr;
-	nctype = NC_CHAR;
-#endif
+	/* String/char have no endianness */
+	endianness = NC_ENDIAN_NATIVE;
     } else {
 	switch(typelen) {
         case 1:
@@ -639,9 +637,11 @@ ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nct
         }
     }
 
+#if 0
     /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */
     if(endianness == NC_ENDIAN_NATIVE)
         endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG);
+#endif
 
     if(nctypep) *nctypep = nctype;
     if(typelenp) *typelenp = typelen;

diff --git a/nc_test/tst_pnetcdf.c b/nc_test/tst_pnetcdf.c
@@ -38,7 +38,7 @@
 
 int main(int argc, char* argv[])
 {
-    int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf;
+    int i, j, rank, nprocs, ncid, cmode, varid[NVARS], dimid[2], *buf=NULL;
     int st, nerrs=0;
     char str[32];
     size_t start[2], count[2];
@@ -143,9 +143,8 @@ int main(int argc, char* argv[])
         }
     }
     st = nc_close(ncid); CHK_ERR(st)
-    free(buf);
-
 fn_exit:
+    free(buf);
     MPI_Finalize();
     err = nerrs;
     SUMMARIZE_ERR;

diff --git a/nc_test4/tst_specific_filters.sh b/nc_test4/tst_specific_filters.sh
@@ -15,7 +15,7 @@ if test "x$TESTNCZARR" = x1 ; then
 BLOSCARGS="32001,0,0,0,256,5,1,1"
 BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]'
 else
-BLOSCARGS="32001,0,0,0,256,5,1,1"
+BLOSCARGS="32001,0,0,4,256,5,1,1"
 BLOSCCODEC='[{\"id\": \"blosc\",\"clevel\": 5,\"blocksize\": 256,\"cname\": \"lz4\",\"shuffle\": 1}]'
 fi
 

diff --git a/ncdump/utils.c b/ncdump/utils.c
@@ -79,10 +79,10 @@ erealloc (void* p0,			/* check return from realloc */
 }
 
 void
-check(int err, const char* file, const int line)
+check(int err, const char* file, const char* fcn, const int line)
 {
     fprintf(stderr,"%s\n",nc_strerror(err));
-    fprintf(stderr,"Location: file %s; line %d\n", file,line);
+    fprintf(stderr,"Location: file %s; fcn %s line %d\n",(file?file:"?"),(fcn?fcn:"?"),line);
     fflush(stderr); fflush(stdout);
     exit(1);
 }

diff --git a/ncdump/utils.h b/ncdump/utils.h
@@ -72,9 +72,9 @@ extern "C" {
  * debian package builders. They already use NDEBUG to turn off the
  * file names in asserts. */
 #ifdef NDEBUG
-#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,"",__LINE__);}
+#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,NULL,NULL,__LINE__);}
 #else
-#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__LINE__);}
+#define NC_CHECK(fncall) {int ncstat=fncall;if(ncstat!=NC_NOERR)check(ncstat,__FILE__,__func__,__LINE__);}
 #endif /* NDEBUG */
 
 /* Print error message to stderr and exit */
@@ -88,7 +88,7 @@ extern void*    ecalloc ( size_t size );
 extern void*    erealloc (void* p, size_t size );
 
 /* Check error return.  If bad, print error message and exit. */
-extern void check(int err, const char* file, const int line);
+extern void check(int err, const char* file, const char* fcn, const int line);
 
 /* Return malloced name with chars special to CDL escaped. */
 char* escaped_name(const char* cp);

diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt
@@ -118,6 +118,7 @@ IF(ENABLE_TESTS)
     add_sh_test(nczarr_test run_jsonconvention)
     add_sh_test(nczarr_test run_strings)
     add_sh_test(nczarr_test run_scalar)
+    add_sh_test(nczarr_test run_nulls)
 
     BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
     add_sh_test(nczarr_test run_quantize)

diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am
@@ -63,6 +63,7 @@ TESTS += run_nczarr_fill.sh
 TESTS += run_jsonconvention.sh
 TESTS += run_strings.sh
 TESTS += run_scalar.sh
+TESTS += run_nulls.sh
 endif
 
 if BUILD_UTILITIES 
@@ -131,7 +132,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \
 run_filter.sh run_specific_filters.sh \
 run_newformat.sh run_nczarr_fill.sh run_quantize.sh \
 run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \
-run_scalar.sh run_strings.sh
+run_scalar.sh run_strings.sh run_nulls.sh
 
 EXTRA_DIST += \
 ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
@@ -152,7 +153,8 @@ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \
 ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \
 ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl \
 ref_jsonconvention.cdl ref_jsonconvention.zmap \
-ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl
+ref_string.cdl ref_string_nczarr.baseline ref_string_zarr.baseline ref_scalar.cdl \
+ref_nulls_nczarr.baseline ref_nulls_zarr.baseline ref_nulls.cdl
 
 # Interoperability files
 EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl 

diff --git a/nczarr_test/ref_jsonconvention.zmap b/nczarr_test/ref_jsonconvention.zmap
@@ -1,5 +1,5 @@
-[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": "|S1", "globalillegal": "|S1", "_NCProperties": "|S1"}}}|
+[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
 [1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}|
 [3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "<i4", "chunks": [1], "fill_value": -2147483647, "order": "C", "compressor": null, "filters": null, "_nczarr_array": {"dimrefs": ["/d1"], "storage": "chunked"}}|
-[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": "|S1", "varjson2": "|S1", "varvec1": "|S1", "varvec2": "|S1"}}}|
+[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": ">S1", "varjson2": ">S1", "varvec1": ">S1", "varvec2": ">S1"}}}|
 [5] /v/0 : (4) (ubyte) |...|
diff --git a/nczarr_test/ref_newformatpure.cdl b/nczarr_test/ref_newformatpure.cdl
@@ -6,7 +6,7 @@ dimensions:
 variables:
 	int lat(lat) ;
 		lat:_FillValue = -1 ;
-		lat:lat_attr = "latitude" ;
+		string lat:lat_attr = "latitude" ;
 data:
 
  lat = 1, 2, 3, 4, 5, 6, 7, 8 ;
@@ -15,7 +15,7 @@ group: g1 {
   variables:
   	int pos(_zdim_8, _zdim_10) ;
   		pos:_FillValue = -1 ;
-  		pos:pos_attr = "latXlon" ;
+  		string pos:pos_attr = "latXlon" ;
 
   // group attributes:
   		:g1_attr = 17 ;

diff --git a/nczarr_test/ref_nulls.cdl b/nczarr_test/ref_nulls.cdl
@@ -0,0 +1,12 @@
+netcdf ref_nulls {
+variables:
+	int test;
+		test:nul_sng = '\0';
+		test:empty_sng = "";
+	        test:space_sng = " ";
+		test:zero_sng = "0";
+		test:nul0 = '\0' ;
+data:
+
+ test = 1;
+}