From 9f461848b5c43494ba4e5d136d6cc446204671be Mon Sep 17 00:00:00 2001 From: Dave Allured Date: Fri, 7 Jan 2022 18:34:52 -0700 Subject: [PATCH 1/4] Format compatibility when re-opening files This commit selects the best HDF5 format compatibility options when re-opening an existing netCDF-4 file for writing, such as appending, or adding new groups or variables. The general objective is to make netCDF-4 files that can be read and written by all previous library versions. Optimal HDF5 v1.8 compatibility is selected whenever possible. Otherwise this falls back to the adequate v1.6 compatibility. Format compatibility is a transient property of the HDF5 library, rather than baked in at file creation time. Therefore, compatibility options must be re-selected every time a netCDF-4 file is re-opened for writing. This builds on the previous update for initial file creation, PR #1931, by @brtnfld, released in netcdf-c version 4.8.1. In particular, this commit moves compatibility controls into a single central location, a new common function that is shared by both create and open functions. For more details, see issue #951, also documentation at the top of libhdf5/hdf5set_format_compatibility.c. This commit also makes several corrections and cleanups to previous comments about the use of related property lists. --- libhdf5/CMakeLists.txt | 6 +-- libhdf5/Makefile.am | 11 ++-- libhdf5/hdf5create.c | 37 ++++++------- libhdf5/hdf5open.c | 16 ++++-- libhdf5/hdf5set_format_compatibility.c | 74 ++++++++++++++++++++++++++ 5 files changed, 107 insertions(+), 37 deletions(-) create mode 100644 libhdf5/hdf5set_format_compatibility.c diff --git a/libhdf5/CMakeLists.txt b/libhdf5/CMakeLists.txt index ad9b67a234..5cf4aa9267 100644 --- a/libhdf5/CMakeLists.txt +++ b/libhdf5/CMakeLists.txt @@ -1,5 +1,5 @@ ## This is a CMake file, part of Unidata's netCDF package. -# Copyright 2018, see the COPYRIGHT file for more information. +# Copyright 2018-2022, see the COPYRIGHT file for more information. # # This builds the HDF5 dispatch layer. # @@ -9,13 +9,13 @@ SET(libnchdf5_SOURCES nc4hdf.c nc4info.c hdf5file.c hdf5attr.c hdf5dim.c hdf5grp.c hdf5type.c hdf5internal.c hdf5create.c hdf5open.c hdf5var.c nc4mem.c nc4memcb.c hdf5dispatch.c hdf5filter.c -hdf5debug.c) +hdf5set_format_compatibility.c hdf5debug.c) IF(ENABLE_BYTERANGE) SET(libnchdf5_SOURCES ${libnchdf5_SOURCES} H5FDhttp.c) ENDIF() -# Build the HDF4 dispatch layer as a library that will be included in +# Build the HDF5 dispatch layer as a library that will be included in # the netCDF library. add_library(netcdfhdf5 OBJECT ${libnchdf5_SOURCES}) diff --git a/libhdf5/Makefile.am b/libhdf5/Makefile.am index 3e5d3359dc..e7aa2164f1 100644 --- a/libhdf5/Makefile.am +++ b/libhdf5/Makefile.am @@ -1,5 +1,5 @@ -# This is part of Unidata's netCDF package. Copyright 2018, see the -# COPYRIGHT file for more information. +# This is part of Unidata's netCDF package. Copyright 2018-2022, +# see the COPYRIGHT file for more information. # Build the HDF5 dispatch layer. @@ -16,7 +16,7 @@ noinst_LTLIBRARIES = libnchdf5.la libnchdf5_la_SOURCES = nc4hdf.c nc4info.c hdf5file.c hdf5attr.c \ hdf5dim.c hdf5grp.c hdf5type.c hdf5internal.c hdf5create.c hdf5open.c \ hdf5var.c nc4mem.c nc4memcb.c hdf5dispatch.c hdf5filter.c \ -hdf5debug.c hdf5debug.h hdf5err.h +hdf5set_format_compatibility.c hdf5debug.c hdf5debug.h hdf5err.h if ENABLE_BYTERANGE libnchdf5_la_SOURCES += H5FDhttp.c H5FDhttp.h @@ -24,8 +24,3 @@ endif # Package this for cmake build. EXTRA_DIST = CMakeLists.txt - - - - - diff --git a/libhdf5/hdf5create.c b/libhdf5/hdf5create.c index 0475c525c7..16c5d7a9dd 100644 --- a/libhdf5/hdf5create.c +++ b/libhdf5/hdf5create.c @@ -117,9 +117,10 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, } } - /* Need this access plist to control how HDF5 handles open objects - * on file close. (Setting H5F_CLOSE_WEAK will cause H5Fclose not to - * fail if there are any open objects in the file. This may happen when virtual + /* Need this FILE ACCESS plist to control how HDF5 handles open + * objects on file close; as well as for other controls below. + * (Setting H5F_CLOSE_WEAK will cause H5Fclose not to fail if there + * are any open objects in the file. This may happen when virtual * datasets are opened). */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) BAIL(NC_EHDFERR); @@ -127,8 +128,8 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, BAIL(NC_EHDFERR); #ifdef USE_PARALLEL4 - /* If this is a parallel file create, set up the file creation - property list. */ + /* If this is a parallel file create, set up the file access + property list for MPI/IO. */ if (mpiinfo != NULL) { nc4_info->parallel = NC_TRUE; LOG((4, "creating parallel file with MPI/IO")); @@ -164,21 +165,14 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, nc4_chunk_cache_preemption)); } -#if H5_VERSION_GE(1,10,2) - /* lib versions 1.10.2 and higher */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_V18, H5F_LIBVER_LATEST) < 0) -#else -#if H5_VERSION_GE(1,10,0) - /* lib versions 1.10.0, 1.10.1 */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_LATEST) < 0) -#else - /* all HDF5 1.8 lib versions */ - if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) -#endif -#endif - BAIL(NC_EHDFERR); + /* Set HDF5 format compatibility in the FILE ACCESS property list. + * Compatibility is transient and must be reselected every time + * a file is opened for writing. */ + retval = hdf5set_format_compatibility(fapl_id); + if (retval != NC_NOERR) + BAIL(retval); - /* Create the property list. */ + /* Begin setup for the FILE CREATION property list. */ if ((fcpl_id = H5Pcreate(H5P_FILE_CREATE)) < 0) BAIL(NC_EHDFERR); @@ -186,9 +180,8 @@ nc4_create_file(const char *path, int cmode, size_t initialsz, if (H5Pset_obj_track_times(fcpl_id,0)<0) BAIL(NC_EHDFERR); - /* Set latest_format in access propertly list and - * H5P_CRT_ORDER_TRACKED in the creation property list. This turns - * on HDF5 creation ordering. */ + /* Set H5P_CRT_ORDER_TRACKED in the creation property list. + * This turns on HDF5 creation ordering. */ if (H5Pset_link_creation_order(fcpl_id, (H5P_CRT_ORDER_TRACKED | H5P_CRT_ORDER_INDEXED)) < 0) BAIL(NC_EHDFERR); diff --git a/libhdf5/hdf5open.c b/libhdf5/hdf5open.c index f3ede3ede4..09a251b127 100644 --- a/libhdf5/hdf5open.c +++ b/libhdf5/hdf5open.c @@ -1,4 +1,4 @@ -/* Copyright 2003-2018, University Corporation for Atmospheric +/* Copyright 2003-2022, University Corporation for Atmospheric * Research. See COPYRIGHT file for copying and redistribution * conditions. */ /** @@ -765,9 +765,10 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) mpiinfo = (NC_MPI_INFO *)parameters; /* assume, may be changed if inmemory is true */ #endif /* !USE_PARALLEL4 */ - /* Need this access plist to control how HDF5 handles open objects - * on file close. (Setting H5F_CLOSE_WEAK will cause H5Fclose not to - * fail if there are any open objects in the file. This may happen when virtual + /* Need this FILE ACCESS plist to control how HDF5 handles open + * objects on file close; as well as for other controls below. + * (Setting H5F_CLOSE_WEAK will cause H5Fclose not to fail if there + * are any open objects in the file. This may happen when virtual * datasets are opened). */ if ((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) BAIL(NC_EHDFERR); @@ -820,6 +821,13 @@ nc4_open_file(const char *path, int mode, void* parameters, int ncid) nc4_chunk_cache_preemption)); } + /* Set HDF5 format compatibility in the FILE ACCESS property list. + * Compatibility is transient and must be reselected every time + * a file is opened for writing. */ + retval = hdf5set_format_compatibility(fapl_id); + if (retval != NC_NOERR) + BAIL(retval); + /* Process NC_INMEMORY */ if(nc4_info->mem.inmemory) { NC_memio* memio; diff --git a/libhdf5/hdf5set_format_compatibility.c b/libhdf5/hdf5set_format_compatibility.c new file mode 100644 index 0000000000..f6e7421147 --- /dev/null +++ b/libhdf5/hdf5set_format_compatibility.c @@ -0,0 +1,74 @@ +/* Copyright 2022, University Corporation for Atmospheric Research. + * See COPYRIGHT file for copying and redistribution conditions. */ +/** + * @file + * @internal This function selects the best HDF5 file format options + * to create netCDF-4 files that can be read and written by older + * library versions. + * + * Format compatibility is transient, not baked in to an HDF5 file + * at creation time. Therefore the desired compatibilty options + * must be selected every time a file is opened for writing. + * + * This function should be called before every call to create a new + * netCDF-4 file, or to open an existing netCDF-4 file for writing. + * This function has no effect when opening a file for read only. + * + * This function should work correctly with all HDF5 library versions + * from 1.8.0 through 1.13.0 and beyond, with no further changes. + * This assumes that HDF5 versioning controls remain consistent + * into the future. + * + * The basic functionality is to select the traditional HDF5 v1.8 + * format compatibility, whenever possible. The less desirable + * v1.6 compatibily is selected in a few strange cases when it is + * not possible to select v1.8. + * + * Files created with v1.6 compatibility have superblock version 0. + * Files created with v1.8 compatibility have superblock version 2. + * + * The superblock version is locked in when a file is first created. + * It is then possible to get a mix of v1.6 and v1.8 internal + * object versions, when an existing netCDF-4 file is modified by + * a different software version than the one that originally + * created the file. Mixed-object files of this nature are common + * and do not suffer any serious problems. + * + * See netcdf-c github issues #250 and #951 for more details about + * the rationale and evolution of netCDF-4 format compatibility. + */ + +#include "config.h" +#include "hdf5internal.h" + +/** + * @internal Function to set HDF5 file access options for backward + * format compatibility. Call this before every call to H5Fcreate + * or H5Fopen. + * + * @param fapl_id Identifier for valid file access property list to + * be used in the next call to H5Fcreate or H5Fopen. + * + * @return ::NC_EHDFERR General failure in HDF5. + */ +int +hdf5set_format_compatibility(hid_t fapl_id) +{ +#if H5_VERSION_GE(1,10,2) + /* lib versions 1.10.2 and higher */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_V18, H5F_LIBVER_LATEST) < 0) + +#else +#if H5_VERSION_GE(1,10,0) + /* lib versions 1.10.0, 1.10.1 */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_EARLIEST, H5F_LIBVER_LATEST) < 0) + +#else + /* all HDF5 1.8 lib versions */ + if (H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) < 0) +#endif +#endif + return NC_EHDFERR; /* failure exit */ + + return NC_NOERR; /* normal exit */ +} From b3b0da91b2a07e751c2eadbf388121c4edccf454 Mon Sep 17 00:00:00 2001 From: Dave Allured Date: Fri, 7 Jan 2022 18:40:44 -0700 Subject: [PATCH 2/4] Add compatibility function prototype --- include/hdf5internal.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/hdf5internal.h b/include/hdf5internal.h index ec3d60a5e9..9b8a4b7e17 100644 --- a/include/hdf5internal.h +++ b/include/hdf5internal.h @@ -1,4 +1,4 @@ -/* Copyright 2018-2018 University Corporation for Atmospheric +/* Copyright 2018-2022 University Corporation for Atmospheric Research/Unidata. */ /** * @file This header file contains macros, types, and prototypes for @@ -216,4 +216,6 @@ extern int nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var); EXTERNL hid_t nc4_H5Fopen(const char *filename, unsigned flags, hid_t fapl_id); EXTERNL hid_t nc4_H5Fcreate(const char *filename, unsigned flags, hid_t fcpl_id, hid_t fapl_id); +int hdf5set_format_compatibility(hid_t fapl_id); + #endif /* _HDF5INTERNAL_ */ From 69b2d20d5664b5dbb4f473463af3f76845900508 Mon Sep 17 00:00:00 2001 From: Dave Allured Date: Tue, 11 Jan 2022 10:25:13 -0700 Subject: [PATCH 3/4] Improve comments about format compatibility --- libhdf5/hdf5create.c | 2 +- libhdf5/hdf5set_format_compatibility.c | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/libhdf5/hdf5create.c b/libhdf5/hdf5create.c index 16c5d7a9dd..b40447faad 100644 --- a/libhdf5/hdf5create.c +++ b/libhdf5/hdf5create.c @@ -1,4 +1,4 @@ -/* Copyright 2003-2018, University Corporation for Atmospheric +/* Copyright 2003-2022, University Corporation for Atmospheric * Research. See COPYRIGHT file for copying and redistribution * conditions. */ /** diff --git a/libhdf5/hdf5set_format_compatibility.c b/libhdf5/hdf5set_format_compatibility.c index f6e7421147..308a753ff7 100644 --- a/libhdf5/hdf5set_format_compatibility.c +++ b/libhdf5/hdf5set_format_compatibility.c @@ -24,8 +24,18 @@ * v1.6 compatibily is selected in a few strange cases when it is * not possible to select v1.8. * - * Files created with v1.6 compatibility have superblock version 0. - * Files created with v1.8 compatibility have superblock version 2. + * Files created or updated with v1.10 and higher compatibility are + * not legal netCDF-4 format, as of 2022 January. They are not + * readable by any netCDF library version linked with any HDF5 v1.8 + * or older library version. However, it is possible for advanced + * or experimental software to deliberately override these default + * format settings, to create advanced format files for special + * purposes. + * + * Files created with v1.6 compatibility have superblock version 0. + * Files created with v1.8 compatibility have superblock version 2. + * Files created with v1.10 compatibility have superblock version 3, + * and are avoided by default. Et cetera. * * The superblock version is locked in when a file is first created. * It is then possible to get a mix of v1.6 and v1.8 internal @@ -36,6 +46,11 @@ * * See netcdf-c github issues #250 and #951 for more details about * the rationale and evolution of netCDF-4 format compatibility. + * + * See HDF5 documentation for H5Pset_libver_bounds and related RFC's, + * for more details about HDF5 file object versioning. + * + * @author Dave Allured, NOAA/PSL/CIRES @date 2022 January 11 */ #include "config.h" @@ -50,6 +65,8 @@ * be used in the next call to H5Fcreate or H5Fopen. * * @return ::NC_EHDFERR General failure in HDF5. + * + * @author Dave Allured, NOAA/PSL/CIRES @date 2022 January 11 */ int hdf5set_format_compatibility(hid_t fapl_id) From 158f290663e84e68faf90e7a55c5ea53c83d9618 Mon Sep 17 00:00:00 2001 From: Dave Allured Date: Wed, 12 Jan 2022 19:06:17 -0700 Subject: [PATCH 4/4] Minor comment fix for return codes --- libhdf5/hdf5set_format_compatibility.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libhdf5/hdf5set_format_compatibility.c b/libhdf5/hdf5set_format_compatibility.c index 308a753ff7..cec9575dd7 100644 --- a/libhdf5/hdf5set_format_compatibility.c +++ b/libhdf5/hdf5set_format_compatibility.c @@ -64,7 +64,8 @@ * @param fapl_id Identifier for valid file access property list to * be used in the next call to H5Fcreate or H5Fopen. * - * @return ::NC_EHDFERR General failure in HDF5. + * @return ::NC_NOERR No error. + * @return ::NC_EHDFERR HDF5 returned error. * * @author Dave Allured, NOAA/PSL/CIRES @date 2022 January 11 */