Serialbox  2.2.0
Data serialization library and tools for C/C++, Python and Fortran
NetCDFArchive.cpp
Go to the documentation of this file.
1 //===-- serialbox/core/archive/NetCDFArchive.cpp ------------------------------------*- C++ -*-===//
2 //
3 // S E R I A L B O X
4 //
5 // This file is distributed under terms of BSD license.
6 // See LICENSE.txt for more information
7 //
8 //===------------------------------------------------------------------------------------------===//
9 //
13 //===------------------------------------------------------------------------------------------===//
14 
16 #ifdef SERIALBOX_HAS_NETCDF
17 
18 #include "serialbox/core/Logging.h"
21 #include "serialbox/core/Version.h"
23 #include <boost/algorithm/string.hpp>
24 #include <fstream>
25 #include <memory>
26 #include <netcdf.h>
27 #include <netcdf_meta.h>
28 #include <unordered_map>
29 #include <vector>
30 
34 #define NETCDF_CHECK(functionCall) \
35  if((errorCode = functionCall)) \
36  throw serialbox::Exception("NetCDFArchive: %s", nc_strerror(errorCode));
37 
38 namespace serialbox {
39 
40 namespace internal {
41 
43 static int typeID2NcType(TypeID type) {
44  switch(type) {
45  case TypeID::Boolean:
46  return NC_BYTE;
47  case TypeID::Int32:
48  return NC_INT;
49  case TypeID::Int64:
50  return NC_INT64;
51  case TypeID::Float32:
52  return NC_FLOAT;
53  case TypeID::Float64:
54  return NC_DOUBLE;
55  default:
56  throw Exception("cannot convert type '%s' to netCDF type", TypeUtil::toString(type));
57  }
58 }
59 
60 template <bool Int64IsLong>
61 struct DispatchInt64Impl {
62  template <class FunctionForLong, class FunctionForLongLong, typename... Args>
63  int operator()(FunctionForLong&& functionForLong, FunctionForLongLong&& functionForLongLong,
64  Args&&... args) noexcept {
65  (void)functionForLongLong;
66  return functionForLong(args...);
67  }
68 };
69 
70 template <>
71 struct DispatchInt64Impl<false> {
72  template <class FunctionForLong, class FunctionForLongLong, typename... Args>
73  int operator()(FunctionForLong&& functionForLong, FunctionForLongLong&& functionForLongLong,
74  Args&&... args) noexcept {
75  (void)functionForLong;
76  return functionForLongLong(args...);
77  }
78 };
79 
82 template <class FunctionForLong, class FunctionForLongLong, typename... Args>
83 static int dispatchInt64(FunctionForLong&& functionForLong,
84  FunctionForLongLong&& functionForLongLong, Args&&... args) {
85  return DispatchInt64Impl<std::is_same<std::int64_t, long>::value>()(
86  std::forward<FunctionForLong>(functionForLong),
87  std::forward<FunctionForLongLong>(functionForLongLong), std::forward<Args>(args)...);
88 }
89 
91 static void write(int ncID, int varID, const std::vector<std::size_t>& startp,
92  const std::vector<std::size_t>& countp,
93  const std::vector<std::ptrdiff_t>& stridep,
94  const std::vector<std::ptrdiff_t>& imapp, const StorageView& storageView) {
95  int errorCode;
96 
97  TypeID type = storageView.type();
98 
99  switch(type) {
100  case TypeID::Boolean:
101  NETCDF_CHECK(nc_put_varm_text(ncID, varID, startp.data(), countp.data(), stridep.data(),
102  imapp.data(), storageView.originPtr()));
103  break;
104  case TypeID::Int32:
105  NETCDF_CHECK(nc_put_varm_int(ncID, varID, startp.data(), countp.data(), stridep.data(),
106  imapp.data(), storageView.originPtrAs<int>()));
107  break;
108  case TypeID::Int64: {
109  NETCDF_CHECK(internal::dispatchInt64(nc_put_varm_long, nc_put_varm_longlong, ncID, varID,
110  startp.data(), countp.data(), stridep.data(), imapp.data(),
111  storageView.originPtrAs<std::int64_t>()));
112  break;
113  }
114  case TypeID::Float32:
115  NETCDF_CHECK(nc_put_varm_float(ncID, varID, startp.data(), countp.data(), stridep.data(),
116  imapp.data(), storageView.originPtrAs<float>()));
117  break;
118  case TypeID::Float64:
119  NETCDF_CHECK(nc_put_varm_double(ncID, varID, startp.data(), countp.data(), stridep.data(),
120  imapp.data(), storageView.originPtrAs<double>()));
121  break;
122  default:
123  serialbox_unreachable("type not supported");
124  }
125 }
126 
128 static void read(int ncID, int varID, const std::vector<std::size_t>& startp,
129  const std::vector<std::size_t>& countp, const std::vector<std::ptrdiff_t>& stridep,
130  const std::vector<std::ptrdiff_t>& imapp, StorageView& storageView) {
131  int errorCode;
132 
133  TypeID type = storageView.type();
134 
135  switch(type) {
136  case TypeID::Boolean:
137  NETCDF_CHECK(nc_get_varm_text(ncID, varID, startp.data(), countp.data(), stridep.data(),
138  imapp.data(), storageView.originPtr()));
139  break;
140  case TypeID::Int32:
141  NETCDF_CHECK(nc_get_varm_int(ncID, varID, startp.data(), countp.data(), stridep.data(),
142  imapp.data(), storageView.originPtrAs<int>()));
143  break;
144  case TypeID::Int64:
145  NETCDF_CHECK(internal::dispatchInt64(nc_get_varm_long, nc_get_varm_longlong, ncID, varID,
146  startp.data(), countp.data(), stridep.data(), imapp.data(),
147  storageView.originPtrAs<std::int64_t>()));
148  break;
149  case TypeID::Float32:
150  NETCDF_CHECK(nc_get_varm_float(ncID, varID, startp.data(), countp.data(), stridep.data(),
151  imapp.data(), storageView.originPtrAs<float>()));
152  break;
153  case TypeID::Float64:
154  NETCDF_CHECK(nc_get_varm_double(ncID, varID, startp.data(), countp.data(), stridep.data(),
155  imapp.data(), storageView.originPtrAs<double>()));
156  break;
157  default:
158  serialbox_unreachable("type not supported");
159  }
160 }
161 
162 } // namespace internal
163 
164 const std::string NetCDFArchive::Name = "NetCDF";
165 
166 const int NetCDFArchive::Version = 0;
167 
168 NetCDFArchive::NetCDFArchive(OpenModeKind mode, const std::string& directory,
169  const std::string& prefix)
170  : mode_(mode), directory_(directory), prefix_(prefix) {
171 
172  LOG(info) << "Creating NetCDFArchive (mode = " << mode_ << ") based on NetCDF (" << NC_VERSION
173  << ") from directory " << directory_;
174 
175  metaDatafile_ = directory_ / ("ArchiveMetaData-" + prefix_ + ".json");
176 
177  try {
178  bool isDir = filesystem::is_directory(directory_);
179 
180  switch(mode_) {
181  // We are reading, the directory needs to exist
182  case OpenModeKind::Read:
183  if(!isDir)
184  throw Exception("no such directory: '%s'", directory_.string());
185  break;
186  // We are writing or appending, create directories if it they don't exist
187  case OpenModeKind::Write:
188  case OpenModeKind::Append:
189  if(!isDir)
190  filesystem::create_directories(directory_);
191  break;
192  }
193  } catch(filesystem::filesystem_error& e) {
194  throw Exception(e.what());
195  }
196 
198 
199  // Remove all files
200  if(mode_ == OpenModeKind::Write)
201  clear();
202 }
203 
205 
207  LOG(info) << "Update MetaData of NetCDF Archive";
208 
209  json_.clear();
210 
211  // Tag versions
212  json_["serialbox_version"] =
213  100 * SERIALBOX_VERSION_MAJOR + 10 * SERIALBOX_VERSION_MINOR + SERIALBOX_VERSION_PATCH;
214  json_["archive_name"] = NetCDFArchive::Name;
215  json_["archive_version"] = NetCDFArchive::Version;
216 
217  // FieldMap
218  for(auto it = fieldMap_.begin(), end = fieldMap_.end(); it != end; ++it)
219  json_["field_map"][it->first] = it->second;
220 
221  // Write metaData to disk (just overwrite the file, we assume that there is never more than one
222  // Archive per data set and thus our in-memory copy is always the up-to-date one)
223  std::ofstream fs(metaDatafile_.string(), std::ios::out | std::ios::trunc);
224 
225  if(!fs.is_open())
226  throw Exception("cannot open file: %s", metaDatafile_);
227 
228  fs << json_.dump(2) << std::endl;
229  fs.close();
230 }
231 
233  LOG(info) << "Reading MetaData for NetCDF archive ... ";
234 
235  // Check if metaData file exists
236  if(!filesystem::exists(metaDatafile_)) {
237  if(mode_ != OpenModeKind::Read)
238  return;
239  throw Exception("archive meta data not found in directory '%s'", directory_.string());
240  }
241 
242  std::ifstream fs(metaDatafile_.string(), std::ios::in);
243  fs >> json_;
244  fs.close();
245 
246  int serialboxVersion = json_["serialbox_version"];
247  std::string archiveName = json_["archive_name"];
248  int archiveVersion = json_["archive_version"];
249 
250  // Check consistency
251  if(!Version::isCompatible(serialboxVersion))
252  throw Exception("serialbox version of NetCDF archive (%s) is not compatible with the version "
253  "of the library (%s)",
254  Version::toString(serialboxVersion), SERIALBOX_VERSION_STRING);
255 
256  if(archiveName != NetCDFArchive::Name)
257  throw Exception("archive is not a NetCDF archive");
258 
259  if(archiveVersion > NetCDFArchive::Version)
260  throw Exception("NetCDF archive version (%s) does not match the version of the library (%s)",
261  archiveVersion, NetCDFArchive::Version);
262 
263  // Deserialize FieldMap
264  if(json_.count("field_map")) {
265  fieldMap_.clear();
266  for(auto it = json_["field_map"].begin(); it != json_["field_map"].end(); ++it)
267  fieldMap_.insert({it.key(), static_cast<int>(it.value())});
268  }
269 }
270 
271 //===------------------------------------------------------------------------------------------===//
272 // Writing
273 //===------------------------------------------------------------------------------------------===//
274 
275 FieldID NetCDFArchive::write(const StorageView& storageView, const std::string& field,
276  const std::shared_ptr<FieldMetainfoImpl> info) {
277  if(mode_ == OpenModeKind::Read)
278  throw Exception("Archive is not initialized with OpenModeKind set to 'Write' or 'Append'");
279 
280  LOG(info) << "Attempting to write field \"" << field << "\" to NetCDF archive ...";
281 
282  int ncID, varID, errorCode;
283 
284  TypeID type = storageView.type();
285 
286  std::vector<int> dims;
287  std::vector<int> strides;
288  for(size_t i = 0; i < storageView.dims().size(); ++i) {
289  if(storageView.dims()[i] > 0) {
290  dims.push_back(storageView.dims()[i]);
291  strides.push_back(storageView.strides()[i]);
292  }
293  }
294 
295  std::size_t numDims = dims.size();
296  std::size_t numDimsID = numDims + 1;
297 
298  auto it = fieldMap_.find(field);
299 
300  FieldID fieldID{field, 0};
301  filesystem::path filename = directory_ / (prefix_ + "_" + field + ".nc");
302 
303  if(it != fieldMap_.end()) {
304  it->second++;
305  fieldID.id = it->second;
306 
307  // Open file for appending
308  NETCDF_CHECK(nc_open(filename.c_str(), NC_WRITE, &ncID));
309 
310  // Get the variable
311  NETCDF_CHECK(nc_inq_varid(ncID, field.c_str(), &varID));
312 
313  } else {
314  // Open new file
315  NETCDF_CHECK(nc_create(filename.c_str(), NC_NETCDF4, &ncID));
316 
317  // Create dimensions
318  std::vector<int> dimsID(numDimsID);
319 
320  NETCDF_CHECK(nc_def_dim(ncID, "fieldID", NC_UNLIMITED, &dimsID[0]));
321  for(int i = 1; i < dimsID.size(); ++i)
322  NETCDF_CHECK(
323  nc_def_dim(ncID, ("d" + std::to_string(i - 1)).c_str(), dims[i - 1], &dimsID[i]));
324 
325  // Define the variable
326  NETCDF_CHECK(nc_def_var(ncID, field.c_str(), internal::typeID2NcType(type), numDimsID,
327  dimsID.data(), &varID));
328 
329  // End define mode
330  NETCDF_CHECK(nc_enddef(ncID));
331 
332  fieldMap_.insert({fieldID.name, fieldID.id});
333  }
334 
335  // Write data to disk
336  std::vector<std::size_t> startp(numDimsID, 0), countp(numDimsID);
337  std::vector<std::ptrdiff_t> stridep(numDimsID, 1), imapp(numDimsID);
338 
339  startp[0] = fieldID.id;
340  countp[0] = 1;
341  imapp[0] = storageView.size();
342 
343  for(int i = 0; i < numDims; ++i) {
344  countp[i + 1] = dims[i];
345  imapp[i + 1] = strides[i];
346  }
347 
348  internal::write(ncID, varID, startp, countp, stridep, imapp, storageView);
349 
350  // Close file
351  NETCDF_CHECK(nc_close(ncID));
352 
353  // Update meta-data
354  updateMetaData();
355 
356  LOG(info) << "Successfully wrote field \"" << fieldID.name << "\" (id = " << fieldID.id << ") to "
357  << filename.filename();
358  return fieldID;
359 }
360 
361 void NetCDFArchive::writeToFile(std::string filename, const StorageView& storageView,
362  const std::string& field) {
363  int ncID, varID, errorCode;
364 
365  TypeID type = storageView.type();
366  const std::vector<int>& dims = storageView.dims();
367  const std::vector<int>& strides = storageView.strides();
368 
369  std::size_t numDims = dims.size();
370 
371  // Open new file
372  NETCDF_CHECK(nc_create(filename.c_str(), NC_NETCDF4, &ncID));
373 
374  // Create dimensions
375  std::vector<int> dimsID(numDims);
376  for(int i = 0; i < dimsID.size(); ++i)
377  NETCDF_CHECK(nc_def_dim(ncID, ("d" + std::to_string(i)).c_str(), dims[i], &dimsID[i]));
378 
379  // Define the variable
380  NETCDF_CHECK(nc_def_var(ncID, field.c_str(), internal::typeID2NcType(type), dimsID.size(),
381  dimsID.data(), &varID));
382 
383  // End define mode
384  NETCDF_CHECK(nc_enddef(ncID));
385 
386  // Write data to disk
387  std::vector<std::size_t> startp(numDims, 0), countp(numDims);
388  std::vector<std::ptrdiff_t> stridep(numDims, 1), imapp(numDims);
389  for(int i = 0; i < numDims; ++i) {
390  countp[i] = dims[i];
391  imapp[i] = strides[i];
392  }
393 
394  internal::write(ncID, varID, startp, countp, stridep, imapp, storageView);
395 
396  // Close file
397  NETCDF_CHECK(nc_close(ncID));
398 }
399 
400 //===------------------------------------------------------------------------------------------===//
401 // Reading
402 //===------------------------------------------------------------------------------------------===//
403 
404 void NetCDFArchive::read(StorageView& storageView, const FieldID& fieldID,
405  std::shared_ptr<FieldMetainfoImpl> info) const {
406  LOG(info) << "Attempting to read field \"" << fieldID.name << "\" (id = " << fieldID.id
407  << ") via NetCDFArchive ... ";
408 
409  int ncID, varID, errorCode;
410 
411  const std::vector<int>& dims = storageView.dims();
412  const std::vector<int>& strides = storageView.strides();
413 
414  std::size_t numDims = dims.size();
415  std::size_t numDimsID = numDims + 1;
416 
417  // Check if field exists
418  auto it = fieldMap_.find(fieldID.name);
419  if(it == fieldMap_.end())
420  throw Exception("no field '%s' registered in NetCDFArchive", fieldID.name);
421 
422  // Check if id is valid
423  if(fieldID.id > it->second)
424  throw Exception("invalid id '%i' of field '%s'", fieldID.id, fieldID.name);
425 
426  filesystem::path filename = directory_ / (prefix_ + "_" + fieldID.name + ".nc");
427 
428  // Open file for reading
429  NETCDF_CHECK(nc_open(filename.c_str(), NC_NOWRITE, &ncID));
430 
431  // Get the variable
432  NETCDF_CHECK(nc_inq_varid(ncID, fieldID.name.c_str(), &varID));
433 
434  // Read data from disk
435  std::vector<std::size_t> startp(numDimsID, 0), countp(numDimsID);
436  std::vector<std::ptrdiff_t> stridep(numDimsID, 1), imapp(numDimsID);
437 
438  startp[0] = fieldID.id;
439  countp[0] = 1;
440  imapp[0] = storageView.size();
441 
442  for(int i = 0; i < numDims; ++i) {
443  countp[i + 1] = dims[i];
444  imapp[i + 1] = strides[i];
445  }
446 
447  internal::read(ncID, varID, startp, countp, stridep, imapp, storageView);
448 
449  // Close file
450  NETCDF_CHECK(nc_close(ncID));
451 
452  LOG(info) << "Successfully read field \"" << fieldID.name << "\" (id = " << fieldID.id << ")";
453 }
454 
455 void NetCDFArchive::readFromFile(std::string filename, StorageView& storageView,
456  const std::string& field) {
457 
458  if(!filesystem::exists(filename))
459  throw Exception("cannot open %s: file does not exist", filename);
460 
461  int ncID, varID, errorCode;
462 
463  const std::vector<int>& dims = storageView.dims();
464  const std::vector<int>& strides = storageView.strides();
465 
466  std::size_t numDims = dims.size();
467 
468  // Open file for reading
469  NETCDF_CHECK(nc_open(filename.c_str(), NC_NOWRITE, &ncID));
470 
471  // Get the variable
472  NETCDF_CHECK(nc_inq_varid(ncID, field.c_str(), &varID));
473 
474  // Read data from disk
475  std::vector<std::size_t> startp(numDims, 0), countp(numDims);
476  std::vector<std::ptrdiff_t> stridep(numDims, 1), imapp(numDims);
477 
478  for(int i = 0; i < numDims; ++i) {
479  countp[i] = dims[i];
480  imapp[i] = strides[i];
481  }
482 
483  internal::read(ncID, varID, startp, countp, stridep, imapp, storageView);
484 
485  // Close file
486  NETCDF_CHECK(nc_close(ncID));
487 }
488 
490  filesystem::directory_iterator end;
491  for(filesystem::directory_iterator it(directory_); it != end; ++it) {
492  if(filesystem::is_regular_file(it->path()) &&
493  boost::algorithm::starts_with(it->path().filename().string(), prefix_ + "_") &&
494  filesystem::path(it->path()).extension() == ".nc") {
495 
496  if(!filesystem::remove(it->path()))
497  LOG(warning) << "NetCDFArchive: cannot remove file " << it->path();
498  }
499  }
500  fieldMap_.clear();
501 }
502 
503 std::ostream& NetCDFArchive::toStream(std::ostream& stream) const {
504  stream << "NetCDFArchive = {\n";
505  stream << " directory: " << directory_.string() << "\n";
506  stream << " mode: " << mode_ << "\n";
507  stream << " prefix: " << prefix_ << "\n";
508  stream << " fieldMap = {\n";
509  for(auto it = fieldMap_.begin(), end = fieldMap_.end(); it != end; ++it)
510  stream << " " << it->first << ": " << it->second << "\n";
511  stream << " }\n";
512  stream << "}\n";
513  return stream;
514 }
515 
516 std::unique_ptr<Archive> NetCDFArchive::create(OpenModeKind mode, const std::string& directory,
517  const std::string& prefix) {
518  return std::make_unique<NetCDFArchive>(mode, directory, prefix);
519 }
520 
521 } // namespace serialbox
522 
523 #endif // SERIALBOX_HAS_NETCDF
virtual void updateMetaData() override
Update the meta-data on disk.
virtual OpenModeKind mode() const override
Open-policy of the archive.
Definition: NetCDFArchive.h:74
static std::string toString(int version)
Convert to string.
Definition: Version.h:38
static const int Version
Revision of the NetCDF archive.
Definition: NetCDFArchive.h:41
unsigned int id
ID within the field.
Definition: FieldID.h:29
virtual FieldID write(const StorageView &storageView, const std::string &fieldID, const std::shared_ptr< FieldMetainfoImpl > info) override
Write the field given by storageView to disk.
virtual void clear() override
Clear the archive i.e remove all data from disk and reset the internal data-structures.
virtual std::string directory() const override
Directory to write/read files.
Definition: NetCDFArchive.h:76
virtual void read(StorageView &storageView, const FieldID &fieldID, std::shared_ptr< FieldMetainfoImpl > info) const override
Read the field identified by fieldID and given by storageView from disk.
#define NETCDF_CHECK(functionCall)
Check return type of NetCDF functions.
static const std::string Name
Name of the NetCDF archive.
Definition: NetCDFArchive.h:38
#define LOG(severity)
Logging infrastructure.
Definition: Logging.h:102
Represent a mutable view to a multi-dimensional storage.
Definition: StorageView.h:33
TypeID type() const noexcept
Get type.
Definition: StorageView.h:90
Namespace of the serialbox library.
Definition: Archive.h:25
TypeID
Type-id of types recognized by serialbox.
Definition: Type.h:55
static void writeToFile(std::string filename, const StorageView &storageView, const std::string &field)
Directly write field (given by storageView) to file.
void writeMetaDataToJson()
Convert meta-data to JSON and serialize to file.
Uniquely identifiy a field.
Definition: FieldID.h:27
std::string name
Name of the field.
Definition: FieldID.h:28
const std::vector< int > & dims() const noexcept
Get dimensions.
Definition: StorageView.h:96
virtual std::ostream & toStream(std::ostream &stream) const override
Convert the archive to stream.
static std::string toString(TypeID id)
Convert to string.
Definition: Type.cpp:35
void readMetaDataFromJson()
Load meta-data from JSON file.
static void readFromFile(std::string filename, StorageView &storageView, const std::string &field)
Directly read field (given by storageView) from file.
std::size_t size() const noexcept
Size of the allocated, sliced data (without padding)
OpenModeKind
Policy for opening files in the Serializer and Archive.
Definition: Type.h:40
NetCDFArchive(OpenModeKind mode, const std::string &directory, const std::string &prefix)
Initialize the archive.
const std::vector< int > & strides() const noexcept
Get strides.
Definition: StorageView.h:99
static bool isCompatible(int version) noexcept
Check if the given version is compatible with the current library version (i.e. is older) ...
Definition: Version.h:61
static std::unique_ptr< Archive > create(OpenModeKind mode, const std::string &directory, const std::string &prefix)
Create a NetCDFArchive.
#define serialbox_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: Unreachable.h:41
virtual std::string prefix() const override
Prefix of all files.
Definition: NetCDFArchive.h:78
Exception class which stores a human-readable error description.
Definition: Exception.h:30