20 #include <boost/algorithm/string.hpp> 40 const auto& dims = storageView.
dims();
50 dims_.back() = triple.stop - triple.start;
53 strides_.resize(dims_.size());
58 for(
int i = 1; i < dims_.size(); ++i) {
59 stride *= dims_[i - 1];
65 for(std::size_t i = 0; i < dims_.size(); ++i)
66 size *= (dims_[i] == 0 ? 1 : dims_[i]);
69 offset_ = (strides_.back() * triple.start) * bytesPerElement;
71 buffer_.resize(size * bytesPerElement);
80 Byte* dataPtr = buffer_.data();
84 std::memcpy(storageView.
originPtr(), dataPtr, buffer_.size());
86 for(
auto it = storageView.
begin(), end = storageView.
end(); it != end;
87 ++it, dataPtr += bytesPerElement)
88 std::memcpy(it.ptr(), dataPtr, bytesPerElement);
92 const int numDims = dims_.size();
95 Byte* dataPtr = buffer_.data();
98 std::vector<int> index(numDims);
99 for(
int i = 0; i < numDims - 1; ++i)
100 index[i] = triples[i].start;
104 Byte* curPtr = buffer_.data();
105 for(
auto it = storageView.
begin(), end = storageView.
end(); it != end; ++it) {
109 for(
int i = 0; i < numDims; ++i)
110 pos += bytesPerElement * (strides_[i] * index[i]);
111 curPtr = dataPtr + pos;
114 std::memcpy(it.ptr(), curPtr, bytesPerElement);
117 for(
int i = 0; i < numDims; ++i)
118 if((index[i] += triples[i].step) < triples[i].stop)
121 index[i] = triples[i].start;
128 Byte* dataPtr = buffer_.data();
132 std::memcpy(dataPtr, storageView.
originPtr(), buffer_.size());
134 for(
auto it = storageView.
begin(), end = storageView.
end(); it != end;
135 ++it, dataPtr += bytesPerElement)
136 std::memcpy(dataPtr, it.ptr(), bytesPerElement);
141 std::size_t
size() const noexcept {
return buffer_.size(); }
145 const Byte*
data()
const noexcept {
return buffer_.data(); }
148 std::size_t
offset() const noexcept {
return offset_; }
151 std::vector<Byte> buffer_;
153 std::vector<int> strides_;
154 std::vector<int> dims_;
166 BinaryArchive::BinaryArchive(
OpenModeKind mode,
const std::string& directory,
167 const std::string& prefix,
bool skipMetaData)
168 : mode_(mode), directory_(directory), prefix_(prefix), json_() {
170 LOG(info) <<
"Creating BinaryArchive (mode = " << mode_ <<
") from directory " << directory_;
172 metaDatafile_ = directory_ / (
"ArchiveMetaData-" + prefix_ +
".json");
176 bool isDir = filesystem::is_directory(directory_);
180 case OpenModeKind::Read:
182 throw Exception(
"no such directory: '%s'", directory_.string());
185 case OpenModeKind::Write:
186 case OpenModeKind::Append:
188 filesystem::create_directories(directory_);
191 }
catch(filesystem::filesystem_error& e) {
199 if(mode_ == OpenModeKind::Write)
206 LOG(info) <<
"Reading MetaData for BinaryArchive ... ";
209 if(!filesystem::exists(metaDatafile_)) {
210 if(mode_ != OpenModeKind::Read)
212 throw Exception(
"archive meta data not found in directory '%s'", directory_.string());
215 std::ifstream fs(metaDatafile_.string(), std::ios::in);
219 int serialboxVersion = json_[
"serialbox_version"];
220 std::string archiveName = json_[
"archive_name"];
221 int archiveVersion = json_[
"archive_version"];
222 std::string hashAlgorithm = json_[
"hash_algorithm"];
226 throw Exception(
"serialbox version of binary archive (%s) does not match the version " 227 "of the library (%s)",
230 if(archiveName != BinaryArchive::Name)
231 throw Exception(
"archive is not a binary archive");
233 if(archiveVersion != BinaryArchive::Version)
234 throw Exception(
"binary archive version (%s) does not match the version of the library (%s)",
235 archiveVersion, BinaryArchive::Version);
238 if(mode_ != OpenModeKind::Write)
242 for(
auto it = json_[
"fields_table"].begin(); it != json_[
"fields_table"].end(); ++it) {
246 for(
auto fileOffsetIt = it->begin(); fileOffsetIt != it->end(); ++fileOffsetIt)
247 fieldOffsetTable.push_back(
FileOffsetType{fileOffsetIt->at(0), fileOffsetIt->at(1)});
249 fieldTable_[it.key()] = fieldOffsetTable;
254 LOG(info) <<
"Update MetaData of BinaryArchive";
259 json_[
"serialbox_version"] =
260 100 * SERIALBOX_VERSION_MAJOR + 10 * SERIALBOX_VERSION_MINOR + SERIALBOX_VERSION_PATCH;
263 json_[
"hash_algorithm"] = hash_->name();
266 for(
auto it = fieldTable_.begin(), end = fieldTable_.end(); it != end; ++it) {
267 for(
unsigned int id = 0;
id < it->second.size(); ++id)
268 json_[
"fields_table"][it->first].push_back({it->second[id].offset, it->second[id].checksum});
273 std::ofstream fs(metaDatafile_.string(), std::ios::out | std::ios::trunc);
276 throw Exception(
"cannot open file: %s", metaDatafile_);
278 fs << json_.dump(2) << std::endl;
289 const std::shared_ptr<FieldMetainfoImpl> info) {
290 if(mode_ == OpenModeKind::Read)
291 throw Exception(
"Archive is not initialized with OpenModeKind set to 'Write' or 'Append'");
293 LOG(info) <<
"Attempting to write field \"" << field <<
"\" to BinaryArchive ...";
295 filesystem::path filename(directory_ / (prefix_ +
"_" + field +
".dat"));
303 std::string checksum(hash_->hash(binaryBuffer.
data(), binaryBuffer.
size()));
306 auto it = fieldTable_.find(field);
310 if(it != fieldTable_.end()) {
314 for(std::size_t i = 0; i < fieldOffsetTable.size(); ++i)
315 if(checksum == fieldOffsetTable[i].checksum) {
316 LOG(info) <<
"Field \"" << field <<
"\" already serialized (id = " << i <<
"). Stopping";
322 fs.open(filename.string(), std::ofstream::out | std::ofstream::binary | std::ofstream::app);
323 #ifdef SERIALBOX_COMPILER_MSVC 326 auto offset = fs.tellp();
327 fieldID.id = fieldOffsetTable.size();
330 LOG(info) <<
"Appending field \"" << fieldID.name <<
"\" (id = " << fieldID.id <<
") to " 331 << filename.filename();
335 fs.open(filename.string(), std::ios::out | std::ios::binary | std::ios::trunc);
341 LOG(info) <<
"Creating new file " << filename.filename() <<
" for field \"" << fieldID.name
342 <<
"\" (id = " << fieldID.id <<
")";
346 throw Exception(
"cannot open file: '%s'", filename.string());
349 fs.write(binaryBuffer.
data(), binaryBuffer.
size());
354 LOG(info) <<
"Successfully wrote field \"" << fieldID.name <<
"\" (id = " << fieldID.id <<
") to " 355 << filename.filename();
365 std::ofstream fs(filename, std::ios::out | std::ios::binary | std::ios::trunc);
368 throw Exception(
"cannot open file: '%s'", filename);
370 fs.write(binaryBuffer.
data(), binaryBuffer.
size());
379 std::shared_ptr<FieldMetainfoImpl> info)
const {
380 LOG(info) <<
"Attempting to read field \"" << fieldID.
name <<
"\" (id = " << fieldID.
id 381 <<
") via BinaryArchive ... ";
384 auto it = fieldTable_.find(fieldID.
name);
385 if(it == fieldTable_.end())
386 throw Exception(
"no field '%s' registered in BinaryArchive", fieldID.
name);
391 if(fieldID.
id >= fieldOffsetTable.size())
392 throw Exception(
"invalid id '%i' of field '%s'", fieldID.
id, fieldID.
name);
398 std::string filename((directory_ / (prefix_ +
"_" + fieldID.
name +
".dat")).string());
399 std::ifstream fs(filename, std::ios::binary);
402 throw Exception(
"cannot open file: '%s'", filename);
405 auto offset = fieldOffsetTable[fieldID.
id].offset + binaryBuffer.
offset();
409 fs.read(binaryBuffer.
data(), binaryBuffer.
size());
414 LOG(info) <<
"Successfully read field \"" << fieldID.
name <<
"\" (id = " << fieldID.
id <<
")";
418 filesystem::path filepath(filename);
420 if(!filesystem::exists(filepath))
421 throw Exception(
"cannot open %s: file does not exist", filepath);
426 std::ifstream fs(filepath.string(), std::ios::in | std::ios::binary);
429 throw Exception(
"cannot open file: '%s'", filename);
432 fs.read(binaryBuffer.
data(), binaryBuffer.
size());
439 stream <<
"BinaryArchive = {\n";
440 stream <<
" directory: " << directory_.string() <<
"\n";
441 stream <<
" mode: " << mode_ <<
"\n";
442 stream <<
" prefix: " << prefix_ <<
"\n";
443 stream <<
" fieldsTable = {\n";
444 for(
auto it = fieldTable_.begin(), end = fieldTable_.end(); it != end; ++it) {
445 stream <<
" " << it->first <<
" = {\n";
446 for(std::size_t
id = 0;
id < it->second.size(); ++id)
447 stream <<
" [ " << it->second[
id].offset <<
", " << it->second[
id].checksum <<
" ]\n";
456 filesystem::directory_iterator end;
457 for(filesystem::directory_iterator it(directory_); it != end; ++it) {
458 if(filesystem::is_regular_file(it->path()) &&
459 boost::algorithm::starts_with(it->path().filename().string(), prefix_ +
"_") &&
460 filesystem::path(it->path()).extension() ==
".dat") {
462 if(!filesystem::remove(it->path()))
463 LOG(warning) <<
"BinaryArchive: cannot remove file " << it->path();
475 const std::string&
prefix) {
BinaryBuffer(const StorageView &storageView)
Allocate the buffer.
Byte * originPtr() noexcept
Get raw data pointer.
static std::string defaultHash()
Get the default hash algorithm (currently MD5 if avialable, SHA256 otherwise)
virtual void clear() override
Clear the archive i.e remove all data from disk and reset the internal data-structures.
static void writeToFile(std::string filename, const StorageView &storageView)
Directly write field (given by storageView) to file.
static std::string toString(int version)
Convert to string.
virtual std::ostream & toStream(std::ostream &stream) const override
Convert the archive to stream.
static const std::string Name
Name of the binary archive.
std::size_t sizeInBytes() const noexcept
Size of the allocated, sliced data (without padding) in Bytes.
unsigned int id
ID within the field.
static void readFromFile(std::string filename, StorageView &storageView)
Directly read field (given by storageView) from file.
static const int Version
Revision of the binary archive.
void copyStorageViewToBuffer(const StorageView &storageView)
Copy data from storageView to buffer.
virtual std::string directory() const override
Directory to write/read files.
std::vector< FileOffsetType > FieldOffsetTable
Table of ids and corresponding offsets whithin in each field (i.e file)
#define LOG(severity)
Logging infrastructure.
Represent a mutable view to a multi-dimensional storage.
static std::unique_ptr< Hash > create(const std::string &name)
Construct an instance of the Hash name
Namespace of the serialbox library.
char Byte
Represent a byte i.e sizeof(Byte) == 1.
std::size_t offset() const noexcept
Get initial offset of the data on disk in bytes.
virtual void read(StorageView &storageView, const FieldID &fieldID, std::shared_ptr< FieldMetainfoImpl > info) const override
Read the field identified by fieldID and given by storageView from disk.
Uniquely identifiy a field.
std::string name
Name of the field.
const std::vector< int > & dims() const noexcept
Get dimensions.
virtual std::string prefix() const override
Prefix of all files.
virtual void updateMetaData() override
Update the meta-data on disk.
virtual OpenModeKind mode() const override
Open-policy of the archive.
void readMetaDataFromJson()
Load meta-data from JSON file.
void copyBufferToStorageView(StorageView &storageView)
Copy data from buffer to storageView while handling slicing.
void writeMetaDataToJson()
Convert meta-data to JSON and serialize to file.
void clearFieldTable()
Clear fieldTable.
std::vector< SliceTriple > & sliceTriples() noexcept
Get slice triples.
Slice & getSlice() noexcept
Get the slice of the StorageView
bool isMemCopyable() const noexcept
Return true if the storage is contiguous in memory (i.e no padding) and is column-major ordered...
StorageViewIterator end() noexcept
Iterator to the end of the data.
OpenModeKind
Policy for opening files in the Serializer and Archive.
bool empty() const noexcept
Check if slice is empty.
StorageViewIterator begin() noexcept
Iterator to the beginning of the data.
static bool isCompatible(int version) noexcept
Check if the given version is compatible with the current library version (i.e. is older) ...
static std::unique_ptr< Archive > create(OpenModeKind mode, const std::string &directory, const std::string &prefix)
Create a BinaryArchive.
Contiguous buffer with support for sliced loading.
Byte * data() noexcept
Get pointer to the beginning of the buffer.
virtual FieldID write(const StorageView &storageView, const std::string &fieldID, const std::shared_ptr< FieldMetainfoImpl > info) override
Write the field given by storageView to disk.
Exception class which stores a human-readable error description.
std::size_t size() const noexcept
Get Buffer size.
int bytesPerElement() const noexcept
Get bytes per element.
virtual ~BinaryArchive()
Destructor.