#include <some_header_files>
int main(int argc, char *argv[]) {
sn::some_type_t object;
write( somewhere, object, ... );
...
for( size_t i=0; i<huge_number; i+=batch_size)
read( somewhere, object, ...);
}
std::vector<int>().data()
points to a contiguous memory, but when T = std::string
it doesn'tpurely trait based approach requires the type available upfront, making it less powerful then if we could detect the presence of certain methods
//a vector of pod struct
struct coo_t {
size_t row;
size_t column;
double value;
};
std::vector<coo_t> sparse_matrix;
// each field of the struct is a vector
struct csc_t {
std::vector<size_t> rowind; // row indices
std::vector<size_t> colptr; // start of new columns
std::vector<double> values; // nonzero values
};
csc_t sparse_matrix;
...
StatementMatcher h5templateMatcher = callExpr( allOf(
hasDescendant( declRefExpr( to( varDecl().bind("variableDecl") ) ) ),
hasDescendant( declRefExpr( to(
functionDecl( allOf(
eachOf(
hasName("h5::write"), hasName("h5::create"), hasName("h5::read"),
hasName("h5::append"),
hasName("h5::awrite"), hasName("h5::acreate"), hasName("h5::aread")
),
... ));
It is possible to identify if a container is STL like, provides direct access to its contiguous storage -- as std::vector<T>
does, or alternatively iterators for scatter/gather operations
template <typename T> using value_type_f = typename T::value_type;
template <typename T> using data_f = decltype(std::declval <T>().data());
template <typename T> using size_f = decltype(std::declval <T>().size());
template <typename T> using begin_f = decltype(std::declval <T>().begin());
template <typename T> using end_f = decltype(std::declval <T>().end());
template <typename T> using cbegin_f = decltype(std::declval <T>().cbegin());
template <typename T> using cend_f = decltype(std::declval <T>().cend());
template <typename T> using value = compat::detected_or <T, value_type_f, T>;
template <typename T> using has_value_type = compat::is_detected <value_type_f, T>;
template <typename T> using has_data = compat::is_detected <data_f, T>;
template <typename T> using has_direct_access = compat::is_detected <data_f, T>;
template <typename T> using has_size = compat::is_detected <size_f, T>;
template <typename T> using has_begin = compat::is_detected <begin_f, T>;
template <typename T> using has_end = compat::is_detected <end_f, T>;
template <typename T> using has_cbegin = compat::is_detected <cbegin_f, T>;
template <typename T> using has_cend = compat::is_detected <cend_f, T>;
template <typename T> using has_iterator = std::integral_constant <bool, has_begin <T>::value && has_end <T>::value >;
template <typename T> using has_const_iterator = std::integral_constant <bool, has_cbegin <T>::value && has_cend <T>::value >;
are dedicated category, as they all must provide mechanism to pass/receive data to/from some BLAS system call, however the naming varies from system to system.
The differences can be mitigated with a combination of
library | direct access | vector size |
---|---|---|
arma | memptr() | n_elem |
eigen | data() | size() |
blaze | data() | n/a |
blitz | data() | size() |
itpp | _data() | length() |
ublas | data().begin() | n/a |
dlib | (0,0) | size() |
typedef unsigned long long int MyUInt;
namespace sn {
namespace example {
struct Record {
MyUInt field_01;
char field_02;
double field_03[3];
other::Record field_04[4];
};
}
}
write your cpp program as if `generated.h` were already written #include "some_header_file.h" #include <h5cpp/core> #include "generated.h" #include <h5cpp/io> int main(){ std::vector<sn::example::Record> stream = ... h5::fd_t fd = h5::create("example.h5",H5F_ACC_TRUNC); h5::pt_t pt = h5::create<sn::example::Record>( fd, "stream of struct", h5::max_dims{H5S_UNLIMITED,7}, h5::chunk{4,7} | h5::gzip{9} ); ... }
h5::create | h5::write | h5::read | h5::append | h5::acreate | h5::awrite | h5::aread
#ifndef H5CPP_GUARD_ErRrk
#define H5CPP_GUARD_ErRrk
namespace h5{
template<> hid_t inline register_struct(){
hsize_t at_00_[] ={7}; hid_t at_00 = H5Tarray_create(H5T_NATIVE_FLOAT,1,at_00_);
hsize_t at_01_[] ={3}; hid_t at_01 = H5Tarray_create(H5T_NATIVE_DOUBLE,1,at_01_);
hid_t ct_00 = H5Tcreate(H5T_COMPOUND, sizeof (sn::typecheck::Record));
H5Tinsert(ct_00, "_char", HOFFSET(sn::typecheck::Record,_char),H5T_NATIVE_CHAR);
...
H5Tclose(at_03); H5Tclose(at_04); H5Tclose(at_05);
return ct_02;
};
}
H5CPP_REGISTER_STRUCT(sn::example::Record);
#endif
#include "csv.h"
#include "struct.h"
#include <h5cpp/core> // has handle + type descriptors
#include "generated.h" // uses type descriptors
#include <h5cpp/io> // uses generated.h + core
int main(){
h5::fd_t fd = h5::create("output.h5",H5F_ACC_TRUNC);
h5::ds_t ds = h5::create<input_t>(fd, "simple approach/dataset.csv",
h5::max_dims{H5S_UNLIMITED}, h5::chunk{10} | h5::gzip{9} );
h5::pt_t pt = ds;
ds["data set"] = "monroe-county-crash-data2003-to-2015.csv";
ds["cvs parser"] = "https://github.com/ben-strasser/fast-cpp-csv-parser";
constexpr unsigned N_COLS = 5;
io::CSVReader<N_COLS> in("input.csv"); // number of cols may be less, than total columns in a row, we're to read only 5
in.read_header(io::ignore_extra_column, "Master Record Number", "Hour", "Reported_Location","Latitude","Longitude");
input_t row; // buffer to read line by line
char* ptr; // indirection, as `read_row` doesn't take array directly
while(in.read_row(row.MasterRecordNumber, row.Hour, ptr, row.Latitude, row.Longitude)){
strncpy(row.ReportedLocation, ptr, STR_ARRAY_SIZE); // defined in struct.h
h5::append(pt, row);
}
}
do the right thing. Here are some examples, and come with an easy to use operator:
h5::ds_t ds = h5::write(fd,"some dataset with attributes", ... );
ds["att_01"] = 42 ;
ds["att_02"] = {1.,3.,4.,5.};
ds["att_03"] = {'1','3','4','5'};
ds["att_04"] = {"alpha", "beta","gamma","..."};
ds["att_05"] = "const char[N]";
ds["att_06"] = u8"const char[N]áééé";
ds["att_07"] = std::string( "std::string");
ds["att_08"] = record; // pod/compound datatype
ds["att_09"] = vector; // vector of pod/compound type
ds["att_10"] = matrix; // linear algebra object