RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
StreamOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10//
11#include <RDGeneral/export.h>
12#ifndef _RD_STREAMOPS_H
13#define _RD_STREAMOPS_H
14
15#include "types.h"
16#include "Invariant.h"
17#include "RDProps.h"
18#include <string>
19#include <sstream>
20#include <iostream>
21#include <unordered_set>
22#include <boost/cstdint.hpp>
23#include <boost/predef.h>
24
25namespace RDKit {
26// this code block for handling endian problems is adapted from :
27// http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
28enum EEndian {
31#if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
33#elif defined(BOOST_ENDIAN_BIG_BYTE)
35#elif defined(BOOST_ENDIAN_BIG_WORD)
36#error "Cannot compile on word-swapped big-endian systems"
37#else
38#error "Failed to determine the system endian value"
39#endif
40};
41
42// this function swap the bytes of values given it's size as a template
43// parameter (could sizeof be used?).
44template <class T, unsigned int size>
45inline T SwapBytes(T value) {
46 if (size < 2) {
47 return value;
48 }
49
50 union {
51 T value;
52 char bytes[size];
53 } in, out;
54
55 in.value = value;
56
57 for (unsigned int i = 0; i < size; ++i) {
58 out.bytes[i] = in.bytes[size - 1 - i];
59 }
60
61 return out.value;
62}
63
64// Here is the function you will use. Again there is two compile-time assertion
65// that use the boost libraries. You could probably comment them out, but if you
66// do be cautious not to use this function for anything else than integers
67// types. This function need to be called like this :
68//
69// int x = someValue;
70// int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
71//
72template <EEndian from, EEndian to, class T>
73inline T EndianSwapBytes(T value) {
74 // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
75 BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
76 sizeof(T) == 8);
77 if (sizeof(T) == 1) {
78 return value;
79 }
80
81 // A : La donnée à swapper est d'un type arithmetic
82 // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
83
84 // Si from et to sont du même type on ne swap pas.
85 if (from == to) {
86 return value;
87 }
88
89 return SwapBytes<T, sizeof(T)>(value);
90}
91
92template <EEndian from, EEndian to>
93inline char EndianSwapBytes(char value) {
94 return value;
95}
96
97template <EEndian from, EEndian to>
98inline unsigned char EndianSwapBytes(unsigned char value) {
99 return value;
100}
101
102template <EEndian from, EEndian to>
103inline signed char EndianSwapBytes(signed char value) {
104 return value;
105}
106
107// --------------------------------------
108
109//! Packs an integer and outputs it to a stream
110inline void appendPackedIntToStream(std::stringstream &ss,
111 boost::uint32_t num) {
112 int nbytes, bix;
113 unsigned int val, res;
114 char tc;
115
116 res = num;
117 while (1) {
118 if (res < (1 << 7)) {
119 val = (res << 1);
120 nbytes = 1;
121 break;
122 }
123 res -= (1 << 7);
124 if (res < (1 << 14)) {
125 val = ((res << 2) | 1);
126 nbytes = 2;
127 break;
128 }
129 res -= (1 << 14);
130 if (res < (1 << 21)) {
131 val = ((res << 3) | 3);
132 nbytes = 3;
133 break;
134 }
135 res -= (1 << 21);
136 if (res < (1 << 29)) {
137 val = ((res << 3) | 7);
138 nbytes = 4;
139 break;
140 } else {
141 CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
142 }
143 }
144 // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
145
146 for (bix = 0; bix < nbytes; bix++) {
147 tc = (char)(val & 255);
148 ss.write(&tc, 1);
149 val >>= 8;
150 }
151}
152
153//! Reads an integer from a stream in packed format and returns the result.
154inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
155 boost::uint32_t val, num;
156 int shift, offset;
157 char tmp;
158 ss.read(&tmp, sizeof(tmp));
159 if (ss.fail()) {
160 throw std::runtime_error("failed to read from stream");
161 }
162
163 val = UCHAR(tmp);
164 offset = 0;
165 if ((val & 1) == 0) {
166 shift = 1;
167 } else if ((val & 3) == 1) {
168 ss.read((char *)&tmp, sizeof(tmp));
169 if (ss.fail()) {
170 throw std::runtime_error("failed to read from stream");
171 }
172
173 val |= (UCHAR(tmp) << 8);
174 shift = 2;
175 offset = (1 << 7);
176 } else if ((val & 7) == 3) {
177 ss.read((char *)&tmp, sizeof(tmp));
178 if (ss.fail()) {
179 throw std::runtime_error("failed to read from stream");
180 }
181
182 val |= (UCHAR(tmp) << 8);
183 ss.read((char *)&tmp, sizeof(tmp));
184 if (ss.fail()) {
185 throw std::runtime_error("failed to read from stream");
186 }
187
188 val |= (UCHAR(tmp) << 16);
189 shift = 3;
190 offset = (1 << 7) + (1 << 14);
191 } else {
192 ss.read((char *)&tmp, sizeof(tmp));
193 if (ss.fail()) {
194 throw std::runtime_error("failed to read from stream");
195 }
196
197 val |= (UCHAR(tmp) << 8);
198 ss.read((char *)&tmp, sizeof(tmp));
199 if (ss.fail()) {
200 throw std::runtime_error("failed to read from stream");
201 }
202
203 val |= (UCHAR(tmp) << 16);
204 ss.read((char *)&tmp, sizeof(tmp));
205 if (ss.fail()) {
206 throw std::runtime_error("failed to read from stream");
207 }
208
209 val |= (UCHAR(tmp) << 24);
210 shift = 3;
211 offset = (1 << 7) + (1 << 14) + (1 << 21);
212 }
213 num = (val >> shift) + offset;
214 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
215 return num;
216}
217
218//! Reads an integer from a char * in packed format and returns the result.
219//! The argument is advanced
220inline boost::uint32_t pullPackedIntFromString(const char *&text) {
221 boost::uint32_t val, num;
222 int shift, offset;
223 char tmp;
224 tmp = *text;
225 text++;
226 val = UCHAR(tmp);
227 offset = 0;
228 if ((val & 1) == 0) {
229 shift = 1;
230 } else if ((val & 3) == 1) {
231 tmp = *text;
232 text++;
233 val |= (UCHAR(tmp) << 8);
234 shift = 2;
235 offset = (1 << 7);
236 } else if ((val & 7) == 3) {
237 tmp = *text;
238 text++;
239 val |= (UCHAR(tmp) << 8);
240 tmp = *text;
241 text++;
242 val |= (UCHAR(tmp) << 16);
243 shift = 3;
244 offset = (1 << 7) + (1 << 14);
245 } else {
246 tmp = *text;
247 text++;
248 val |= (UCHAR(tmp) << 8);
249 tmp = *text;
250 text++;
251 val |= (UCHAR(tmp) << 16);
252 tmp = *text;
253 text++;
254 val |= (UCHAR(tmp) << 24);
255 shift = 3;
256 offset = (1 << 7) + (1 << 14) + (1 << 21);
257 }
258 num = (val >> shift) + offset;
259 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
260 return num;
261}
262
263//! does a binary write of an object to a stream
264template <typename T>
265void streamWrite(std::ostream &ss, const T &val) {
267 ss.write((const char *)&tval, sizeof(T));
268}
269
270//! special case for string
271inline void streamWrite(std::ostream &ss, const std::string &what) {
272 unsigned int l = static_cast<unsigned int>(what.length());
273 streamWrite(ss, l);
274 ss.write(what.c_str(), sizeof(char) * l);
275};
276
277template <typename T>
278void streamWriteVec(std::ostream &ss, const T &val) {
279 streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
280 for (size_t i = 0; i < val.size(); ++i) {
281 streamWrite(ss, val[i]);
282 }
283}
284
285//! does a binary read of an object from a stream
286template <typename T>
287void streamRead(std::istream &ss, T &loc) {
288 T tloc;
289 ss.read((char *)&tloc, sizeof(T));
290 if (ss.fail()) {
291 throw std::runtime_error("failed to read from stream");
292 }
294}
295
296//! special case for string
297template <class T>
298void streamRead(std::istream &ss, T &obj, int version) {
300 streamRead(ss, obj);
301}
302
303inline void streamRead(std::istream &ss, std::string &what, int version) {
305 unsigned int l;
306 streamRead(ss, l);
307 auto buff = std::make_unique<char[]>(l);
308 ss.read(buff.get(), sizeof(char) * l);
309 if (ss.fail()) {
310 throw std::runtime_error("failed to read from stream");
311 }
312 what = std::string(buff.get(), l);
313};
314
315template <class T>
316void streamReadVec(std::istream &ss, T &val) {
317 boost::uint64_t size;
318 streamRead(ss, size);
319 val.resize(boost::numeric_cast<size_t>(size));
320
321 for (size_t i = 0; i < size; ++i) {
322 streamRead(ss, val[i]);
323 }
324}
325
326inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
327 int version) {
328 boost::uint64_t size;
329 streamRead(ss, size);
330 val.resize(size);
331
332 for (size_t i = 0; i < size; ++i) {
333 streamRead(ss, val[i], version);
334 }
335}
336
337//! grabs the next line from an instream and returns it.
338inline std::string getLine(std::istream *inStream) {
339 std::string res;
340 std::getline(*inStream, res);
341 if (!res.empty() && (res.back() == '\r')) {
342 res.resize(res.length() - 1);
343 }
344 return res;
345}
346
347//! grabs the next line from an instream and returns it.
348inline std::string getLine(std::istream &inStream) {
349 return getLine(&inStream);
350}
351
352// n.b. We can't use RDTypeTag directly, they are implementation
353// specific
354namespace DTags {
355const unsigned char StringTag = 0;
356const unsigned char IntTag = 1;
357const unsigned char UnsignedIntTag = 2;
358const unsigned char BoolTag = 3;
359const unsigned char FloatTag = 4;
360const unsigned char DoubleTag = 5;
361const unsigned char VecStringTag = 6;
362const unsigned char VecIntTag = 7;
363const unsigned char VecUIntTag = 8;
364const unsigned char VecBoolTag = 9;
365const unsigned char VecFloatTag = 10;
366const unsigned char VecDoubleTag = 11;
367
368const unsigned char CustomTag = 0xFE; // custom data
369const unsigned char EndTag = 0xFF;
370} // namespace DTags
371
373 public:
375
376 virtual const char *getPropName() const = 0;
377
378 virtual bool canSerialize(const RDValue &value) const = 0;
379
380 virtual bool read(std::istream &ss, RDValue &value) const = 0;
381
382 virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
383
384 virtual CustomPropHandler *clone() const = 0;
385};
386
387typedef std::vector<std::shared_ptr<const CustomPropHandler>>
389
390inline bool isSerializable(const Dict::Pair &pair,
391 const CustomPropHandlerVec &handlers = {}) {
392 switch (pair.val.getTag()) {
399
405 return true;
407 for (auto &handler : handlers) {
408 if (handler->canSerialize(pair.val)) {
409 return true;
410 }
411 }
412 return false;
413 default:
414 return false;
415 }
416}
417
418inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
419 const CustomPropHandlerVec &handlers = {}) {
421 return false;
422 }
423
424 streamWrite(ss, pair.key);
425 switch (pair.val.getTag()) {
429 break;
433 break;
437 break;
441 break;
445 break;
449 break;
450
453 streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
454 break;
457 streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
458 break;
461 streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
462 break;
465 streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
466 break;
469 streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
470 break;
471 default:
472 for (auto &handler : handlers) {
473 if (handler->canSerialize(pair.val)) {
474 // The form of a custom tag is
475 // CustomTag
476 // customPropName (must be unique)
477 // custom serialization
479 streamWrite(ss, std::string(handler->getPropName()));
480 handler->write(ss, pair.val);
481 return true;
482 }
483 }
484
485 return false;
486 }
487 return true;
488}
489
490template <typename COUNT_TYPE = unsigned int>
492 std::ostream &ss, const RDProps &props, bool savePrivate = false,
493 bool saveComputed = false, const CustomPropHandlerVec &handlers = {},
494 const std::unordered_set<std::string> &ignore = {}) {
496 std::unordered_set<std::string> propnames;
497 for (const auto &pn : propsToSave) {
498 if (ignore.empty() || ignore.find(pn) == ignore.end()) {
499 propnames.insert(pn);
500 }
501 }
502
503 const Dict &dict = props.getDict();
504 COUNT_TYPE count = 0;
505 for (const auto &elem : dict.getData()) {
506 if (propnames.find(elem.key) != propnames.end()) {
508 count++;
509 }
510 }
511 }
512 streamWrite(ss, count); // packed int?
513 if (!count) {
514 return false;
515 }
516
518 for (const auto &elem : dict.getData()) {
519 if (propnames.find(elem.key) != propnames.end()) {
521 // note - not all properties are serializable, this may be
522 // a null op
524 writtenCount++;
525 }
526 }
527 }
528 }
530 "Estimated property count not equal to written");
531 return true;
532}
533
534template <class T>
535void readRDValue(std::istream &ss, RDValue &value) {
536 T v;
537 streamRead(ss, v);
538 value = v;
539}
540
541template <class T>
542void readRDVecValue(std::istream &ss, RDValue &value) {
543 std::vector<T> v;
544 streamReadVec(ss, v);
545 value = v;
546}
547
548inline void readRDValueString(std::istream &ss, RDValue &value) {
549 std::string v;
550 int version = 0;
551 streamRead(ss, v, version);
552 value = v;
553}
554
555inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
556 std::vector<std::string> v;
557 int version = 0;
559 value = v;
560}
561
562inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
563 bool &dictHasNonPOD,
564 const CustomPropHandlerVec &handlers = {}) {
565 int version = 0;
566 streamRead(ss, pair.key, version);
567
568 unsigned char type;
569 streamRead(ss, type);
570 switch (type) {
571 case DTags::IntTag:
573 break;
576 break;
577 case DTags::BoolTag:
579 break;
580 case DTags::FloatTag:
582 break;
583 case DTags::DoubleTag:
585 break;
586
587 case DTags::StringTag:
589 dictHasNonPOD = true;
590 break;
593 dictHasNonPOD = true;
594 break;
595 case DTags::VecIntTag:
597 dictHasNonPOD = true;
598 break;
601 dictHasNonPOD = true;
602 break;
605 dictHasNonPOD = true;
606 break;
609 dictHasNonPOD = true;
610 break;
611 case DTags::CustomTag: {
612 std::string propType;
613 int version = 0;
615 for (auto &handler : handlers) {
616 if (propType == handler->getPropName()) {
617 handler->read(ss, pair.val);
618 dictHasNonPOD = true;
619 return true;
620 }
621 }
622 return false;
623 }
624
625 default:
626 return false;
627 }
628 return true;
629}
630
631template <typename COUNT_TYPE = unsigned int>
632inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
633 const CustomPropHandlerVec &handlers = {},
634 bool reset = true) {
637
638 Dict &dict = props.getDict();
639 if (reset) {
640 dict.reset(); // Clear data before repopulating
641 }
642 auto startSz = dict.getData().size();
643 dict.getData().resize(startSz + count);
644 for (unsigned index = 0; index < count; ++index) {
645 CHECK_INVARIANT(streamReadProp(ss, dict.getData()[startSz + index],
646 dict.getNonPODStatus(), handlers),
647 "Corrupted property serialization detected");
648 }
649
650 return static_cast<unsigned int>(count);
651}
652} // namespace RDKit
653
654#endif
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define POSTCONDITION(expr, mess)
Definition Invariant.h:117
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
virtual bool read(std::istream &ss, RDValue &value) const =0
virtual bool write(std::ostream &ss, const RDValue &value) const =0
virtual const char * getPropName() const =0
virtual CustomPropHandler * clone() const =0
virtual bool canSerialize(const RDValue &value) const =0
const unsigned char IntTag
Definition StreamOps.h:356
const unsigned char VecUIntTag
Definition StreamOps.h:363
const unsigned char VecBoolTag
Definition StreamOps.h:364
const unsigned char VecIntTag
Definition StreamOps.h:362
const unsigned char CustomTag
Definition StreamOps.h:368
const unsigned char StringTag
Definition StreamOps.h:355
const unsigned char VecFloatTag
Definition StreamOps.h:365
const unsigned char DoubleTag
Definition StreamOps.h:360
const unsigned char VecStringTag
Definition StreamOps.h:361
const unsigned char EndTag
Definition StreamOps.h:369
const unsigned char BoolTag
Definition StreamOps.h:358
const unsigned char VecDoubleTag
Definition StreamOps.h:366
const unsigned char FloatTag
Definition StreamOps.h:359
const unsigned char UnsignedIntTag
Definition StreamOps.h:357
static const boost::uint64_t UnsignedIntTag
static const boost::uint64_t StringTag
static const boost::uint64_t VecStringTag
static const boost::uint64_t VecIntTag
static const boost::uint64_t FloatTag
static const boost::uint64_t VecUnsignedIntTag
static const boost::uint64_t DoubleTag
static const boost::uint64_t IntTag
static const boost::uint64_t AnyTag
static const boost::uint64_t VecFloatTag
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t BoolTag
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
int rdvalue_cast< int >(RDValue_cast_t v)
unsigned char UCHAR
Definition types.h:289
unsigned int rdvalue_cast< unsigned int >(RDValue_cast_t v)
std::string rdvalue_cast< std::string >(RDValue_cast_t v)
Definition RDValue.h:46
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition StreamOps.h:220
bool rdvalue_is(const RDValue_cast_t)
double rdvalue_cast< double >(RDValue_cast_t v)
@ LITTLE_ENDIAN_ORDER
Definition StreamOps.h:29
@ BIG_ENDIAN_ORDER
Definition StreamOps.h:30
T SwapBytes(T value)
Definition StreamOps.h:45
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:555
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition StreamOps.h:287
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition StreamOps.h:338
void readRDValueString(std::istream &ss, RDValue &value)
Definition StreamOps.h:548
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition StreamOps.h:154
bool isSerializable(const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:390
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition StreamOps.h:326
void readRDVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:542
void streamWriteVec(std::ostream &ss, const T &val)
Definition StreamOps.h:278
T rdvalue_cast(RDValue_cast_t v)
void streamReadVec(std::istream &ss, T &val)
Definition StreamOps.h:316
void readRDValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:535
T EndianSwapBytes(T value)
Definition StreamOps.h:73
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false, const CustomPropHandlerVec &handlers={}, const std::unordered_set< std::string > &ignore={})
Definition StreamOps.h:491
bool streamReadProp(std::istream &ss, Dict::Pair &pair, bool &dictHasNonPOD, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:562
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:418
bool rdvalue_cast< bool >(RDValue_cast_t v)
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition StreamOps.h:265
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition StreamOps.h:110
float rdvalue_cast< float >(RDValue_cast_t v)
std::vector< std::shared_ptr< const CustomPropHandler > > CustomPropHandlerVec
Definition StreamOps.h:388
unsigned int streamReadProps(std::istream &ss, RDProps &props, const CustomPropHandlerVec &handlers={}, bool reset=true)
Definition StreamOps.h:632