RDKit
Open-source cheminformatics and machine learning.
Loading...
Searching...
No Matches
StreamOps.h
Go to the documentation of this file.
1//
2// Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10//
11#include <RDGeneral/export.h>
12#ifndef _RD_STREAMOPS_H
13#define _RD_STREAMOPS_H
14
15#include "types.h"
16#include "Invariant.h"
17#include "RDProps.h"
18#include <string>
19#include <sstream>
20#include <iostream>
21#include <unordered_set>
22#include <boost/cstdint.hpp>
23#include <boost/predef.h>
24
25namespace RDKit {
26// this code block for handling endian problems is adapted from :
27// http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
28enum EEndian {
31#if defined(BOOST_ENDIAN_LITTLE_BYTE) || defined(BOOST_ENDIAN_LITTLE_WORD)
33#elif defined(BOOST_ENDIAN_BIG_BYTE)
35#elif defined(BOOST_ENDIAN_BIG_WORD)
36#error "Cannot compile on word-swapped big-endian systems"
37#else
38#error "Failed to determine the system endian value"
39#endif
40};
41
42// this function swap the bytes of values given it's size as a template
43// parameter (could sizeof be used?).
44template <class T, unsigned int size>
45inline T SwapBytes(T value) {
46 if (size < 2) {
47 return value;
48 }
49
50 union {
51 T value;
52 char bytes[size];
53 } in, out;
54
55 in.value = value;
56
57 for (unsigned int i = 0; i < size; ++i) {
58 out.bytes[i] = in.bytes[size - 1 - i];
59 }
60
61 return out.value;
62}
63
64// Here is the function you will use. Again there is two compile-time assertion
65// that use the boost libraries. You could probably comment them out, but if you
66// do be cautious not to use this function for anything else than integers
67// types. This function need to be called like this :
68//
69// int x = someValue;
70// int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
71//
72template <EEndian from, EEndian to, class T>
73inline T EndianSwapBytes(T value) {
74 // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
75 BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
76 sizeof(T) == 8);
77 if (sizeof(T) == 1) {
78 return value;
79 }
80
81 // A : La donnée à swapper est d'un type arithmetic
82 // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
83
84 // Si from et to sont du même type on ne swap pas.
85 if (from == to) {
86 return value;
87 }
88
89 return SwapBytes<T, sizeof(T)>(value);
90}
91template <EEndian from, EEndian to>
92inline char EndianSwapBytes(char value) {
93 return value;
94}
95template <EEndian from, EEndian to>
96inline unsigned char EndianSwapBytes(unsigned char value) {
97 return value;
98}
99template <EEndian from, EEndian to>
100inline signed char EndianSwapBytes(signed char value) {
101 return value;
102}
103// --------------------------------------
104
105//! Packs an integer and outputs it to a stream
106inline void appendPackedIntToStream(std::stringstream &ss,
107 boost::uint32_t num) {
108 int nbytes, bix;
109 unsigned int val, res;
110 char tc;
111
112 res = num;
113 while (1) {
114 if (res < (1 << 7)) {
115 val = (res << 1);
116 nbytes = 1;
117 break;
118 }
119 res -= (1 << 7);
120 if (res < (1 << 14)) {
121 val = ((res << 2) | 1);
122 nbytes = 2;
123 break;
124 }
125 res -= (1 << 14);
126 if (res < (1 << 21)) {
127 val = ((res << 3) | 3);
128 nbytes = 3;
129 break;
130 }
131 res -= (1 << 21);
132 if (res < (1 << 29)) {
133 val = ((res << 3) | 7);
134 nbytes = 4;
135 break;
136 } else {
137 CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
138 }
139 }
140 // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
141
142 for (bix = 0; bix < nbytes; bix++) {
143 tc = (char)(val & 255);
144 ss.write(&tc, 1);
145 val >>= 8;
146 }
147}
148
149//! Reads an integer from a stream in packed format and returns the result.
150inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
151 boost::uint32_t val, num;
152 int shift, offset;
153 char tmp;
154 ss.read(&tmp, sizeof(tmp));
155 if (ss.fail()) {
156 throw std::runtime_error("failed to read from stream");
157 }
158
159 val = UCHAR(tmp);
160 offset = 0;
161 if ((val & 1) == 0) {
162 shift = 1;
163 } else if ((val & 3) == 1) {
164 ss.read((char *)&tmp, sizeof(tmp));
165 if (ss.fail()) {
166 throw std::runtime_error("failed to read from stream");
167 }
168
169 val |= (UCHAR(tmp) << 8);
170 shift = 2;
171 offset = (1 << 7);
172 } else if ((val & 7) == 3) {
173 ss.read((char *)&tmp, sizeof(tmp));
174 if (ss.fail()) {
175 throw std::runtime_error("failed to read from stream");
176 }
177
178 val |= (UCHAR(tmp) << 8);
179 ss.read((char *)&tmp, sizeof(tmp));
180 if (ss.fail()) {
181 throw std::runtime_error("failed to read from stream");
182 }
183
184 val |= (UCHAR(tmp) << 16);
185 shift = 3;
186 offset = (1 << 7) + (1 << 14);
187 } else {
188 ss.read((char *)&tmp, sizeof(tmp));
189 if (ss.fail()) {
190 throw std::runtime_error("failed to read from stream");
191 }
192
193 val |= (UCHAR(tmp) << 8);
194 ss.read((char *)&tmp, sizeof(tmp));
195 if (ss.fail()) {
196 throw std::runtime_error("failed to read from stream");
197 }
198
199 val |= (UCHAR(tmp) << 16);
200 ss.read((char *)&tmp, sizeof(tmp));
201 if (ss.fail()) {
202 throw std::runtime_error("failed to read from stream");
203 }
204
205 val |= (UCHAR(tmp) << 24);
206 shift = 3;
207 offset = (1 << 7) + (1 << 14) + (1 << 21);
208 }
209 num = (val >> shift) + offset;
210 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
211 return num;
212}
213
214//! Reads an integer from a char * in packed format and returns the result.
215//! The argument is advanced
216inline boost::uint32_t pullPackedIntFromString(const char *&text) {
217 boost::uint32_t val, num;
218 int shift, offset;
219 char tmp;
220 tmp = *text;
221 text++;
222 val = UCHAR(tmp);
223 offset = 0;
224 if ((val & 1) == 0) {
225 shift = 1;
226 } else if ((val & 3) == 1) {
227 tmp = *text;
228 text++;
229 val |= (UCHAR(tmp) << 8);
230 shift = 2;
231 offset = (1 << 7);
232 } else if ((val & 7) == 3) {
233 tmp = *text;
234 text++;
235 val |= (UCHAR(tmp) << 8);
236 tmp = *text;
237 text++;
238 val |= (UCHAR(tmp) << 16);
239 shift = 3;
240 offset = (1 << 7) + (1 << 14);
241 } else {
242 tmp = *text;
243 text++;
244 val |= (UCHAR(tmp) << 8);
245 tmp = *text;
246 text++;
247 val |= (UCHAR(tmp) << 16);
248 tmp = *text;
249 text++;
250 val |= (UCHAR(tmp) << 24);
251 shift = 3;
252 offset = (1 << 7) + (1 << 14) + (1 << 21);
253 }
254 num = (val >> shift) + offset;
255 // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
256 return num;
257}
258
259//! does a binary write of an object to a stream
260template <typename T>
261void streamWrite(std::ostream &ss, const T &val) {
263 ss.write((const char *)&tval, sizeof(T));
264}
265
266//! special case for string
267inline void streamWrite(std::ostream &ss, const std::string &what) {
268 unsigned int l = rdcast<unsigned int>(what.length());
269 ss.write((const char *)&l, sizeof(l));
270 ss.write(what.c_str(), sizeof(char) * l);
271};
272
273template <typename T>
274void streamWriteVec(std::ostream &ss, const T &val) {
275 streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
276 for (size_t i = 0; i < val.size(); ++i) {
277 streamWrite(ss, val[i]);
278 }
279}
280
281//! does a binary read of an object from a stream
282template <typename T>
283void streamRead(std::istream &ss, T &loc) {
284 T tloc;
285 ss.read((char *)&tloc, sizeof(T));
286 if (ss.fail()) {
287 throw std::runtime_error("failed to read from stream");
288 }
290}
291
292//! special case for string
293template <class T>
294void streamRead(std::istream &ss, T &obj, int version) {
296 streamRead(ss, obj);
297}
298
299inline void streamRead(std::istream &ss, std::string &what, int version) {
301 unsigned int l;
302 ss.read((char *)&l, sizeof(l));
303 if (ss.fail()) {
304 throw std::runtime_error("failed to read from stream");
305 }
306 char *buff = new char[l];
307 ss.read(buff, sizeof(char) * l);
308 if (ss.fail()) {
309 throw std::runtime_error("failed to read from stream");
310 }
311 what = std::string(buff, l);
312 delete[] buff;
313};
314
315template <class T>
316void streamReadVec(std::istream &ss, T &val) {
317 boost::uint64_t size;
318 streamRead(ss, size);
319 val.resize(boost::numeric_cast<size_t>(size));
320
321 for (size_t i = 0; i < size; ++i) {
322 streamRead(ss, val[i]);
323 }
324}
325
326inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val,
327 int version) {
328 boost::uint64_t size;
329 streamRead(ss, size);
330 val.resize(size);
331
332 for (size_t i = 0; i < size; ++i) {
333 streamRead(ss, val[i], version);
334 }
335}
336
337//! grabs the next line from an instream and returns it.
338inline std::string getLine(std::istream *inStream) {
339 std::string res;
340 std::getline(*inStream, res);
341 if (!res.empty() && (res.back() == '\r')) {
342 res.resize(res.length() - 1);
343 }
344 return res;
345}
346//! grabs the next line from an instream and returns it.
347inline std::string getLine(std::istream &inStream) {
348 return getLine(&inStream);
349}
350
351// n.b. We can't use RDTypeTag directly, they are implementation
352// specific
353namespace DTags {
354const unsigned char StringTag = 0;
355const unsigned char IntTag = 1;
356const unsigned char UnsignedIntTag = 2;
357const unsigned char BoolTag = 3;
358const unsigned char FloatTag = 4;
359const unsigned char DoubleTag = 5;
360const unsigned char VecStringTag = 6;
361const unsigned char VecIntTag = 7;
362const unsigned char VecUIntTag = 8;
363const unsigned char VecBoolTag = 9;
364const unsigned char VecFloatTag = 10;
365const unsigned char VecDoubleTag = 11;
366
367const unsigned char CustomTag = 0xFE; // custom data
368const unsigned char EndTag = 0xFF;
369} // namespace DTags
370
372 public:
374 virtual const char *getPropName() const = 0;
375 virtual bool canSerialize(const RDValue &value) const = 0;
376 virtual bool read(std::istream &ss, RDValue &value) const = 0;
377 virtual bool write(std::ostream &ss, const RDValue &value) const = 0;
378 virtual CustomPropHandler *clone() const = 0;
379};
380
381typedef std::vector<std::shared_ptr<const CustomPropHandler>>
383
384inline bool isSerializable(const Dict::Pair &pair,
385 const CustomPropHandlerVec &handlers = {}) {
386 switch (pair.val.getTag()) {
393
399 return true;
401 for (auto &handler : handlers) {
402 if (handler->canSerialize(pair.val)) {
403 return true;
404 }
405 }
406 return false;
407 default:
408 return false;
409 }
410}
411
412inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair,
413 const CustomPropHandlerVec &handlers = {}) {
414 if (!isSerializable(pair, handlers)) {
415 return false;
416 }
417
418 streamWrite(ss, pair.key);
419 switch (pair.val.getTag()) {
423 break;
427 break;
431 break;
435 break;
439 break;
443 break;
444
447 streamWriteVec(ss, rdvalue_cast<std::vector<std::string>>(pair.val));
448 break;
451 streamWriteVec(ss, rdvalue_cast<std::vector<double>>(pair.val));
452 break;
455 streamWriteVec(ss, rdvalue_cast<std::vector<float>>(pair.val));
456 break;
459 streamWriteVec(ss, rdvalue_cast<std::vector<int>>(pair.val));
460 break;
463 streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int>>(pair.val));
464 break;
465 default:
466 for (auto &handler : handlers) {
467 if (handler->canSerialize(pair.val)) {
468 // The form of a custom tag is
469 // CustomTag
470 // customPropName (must be unique)
471 // custom serialization
473 streamWrite(ss, std::string(handler->getPropName()));
474 handler->write(ss, pair.val);
475 return true;
476 }
477 }
478
479 return false;
480 }
481 return true;
482}
483
484template <typename COUNT_TYPE = unsigned int>
486 std::ostream &ss, const RDProps &props, bool savePrivate = false,
487 bool saveComputed = false, const CustomPropHandlerVec &handlers = {},
488 const std::unordered_set<std::string> &ignore = {}) {
490 std::unordered_set<std::string> propnames;
491 for (const auto &pn : propsToSave) {
492 if (ignore.empty() || ignore.find(pn) == ignore.end()) {
493 propnames.insert(pn);
494 }
495 }
496
497 const Dict &dict = props.getDict();
498 COUNT_TYPE count = 0;
499 for (const auto &elem : dict.getData()) {
500 if (propnames.find(elem.key) != propnames.end()) {
502 count++;
503 }
504 }
505 }
506 streamWrite(ss, count); // packed int?
507 if (!count) {
508 return false;
509 }
510
512 for (const auto &elem : dict.getData()) {
513 if (propnames.find(elem.key) != propnames.end()) {
515 // note - not all properties are serializable, this may be
516 // a null op
518 writtenCount++;
519 }
520 }
521 }
522 }
524 "Estimated property count not equal to written");
525 return true;
526}
527
528template <class T>
529void readRDValue(std::istream &ss, RDValue &value) {
530 T v;
531 streamRead(ss, v);
532 value = v;
533}
534
535template <class T>
536void readRDVecValue(std::istream &ss, RDValue &value) {
537 std::vector<T> v;
538 streamReadVec(ss, v);
539 value = v;
540}
541
542inline void readRDValueString(std::istream &ss, RDValue &value) {
543 std::string v;
544 int version = 0;
545 streamRead(ss, v, version);
546 value = v;
547}
548
549inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
550 std::vector<std::string> v;
551 int version = 0;
553 value = v;
554}
555
556inline bool streamReadProp(std::istream &ss, Dict::Pair &pair,
557 bool &dictHasNonPOD,
558 const CustomPropHandlerVec &handlers = {}) {
559 int version = 0;
560 streamRead(ss, pair.key, version);
561
562 unsigned char type;
563 streamRead(ss, type);
564 switch (type) {
565 case DTags::IntTag:
566 readRDValue<int>(ss, pair.val);
567 break;
570 break;
571 case DTags::BoolTag:
572 readRDValue<bool>(ss, pair.val);
573 break;
574 case DTags::FloatTag:
576 break;
577 case DTags::DoubleTag:
579 break;
580
581 case DTags::StringTag:
582 readRDValueString(ss, pair.val);
583 dictHasNonPOD = true;
584 break;
587 dictHasNonPOD = true;
588 break;
589 case DTags::VecIntTag:
591 dictHasNonPOD = true;
592 break;
595 dictHasNonPOD = true;
596 break;
599 dictHasNonPOD = true;
600 break;
603 dictHasNonPOD = true;
604 break;
605 case DTags::CustomTag: {
606 std::string propType;
607 int version = 0;
609 for (auto &handler : handlers) {
610 if (propType == handler->getPropName()) {
611 handler->read(ss, pair.val);
612 dictHasNonPOD = true;
613 return true;
614 }
615 }
616 return false;
617 }
618
619 default:
620 return false;
621 }
622 return true;
623}
624
625template <typename COUNT_TYPE = unsigned int>
626inline unsigned int streamReadProps(std::istream &ss, RDProps &props,
627 const CustomPropHandlerVec &handlers = {},
628 bool reset = true) {
631
632 Dict &dict = props.getDict();
633 if (reset) {
634 dict.reset(); // Clear data before repopulating
635 }
636 auto startSz = dict.getData().size();
637 dict.getData().resize(startSz + count);
638 for (unsigned index = 0; index < count; ++index) {
639 CHECK_INVARIANT(streamReadProp(ss, dict.getData()[startSz + index],
640 dict.getNonPODStatus(), handlers),
641 "Corrupted property serialization detected");
642 }
643
644 return static_cast<unsigned int>(count);
645}
646
647} // namespace RDKit
648
649#endif
#define CHECK_INVARIANT(expr, mess)
Definition Invariant.h:101
#define POSTCONDITION(expr, mess)
Definition Invariant.h:117
#define RDUNUSED_PARAM(x)
Definition Invariant.h:196
virtual bool read(std::istream &ss, RDValue &value) const =0
virtual bool write(std::ostream &ss, const RDValue &value) const =0
virtual const char * getPropName() const =0
virtual CustomPropHandler * clone() const =0
virtual bool canSerialize(const RDValue &value) const =0
const unsigned char IntTag
Definition StreamOps.h:355
const unsigned char VecUIntTag
Definition StreamOps.h:362
const unsigned char VecBoolTag
Definition StreamOps.h:363
const unsigned char VecIntTag
Definition StreamOps.h:361
const unsigned char CustomTag
Definition StreamOps.h:367
const unsigned char StringTag
Definition StreamOps.h:354
const unsigned char VecFloatTag
Definition StreamOps.h:364
const unsigned char DoubleTag
Definition StreamOps.h:359
const unsigned char VecStringTag
Definition StreamOps.h:360
const unsigned char EndTag
Definition StreamOps.h:368
const unsigned char BoolTag
Definition StreamOps.h:357
const unsigned char VecDoubleTag
Definition StreamOps.h:365
const unsigned char FloatTag
Definition StreamOps.h:358
const unsigned char UnsignedIntTag
Definition StreamOps.h:356
static const boost::uint64_t UnsignedIntTag
static const boost::uint64_t StringTag
static const boost::uint64_t VecStringTag
static const boost::uint64_t VecIntTag
static const boost::uint64_t FloatTag
static const boost::uint64_t VecUnsignedIntTag
static const boost::uint64_t DoubleTag
static const boost::uint64_t IntTag
static const boost::uint64_t AnyTag
static const boost::uint64_t VecFloatTag
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t BoolTag
Std stuff.
std::vector< std::string > STR_VECT
Definition Dict.h:29
int rdvalue_cast< int >(RDValue_cast_t v)
unsigned char UCHAR
Definition types.h:282
unsigned int rdvalue_cast< unsigned int >(RDValue_cast_t v)
std::string rdvalue_cast< std::string >(RDValue_cast_t v)
Definition RDValue.h:46
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition StreamOps.h:216
bool rdvalue_is(const RDValue_cast_t)
double rdvalue_cast< double >(RDValue_cast_t v)
@ LITTLE_ENDIAN_ORDER
Definition StreamOps.h:29
@ BIG_ENDIAN_ORDER
Definition StreamOps.h:30
T SwapBytes(T value)
Definition StreamOps.h:45
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:549
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition StreamOps.h:283
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition StreamOps.h:338
void readRDValueString(std::istream &ss, RDValue &value)
Definition StreamOps.h:542
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition StreamOps.h:150
bool isSerializable(const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:384
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition StreamOps.h:326
void readRDVecValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:536
void streamWriteVec(std::ostream &ss, const T &val)
Definition StreamOps.h:274
T rdvalue_cast(RDValue_cast_t v)
void streamReadVec(std::istream &ss, T &val)
Definition StreamOps.h:316
void readRDValue(std::istream &ss, RDValue &value)
Definition StreamOps.h:529
T EndianSwapBytes(T value)
Definition StreamOps.h:73
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false, const CustomPropHandlerVec &handlers={}, const std::unordered_set< std::string > &ignore={})
Definition StreamOps.h:485
bool streamReadProp(std::istream &ss, Dict::Pair &pair, bool &dictHasNonPOD, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:556
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair, const CustomPropHandlerVec &handlers={})
Definition StreamOps.h:412
bool rdvalue_cast< bool >(RDValue_cast_t v)
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition StreamOps.h:261
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition StreamOps.h:106
float rdvalue_cast< float >(RDValue_cast_t v)
std::vector< std::shared_ptr< const CustomPropHandler > > CustomPropHandlerVec
Definition StreamOps.h:382
unsigned int streamReadProps(std::istream &ss, RDProps &props, const CustomPropHandlerVec &handlers={}, bool reset=true)
Definition StreamOps.h:626
std::string key
Definition Dict.h:39
RDValue val
Definition Dict.h:40
boost::uint64_t getTag() const