RDKit
Open-source cheminformatics and machine learning.
StreamOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 //
11 #ifndef _RD_STREAMOPS_H
12 #define _RD_STREAMOPS_H
13 
14 #include "types.h"
15 #include "Invariant.h"
16 #include "RDProps.h"
17 #include <string>
18 #include <sstream>
19 #include <iostream>
20 #include <boost/cstdint.hpp>
21 #include <boost/detail/endian.hpp>
22 
23 namespace RDKit {
24 // this code block for handling endian problems is from :
25 // http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
26 enum EEndian {
29 #if defined(BOOST_LITTLE_ENDIAN)
30  HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
31 #elif defined(BOOST_BIG_ENDIAN)
32  HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
33 #else
34 #error "Failed to determine the system endian value"
35 #endif
36 };
37 
38 // this function swap the bytes of values given it's size as a template
39 // parameter (could sizeof be used?).
40 template <class T, unsigned int size>
41 inline T SwapBytes(T value) {
42  union {
43  T value;
44  char bytes[size];
45  } in, out;
46 
47  in.value = value;
48 
49  for (unsigned int i = 0; i < size / 2; ++i) {
50  out.bytes[i] = in.bytes[size - 1 - i];
51  out.bytes[size - 1 - i] = in.bytes[i];
52  }
53 
54  return out.value;
55 }
56 
57 // Here is the function you will use. Again there is two compile-time assertion
58 // that use the boost librarie. You could probably comment them out, but if you
59 // do be cautious not to use this function for anything else than integers
60 // types. This function need to be calles like this :
61 //
62 // int x = someValue;
63 // int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
64 //
65 template <EEndian from, EEndian to, class T>
66 inline T EndianSwapBytes(T value) {
67  // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
68  BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
69  sizeof(T) == 8);
70  if (sizeof(T) == 1) return value;
71 
72  // A : La donnée à swapper est d'un type arithmetic
73  // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
74 
75  // Si from et to sont du même type on ne swap pas.
76  if (from == to) return value;
77 
78  return SwapBytes<T, sizeof(T)>(value);
79 }
80 template <EEndian from, EEndian to>
81 inline char EndianSwapBytes(char value) {
82  return value;
83 }
84 template <EEndian from, EEndian to>
85 inline unsigned char EndianSwapBytes(unsigned char value) {
86  return value;
87 }
88 template <EEndian from, EEndian to>
89 inline signed char EndianSwapBytes(signed char value) {
90  return value;
91 }
92 // --------------------------------------
93 
94 //! Packs an integer and outputs it to a stream
95 inline void appendPackedIntToStream(std::stringstream &ss,
96  boost::uint32_t num) {
97  int nbytes, bix;
98  unsigned int val, res;
99  char tc;
100 
101  res = num;
102  while (1) {
103  if (res < (1 << 7)) {
104  val = (res << 1);
105  nbytes = 1;
106  break;
107  }
108  res -= (1 << 7);
109  if (res < (1 << 14)) {
110  val = ((res << 2) | 1);
111  nbytes = 2;
112  break;
113  }
114  res -= (1 << 14);
115  if (res < (1 << 21)) {
116  val = ((res << 3) | 3);
117  nbytes = 3;
118  break;
119  }
120  res -= (1 << 21);
121  if (res < (1 << 29)) {
122  val = ((res << 3) | 7);
123  nbytes = 4;
124  break;
125  } else {
126  CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
127  }
128  }
129  // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
130 
131  for (bix = 0; bix < nbytes; bix++) {
132  tc = (char)(val & 255);
133  ss.write(&tc, 1);
134  val >>= 8;
135  }
136 }
137 
138 //! Reads an integer from a stream in packed format and returns the result.
139 inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
140  boost::uint32_t val, num;
141  int shift, offset;
142  char tmp;
143  ss.read(&tmp, sizeof(tmp));
144  val = UCHAR(tmp);
145  offset = 0;
146  if ((val & 1) == 0) {
147  shift = 1;
148  } else if ((val & 3) == 1) {
149  ss.read((char *)&tmp, sizeof(tmp));
150  val |= (UCHAR(tmp) << 8);
151  shift = 2;
152  offset = (1 << 7);
153  } else if ((val & 7) == 3) {
154  ss.read((char *)&tmp, sizeof(tmp));
155  val |= (UCHAR(tmp) << 8);
156  ss.read((char *)&tmp, sizeof(tmp));
157  val |= (UCHAR(tmp) << 16);
158  shift = 3;
159  offset = (1 << 7) + (1 << 14);
160  } else {
161  ss.read((char *)&tmp, sizeof(tmp));
162  val |= (UCHAR(tmp) << 8);
163  ss.read((char *)&tmp, sizeof(tmp));
164  val |= (UCHAR(tmp) << 16);
165  ss.read((char *)&tmp, sizeof(tmp));
166  val |= (UCHAR(tmp) << 24);
167  shift = 3;
168  offset = (1 << 7) + (1 << 14) + (1 << 21);
169  }
170  num = (val >> shift) + offset;
171  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
172  return num;
173 }
174 
175 //! Reads an integer from a char * in packed format and returns the result.
176 //! The argument is advanced
177 inline boost::uint32_t pullPackedIntFromString(const char *&text) {
178  boost::uint32_t val, num;
179  int shift, offset;
180  char tmp;
181  tmp = *text;
182  text++;
183  val = UCHAR(tmp);
184  offset = 0;
185  if ((val & 1) == 0) {
186  shift = 1;
187  } else if ((val & 3) == 1) {
188  tmp = *text;
189  text++;
190  val |= (UCHAR(tmp) << 8);
191  shift = 2;
192  offset = (1 << 7);
193  } else if ((val & 7) == 3) {
194  tmp = *text;
195  text++;
196  val |= (UCHAR(tmp) << 8);
197  tmp = *text;
198  text++;
199  val |= (UCHAR(tmp) << 16);
200  shift = 3;
201  offset = (1 << 7) + (1 << 14);
202  } else {
203  tmp = *text;
204  text++;
205  val |= (UCHAR(tmp) << 8);
206  tmp = *text;
207  text++;
208  val |= (UCHAR(tmp) << 16);
209  tmp = *text;
210  text++;
211  val |= (UCHAR(tmp) << 24);
212  shift = 3;
213  offset = (1 << 7) + (1 << 14) + (1 << 21);
214  }
215  num = (val >> shift) + offset;
216  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
217  return num;
218 }
219 
220 //! does a binary write of an object to a stream
221 template <typename T>
222 void streamWrite(std::ostream &ss, const T &val) {
223  T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
224  ss.write((const char *)&tval, sizeof(T));
225 }
226 
227 //! special case for string
228 inline void streamWrite(std::ostream &ss, const std::string &what) {
229  unsigned int l = rdcast<unsigned int>(what.length());
230  ss.write((const char *)&l, sizeof(l));
231  ss.write(what.c_str(), sizeof(char) * l);
232 };
233 
234 template<typename T>
235 void streamWriteVec(std::ostream &ss, const T &val) {
236  streamWrite(ss, static_cast<boost::uint64_t>(val.size()));
237  for(size_t i=0;i<val.size();++i)
238  streamWrite(ss, val[i]);
239 }
240 
241 //! does a binary read of an object from a stream
242 template <typename T>
243 void streamRead(std::istream &ss, T &loc) {
244  T tloc;
245  ss.read((char *)&tloc, sizeof(T));
246  loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
247 }
248 
249 //! special case for string
250 template <class T>
251 void streamRead(std::istream &ss, T &obj, int version) {
252  RDUNUSED_PARAM(version);
253  streamRead(ss, obj);
254 }
255 
256 inline void streamRead(std::istream &ss, std::string &what, int version) {
257  RDUNUSED_PARAM(version);
258  unsigned int l;
259  ss.read((char *)&l, sizeof(l));
260  char *buff = new char[l + 1];
261  ss.read(buff, sizeof(char) * l);
262  buff[l] = 0;
263  what = buff;
264  delete[] buff;
265 };
266 
267 
268 template<class T>
269 void streamReadVec(std::istream &ss, T &val) {
270  boost::uint64_t size;
271  streamRead(ss, size);
272  val.resize(size);
273 
274  for(size_t i=0;i<size;++i)
275  streamRead(ss, val[i]);
276 }
277 
278 inline void streamReadStringVec(std::istream &ss, std::vector<std::string> &val, int version) {
279  boost::uint64_t size;
280  streamRead(ss, size);
281  val.resize(size);
282 
283  for(size_t i=0;i<size;++i)
284  streamRead(ss, val[i], version);
285 }
286 
287 //! grabs the next line from an instream and returns it.
288 inline std::string getLine(std::istream *inStream) {
289  std::string res;
290  std::getline(*inStream, res);
291  if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
292  res.erase(res.length() - 1);
293  }
294  return res;
295 }
296 //! grabs the next line from an instream and returns it.
297 inline std::string getLine(std::istream &inStream) {
298  return getLine(&inStream);
299 }
300 
301 // n.b. We can't use RDTypeTag directly, they are implementation
302 // specific
303 namespace DTags {
304  const unsigned char StringTag = 0;
305  const unsigned char IntTag = 1;
306  const unsigned char UnsignedIntTag = 2;
307  const unsigned char BoolTag = 3;
308  const unsigned char FloatTag = 4;
309  const unsigned char DoubleTag = 5;
310  const unsigned char VecStringTag = 6;
311  const unsigned char VecIntTag = 7;
312  const unsigned char VecUIntTag = 8;
313  const unsigned char VecBoolTag = 9;
314  const unsigned char VecFloatTag = 10;
315  const unsigned char VecDoubleTag = 11;
316  const unsigned char EndTag = 0xFF;
317 }
318 
319 inline bool isSerializable(const Dict::Pair &pair) {
320  switch (pair.val.getTag()) {
322  case RDTypeTag::IntTag:
324  case RDTypeTag::BoolTag:
325  case RDTypeTag::FloatTag:
327 
333 
334  return true;
335  default:
336  return false;
337  }
338 }
339 
340 inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair) {
341  if (!isSerializable(pair))
342  return false;
343 
344 
345  streamWrite(ss, pair.key);
346  switch (pair.val.getTag()) {
349  streamWrite(ss, rdvalue_cast<std::string>(pair.val));
350  break;
351  case RDTypeTag::IntTag:
353  streamWrite(ss, rdvalue_cast<int>(pair.val));
354  break;
358  break;
359  case RDTypeTag::BoolTag:
362  break;
363  case RDTypeTag::FloatTag:
366  break;
370  break;
371 
374  streamWriteVec(ss, rdvalue_cast<std::vector<std::string> >(pair.val));
375  break;
378  streamWriteVec(ss, rdvalue_cast<std::vector<double> >(pair.val));
379  break;
382  streamWriteVec(ss, rdvalue_cast<std::vector<float> >(pair.val));
383  break;
386  streamWriteVec(ss, rdvalue_cast<std::vector<int> >(pair.val));
387  break;
390  streamWriteVec(ss, rdvalue_cast<std::vector<unsigned int> >(pair.val));
391  break;
392  default:
393  std::cerr << "Failed to write " << pair.key << std::endl;
394  return false;
395  }
396  return true;
397 }
398 
399 inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
400  bool savePrivate=false, bool saveComputed=false) {
401  STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
402  std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
403 
404  const Dict &dict = props.getDict();
405  unsigned int count = 0;
406  for(Dict::DataType::const_iterator it = dict.getData().begin();
407  it != dict.getData().end();
408  ++it) {
409  if(isSerializable(*it) && propnames.find(it->key) != propnames.end()) {
410  count ++;
411  }
412  }
413 
414  streamWrite(ss, count); // packed int?
415 
416  unsigned int writtenCount = 0;
417  for(Dict::DataType::const_iterator it = dict.getData().begin();
418  it != dict.getData().end();
419  ++it) {
420  if(propnames.find(it->key) != propnames.end()) {
421  if(isSerializable(*it)) {
422  // note - not all properties are serializable, this may be
423  // a null op
424  if(streamWriteProp(ss, *it)) {
425  writtenCount++;
426  }
427  }
428  }
429  }
430  POSTCONDITION(count==writtenCount, "Estimated property count not equal to written");
431  return true;
432 }
433 
434 template<class T>
435 void readRDValue(std::istream &ss, RDValue &value) {
436  T v;
437  streamRead(ss, v);
438  value = v;
439 }
440 
441 template<class T>
442 void readRDVecValue(std::istream &ss, RDValue &value) {
443  std::vector<T> v;
444  streamReadVec(ss, v);
445  value = v;
446 }
447 
448 inline void readRDValueString(std::istream &ss, RDValue &value) {
449  std::string v;
450  int version=0;
451  streamRead(ss, v, version);
452  value = v;
453 }
454 
455 
456 inline void readRDStringVecValue(std::istream &ss, RDValue &value) {
457  std::vector<std::string> v;
458  int version=0;
459  streamReadStringVec(ss, v, version);
460  value = v;
461 }
462 
463 
464 inline bool streamReadProp(std::istream &ss, Dict::Pair &pair) {
465  int version=0;
466  streamRead(ss, pair.key, version);
467 
468  unsigned char type;
469  streamRead(ss, type);
470  switch(type) {
471  case DTags::StringTag: readRDValueString(ss, pair.val); break;
472  case DTags::IntTag: readRDValue<int>(ss, pair.val); break;
473  case DTags::UnsignedIntTag: readRDValue<unsigned int>(ss, pair.val); break;
474  case DTags::BoolTag: readRDValue<bool>(ss, pair.val); break;
475  case DTags::FloatTag: readRDValue<float>(ss, pair.val); break;
476  case DTags::DoubleTag: readRDValue<double>(ss, pair.val); break;
477 
478  case DTags::VecStringTag: readRDStringVecValue(ss, pair.val); break;
479  case DTags::VecIntTag: readRDVecValue<int>(ss, pair.val); break;
480  case DTags::VecUIntTag: readRDVecValue<unsigned int>(ss, pair.val); break;
481  case DTags::VecFloatTag: readRDVecValue<float>(ss, pair.val); break;
482  case DTags::VecDoubleTag: readRDVecValue<double>(ss, pair.val); break;
483 
484  default:
485  return false;
486  }
487  return true;
488 }
489 
490 inline unsigned int streamReadProps(std::istream &ss, RDProps &props) {
491  unsigned int count;
492  streamRead(ss, count);
493 
494  Dict &dict = props.getDict();
495  dict.getData().resize(count);
496  for(unsigned index = 0; index<count; ++index) {
497  CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index]),
498  "Corrupted property serialization detected");
499  }
500 
501  return count;
502 }
503 
504 }
505 
506 #endif
const unsigned char FloatTag
Definition: StreamOps.h:308
#define POSTCONDITION(expr, mess)
Definition: Invariant.h:115
const unsigned char EndTag
Definition: StreamOps.h:316
static const boost::uint64_t VecDoubleTag
static const boost::uint64_t UnsignedIntTag
T EndianSwapBytes(T value)
Definition: StreamOps.h:66
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:99
RDValue val
Definition: Dict.h:39
T rdvalue_cast(RDValue v)
unsigned char UCHAR
Definition: types.h:189
EEndian
Definition: StreamOps.h:26
static const boost::uint64_t DoubleTag
const unsigned char VecUIntTag
Definition: StreamOps.h:312
const unsigned char UnsignedIntTag
Definition: StreamOps.h:306
double rdvalue_cast< double >(RDValue v)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:243
void readRDValueString(std::istream &ss, RDValue &value)
Definition: StreamOps.h:448
void readRDValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:435
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition: StreamOps.h:139
static const boost::uint64_t FloatTag
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition: StreamOps.h:177
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false)
Definition: StreamOps.h:399
bool rdvalue_cast< bool >(RDValue v)
const unsigned char VecBoolTag
Definition: StreamOps.h:313
void streamReadStringVec(std::istream &ss, std::vector< std::string > &val, int version)
Definition: StreamOps.h:278
static const boost::uint64_t StringTag
void readRDVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:442
std::string key
Definition: Dict.h:38
const unsigned char VecFloatTag
Definition: StreamOps.h:314
static const boost::uint64_t VecIntTag
static const boost::uint64_t VecUnsignedIntTag
unsigned int rdvalue_cast< unsigned int >(RDValue v)
const unsigned char VecIntTag
Definition: StreamOps.h:311
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Definition: RDProps.h:34
const unsigned char DoubleTag
Definition: StreamOps.h:309
T SwapBytes(T value)
Definition: StreamOps.h:41
Std stuff.
Definition: Atom.h:29
static const boost::uint64_t VecStringTag
bool streamReadProp(std::istream &ss, Dict::Pair &pair)
Definition: StreamOps.h:464
unsigned int streamReadProps(std::istream &ss, RDProps &props)
Definition: StreamOps.h:490
void streamWriteVec(std::ostream &ss, const T &val)
Definition: StreamOps.h:235
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:194
float rdvalue_cast< float >(RDValue v)
const DataType & getData() const
Access to the underlying data.
Definition: Dict.h:110
static const boost::uint64_t IntTag
const unsigned char BoolTag
Definition: StreamOps.h:307
int rdvalue_cast< int >(RDValue v)
const Dict & getDict() const
gets the underlying Dictionary
Definition: RDProps.h:25
bool isSerializable(const Dict::Pair &pair)
Definition: StreamOps.h:319
static const boost::uint64_t BoolTag
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:222
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition: StreamOps.h:288
const unsigned char StringTag
Definition: StreamOps.h:304
const unsigned char VecStringTag
Definition: StreamOps.h:310
static const boost::uint64_t VecFloatTag
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition: StreamOps.h:95
boost::uint64_t getTag() const
const unsigned char IntTag
Definition: StreamOps.h:305
const unsigned char VecDoubleTag
Definition: StreamOps.h:315
void readRDStringVecValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:456
The Dict class can be used to store objects of arbitrary type keyed by strings.
Definition: Dict.h:35
std::vector< std::string > STR_VECT
Definition: Dict.h:28
void streamReadVec(std::istream &ss, T &val)
Definition: StreamOps.h:269
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair)
Definition: StreamOps.h:340