RDKit
Open-source cheminformatics and machine learning.
StreamOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 //
11 #ifndef _RD_STREAMOPS_H
12 #define _RD_STREAMOPS_H
13 
14 #include "types.h"
15 #include "Invariant.h"
16 #include "RDProps.h"
17 #include <string>
18 #include <sstream>
19 #include <iostream>
20 #include <boost/cstdint.hpp>
21 #include <boost/detail/endian.hpp>
22 
23 namespace RDKit {
24 // this code block for handling endian problems is from :
25 // http://stackoverflow.com/questions/105252/how-do-i-convert-between-big-endian-and-little-endian-values-in-c
26 enum EEndian {
29 #if defined(BOOST_LITTLE_ENDIAN)
30  HOST_ENDIAN_ORDER = LITTLE_ENDIAN_ORDER
31 #elif defined(BOOST_BIG_ENDIAN)
32  HOST_ENDIAN_ORDER = BIG_ENDIAN_ORDER
33 #else
34 #error "Failed to determine the system endian value"
35 #endif
36 };
37 
38 // this function swap the bytes of values given it's size as a template
39 // parameter (could sizeof be used?).
40 template <class T, unsigned int size>
41 inline T SwapBytes(T value) {
42  union {
43  T value;
44  char bytes[size];
45  } in, out;
46 
47  in.value = value;
48 
49  for (unsigned int i = 0; i < size / 2; ++i) {
50  out.bytes[i] = in.bytes[size - 1 - i];
51  out.bytes[size - 1 - i] = in.bytes[i];
52  }
53 
54  return out.value;
55 }
56 
57 // Here is the function you will use. Again there is two compile-time assertion
58 // that use the boost librarie. You could probably comment them out, but if you
59 // do be cautious not to use this function for anything else than integers
60 // types. This function need to be calles like this :
61 //
62 // int x = someValue;
63 // int i = EndianSwapBytes<HOST_ENDIAN_ORDER, BIG_ENDIAN_ORDER>(x);
64 //
65 template <EEndian from, EEndian to, class T>
66 inline T EndianSwapBytes(T value) {
67  // A : La donnée à swapper à une taille de 2, 4 ou 8 octets
68  BOOST_STATIC_ASSERT(sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 ||
69  sizeof(T) == 8);
70  if (sizeof(T) == 1) return value;
71 
72  // A : La donnée à swapper est d'un type arithmetic
73  // BOOST_STATIC_ASSERT(boost::is_arithmetic<T>::value);
74 
75  // Si from et to sont du même type on ne swap pas.
76  if (from == to) return value;
77 
78  return SwapBytes<T, sizeof(T)>(value);
79 }
80 template <EEndian from, EEndian to>
81 inline char EndianSwapBytes(char value) {
82  return value;
83 }
84 template <EEndian from, EEndian to>
85 inline unsigned char EndianSwapBytes(unsigned char value) {
86  return value;
87 }
88 template <EEndian from, EEndian to>
89 inline signed char EndianSwapBytes(signed char value) {
90  return value;
91 }
92 // --------------------------------------
93 
94 //! Packs an integer and outputs it to a stream
95 inline void appendPackedIntToStream(std::stringstream &ss,
96  boost::uint32_t num) {
97  int nbytes, bix;
98  unsigned int val, res;
99  char tc;
100 
101  res = num;
102  while (1) {
103  if (res < (1 << 7)) {
104  val = (res << 1);
105  nbytes = 1;
106  break;
107  }
108  res -= (1 << 7);
109  if (res < (1 << 14)) {
110  val = ((res << 2) | 1);
111  nbytes = 2;
112  break;
113  }
114  res -= (1 << 14);
115  if (res < (1 << 21)) {
116  val = ((res << 3) | 3);
117  nbytes = 3;
118  break;
119  }
120  res -= (1 << 21);
121  if (res < (1 << 29)) {
122  val = ((res << 3) | 7);
123  nbytes = 4;
124  break;
125  } else {
126  CHECK_INVARIANT(0, "ERROR: Integer too big to pack\n");
127  }
128  }
129  // val = EndianSwapBytes<HOST_ENDIAN_ORDER,LITTLE_ENDIAN_ORDER>(val);
130 
131  for (bix = 0; bix < nbytes; bix++) {
132  tc = (char)(val & 255);
133  ss.write(&tc, 1);
134  val >>= 8;
135  }
136 }
137 
138 //! Reads an integer from a stream in packed format and returns the result.
139 inline boost::uint32_t readPackedIntFromStream(std::stringstream &ss) {
140  boost::uint32_t val, num;
141  int shift, offset;
142  char tmp;
143  ss.read(&tmp, sizeof(tmp));
144  val = UCHAR(tmp);
145  offset = 0;
146  if ((val & 1) == 0) {
147  shift = 1;
148  } else if ((val & 3) == 1) {
149  ss.read((char *)&tmp, sizeof(tmp));
150  val |= (UCHAR(tmp) << 8);
151  shift = 2;
152  offset = (1 << 7);
153  } else if ((val & 7) == 3) {
154  ss.read((char *)&tmp, sizeof(tmp));
155  val |= (UCHAR(tmp) << 8);
156  ss.read((char *)&tmp, sizeof(tmp));
157  val |= (UCHAR(tmp) << 16);
158  shift = 3;
159  offset = (1 << 7) + (1 << 14);
160  } else {
161  ss.read((char *)&tmp, sizeof(tmp));
162  val |= (UCHAR(tmp) << 8);
163  ss.read((char *)&tmp, sizeof(tmp));
164  val |= (UCHAR(tmp) << 16);
165  ss.read((char *)&tmp, sizeof(tmp));
166  val |= (UCHAR(tmp) << 24);
167  shift = 3;
168  offset = (1 << 7) + (1 << 14) + (1 << 21);
169  }
170  num = (val >> shift) + offset;
171  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
172  return num;
173 }
174 
175 //! Reads an integer from a char * in packed format and returns the result.
176 //! The argument is advanced
177 inline boost::uint32_t pullPackedIntFromString(const char *&text) {
178  boost::uint32_t val, num;
179  int shift, offset;
180  char tmp;
181  tmp = *text;
182  text++;
183  val = UCHAR(tmp);
184  offset = 0;
185  if ((val & 1) == 0) {
186  shift = 1;
187  } else if ((val & 3) == 1) {
188  tmp = *text;
189  text++;
190  val |= (UCHAR(tmp) << 8);
191  shift = 2;
192  offset = (1 << 7);
193  } else if ((val & 7) == 3) {
194  tmp = *text;
195  text++;
196  val |= (UCHAR(tmp) << 8);
197  tmp = *text;
198  text++;
199  val |= (UCHAR(tmp) << 16);
200  shift = 3;
201  offset = (1 << 7) + (1 << 14);
202  } else {
203  tmp = *text;
204  text++;
205  val |= (UCHAR(tmp) << 8);
206  tmp = *text;
207  text++;
208  val |= (UCHAR(tmp) << 16);
209  tmp = *text;
210  text++;
211  val |= (UCHAR(tmp) << 24);
212  shift = 3;
213  offset = (1 << 7) + (1 << 14) + (1 << 21);
214  }
215  num = (val >> shift) + offset;
216  // num = EndianSwapBytes<LITTLE_ENDIAN_ORDER,HOST_ENDIAN_ORDER>(num);
217  return num;
218 }
219 
220 //! does a binary write of an object to a stream
221 template <typename T>
222 void streamWrite(std::ostream &ss, const T &val) {
223  T tval = EndianSwapBytes<HOST_ENDIAN_ORDER, LITTLE_ENDIAN_ORDER>(val);
224  ss.write((const char *)&tval, sizeof(T));
225 }
226 
227 //! special case for string
228 inline void streamWrite(std::ostream &ss, const std::string &what) {
229  unsigned int l = rdcast<unsigned int>(what.length());
230  ss.write((const char *)&l, sizeof(l));
231  ss.write(what.c_str(), sizeof(char) * l);
232 };
233 
234 //! does a binary read of an object from a stream
235 template <typename T>
236 void streamRead(std::istream &ss, T &loc) {
237  T tloc;
238  ss.read((char *)&tloc, sizeof(T));
239  loc = EndianSwapBytes<LITTLE_ENDIAN_ORDER, HOST_ENDIAN_ORDER>(tloc);
240 }
241 
242 //! special case for string
243 template <class T>
244 void streamRead(std::istream &ss, T &obj, int version) {
245  RDUNUSED_PARAM(version);
246  streamRead(ss, obj);
247 }
248 
249 inline void streamRead(std::istream &ss, std::string &what, int version) {
250  RDUNUSED_PARAM(version);
251  unsigned int l;
252  ss.read((char *)&l, sizeof(l));
253  char *buff = new char[l + 1];
254  ss.read(buff, sizeof(char) * l);
255  buff[l] = 0;
256  what = buff;
257  delete[] buff;
258 };
259 
260 
261 //! grabs the next line from an instream and returns it.
262 inline std::string getLine(std::istream *inStream) {
263  std::string res;
264  std::getline(*inStream, res);
265  if ((res.length() > 0) && (res[res.length() - 1] == '\r')) {
266  res.erase(res.length() - 1);
267  }
268  return res;
269 }
270 //! grabs the next line from an instream and returns it.
271 inline std::string getLine(std::istream &inStream) {
272  return getLine(&inStream);
273 }
274 
275 // n.b. We can't use RDTypeTag directly, they are implementation
276 // specific
277 namespace DTags {
278  const unsigned char StringTag = 0;
279  const unsigned char IntTag = 1;
280  const unsigned char UnsignedIntTag = 2;
281  const unsigned char BoolTag = 3;
282  const unsigned char FloatTag = 4;
283  const unsigned char DoubleTag = 5;
284  const unsigned char EndTag = 0xFF;
285 }
286 
287 inline bool isSerializable(const Dict::Pair &pair) {
288  switch (pair.val.getTag()) {
290  case RDTypeTag::IntTag:
292  case RDTypeTag::BoolTag:
293  case RDTypeTag::FloatTag:
295  return true;
296  default:
297  return false;
298  }
299 }
300 
301 inline bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair) {
302  switch (pair.val.getTag()) {
304  case RDTypeTag::IntTag:
306  case RDTypeTag::BoolTag:
307  case RDTypeTag::FloatTag:
309  break;
310  default:
311  return false;
312  }
313 
314  streamWrite(ss, pair.key);
315  switch (pair.val.getTag()) {
318  streamWrite(ss, rdvalue_cast<std::string>(pair.val));
319  break;
320  case RDTypeTag::IntTag:
322  streamWrite(ss, rdvalue_cast<int>(pair.val));
323  break;
327  break;
328  case RDTypeTag::BoolTag:
331  break;
332  case RDTypeTag::FloatTag:
335  break;
339  break;
340  default:
341  std::cerr << "Failed to write " << pair.key << std::endl;
342  return false;
343  }
344  return true;
345 }
346 
347 inline bool streamWriteProps(std::ostream &ss, const RDProps &props,
348  bool savePrivate=false, bool saveComputed=false) {
349  STR_VECT propsToSave = props.getPropList(savePrivate, saveComputed);
350  std::set<std::string> propnames(propsToSave.begin(), propsToSave.end());
351 
352  const Dict &dict = props.getDict();
353  unsigned int count = 0;
354  for(Dict::DataType::const_iterator it = dict.getData().begin();
355  it != dict.getData().end();
356  ++it) {
357  if(isSerializable(*it) && propnames.find(it->key) != propnames.end()) {
358  count ++;
359  }
360  }
361 
362  streamWrite(ss, count); // packed int?
363 
364  unsigned int writtenCount = 0;
365  for(Dict::DataType::const_iterator it = dict.getData().begin();
366  it != dict.getData().end();
367  ++it) {
368  if(isSerializable(*it) && propnames.find(it->key) != propnames.end()) {
369  // note - not all properties are serializable, this may be
370  // a null op
371  if(streamWriteProp(ss, *it)) {
372  writtenCount++;
373  }
374  }
375  }
376  POSTCONDITION(count==writtenCount, "Estimated property count not equal to written");
377  return true;
378 }
379 
380 template<class T>
381 void readRDValue(std::istream &ss, RDValue &value) {
382  T v;
383  streamRead(ss, v);
384  value = v;
385 }
386 
387 inline void readRDValueString(std::istream &ss, RDValue &value) {
388  std::string v;
389  int version=0;
390  streamRead(ss, v, version);
391  value = v;
392 }
393 
394 inline bool streamReadProp(std::istream &ss, Dict::Pair &pair) {
395  int version=0;
396  streamRead(ss, pair.key, version);
397 
398  unsigned char type;
399  streamRead(ss, type);
400  switch(type) {
401  case DTags::StringTag: readRDValueString(ss, pair.val); break;
402  case DTags::IntTag: readRDValue<int>(ss, pair.val); break;
403  case DTags::UnsignedIntTag: readRDValue<unsigned int>(ss, pair.val); break;
404  case DTags::BoolTag: readRDValue<bool>(ss, pair.val); break;
405  case DTags::FloatTag: readRDValue<float>(ss, pair.val); break;
406  case DTags::DoubleTag: readRDValue<double>(ss, pair.val); break;
407  default:
408  return false;
409  }
410  return true;
411 }
412 
413 inline unsigned int streamReadProps(std::istream &ss, RDProps &props) {
414  unsigned int count;
415  streamRead(ss, count);
416 
417  Dict &dict = props.getDict();
418  dict.getData().resize(count);
419  for(unsigned index = 0; index<count; ++index) {
420  CHECK_INVARIANT(streamReadProp(ss, dict.getData()[index]),
421  "Corrupted property serialization detected");
422  }
423 
424  return count;
425 }
426 
427 }
428 
429 #endif
const unsigned char FloatTag
Definition: StreamOps.h:282
#define POSTCONDITION(expr, mess)
Definition: Invariant.h:115
const unsigned char EndTag
Definition: StreamOps.h:284
static const boost::uint64_t UnsignedIntTag
T EndianSwapBytes(T value)
Definition: StreamOps.h:66
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:99
RDValue val
Definition: Dict.h:39
unsigned char UCHAR
Definition: types.h:188
EEndian
Definition: StreamOps.h:26
static const boost::uint64_t DoubleTag
const unsigned char UnsignedIntTag
Definition: StreamOps.h:280
double rdvalue_cast< double >(RDValue v)
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:236
void readRDValueString(std::istream &ss, RDValue &value)
Definition: StreamOps.h:387
void readRDValue(std::istream &ss, RDValue &value)
Definition: StreamOps.h:381
boost::uint32_t readPackedIntFromStream(std::stringstream &ss)
Reads an integer from a stream in packed format and returns the result.
Definition: StreamOps.h:139
static const boost::uint64_t FloatTag
boost::uint32_t pullPackedIntFromString(const char *&text)
Definition: StreamOps.h:177
bool streamWriteProps(std::ostream &ss, const RDProps &props, bool savePrivate=false, bool saveComputed=false)
Definition: StreamOps.h:347
bool rdvalue_cast< bool >(RDValue v)
static const boost::uint64_t StringTag
std::string key
Definition: Dict.h:38
unsigned int rdvalue_cast< unsigned int >(RDValue v)
STR_VECT getPropList(bool includePrivate=true, bool includeComputed=true) const
returns a list with the names of our properties
Definition: RDProps.h:34
const unsigned char DoubleTag
Definition: StreamOps.h:283
T SwapBytes(T value)
Definition: StreamOps.h:41
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
bool streamReadProp(std::istream &ss, Dict::Pair &pair)
Definition: StreamOps.h:394
unsigned int streamReadProps(std::istream &ss, RDProps &props)
Definition: StreamOps.h:413
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:194
float rdvalue_cast< float >(RDValue v)
const DataType & getData() const
Access to the underlying data.
Definition: Dict.h:83
static const boost::uint64_t IntTag
const unsigned char BoolTag
Definition: StreamOps.h:281
int rdvalue_cast< int >(RDValue v)
const Dict & getDict() const
gets the underlying Dictionary
Definition: RDProps.h:25
bool isSerializable(const Dict::Pair &pair)
Definition: StreamOps.h:287
static const boost::uint64_t BoolTag
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:222
std::string getLine(std::istream *inStream)
grabs the next line from an instream and returns it.
Definition: StreamOps.h:262
const unsigned char StringTag
Definition: StreamOps.h:278
void appendPackedIntToStream(std::stringstream &ss, boost::uint32_t num)
Packs an integer and outputs it to a stream.
Definition: StreamOps.h:95
boost::uint64_t getTag() const
const unsigned char IntTag
Definition: StreamOps.h:279
The Dict class can be used to store objects of arbitrary type keyed by strings.
Definition: Dict.h:35
std::vector< std::string > STR_VECT
Definition: Dict.h:28
bool streamWriteProp(std::ostream &ss, const Dict::Pair &pair)
Definition: StreamOps.h:301