RDKit
Open-source cheminformatics and machine learning.
RDValue-doublemagic.h
Go to the documentation of this file.
1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDKIT_RDVALUE_PTRMAGIC_H
32 #define RDKIT_RDVALUE_PTRMAGIC_H
33 
34 #include <boost/cstdint.hpp>
35 #include <cassert>
36 #include <boost/any.hpp>
37 #include "Invariant.h"
38 #include <iostream>
39 #include <iomanip>
40 #include <sstream>
41 #include <vector>
42 #include <string>
44 #include <boost/utility.hpp>
45 #include <boost/lexical_cast.hpp>
46 #include <boost/type_traits.hpp>
47 #include <boost/static_assert.hpp>
49 #include <cmath>
50 #include "LocaleSwitcher.h"
51 
52 #define RDVALUE_HASBOOL
53 
54 namespace RDKit {
55 
56  // Inspired by
57  // https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html
58 // 16 bit storage for value types using Quiet NaN spaces in
59 // doubles
60 // Won't work on Solaris and some other os's as mmaping maps from
61 // top memory down
62 // Example check:
63 // std::string *pointer = new std::string(v);
64 // assert((reinterpret_cast<boost::uint64_t>(pointer) & StringTag) == 0);
65 
66 // implementations, need a typedef at compile time to figure this out.
67 // current implementation is probably little endian, need to check.
68 
69 /*
70  Encoding for storing other things as a double. Use
71  Quiet NaN
72  Quiet NaN: // used to encode types
73  F F F 1XXX < - X = type bits (first bit is set to one)
74 
75  seeeeeee|eeeemmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm
76  s1111111|11111ppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp
77  ^- first mantissa bit 1 everything else is "payload" -^
78  ^- exponent bits all 1 and mustn't be all-zero (as it
79  ^- any sign bit would be INF then)
80 
81  Available
82  8 = 1000 MaxDouble // Not really a tag, is a sentinel
83  9 = 1001 Float
84  b = 1010 Int32
85  a = 1011 Uint32
86  C = 1100 <none>
87  D = 1101 <none>
88  E = 1110 <none>
89  F = 1111 PtrTag (look at lower 3 bits for type)
90 */
91 
92 namespace RDTypeTag {
93 static const boost::uint64_t NaN = 0xfff7FFFFFFFFFFFF; // signalling NaN
94 static const boost::uint64_t MaxDouble = 0xfff8000000000000; //
95 static const boost::uint64_t DoubleTag = 0xfff8000000000000; //
96 static const boost::uint64_t FloatTag = 0xfff9000000000000; //
97 static const boost::uint64_t IntTag = 0xfffa000000000000; //
98 static const boost::uint64_t UnsignedIntTag = 0xfffb000000000000; //
99 static const boost::uint64_t BoolTag = 0xfffc000000000000; //
100 
101 // PTR Tags use the last 3 bits for typing info
102 static const boost::uint64_t PtrTag = 0xffff000000000000;
103 static const boost::uint64_t StringTag = 0xffff000000000001; // 001
104 static const boost::uint64_t VecDoubleTag = 0xffff000000000002; // 010
105 static const boost::uint64_t VecFloatTag = 0xffff000000000003; // 011
106 static const boost::uint64_t VecIntTag = 0xffff000000000004; // 100
107 static const boost::uint64_t VecUnsignedIntTag = 0xffff000000000005; // 101
108 static const boost::uint64_t VecStringTag = 0xffff000000000006; // 110
109 static const boost::uint64_t AnyTag = 0xffff000000000007; // 111
110 
111 // Retrieves the tag (and PtrMask) from the type
112 template <class T>
113 inline boost::uint64_t GetTag() {
114  return AnyTag; }
115  template<> inline boost::uint64_t GetTag<double>() { return MaxDouble; }
116  template<> inline boost::uint64_t GetTag<float>() { return FloatTag; }
117  template<> inline boost::uint64_t GetTag<int>() { return IntTag; }
118  template<> inline boost::uint64_t GetTag<unsigned int>() { return UnsignedIntTag; }
119  template<> inline boost::uint64_t GetTag<bool>() { return BoolTag; }
120  template<> inline boost::uint64_t GetTag<std::string>() { return StringTag; }
121  template<> inline boost::uint64_t GetTag<std::vector<double> >() { return VecDoubleTag; }
122  template<> inline boost::uint64_t GetTag<std::vector<float> >() { return VecFloatTag; }
123  template<> inline boost::uint64_t GetTag<std::vector<int> >() { return VecIntTag; }
124  template<> inline boost::uint64_t GetTag<std::vector<unsigned int> >() { return VecUnsignedIntTag; }
125  template<> inline boost::uint64_t GetTag<std::vector<std::string> >() { return VecStringTag; }
126  template<> inline boost::uint64_t GetTag<boost::any>() { return AnyTag; }
127 }
128 
129 
130 struct RDValue {
131  // Bit Twidling for conversion from the Tag to a Pointer
132  static const boost::uint64_t TagMask = 0xFFFF000000000000;
133  static const boost::uint64_t PointerTagMask = 0xFFFF000000000007;
134  static const boost::uint64_t ApplyMask = 0x0000FFFFFFFFFFFF;
135  static const boost::uint64_t ApplyPtrMask = 0x0000FFFFFFFFFFF8;
136 
137  union {
138  double doubleBits;
139  boost::uint64_t otherBits;
140  };
141 
142  inline RDValue() : doubleBits(0.0) {}
143 
144  inline RDValue(double number) {
145  if (boost::math::isnan(number)) {
146  // Store a signalling NaN for NaN's.
147  // quiet NaNs are used for other types.
148  otherBits = RDTypeTag::NaN;
149  assert(boost::math::isnan(doubleBits));
150  }
151  else
152  doubleBits = number;
153  }
154 
155  inline RDValue(float number) {
156  otherBits = 0 | RDTypeTag::FloatTag;
157  memcpy(((char*)&otherBits), &number, sizeof(float));
158  }
159 
160  inline RDValue(int32_t number) {
161  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::IntTag;
162  }
163 
164  inline RDValue(unsigned int number) {
165  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::UnsignedIntTag;
166  }
167 
168  inline RDValue(bool number) {
169  otherBits = (static_cast<boost::uint64_t>(number) & ApplyMask) | RDTypeTag::BoolTag;
170  }
171 
172  inline RDValue(boost::any *pointer) {
173  // ensure that the pointer really is only 48 bit
174  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
175  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
176  }
177 
178  inline RDValue(const boost::any &any) {
179  // ensure that the pointer really is only 48 bit
180  boost::any *pointer = new boost::any(any);
181  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
182  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
183  }
184 
185  // Unknown types are stored as boost::any
186  template <class T>
187  inline RDValue(const T&v) {
188  boost::any *pointer = new boost::any(v);
189  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
190  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
191  }
192 
193  inline RDValue(const std::string &v) {
194  std::string *pointer = new std::string(v);
195  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::StringTag) == 0);
196  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::StringTag;
197  }
198 
199  inline RDValue(const std::vector<double> &v) {
200  std::vector<double> *pointer = new std::vector<double>(v);
201  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecDoubleTag) == 0);
202  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecDoubleTag;
203  }
204 
205  inline RDValue(const std::vector<float> &v) {
206  std::vector<float> *pointer = new std::vector<float>(v);
207  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecFloatTag) == 0);
208  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecFloatTag;
209  }
210 
211  inline RDValue(const std::vector<int> &v) {
212  std::vector<int> *pointer = new std::vector<int>(v);
213  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
214  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecIntTag;
215  }
216 
217  inline RDValue(const std::vector<unsigned int> &v) {
218  std::vector<unsigned int> *pointer = new std::vector<unsigned int>(v);
219  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
220  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecUnsignedIntTag;
221  }
222 
223  inline RDValue(const std::vector<std::string> &v) {
224  std::vector<std::string> *pointer = new std::vector<std::string>(v);
225  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecStringTag) == 0);
226  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecStringTag;
227  }
228 
229  boost::uint64_t getTag() const {
230  if (otherBits < RDTypeTag::MaxDouble ||
231  (otherBits & RDTypeTag::NaN) == RDTypeTag::NaN) {
232  return RDTypeTag::DoubleTag;
233  }
234 
235  boost::uint64_t tag = otherBits & TagMask;
236  if (tag == RDTypeTag::PtrTag)
237  return otherBits & PointerTagMask;
238  return tag;
239  }
240 
241  // ptrCast - unsafe, use rdvalue_cast instead.
242  template<class T>
243  inline T* ptrCast() const {
244  return reinterpret_cast<T*>(otherBits & ~RDTypeTag::GetTag<T>());
245  }
246 
247  // RDValue doesn't have an explicit destructor, it must
248  // be wrapped in a container.
249  // The idea is that POD types don't need to be destroyed
250  // and this allows the container optimization possibilities.
251  inline void destroy() {
252  switch(getTag()) {
254  delete ptrCast<std::string>();
255  break;
257  delete ptrCast<std::vector<double> >();
258  break;
260  delete ptrCast<std::vector<float> >();
261  break;
263  delete ptrCast<std::vector<int> >();
264  break;
266  delete ptrCast<std::vector<unsigned int> >();
267  break;
269  delete ptrCast<std::vector<std::string> >();
270  break;
271  case RDTypeTag::AnyTag:
272  delete ptrCast<boost::any>();
273  break;
274  default:
275  break;
276  }
277  }
278 
279  static
280  inline void cleanup_rdvalue(RDValue v) { v.destroy(); }
281 
282 };
283 
284 /////////////////////////////////////////////////////////////////////////////////////
285 // Given two RDValue::Values - copy the appropriate structure
286 // RDValue doesn't have a copy constructor, the default
287 // copy act's like a move for better value semantics.
288 // Containers may need to copy though.
289 inline void copy_rdvalue(RDValue &dest,
290  const RDValue &src) {
291  dest.destroy();
292  switch(src.getTag()) {
294  dest = RDValue(*src.ptrCast<std::string>());
295  break;
297  dest = RDValue(*src.ptrCast<std::vector<double> >());
298  break;
300  dest = RDValue(*src.ptrCast<std::vector<float> >());
301  break;
303  dest = RDValue(*src.ptrCast<std::vector<int> >());
304  break;
306  dest = RDValue(*src.ptrCast<std::vector<unsigned int> >());
307  break;
309  dest = RDValue(*src.ptrCast<std::vector<std::string> >());
310  break;
311  case RDTypeTag::AnyTag:
312  dest = RDValue(*src.ptrCast<boost::any>());
313  break;
314  default:
315  dest = src;
316  }
317 }
318 
319 /////////////////////////////////////////////////////////////////////////////////////
320 // rdvalue_is<T>
321 
322 template<class T>
323 inline bool rdvalue_is(RDValue v) {
324  return v.getTag() == RDTypeTag::GetTag<typename boost::remove_reference<T>::type>();
325 }
326 
327 template<>
328 inline bool rdvalue_is<double>(RDValue v) {
329  return v.otherBits < RDTypeTag::MaxDouble ||
330  (v.otherBits & RDTypeTag::NaN) == RDTypeTag::NaN;
331 }
332 
333 template<>
335  return rdvalue_is<double>(v);
336 }
337 
338 /*
339 template<>
340 inline bool rdvalue_is<bool>(RDValue v) {
341  return (v.getTag() == RDTypeTag::IntTag &&
342  (static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 1 ||
343  static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 0 ));
344 }
345 
346 template<>
347 inline bool rdvalue_is<const bool&>(RDValue v) {
348  return rdvalue_is<bool>(v);
349 }
350 */
351 
352 /////////////////////////////////////////////////////////////////////////////////////
353 // rdvalue_cast<T>
354 //
355 // POD types do not support reference semantics. Other types do.
356 // rdvalue_cast<const std::vector<double> &>(RDValue); // ok
357 // rdvalue_cast<const float &>(RDValue); // bad_any_cast
358 
360 // Get stuff stored in boost any
361 template<class T>
362 inline T rdvalue_cast(RDValue v) {
363  // Disable reference and pointer casts to POD data.
364  BOOST_STATIC_ASSERT( !(
365  (boost::is_pointer<T>::value && (
366  boost::is_integral<typename boost::remove_pointer<T>::type>::value ||
367  boost::is_floating_point<typename boost::remove_pointer<T>::type>::value)) ||
368  (boost::is_reference<T>::value && (
369  boost::is_integral<typename boost::remove_reference<T>::type>::value ||
370  boost::is_floating_point<typename boost::remove_reference<T>::type>::value))
371  ));
372 
373  if (rdvalue_is<boost::any>(v)) {
374  return boost::any_cast<T>(*v.ptrCast<boost::any>());
375  }
376  throw boost::bad_any_cast();
377 }
378 
379 // POD casts
380 template<>
381 inline double rdvalue_cast<double>(RDValue v) {
382  if (rdvalue_is<double>(v)) return v.doubleBits;
383  throw boost::bad_any_cast();
384 }
385 
386 template<>
387 inline float rdvalue_cast<float>(RDValue v) {
388  if (rdvalue_is<float>(v)) {
389  float f;
390  memcpy(&f, ((char*)&v.otherBits), sizeof(float));
391  return f;
392  }
393  throw boost::bad_any_cast();
394 }
395 
396 // n.b. with const expressions, could use ~RDTagTypes::GetTag<T>()
397 // and enable_if
398 template<>
399 inline int rdvalue_cast<int>(RDValue v) {
400  if (rdvalue_is<int>(v)) return static_cast<int32_t>(v.otherBits &
402  throw boost::bad_any_cast();
403 }
404 template<>
405 inline unsigned int rdvalue_cast<unsigned int>(RDValue v) {
406  if (rdvalue_is<unsigned int>(v)) return static_cast<uint32_t>(
407  v.otherBits & ~RDTypeTag::UnsignedIntTag);
408  throw boost::bad_any_cast();
409 }
410 
411 template<>
412 inline bool rdvalue_cast<bool>(RDValue v) {
413  if (rdvalue_is<bool>(v)) return static_cast<bool>(
414  v.otherBits & ~RDTypeTag::BoolTag);
415  throw boost::bad_any_cast();
416 }
417 
418 } // namespace rdkit
419 #endif
bool rdvalue_is< double >(RDValue v)
RDValue(double number)
RDValue(const std::vector< unsigned int > &v)
RDValue(boost::any *pointer)
boost::uint64_t GetTag< float >()
static const boost::uint64_t MaxDouble
void copy_rdvalue(RDValue &dest, const RDValue &src)
static const boost::uint64_t VecDoubleTag
boost::uint64_t GetTag< int >()
static const boost::uint64_t UnsignedIntTag
RDValue(unsigned int number)
RDValue(int32_t number)
T rdvalue_cast(RDValue v)
static const boost::uint64_t AnyTag
static const boost::uint64_t DoubleTag
RDValue(const std::vector< float > &v)
static const boost::uint64_t FloatTag
static void cleanup_rdvalue(RDValue v)
T * ptrCast() const
static const boost::uint64_t StringTag
bool rdvalue_is(RDValue v)
RDValue(const std::vector< std::string > &v)
RDValue(bool number)
static const boost::uint64_t VecIntTag
static const boost::uint64_t VecUnsignedIntTag
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
static const boost::uint64_t VecStringTag
boost::uint64_t GetTag()
static const boost::uint64_t NaN
static const boost::uint64_t IntTag
boost::uint64_t GetTag< unsigned int >()
RDValue RDValue_cast_t
bool rdvalue_is< const double & >(RDValue v)
static const boost::uint64_t BoolTag
RDValue(float number)
static const boost::uint64_t VecFloatTag
boost::uint64_t getTag() const
RDValue(const std::vector< double > &v)
boost::uint64_t GetTag< double >()
RDValue(const boost::any &any)
boost::uint64_t GetTag< bool >()
static const boost::uint64_t PtrTag
RDValue(const std::vector< int > &v)
RDValue(const std::string &v)
boost::uint64_t otherBits