RDKit
Open-source cheminformatics and machine learning.
RDValue-doublemagic.h
Go to the documentation of this file.
1 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDKIT_RDVALUE_PTRMAGIC_H
32 #define RDKIT_RDVALUE_PTRMAGIC_H
33 
34 #include <boost/cstdint.hpp>
35 #include <cassert>
36 #include <boost/any.hpp>
37 #include "Invariant.h"
38 #include <iostream>
39 #include <iomanip>
40 #include <sstream>
41 #include <vector>
42 #include <string>
43 #include <boost/utility.hpp>
44 #include <boost/lexical_cast.hpp>
45 #include <cmath>
46 #include <boost/type_traits.hpp>
47 #include <boost/static_assert.hpp>
48 #include "LocaleSwitcher.h"
49 
50 #define RDVALUE_HASBOOL
51 
52 namespace RDKit {
53 
54  // Inspired by
55  // https://nikic.github.io/2012/02/02/Pointer-magic-for-efficient-dynamic-value-representations.html
56 // 16 bit storage for value types using Quiet NaN spaces in
57 // doubles
58 // Won't work on Solaris and some other os's as mmaping maps from
59 // top memory down
60 // Example check:
61 // std::string *pointer = new std::string(v);
62 // assert((reinterpret_cast<boost::uint64_t>(pointer) & StringTag) == 0);
63 
64 // implementations, need a typedef at compile time to figure this out.
65 // current implementation is probably little endian, need to check.
66 
67 /*
68  Encoding for storing other things as a double. Use
69  Quiet NaN
70  Quiet NaN: // used to encode types
71  F F F 1XXX < - X = type bits (first bit is set to one)
72 
73  seeeeeee|eeeemmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm|mmmmmmmm
74  s1111111|11111ppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp|pppppppp
75  ^- first mantissa bit 1 everything else is "payload" -^
76  ^- exponent bits all 1 and mustn't be all-zero (as it
77  ^- any sign bit would be INF then)
78 
79  Available
80  8 = 1000 MaxDouble // Not really a tag, is a sentinel
81  9 = 1001 Float
82  b = 1010 Int32
83  a = 1011 Uint32
84  C = 1100 <none>
85  D = 1101 <none>
86  E = 1110 <none>
87  F = 1111 PtrTag (look at lower 3 bits for type)
88 */
89 
90 namespace RDTypeTag {
91 static const boost::uint64_t NaN = 0xfff7FFFFFFFFFFFF; // signalling NaN
92 static const boost::uint64_t MaxDouble = 0xfff8000000000000; //
93 static const boost::uint64_t DoubleTag = 0xfff8000000000000; //
94 static const boost::uint64_t FloatTag = 0xfff9000000000000; //
95 static const boost::uint64_t IntTag = 0xfffa000000000000; //
96 static const boost::uint64_t UnsignedIntTag = 0xfffb000000000000; //
97 static const boost::uint64_t BoolTag = 0xfffc000000000000; //
98 
99 // PTR Tags use the last 3 bits for typing info
100 static const boost::uint64_t PtrTag = 0xffff000000000000;
101 static const boost::uint64_t StringTag = 0xffff000000000001; // 001
102 static const boost::uint64_t VecDoubleTag = 0xffff000000000002; // 010
103 static const boost::uint64_t VecFloatTag = 0xffff000000000003; // 011
104 static const boost::uint64_t VecIntTag = 0xffff000000000004; // 100
105 static const boost::uint64_t VecUnsignedIntTag = 0xffff000000000005; // 101
106 static const boost::uint64_t VecStringTag = 0xffff000000000006; // 110
107 static const boost::uint64_t AnyTag = 0xffff000000000007; // 111
108 
109 // Retrieves the tag (and PtrMask) from the type
110 template <class T>
111 inline boost::uint64_t GetTag() {
112  return AnyTag; }
113  template<> inline boost::uint64_t GetTag<double>() { return MaxDouble; }
114  template<> inline boost::uint64_t GetTag<float>() { return FloatTag; }
115  template<> inline boost::uint64_t GetTag<int>() { return IntTag; }
116  template<> inline boost::uint64_t GetTag<unsigned int>() { return UnsignedIntTag; }
117  template<> inline boost::uint64_t GetTag<bool>() { return BoolTag; }
118  template<> inline boost::uint64_t GetTag<std::string>() { return StringTag; }
119  template<> inline boost::uint64_t GetTag<std::vector<double> >() { return VecDoubleTag; }
120  template<> inline boost::uint64_t GetTag<std::vector<float> >() { return VecFloatTag; }
121  template<> inline boost::uint64_t GetTag<std::vector<int> >() { return VecIntTag; }
122  template<> inline boost::uint64_t GetTag<std::vector<unsigned int> >() { return VecUnsignedIntTag; }
123  template<> inline boost::uint64_t GetTag<std::vector<std::string> >() { return VecStringTag; }
124  template<> inline boost::uint64_t GetTag<boost::any>() { return AnyTag; }
125 }
126 
127 
128 struct RDValue {
129  // Bit Twidling for conversion from the Tag to a Pointer
130  static const boost::uint64_t TagMask = 0xFFFF000000000000;
131  static const boost::uint64_t PointerTagMask = 0xFFFF000000000007;
132  static const boost::uint64_t ApplyMask = 0x0000FFFFFFFFFFFF;
133  static const boost::uint64_t ApplyPtrMask = 0x0000FFFFFFFFFFF8;
134 
135  union {
136  double doubleBits;
137  boost::uint64_t otherBits;
138  };
139 
140  inline RDValue() : doubleBits(0.0) {}
141 
142  inline RDValue(double number) {
143  if (boost::math::isnan(number)) {
144  // Store a signalling NaN for NaN's.
145  // quiet NaNs are used for other types.
146  otherBits = RDTypeTag::NaN;
147  assert(boost::math::isnan(doubleBits));
148  }
149  else
150  doubleBits = number;
151  }
152 
153  inline RDValue(float number) {
154  otherBits = 0 | RDTypeTag::FloatTag;
155  memcpy(((char*)&otherBits), &number, sizeof(float));
156  }
157 
158  inline RDValue(int32_t number) {
159  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::IntTag;
160  }
161 
162  inline RDValue(unsigned int number) {
163  otherBits = (((boost::uint64_t)number) & ApplyMask ) | RDTypeTag::UnsignedIntTag;
164  }
165 
166  inline RDValue(bool number) {
167  otherBits = (static_cast<boost::uint64_t>(number) & ApplyMask) | RDTypeTag::BoolTag;
168  }
169 
170  inline RDValue(boost::any *pointer) {
171  // ensure that the pointer really is only 48 bit
172  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
173  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
174  }
175 
176  inline RDValue(const boost::any &any) {
177  // ensure that the pointer really is only 48 bit
178  boost::any *pointer = new boost::any(any);
179  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
180  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
181  }
182 
183  // Unknown types are stored as boost::any
184  template <class T>
185  inline RDValue(const T&v) {
186  boost::any *pointer = new boost::any(v);
187  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::AnyTag) == 0);
188  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::AnyTag;
189  }
190 
191  inline RDValue(const std::string &v) {
192  std::string *pointer = new std::string(v);
193  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::StringTag) == 0);
194  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::StringTag;
195  }
196 
197  inline RDValue(const std::vector<double> &v) {
198  std::vector<double> *pointer = new std::vector<double>(v);
199  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecDoubleTag) == 0);
200  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecDoubleTag;
201  }
202 
203  inline RDValue(const std::vector<float> &v) {
204  std::vector<float> *pointer = new std::vector<float>(v);
205  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecFloatTag) == 0);
206  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecFloatTag;
207  }
208 
209  inline RDValue(const std::vector<int> &v) {
210  std::vector<int> *pointer = new std::vector<int>(v);
211  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
212  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecIntTag;
213  }
214 
215  inline RDValue(const std::vector<unsigned int> &v) {
216  std::vector<unsigned int> *pointer = new std::vector<unsigned int>(v);
217  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecIntTag) == 0);
218  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecUnsignedIntTag;
219  }
220 
221  inline RDValue(const std::vector<std::string> &v) {
222  std::vector<std::string> *pointer = new std::vector<std::string>(v);
223  assert((reinterpret_cast<boost::uint64_t>(pointer) & RDTypeTag::VecStringTag) == 0);
224  otherBits = reinterpret_cast<boost::uint64_t>(pointer) | RDTypeTag::VecStringTag;
225  }
226 
227  boost::uint64_t getTag() const {
228  if (otherBits < RDTypeTag::MaxDouble ||
229  (otherBits & RDTypeTag::NaN) == RDTypeTag::NaN) {
230  return RDTypeTag::DoubleTag;
231  }
232 
233  boost::uint64_t tag = otherBits & TagMask;
234  if (tag == RDTypeTag::PtrTag)
235  return otherBits & PointerTagMask;
236  return tag;
237  }
238 
239  // ptrCast - unsafe, use rdvalue_cast instead.
240  template<class T>
241  inline T* ptrCast() const {
242  return reinterpret_cast<T*>(otherBits & ~RDTypeTag::GetTag<T>());
243  }
244 
245  // RDValue doesn't have an explicit destructor, it must
246  // be wrapped in a container.
247  // The idea is that POD types don't need to be destroyed
248  // and this allows the container optimization possibilities.
249  inline void destroy() {
250  switch(getTag()) {
252  delete ptrCast<std::string>();
253  break;
255  delete ptrCast<std::vector<double> >();
256  break;
258  delete ptrCast<std::vector<float> >();
259  break;
261  delete ptrCast<std::vector<int> >();
262  break;
264  delete ptrCast<std::vector<unsigned int> >();
265  break;
267  delete ptrCast<std::vector<std::string> >();
268  break;
269  case RDTypeTag::AnyTag:
270  delete ptrCast<boost::any>();
271  break;
272  default:
273  break;
274  }
275  }
276 
277  static
278  inline void cleanup_rdvalue(RDValue v) { v.destroy(); }
279 
280 };
281 
282 /////////////////////////////////////////////////////////////////////////////////////
283 // Given two RDValue::Values - copy the appropriate structure
284 // RDValue doesn't have a copy constructor, the default
285 // copy act's like a move for better value semantics.
286 // Containers may need to copy though.
287 inline void copy_rdvalue(RDValue &dest,
288  const RDValue &src) {
289  dest.destroy();
290  switch(src.getTag()) {
292  dest = RDValue(*src.ptrCast<std::string>());
293  break;
295  dest = RDValue(*src.ptrCast<std::vector<double> >());
296  break;
298  dest = RDValue(*src.ptrCast<std::vector<float> >());
299  break;
301  dest = RDValue(*src.ptrCast<std::vector<int> >());
302  break;
304  dest = RDValue(*src.ptrCast<std::vector<unsigned int> >());
305  break;
307  dest = RDValue(*src.ptrCast<std::vector<std::string> >());
308  break;
309  case RDTypeTag::AnyTag:
310  dest = RDValue(*src.ptrCast<boost::any>());
311  break;
312  default:
313  dest = src;
314  }
315 }
316 
317 /////////////////////////////////////////////////////////////////////////////////////
318 // rdvalue_is<T>
319 
320 template<class T>
321 inline bool rdvalue_is(RDValue v) {
322  return v.getTag() == RDTypeTag::GetTag<typename boost::remove_reference<T>::type>();
323 }
324 
325 template<>
326 inline bool rdvalue_is<double>(RDValue v) {
327  return v.otherBits < RDTypeTag::MaxDouble ||
328  (v.otherBits & RDTypeTag::NaN) == RDTypeTag::NaN;
329 }
330 
331 template<>
333  return rdvalue_is<double>(v);
334 }
335 
336 /*
337 template<>
338 inline bool rdvalue_is<bool>(RDValue v) {
339  return (v.getTag() == RDTypeTag::IntTag &&
340  (static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 1 ||
341  static_cast<int32_t>(v.otherBits & ~RDTypeTag::IntTag) == 0 ));
342 }
343 
344 template<>
345 inline bool rdvalue_is<const bool&>(RDValue v) {
346  return rdvalue_is<bool>(v);
347 }
348 */
349 
350 /////////////////////////////////////////////////////////////////////////////////////
351 // rdvalue_cast<T>
352 //
353 // POD types do not support reference semantics. Other types do.
354 // rdvalue_cast<const std::vector<double> &>(RDValue); // ok
355 // rdvalue_cast<const float &>(RDValue); // bad_any_cast
356 
358 // Get stuff stored in boost any
359 template<class T>
360 inline T rdvalue_cast(RDValue v) {
361  // Disable reference and pointer casts to POD data.
362  BOOST_STATIC_ASSERT( !(
363  (boost::is_pointer<T>::value && (
364  boost::is_integral<typename boost::remove_pointer<T>::type>::value ||
365  boost::is_floating_point<typename boost::remove_pointer<T>::type>::value)) ||
366  (boost::is_reference<T>::value && (
367  boost::is_integral<typename boost::remove_reference<T>::type>::value ||
368  boost::is_floating_point<typename boost::remove_reference<T>::type>::value))
369  ));
370 
371  if (rdvalue_is<boost::any>(v)) {
372  return boost::any_cast<T>(*v.ptrCast<boost::any>());
373  }
374  throw boost::bad_any_cast();
375 }
376 
377 // POD casts
378 template<>
379 inline double rdvalue_cast<double>(RDValue v) {
380  if (rdvalue_is<double>(v)) return v.doubleBits;
381  throw boost::bad_any_cast();
382 }
383 
384 template<>
385 inline float rdvalue_cast<float>(RDValue v) {
386  if (rdvalue_is<float>(v)) {
387  float f;
388  memcpy(&f, ((char*)&v.otherBits), sizeof(float));
389  return f;
390  }
391  throw boost::bad_any_cast();
392 }
393 
394 // n.b. with const expressions, could use ~RDTagTypes::GetTag<T>()
395 // and enable_if
396 template<>
397 inline int rdvalue_cast<int>(RDValue v) {
398  if (rdvalue_is<int>(v)) return static_cast<int32_t>(v.otherBits &
400  throw boost::bad_any_cast();
401 }
402 template<>
403 inline unsigned int rdvalue_cast<unsigned int>(RDValue v) {
404  if (rdvalue_is<unsigned int>(v)) return static_cast<uint32_t>(
405  v.otherBits & ~RDTypeTag::UnsignedIntTag);
406  throw boost::bad_any_cast();
407 }
408 
409 template<>
410 inline bool rdvalue_cast<bool>(RDValue v) {
411  if (rdvalue_is<bool>(v)) return static_cast<bool>(
412  v.otherBits & ~RDTypeTag::BoolTag);
413  throw boost::bad_any_cast();
414 }
415 
416 } // namespace rdkit
417 #endif
bool rdvalue_is< double >(RDValue v)
RDValue(double number)
RDValue(const std::vector< unsigned int > &v)
RDValue(boost::any *pointer)
boost::uint64_t GetTag< float >()
static const boost::uint64_t MaxDouble
void copy_rdvalue(RDValue &dest, const RDValue &src)
static const boost::uint64_t VecDoubleTag
boost::uint64_t GetTag< int >()
static const boost::uint64_t UnsignedIntTag
RDValue(unsigned int number)
RDValue(int32_t number)
T rdvalue_cast(RDValue v)
static const boost::uint64_t AnyTag
static const boost::uint64_t DoubleTag
RDValue(const std::vector< float > &v)
static const boost::uint64_t FloatTag
static void cleanup_rdvalue(RDValue v)
T * ptrCast() const
static const boost::uint64_t StringTag
bool rdvalue_is(RDValue v)
RDValue(const std::vector< std::string > &v)
RDValue(bool number)
static const boost::uint64_t VecIntTag
static const boost::uint64_t VecUnsignedIntTag
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:29
static const boost::uint64_t VecStringTag
boost::uint64_t GetTag()
static const boost::uint64_t NaN
static const boost::uint64_t IntTag
boost::uint64_t GetTag< unsigned int >()
RDValue RDValue_cast_t
bool rdvalue_is< const double & >(RDValue v)
static const boost::uint64_t BoolTag
RDValue(float number)
static const boost::uint64_t VecFloatTag
boost::uint64_t getTag() const
RDValue(const std::vector< double > &v)
boost::uint64_t GetTag< double >()
RDValue(const boost::any &any)
boost::uint64_t GetTag< bool >()
static const boost::uint64_t PtrTag
RDValue(const std::vector< int > &v)
RDValue(const std::string &v)
boost::uint64_t otherBits