libstdc++
|
00001 // Optimizations for random number functions, x86 version -*- C++ -*- 00002 00003 // Copyright (C) 2012-2015 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/opt_random.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{random} 00028 */ 00029 00030 #ifndef _BITS_OPT_RANDOM_H 00031 #define _BITS_OPT_RANDOM_H 1 00032 00033 #include <x86intrin.h> 00034 00035 00036 #pragma GCC system_header 00037 00038 00039 namespace std _GLIBCXX_VISIBILITY(default) 00040 { 00041 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00042 00043 #ifdef __SSE3__ 00044 template<> 00045 template<typename _UniformRandomNumberGenerator> 00046 void 00047 normal_distribution<double>:: 00048 __generate(typename normal_distribution<double>::result_type* __f, 00049 typename normal_distribution<double>::result_type* __t, 00050 _UniformRandomNumberGenerator& __urng, 00051 const param_type& __param) 00052 { 00053 typedef uint64_t __uctype; 00054 00055 if (__f == __t) 00056 return; 00057 00058 if (_M_saved_available) 00059 { 00060 _M_saved_available = false; 00061 *__f++ = _M_saved * __param.stddev() + __param.mean(); 00062 00063 if (__f == __t) 00064 return; 00065 } 00066 00067 constexpr uint64_t __maskval = 0xfffffffffffffull; 00068 static const __m128i __mask = _mm_set1_epi64x(__maskval); 00069 static const __m128i __two = _mm_set1_epi64x(0x4000000000000000ull); 00070 static const __m128d __three = _mm_set1_pd(3.0); 00071 const __m128d __av = _mm_set1_pd(__param.mean()); 00072 00073 const __uctype __urngmin = __urng.min(); 00074 const __uctype __urngmax = __urng.max(); 00075 const __uctype __urngrange = __urngmax - __urngmin; 00076 const __uctype __uerngrange = __urngrange + 1; 00077 00078 while (__f + 1 < __t) 00079 { 00080 double __le; 00081 __m128d __x; 00082 do 00083 { 00084 union 00085 { 00086 __m128i __i; 00087 __m128d __d; 00088 } __v; 00089 00090 if (__urngrange > __maskval) 00091 { 00092 if (__detail::_Power_of_2(__uerngrange)) 00093 __v.__i = _mm_and_si128(_mm_set_epi64x(__urng(), 00094 __urng()), 00095 __mask); 00096 else 00097 { 00098 const __uctype __uerange = __maskval + 1; 00099 const __uctype __scaling = __urngrange / __uerange; 00100 const __uctype __past = __uerange * __scaling; 00101 uint64_t __v1; 00102 do 00103 __v1 = __uctype(__urng()) - __urngmin; 00104 while (__v1 >= __past); 00105 __v1 /= __scaling; 00106 uint64_t __v2; 00107 do 00108 __v2 = __uctype(__urng()) - __urngmin; 00109 while (__v2 >= __past); 00110 __v2 /= __scaling; 00111 00112 __v.__i = _mm_set_epi64x(__v1, __v2); 00113 } 00114 } 00115 else if (__urngrange == __maskval) 00116 __v.__i = _mm_set_epi64x(__urng(), __urng()); 00117 else if ((__urngrange + 2) * __urngrange >= __maskval 00118 && __detail::_Power_of_2(__uerngrange)) 00119 { 00120 uint64_t __v1 = __urng() * __uerngrange + __urng(); 00121 uint64_t __v2 = __urng() * __uerngrange + __urng(); 00122 00123 __v.__i = _mm_and_si128(_mm_set_epi64x(__v1, __v2), 00124 __mask); 00125 } 00126 else 00127 { 00128 size_t __nrng = 2; 00129 __uctype __high = __maskval / __uerngrange / __uerngrange; 00130 while (__high > __uerngrange) 00131 { 00132 ++__nrng; 00133 __high /= __uerngrange; 00134 } 00135 const __uctype __highrange = __high + 1; 00136 const __uctype __scaling = __urngrange / __highrange; 00137 const __uctype __past = __highrange * __scaling; 00138 __uctype __tmp; 00139 00140 uint64_t __v1; 00141 do 00142 { 00143 do 00144 __tmp = __uctype(__urng()) - __urngmin; 00145 while (__tmp >= __past); 00146 __v1 = __tmp / __scaling; 00147 for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) 00148 { 00149 __tmp = __v1; 00150 __v1 *= __uerngrange; 00151 __v1 += __uctype(__urng()) - __urngmin; 00152 } 00153 } 00154 while (__v1 > __maskval || __v1 < __tmp); 00155 00156 uint64_t __v2; 00157 do 00158 { 00159 do 00160 __tmp = __uctype(__urng()) - __urngmin; 00161 while (__tmp >= __past); 00162 __v2 = __tmp / __scaling; 00163 for (size_t __cnt = 0; __cnt < __nrng; ++__cnt) 00164 { 00165 __tmp = __v2; 00166 __v2 *= __uerngrange; 00167 __v2 += __uctype(__urng()) - __urngmin; 00168 } 00169 } 00170 while (__v2 > __maskval || __v2 < __tmp); 00171 00172 __v.__i = _mm_set_epi64x(__v1, __v2); 00173 } 00174 00175 __v.__i = _mm_or_si128(__v.__i, __two); 00176 __x = _mm_sub_pd(__v.__d, __three); 00177 __m128d __m = _mm_mul_pd(__x, __x); 00178 __le = _mm_cvtsd_f64(_mm_hadd_pd (__m, __m)); 00179 } 00180 while (__le == 0.0 || __le >= 1.0); 00181 00182 double __mult = (std::sqrt(-2.0 * std::log(__le) / __le) 00183 * __param.stddev()); 00184 00185 __x = _mm_add_pd(_mm_mul_pd(__x, _mm_set1_pd(__mult)), __av); 00186 00187 _mm_storeu_pd(__f, __x); 00188 __f += 2; 00189 } 00190 00191 if (__f != __t) 00192 { 00193 result_type __x, __y, __r2; 00194 00195 __detail::_Adaptor<_UniformRandomNumberGenerator, result_type> 00196 __aurng(__urng); 00197 00198 do 00199 { 00200 __x = result_type(2.0) * __aurng() - 1.0; 00201 __y = result_type(2.0) * __aurng() - 1.0; 00202 __r2 = __x * __x + __y * __y; 00203 } 00204 while (__r2 > 1.0 || __r2 == 0.0); 00205 00206 const result_type __mult = std::sqrt(-2 * std::log(__r2) / __r2); 00207 _M_saved = __x * __mult; 00208 _M_saved_available = true; 00209 *__f = __y * __mult * __param.stddev() + __param.mean(); 00210 } 00211 } 00212 #endif 00213 00214 00215 _GLIBCXX_END_NAMESPACE_VERSION 00216 } // namespace 00217 00218 00219 #endif // _BITS_OPT_RANDOM_H