00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
00033 #define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
00034
00035 #include <limits>
00036 #include <bits/stl_numeric.h>
00037 #include <parallel/parallel.h>
00038 #include <parallel/random_number.h>
00039
00040 namespace __gnu_parallel
00041 {
00042
00043
00044
00045
00046
00047 typedef unsigned short _BinIndex;
00048
00049
00050
00051 template<typename _RAIter>
00052 struct _DRandomShufflingGlobalData
00053 {
00054 typedef std::iterator_traits<_RAIter> _TraitsType;
00055 typedef typename _TraitsType::value_type _ValueType;
00056 typedef typename _TraitsType::difference_type _DifferenceType;
00057
00058
00059 _RAIter& _M_source;
00060
00061
00062 _ValueType** _M_temporaries;
00063
00064
00065
00066
00067 _DifferenceType** _M_dist;
00068
00069
00070 _DifferenceType* _M_starts;
00071
00072
00073
00074 _ThreadIndex* _M_bin_proc;
00075
00076
00077 int _M_num_bins;
00078
00079
00080 int _M_num_bits;
00081
00082
00083 _DRandomShufflingGlobalData(_RAIter& __source)
00084 : _M_source(__source) { }
00085 };
00086
00087
00088
00089
00090 template<typename _RAIter, typename _RandomNumberGenerator>
00091 struct _DRSSorterPU
00092 {
00093
00094 int _M_num_threads;
00095
00096
00097 _BinIndex _M_bins_begin;
00098
00099
00100 _BinIndex __bins_end;
00101
00102
00103 uint32_t _M_seed;
00104
00105
00106 _DRandomShufflingGlobalData<_RAIter>* _M_sd;
00107 };
00108
00109
00110
00111
00112
00113 template<typename _RandomNumberGenerator>
00114 inline int
00115 __random_number_pow2(int __logp, _RandomNumberGenerator& __rng)
00116 { return __rng.__genrand_bits(__logp); }
00117
00118
00119
00120 template<typename _RAIter, typename _RandomNumberGenerator>
00121 void
00122 __parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
00123 _RandomNumberGenerator>* __pus)
00124 {
00125 typedef std::iterator_traits<_RAIter> _TraitsType;
00126 typedef typename _TraitsType::value_type _ValueType;
00127 typedef typename _TraitsType::difference_type _DifferenceType;
00128
00129 _ThreadIndex __iam = omp_get_thread_num();
00130 _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam];
00131 _DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd;
00132
00133
00134 _DifferenceType __length = (__sd->_M_starts[__iam + 1]
00135 - __sd->_M_starts[__iam]);
00136 _BinIndex* __oracles = new _BinIndex[__length];
00137 _DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1];
00138 _BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins];
00139 _ValueType** __temporaries = new _ValueType*[__d->_M_num_threads];
00140
00141
00142 for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
00143 __dist[__b] = 0;
00144 int __num_bits = __sd->_M_num_bits;
00145
00146 _RandomNumber __rng(__d->_M_seed);
00147
00148
00149 for (_DifferenceType __i = 0; __i < __length; ++__i)
00150 {
00151 _BinIndex __oracle = __random_number_pow2(__num_bits, __rng);
00152 __oracles[__i] = __oracle;
00153
00154
00155 ++(__dist[__oracle + 1]);
00156 }
00157
00158 for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
00159 __sd->_M_dist[__b][__iam + 1] = __dist[__b];
00160
00161 # pragma omp barrier
00162
00163 # pragma omp single
00164 {
00165
00166
00167 for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s)
00168 __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1],
00169 __sd->_M_dist[__s + 1]
00170 + __d->_M_num_threads + 1,
00171 __sd->_M_dist[__s + 1]);
00172 }
00173
00174 # pragma omp barrier
00175
00176 _SequenceIndex __offset = 0, __global_offset = 0;
00177 for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s)
00178 __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads];
00179
00180 # pragma omp barrier
00181
00182 for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s)
00183 {
00184 for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t)
00185 __sd->_M_dist[__s + 1][__t] += __offset;
00186 __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads];
00187 }
00188
00189 __sd->_M_temporaries[__iam] = static_cast<_ValueType*>
00190 (::operator new(sizeof(_ValueType) * __offset));
00191
00192 # pragma omp barrier
00193
00194
00195 for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
00196 __dist[__b] = __sd->_M_dist[__b][__iam];
00197 for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b)
00198 __bin_proc[__b] = __sd->_M_bin_proc[__b];
00199 for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t)
00200 __temporaries[__t] = __sd->_M_temporaries[__t];
00201
00202 _RAIter __source = __sd->_M_source;
00203 _DifferenceType __start = __sd->_M_starts[__iam];
00204
00205
00206 for (_DifferenceType __i = 0; __i < __length; ++__i)
00207 {
00208 _BinIndex __target_bin = __oracles[__i];
00209 _ThreadIndex __target_p = __bin_proc[__target_bin];
00210
00211
00212 ::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++]))
00213 _ValueType(*(__source + __i + __start));
00214 }
00215
00216 delete[] __oracles;
00217 delete[] __dist;
00218 delete[] __bin_proc;
00219 delete[] __temporaries;
00220
00221 # pragma omp barrier
00222
00223
00224 for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b)
00225 {
00226 _ValueType* __begin =
00227 (__sd->_M_temporaries[__iam]
00228 + (__b == __d->_M_bins_begin
00229 ? 0 : __sd->_M_dist[__b][__d->_M_num_threads])),
00230 * __end = (__sd->_M_temporaries[__iam]
00231 + __sd->_M_dist[__b + 1][__d->_M_num_threads]);
00232
00233 __sequential_random_shuffle(__begin, __end, __rng);
00234 std::copy(__begin, __end, __sd->_M_source + __global_offset
00235 + (__b == __d->_M_bins_begin
00236 ? 0 : __sd->_M_dist[__b][__d->_M_num_threads]));
00237 }
00238
00239 ::operator delete(__sd->_M_temporaries[__iam]);
00240 }
00241
00242
00243
00244 template<typename _Tp>
00245 _Tp
00246 __round_up_to_pow2(_Tp __x)
00247 {
00248 if (__x <= 1)
00249 return 1;
00250 else
00251 return (_Tp)1 << (__rd_log2(__x - 1) + 1);
00252 }
00253
00254
00255
00256
00257
00258
00259
00260
00261 template<typename _RAIter, typename _RandomNumberGenerator>
00262 void
00263 __parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end,
00264 typename std::iterator_traits
00265 <_RAIter>::difference_type __n,
00266 _ThreadIndex __num_threads,
00267 _RandomNumberGenerator& __rng)
00268 {
00269 typedef std::iterator_traits<_RAIter> _TraitsType;
00270 typedef typename _TraitsType::value_type _ValueType;
00271 typedef typename _TraitsType::difference_type _DifferenceType;
00272
00273 _GLIBCXX_CALL(__n)
00274
00275 const _Settings& __s = _Settings::get();
00276
00277 if (__num_threads > __n)
00278 __num_threads = static_cast<_ThreadIndex>(__n);
00279
00280 _BinIndex __num_bins, __num_bins_cache;
00281
00282 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00283
00284
00285
00286 __num_bins_cache =
00287 std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb
00288 / sizeof(_ValueType)));
00289 __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
00290
00291
00292
00293 __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
00294
00295 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00296
00297 __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
00298 #endif
00299 __num_bins = __round_up_to_pow2(__num_bins);
00300
00301 if (__num_bins < __num_bins_cache)
00302 {
00303 #endif
00304
00305
00306 __num_bins_cache = static_cast<_BinIndex>
00307 (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
00308 / sizeof(_ValueType))));
00309 __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
00310
00311
00312 __num_bins = static_cast<_BinIndex>
00313 (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
00314
00315 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00316
00317 __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2),
00318 __num_bins);
00319 #endif
00320 __num_bins = __round_up_to_pow2(__num_bins);
00321 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00322 }
00323 #endif
00324
00325 __num_bins = __round_up_to_pow2(
00326 std::max<_BinIndex>(__num_threads, __num_bins));
00327
00328 if (__num_threads <= 1)
00329 {
00330 _RandomNumber __derived_rng(
00331 __rng(std::numeric_limits<uint32_t>::max()));
00332 __sequential_random_shuffle(__begin, __end, __derived_rng);
00333 return;
00334 }
00335
00336 _DRandomShufflingGlobalData<_RAIter> __sd(__begin);
00337 _DRSSorterPU<_RAIter, _RandomNumber >* __pus;
00338 _DifferenceType* __starts;
00339
00340 # pragma omp parallel num_threads(__num_threads)
00341 {
00342 _ThreadIndex __num_threads = omp_get_num_threads();
00343 # pragma omp single
00344 {
00345 __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads];
00346
00347 __sd._M_temporaries = new _ValueType*[__num_threads];
00348 __sd._M_dist = new _DifferenceType*[__num_bins + 1];
00349 __sd._M_bin_proc = new _ThreadIndex[__num_bins];
00350 for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b)
00351 __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
00352 for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b)
00353 {
00354 __sd._M_dist[0][0] = 0;
00355 __sd._M_dist[__b][0] = 0;
00356 }
00357 __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
00358 int __bin_cursor = 0;
00359 __sd._M_num_bins = __num_bins;
00360 __sd._M_num_bits = __rd_log2(__num_bins);
00361
00362 _DifferenceType __chunk_length = __n / __num_threads,
00363 __split = __n % __num_threads,
00364 __start = 0;
00365 _DifferenceType __bin_chunk_length = __num_bins / __num_threads,
00366 __bin_split = __num_bins % __num_threads;
00367 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00368 {
00369 __starts[__i] = __start;
00370 __start += (__i < __split
00371 ? (__chunk_length + 1) : __chunk_length);
00372 int __j = __pus[__i]._M_bins_begin = __bin_cursor;
00373
00374
00375 __bin_cursor += (__i < __bin_split
00376 ? (__bin_chunk_length + 1)
00377 : __bin_chunk_length);
00378 __pus[__i].__bins_end = __bin_cursor;
00379 for (; __j < __bin_cursor; ++__j)
00380 __sd._M_bin_proc[__j] = __i;
00381 __pus[__i]._M_num_threads = __num_threads;
00382 __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max());
00383 __pus[__i]._M_sd = &__sd;
00384 }
00385 __starts[__num_threads] = __start;
00386 }
00387
00388 __parallel_random_shuffle_drs_pu(__pus);
00389 }
00390
00391 delete[] __starts;
00392 delete[] __sd._M_bin_proc;
00393 for (int __s = 0; __s < (__num_bins + 1); ++__s)
00394 delete[] __sd._M_dist[__s];
00395 delete[] __sd._M_dist;
00396 delete[] __sd._M_temporaries;
00397
00398 delete[] __pus;
00399 }
00400
00401
00402
00403
00404
00405
00406 template<typename _RAIter, typename _RandomNumberGenerator>
00407 void
00408 __sequential_random_shuffle(_RAIter __begin, _RAIter __end,
00409 _RandomNumberGenerator& __rng)
00410 {
00411 typedef std::iterator_traits<_RAIter> _TraitsType;
00412 typedef typename _TraitsType::value_type _ValueType;
00413 typedef typename _TraitsType::difference_type _DifferenceType;
00414
00415 _DifferenceType __n = __end - __begin;
00416 const _Settings& __s = _Settings::get();
00417
00418 _BinIndex __num_bins, __num_bins_cache;
00419
00420 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00421
00422 __num_bins_cache = std::max<_DifferenceType>
00423 (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
00424 __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
00425
00426
00427
00428 __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
00429 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00430
00431 __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins);
00432 #endif
00433 __num_bins = __round_up_to_pow2(__num_bins);
00434
00435 if (__num_bins < __num_bins_cache)
00436 {
00437 #endif
00438
00439 __num_bins_cache = static_cast<_BinIndex>
00440 (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
00441 / sizeof(_ValueType))));
00442 __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
00443
00444
00445
00446 __num_bins = static_cast<_BinIndex>
00447 (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
00448
00449 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
00450
00451 __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
00452 #endif
00453 __num_bins = __round_up_to_pow2(__num_bins);
00454 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
00455 }
00456 #endif
00457
00458 int __num_bits = __rd_log2(__num_bins);
00459
00460 if (__num_bins > 1)
00461 {
00462 _ValueType* __target =
00463 static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n));
00464 _BinIndex* __oracles = new _BinIndex[__n];
00465 _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1],
00466 * __dist1 = new _DifferenceType[__num_bins + 1];
00467
00468 for (int __b = 0; __b < __num_bins + 1; ++__b)
00469 __dist0[__b] = 0;
00470
00471 _RandomNumber __bitrng(__rng(0xFFFFFFFF));
00472
00473 for (_DifferenceType __i = 0; __i < __n; ++__i)
00474 {
00475 _BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng);
00476 __oracles[__i] = __oracle;
00477
00478
00479 ++(__dist0[__oracle + 1]);
00480 }
00481
00482
00483 __gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1,
00484 __dist0);
00485
00486 for (int __b = 0; __b < __num_bins + 1; ++__b)
00487 __dist1[__b] = __dist0[__b];
00488
00489
00490 for (_DifferenceType __i = 0; __i < __n; ++__i)
00491 ::new(&(__target[(__dist0[__oracles[__i]])++]))
00492 _ValueType(*(__begin + __i));
00493
00494 for (int __b = 0; __b < __num_bins; ++__b)
00495 __sequential_random_shuffle(__target + __dist1[__b],
00496 __target + __dist1[__b + 1], __rng);
00497
00498
00499 std::copy(__target, __target + __n, __begin);
00500
00501 delete[] __dist0;
00502 delete[] __dist1;
00503 delete[] __oracles;
00504 ::operator delete(__target);
00505 }
00506 else
00507 __gnu_sequential::random_shuffle(__begin, __end, __rng);
00508 }
00509
00510
00511
00512
00513
00514
00515 template<typename _RAIter, typename _RandomNumberGenerator>
00516 inline void
00517 __parallel_random_shuffle(_RAIter __begin, _RAIter __end,
00518 _RandomNumberGenerator __rng = _RandomNumber())
00519 {
00520 typedef std::iterator_traits<_RAIter> _TraitsType;
00521 typedef typename _TraitsType::difference_type _DifferenceType;
00522 _DifferenceType __n = __end - __begin;
00523 __parallel_random_shuffle_drs(__begin, __end, __n,
00524 __get_max_threads(), __rng);
00525 }
00526 }
00527
00528 #endif