Index: gcc/libstdc++-v3/include/parallel/random_shuffle.h |
diff --git a/gcc/libstdc++-v3/include/parallel/random_shuffle.h b/gcc/libstdc++-v3/include/parallel/random_shuffle.h |
deleted file mode 100644 |
index 6e0ebef1523e8dab39d924418e4e99ec97c23f82..0000000000000000000000000000000000000000 |
--- a/gcc/libstdc++-v3/include/parallel/random_shuffle.h |
+++ /dev/null |
@@ -1,519 +0,0 @@ |
-// -*- C++ -*- |
- |
-// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. |
-// |
-// This file is part of the GNU ISO C++ Library. This library is free |
-// software; you can redistribute it and/or modify it under the terms |
-// of the GNU General Public License as published by the Free Software |
-// Foundation; either version 3, or (at your option) any later |
-// version. |
- |
-// This library is distributed in the hope that it will be useful, but |
-// WITHOUT ANY WARRANTY; without even the implied warranty of |
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
-// General Public License for more details. |
- |
-// Under Section 7 of GPL version 3, you are granted additional |
-// permissions described in the GCC Runtime Library Exception, version |
-// 3.1, as published by the Free Software Foundation. |
- |
-// You should have received a copy of the GNU General Public License and |
-// a copy of the GCC Runtime Library Exception along with this program; |
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
-// <http://www.gnu.org/licenses/>. |
- |
-/** @file parallel/random_shuffle.h |
- * @brief Parallel implementation of std::random_shuffle(). |
- * This file is a GNU parallel extension to the Standard C++ Library. |
- */ |
- |
-// Written by Johannes Singler. |
- |
-#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H |
-#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1 |
- |
-#include <limits> |
-#include <bits/stl_numeric.h> |
-#include <parallel/parallel.h> |
-#include <parallel/random_number.h> |
- |
-namespace __gnu_parallel |
-{ |
-/** @brief Type to hold the index of a bin. |
- * |
- * Since many variables of this type are allocated, it should be |
- * chosen as small as possible. |
- */ |
-typedef unsigned short bin_index; |
- |
-/** @brief Data known to every thread participating in |
- __gnu_parallel::parallel_random_shuffle(). */ |
-template<typename RandomAccessIterator> |
- struct DRandomShufflingGlobalData |
- { |
- typedef std::iterator_traits<RandomAccessIterator> traits_type; |
- typedef typename traits_type::value_type value_type; |
- typedef typename traits_type::difference_type difference_type; |
- |
- /** @brief Begin iterator of the source. */ |
- RandomAccessIterator& source; |
- |
- /** @brief Temporary arrays for each thread. */ |
- value_type** temporaries; |
- |
- /** @brief Two-dimensional array to hold the thread-bin distribution. |
- * |
- * Dimensions (num_threads + 1) x (num_bins + 1). */ |
- difference_type** dist; |
- |
- /** @brief Start indexes of the threads' chunks. */ |
- difference_type* starts; |
- |
- /** @brief Number of the thread that will further process the |
- corresponding bin. */ |
- thread_index_t* bin_proc; |
- |
- /** @brief Number of bins to distribute to. */ |
- int num_bins; |
- |
- /** @brief Number of bits needed to address the bins. */ |
- int num_bits; |
- |
- /** @brief Constructor. */ |
- DRandomShufflingGlobalData(RandomAccessIterator& _source) |
- : source(_source) { } |
- }; |
- |
-/** @brief Local data for a thread participating in |
- __gnu_parallel::parallel_random_shuffle(). |
- */ |
-template<typename RandomAccessIterator, typename RandomNumberGenerator> |
- struct DRSSorterPU |
- { |
- /** @brief Number of threads participating in total. */ |
- int num_threads; |
- |
- /** @brief Begin index for bins taken care of by this thread. */ |
- bin_index bins_begin; |
- |
- /** @brief End index for bins taken care of by this thread. */ |
- bin_index bins_end; |
- |
- /** @brief Random seed for this thread. */ |
- uint32 seed; |
- |
- /** @brief Pointer to global data. */ |
- DRandomShufflingGlobalData<RandomAccessIterator>* sd; |
- }; |
- |
-/** @brief Generate a random number in @c [0,2^logp). |
- * @param logp Logarithm (basis 2) of the upper range bound. |
- * @param rng Random number generator to use. |
- */ |
-template<typename RandomNumberGenerator> |
- inline int |
- random_number_pow2(int logp, RandomNumberGenerator& rng) |
- { return rng.genrand_bits(logp); } |
- |
-/** @brief Random shuffle code executed by each thread. |
- * @param pus Array of thread-local data records. */ |
-template<typename RandomAccessIterator, typename RandomNumberGenerator> |
- void |
- parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator, |
- RandomNumberGenerator>* pus) |
- { |
- typedef std::iterator_traits<RandomAccessIterator> traits_type; |
- typedef typename traits_type::value_type value_type; |
- typedef typename traits_type::difference_type difference_type; |
- |
- thread_index_t iam = omp_get_thread_num(); |
- DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam]; |
- DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd; |
- |
- // Indexing: dist[bin][processor] |
- difference_type length = sd->starts[iam + 1] - sd->starts[iam]; |
- bin_index* oracles = new bin_index[length]; |
- difference_type* dist = new difference_type[sd->num_bins + 1]; |
- bin_index* bin_proc = new bin_index[sd->num_bins]; |
- value_type** temporaries = new value_type*[d->num_threads]; |
- |
- // Compute oracles and count appearances. |
- for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
- dist[b] = 0; |
- int num_bits = sd->num_bits; |
- |
- random_number rng(d->seed); |
- |
- // First main loop. |
- for (difference_type i = 0; i < length; ++i) |
- { |
- bin_index oracle = random_number_pow2(num_bits, rng); |
- oracles[i] = oracle; |
- |
- // To allow prefix (partial) sum. |
- ++(dist[oracle + 1]); |
- } |
- |
- for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
- sd->dist[b][iam + 1] = dist[b]; |
- |
-# pragma omp barrier |
- |
-# pragma omp single |
- { |
- // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the |
- // total number of items in bin s |
- for (bin_index s = 0; s < sd->num_bins; ++s) |
- __gnu_sequential::partial_sum(sd->dist[s + 1], |
- sd->dist[s + 1] + d->num_threads + 1, |
- sd->dist[s + 1]); |
- } |
- |
-# pragma omp barrier |
- |
- sequence_index_t offset = 0, global_offset = 0; |
- for (bin_index s = 0; s < d->bins_begin; ++s) |
- global_offset += sd->dist[s + 1][d->num_threads]; |
- |
-# pragma omp barrier |
- |
- for (bin_index s = d->bins_begin; s < d->bins_end; ++s) |
- { |
- for (int t = 0; t < d->num_threads + 1; ++t) |
- sd->dist[s + 1][t] += offset; |
- offset = sd->dist[s + 1][d->num_threads]; |
- } |
- |
- sd->temporaries[iam] = static_cast<value_type*>( |
- ::operator new(sizeof(value_type) * offset)); |
- |
-# pragma omp barrier |
- |
- // Draw local copies to avoid false sharing. |
- for (bin_index b = 0; b < sd->num_bins + 1; ++b) |
- dist[b] = sd->dist[b][iam]; |
- for (bin_index b = 0; b < sd->num_bins; ++b) |
- bin_proc[b] = sd->bin_proc[b]; |
- for (thread_index_t t = 0; t < d->num_threads; ++t) |
- temporaries[t] = sd->temporaries[t]; |
- |
- RandomAccessIterator source = sd->source; |
- difference_type start = sd->starts[iam]; |
- |
- // Distribute according to oracles, second main loop. |
- for (difference_type i = 0; i < length; ++i) |
- { |
- bin_index target_bin = oracles[i]; |
- thread_index_t target_p = bin_proc[target_bin]; |
- |
- // Last column [d->num_threads] stays unchanged. |
- ::new(&(temporaries[target_p][dist[target_bin + 1]++])) |
- value_type(*(source + i + start)); |
- } |
- |
- delete[] oracles; |
- delete[] dist; |
- delete[] bin_proc; |
- delete[] temporaries; |
- |
-# pragma omp barrier |
- |
- // Shuffle bins internally. |
- for (bin_index b = d->bins_begin; b < d->bins_end; ++b) |
- { |
- value_type* begin = |
- sd->temporaries[iam] + |
- ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]), |
- * end = |
- sd->temporaries[iam] + sd->dist[b + 1][d->num_threads]; |
- sequential_random_shuffle(begin, end, rng); |
- std::copy(begin, end, sd->source + global_offset + |
- ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads])); |
- } |
- |
- ::operator delete(sd->temporaries[iam]); |
- } |
- |
-/** @brief Round up to the next greater power of 2. |
- * @param x Integer to round up */ |
-template<typename T> |
- T |
- round_up_to_pow2(T x) |
- { |
- if (x <= 1) |
- return 1; |
- else |
- return (T)1 << (__log2(x - 1) + 1); |
- } |
- |
-/** @brief Main parallel random shuffle step. |
- * @param begin Begin iterator of sequence. |
- * @param end End iterator of sequence. |
- * @param n Length of sequence. |
- * @param num_threads Number of threads to use. |
- * @param rng Random number generator to use. |
- */ |
-template<typename RandomAccessIterator, typename RandomNumberGenerator> |
- void |
- parallel_random_shuffle_drs(RandomAccessIterator begin, |
- RandomAccessIterator end, |
- typename std::iterator_traits |
- <RandomAccessIterator>::difference_type n, |
- thread_index_t num_threads, |
- RandomNumberGenerator& rng) |
- { |
- typedef std::iterator_traits<RandomAccessIterator> traits_type; |
- typedef typename traits_type::value_type value_type; |
- typedef typename traits_type::difference_type difference_type; |
- |
- _GLIBCXX_CALL(n) |
- |
- const _Settings& __s = _Settings::get(); |
- |
- if (num_threads > n) |
- num_threads = static_cast<thread_index_t>(n); |
- |
- bin_index num_bins, num_bins_cache; |
- |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
- // Try the L1 cache first. |
- |
- // Must fit into L1. |
- num_bins_cache = std::max<difference_type>( |
- 1, n / (__s.L1_cache_size_lb / sizeof(value_type))); |
- num_bins_cache = round_up_to_pow2(num_bins_cache); |
- |
- // No more buckets than TLB entries, power of 2 |
- // Power of 2 and at least one element per bin, at most the TLB size. |
- num_bins = std::min<difference_type>(n, num_bins_cache); |
- |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
- // 2 TLB entries needed per bin. |
- num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins); |
-#endif |
- num_bins = round_up_to_pow2(num_bins); |
- |
- if (num_bins < num_bins_cache) |
- { |
-#endif |
- // Now try the L2 cache |
- // Must fit into L2 |
- num_bins_cache = static_cast<bin_index>(std::max<difference_type>( |
- 1, n / (__s.L2_cache_size / sizeof(value_type)))); |
- num_bins_cache = round_up_to_pow2(num_bins_cache); |
- |
- // No more buckets than TLB entries, power of 2. |
- num_bins = static_cast<bin_index>( |
- std::min(n, static_cast<difference_type>(num_bins_cache))); |
- // Power of 2 and at least one element per bin, at most the TLB size. |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
- // 2 TLB entries needed per bin. |
- num_bins = std::min( |
- static_cast<difference_type>(__s.TLB_size / 2), num_bins); |
-#endif |
- num_bins = round_up_to_pow2(num_bins); |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
- } |
-#endif |
- |
- num_threads = std::min<bin_index>(num_threads, num_bins); |
- |
- if (num_threads <= 1) |
- return sequential_random_shuffle(begin, end, rng); |
- |
- DRandomShufflingGlobalData<RandomAccessIterator> sd(begin); |
- DRSSorterPU<RandomAccessIterator, random_number >* pus; |
- difference_type* starts; |
- |
-# pragma omp parallel num_threads(num_threads) |
- { |
- thread_index_t num_threads = omp_get_num_threads(); |
-# pragma omp single |
- { |
- pus = new DRSSorterPU<RandomAccessIterator, random_number> |
- [num_threads]; |
- |
- sd.temporaries = new value_type*[num_threads]; |
- sd.dist = new difference_type*[num_bins + 1]; |
- sd.bin_proc = new thread_index_t[num_bins]; |
- for (bin_index b = 0; b < num_bins + 1; ++b) |
- sd.dist[b] = new difference_type[num_threads + 1]; |
- for (bin_index b = 0; b < (num_bins + 1); ++b) |
- { |
- sd.dist[0][0] = 0; |
- sd.dist[b][0] = 0; |
- } |
- starts = sd.starts = new difference_type[num_threads + 1]; |
- int bin_cursor = 0; |
- sd.num_bins = num_bins; |
- sd.num_bits = __log2(num_bins); |
- |
- difference_type chunk_length = n / num_threads, |
- split = n % num_threads, start = 0; |
- difference_type bin_chunk_length = num_bins / num_threads, |
- bin_split = num_bins % num_threads; |
- for (thread_index_t i = 0; i < num_threads; ++i) |
- { |
- starts[i] = start; |
- start += (i < split) ? (chunk_length + 1) : chunk_length; |
- int j = pus[i].bins_begin = bin_cursor; |
- |
- // Range of bins for this processor. |
- bin_cursor += (i < bin_split) ? |
- (bin_chunk_length + 1) : bin_chunk_length; |
- pus[i].bins_end = bin_cursor; |
- for (; j < bin_cursor; ++j) |
- sd.bin_proc[j] = i; |
- pus[i].num_threads = num_threads; |
- pus[i].seed = rng(std::numeric_limits<uint32>::max()); |
- pus[i].sd = &sd; |
- } |
- starts[num_threads] = start; |
- } //single |
- // Now shuffle in parallel. |
- parallel_random_shuffle_drs_pu(pus); |
- } // parallel |
- |
- delete[] starts; |
- delete[] sd.bin_proc; |
- for (int s = 0; s < (num_bins + 1); ++s) |
- delete[] sd.dist[s]; |
- delete[] sd.dist; |
- delete[] sd.temporaries; |
- |
- delete[] pus; |
- } |
- |
-/** @brief Sequential cache-efficient random shuffle. |
- * @param begin Begin iterator of sequence. |
- * @param end End iterator of sequence. |
- * @param rng Random number generator to use. |
- */ |
-template<typename RandomAccessIterator, typename RandomNumberGenerator> |
- void |
- sequential_random_shuffle(RandomAccessIterator begin, |
- RandomAccessIterator end, |
- RandomNumberGenerator& rng) |
- { |
- typedef std::iterator_traits<RandomAccessIterator> traits_type; |
- typedef typename traits_type::value_type value_type; |
- typedef typename traits_type::difference_type difference_type; |
- |
- difference_type n = end - begin; |
- const _Settings& __s = _Settings::get(); |
- |
- bin_index num_bins, num_bins_cache; |
- |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
- // Try the L1 cache first, must fit into L1. |
- num_bins_cache = |
- std::max<difference_type> |
- (1, n / (__s.L1_cache_size_lb / sizeof(value_type))); |
- num_bins_cache = round_up_to_pow2(num_bins_cache); |
- |
- // No more buckets than TLB entries, power of 2 |
- // Power of 2 and at least one element per bin, at most the TLB size |
- num_bins = std::min(n, (difference_type)num_bins_cache); |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
- // 2 TLB entries needed per bin |
- num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins); |
-#endif |
- num_bins = round_up_to_pow2(num_bins); |
- |
- if (num_bins < num_bins_cache) |
- { |
-#endif |
- // Now try the L2 cache, must fit into L2. |
- num_bins_cache = |
- static_cast<bin_index>(std::max<difference_type>( |
- 1, n / (__s.L2_cache_size / sizeof(value_type)))); |
- num_bins_cache = round_up_to_pow2(num_bins_cache); |
- |
- // No more buckets than TLB entries, power of 2 |
- // Power of 2 and at least one element per bin, at most the TLB size. |
- num_bins = static_cast<bin_index> |
- (std::min(n, static_cast<difference_type>(num_bins_cache))); |
- |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB |
- // 2 TLB entries needed per bin |
- num_bins = |
- std::min<difference_type>(__s.TLB_size / 2, num_bins); |
-#endif |
- num_bins = round_up_to_pow2(num_bins); |
-#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 |
- } |
-#endif |
- |
- int num_bits = __log2(num_bins); |
- |
- if (num_bins > 1) |
- { |
- value_type* target = static_cast<value_type*>( |
- ::operator new(sizeof(value_type) * n)); |
- bin_index* oracles = new bin_index[n]; |
- difference_type* dist0 = new difference_type[num_bins + 1], |
- * dist1 = new difference_type[num_bins + 1]; |
- |
- for (int b = 0; b < num_bins + 1; ++b) |
- dist0[b] = 0; |
- |
- random_number bitrng(rng(0xFFFFFFFF)); |
- |
- for (difference_type i = 0; i < n; ++i) |
- { |
- bin_index oracle = random_number_pow2(num_bits, bitrng); |
- oracles[i] = oracle; |
- |
- // To allow prefix (partial) sum. |
- ++(dist0[oracle + 1]); |
- } |
- |
- // Sum up bins. |
- __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0); |
- |
- for (int b = 0; b < num_bins + 1; ++b) |
- dist1[b] = dist0[b]; |
- |
- // Distribute according to oracles. |
- for (difference_type i = 0; i < n; ++i) |
- ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i)); |
- |
- for (int b = 0; b < num_bins; ++b) |
- { |
- sequential_random_shuffle(target + dist1[b], |
- target + dist1[b + 1], |
- rng); |
- } |
- |
- // Copy elements back. |
- std::copy(target, target + n, begin); |
- |
- delete[] dist0; |
- delete[] dist1; |
- delete[] oracles; |
- ::operator delete(target); |
- } |
- else |
- __gnu_sequential::random_shuffle(begin, end, rng); |
- } |
- |
-/** @brief Parallel random public call. |
- * @param begin Begin iterator of sequence. |
- * @param end End iterator of sequence. |
- * @param rng Random number generator to use. |
- */ |
-template<typename RandomAccessIterator, typename RandomNumberGenerator> |
- inline void |
- parallel_random_shuffle(RandomAccessIterator begin, |
- RandomAccessIterator end, |
- RandomNumberGenerator rng = random_number()) |
- { |
- typedef std::iterator_traits<RandomAccessIterator> traits_type; |
- typedef typename traits_type::difference_type difference_type; |
- difference_type n = end - begin; |
- parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ; |
- } |
- |
-} |
- |
-#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ |