| Index: gcc/libstdc++-v3/include/parallel/random_shuffle.h
|
| diff --git a/gcc/libstdc++-v3/include/parallel/random_shuffle.h b/gcc/libstdc++-v3/include/parallel/random_shuffle.h
|
| deleted file mode 100644
|
| index 6e0ebef1523e8dab39d924418e4e99ec97c23f82..0000000000000000000000000000000000000000
|
| --- a/gcc/libstdc++-v3/include/parallel/random_shuffle.h
|
| +++ /dev/null
|
| @@ -1,519 +0,0 @@
|
| -// -*- C++ -*-
|
| -
|
| -// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
|
| -//
|
| -// This file is part of the GNU ISO C++ Library. This library is free
|
| -// software; you can redistribute it and/or modify it under the terms
|
| -// of the GNU General Public License as published by the Free Software
|
| -// Foundation; either version 3, or (at your option) any later
|
| -// version.
|
| -
|
| -// This library is distributed in the hope that it will be useful, but
|
| -// WITHOUT ANY WARRANTY; without even the implied warranty of
|
| -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
| -// General Public License for more details.
|
| -
|
| -// Under Section 7 of GPL version 3, you are granted additional
|
| -// permissions described in the GCC Runtime Library Exception, version
|
| -// 3.1, as published by the Free Software Foundation.
|
| -
|
| -// You should have received a copy of the GNU General Public License and
|
| -// a copy of the GCC Runtime Library Exception along with this program;
|
| -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
| -// <http://www.gnu.org/licenses/>.
|
| -
|
| -/** @file parallel/random_shuffle.h
|
| - * @brief Parallel implementation of std::random_shuffle().
|
| - * This file is a GNU parallel extension to the Standard C++ Library.
|
| - */
|
| -
|
| -// Written by Johannes Singler.
|
| -
|
| -#ifndef _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H
|
| -#define _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H 1
|
| -
|
| -#include <limits>
|
| -#include <bits/stl_numeric.h>
|
| -#include <parallel/parallel.h>
|
| -#include <parallel/random_number.h>
|
| -
|
| -namespace __gnu_parallel
|
| -{
|
| -/** @brief Type to hold the index of a bin.
|
| - *
|
| - * Since many variables of this type are allocated, it should be
|
| - * chosen as small as possible.
|
| - */
|
| -typedef unsigned short bin_index;
|
| -
|
| -/** @brief Data known to every thread participating in
|
| - __gnu_parallel::parallel_random_shuffle(). */
|
| -template<typename RandomAccessIterator>
|
| - struct DRandomShufflingGlobalData
|
| - {
|
| - typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
| - typedef typename traits_type::value_type value_type;
|
| - typedef typename traits_type::difference_type difference_type;
|
| -
|
| - /** @brief Begin iterator of the source. */
|
| - RandomAccessIterator& source;
|
| -
|
| - /** @brief Temporary arrays for each thread. */
|
| - value_type** temporaries;
|
| -
|
| - /** @brief Two-dimensional array to hold the thread-bin distribution.
|
| - *
|
| - * Dimensions (num_threads + 1) x (num_bins + 1). */
|
| - difference_type** dist;
|
| -
|
| - /** @brief Start indexes of the threads' chunks. */
|
| - difference_type* starts;
|
| -
|
| - /** @brief Number of the thread that will further process the
|
| - corresponding bin. */
|
| - thread_index_t* bin_proc;
|
| -
|
| - /** @brief Number of bins to distribute to. */
|
| - int num_bins;
|
| -
|
| - /** @brief Number of bits needed to address the bins. */
|
| - int num_bits;
|
| -
|
| - /** @brief Constructor. */
|
| - DRandomShufflingGlobalData(RandomAccessIterator& _source)
|
| - : source(_source) { }
|
| - };
|
| -
|
| -/** @brief Local data for a thread participating in
|
| - __gnu_parallel::parallel_random_shuffle().
|
| - */
|
| -template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
| - struct DRSSorterPU
|
| - {
|
| - /** @brief Number of threads participating in total. */
|
| - int num_threads;
|
| -
|
| - /** @brief Begin index for bins taken care of by this thread. */
|
| - bin_index bins_begin;
|
| -
|
| - /** @brief End index for bins taken care of by this thread. */
|
| - bin_index bins_end;
|
| -
|
| - /** @brief Random seed for this thread. */
|
| - uint32 seed;
|
| -
|
| - /** @brief Pointer to global data. */
|
| - DRandomShufflingGlobalData<RandomAccessIterator>* sd;
|
| - };
|
| -
|
| -/** @brief Generate a random number in @c [0,2^logp).
|
| - * @param logp Logarithm (basis 2) of the upper range bound.
|
| - * @param rng Random number generator to use.
|
| - */
|
| -template<typename RandomNumberGenerator>
|
| - inline int
|
| - random_number_pow2(int logp, RandomNumberGenerator& rng)
|
| - { return rng.genrand_bits(logp); }
|
| -
|
| -/** @brief Random shuffle code executed by each thread.
|
| - * @param pus Array of thread-local data records. */
|
| -template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
| - void
|
| - parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
|
| - RandomNumberGenerator>* pus)
|
| - {
|
| - typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
| - typedef typename traits_type::value_type value_type;
|
| - typedef typename traits_type::difference_type difference_type;
|
| -
|
| - thread_index_t iam = omp_get_thread_num();
|
| - DRSSorterPU<RandomAccessIterator, RandomNumberGenerator>* d = &pus[iam];
|
| - DRandomShufflingGlobalData<RandomAccessIterator>* sd = d->sd;
|
| -
|
| - // Indexing: dist[bin][processor]
|
| - difference_type length = sd->starts[iam + 1] - sd->starts[iam];
|
| - bin_index* oracles = new bin_index[length];
|
| - difference_type* dist = new difference_type[sd->num_bins + 1];
|
| - bin_index* bin_proc = new bin_index[sd->num_bins];
|
| - value_type** temporaries = new value_type*[d->num_threads];
|
| -
|
| - // Compute oracles and count appearances.
|
| - for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
| - dist[b] = 0;
|
| - int num_bits = sd->num_bits;
|
| -
|
| - random_number rng(d->seed);
|
| -
|
| - // First main loop.
|
| - for (difference_type i = 0; i < length; ++i)
|
| - {
|
| - bin_index oracle = random_number_pow2(num_bits, rng);
|
| - oracles[i] = oracle;
|
| -
|
| - // To allow prefix (partial) sum.
|
| - ++(dist[oracle + 1]);
|
| - }
|
| -
|
| - for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
| - sd->dist[b][iam + 1] = dist[b];
|
| -
|
| -# pragma omp barrier
|
| -
|
| -# pragma omp single
|
| - {
|
| - // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
|
| - // total number of items in bin s
|
| - for (bin_index s = 0; s < sd->num_bins; ++s)
|
| - __gnu_sequential::partial_sum(sd->dist[s + 1],
|
| - sd->dist[s + 1] + d->num_threads + 1,
|
| - sd->dist[s + 1]);
|
| - }
|
| -
|
| -# pragma omp barrier
|
| -
|
| - sequence_index_t offset = 0, global_offset = 0;
|
| - for (bin_index s = 0; s < d->bins_begin; ++s)
|
| - global_offset += sd->dist[s + 1][d->num_threads];
|
| -
|
| -# pragma omp barrier
|
| -
|
| - for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
|
| - {
|
| - for (int t = 0; t < d->num_threads + 1; ++t)
|
| - sd->dist[s + 1][t] += offset;
|
| - offset = sd->dist[s + 1][d->num_threads];
|
| - }
|
| -
|
| - sd->temporaries[iam] = static_cast<value_type*>(
|
| - ::operator new(sizeof(value_type) * offset));
|
| -
|
| -# pragma omp barrier
|
| -
|
| - // Draw local copies to avoid false sharing.
|
| - for (bin_index b = 0; b < sd->num_bins + 1; ++b)
|
| - dist[b] = sd->dist[b][iam];
|
| - for (bin_index b = 0; b < sd->num_bins; ++b)
|
| - bin_proc[b] = sd->bin_proc[b];
|
| - for (thread_index_t t = 0; t < d->num_threads; ++t)
|
| - temporaries[t] = sd->temporaries[t];
|
| -
|
| - RandomAccessIterator source = sd->source;
|
| - difference_type start = sd->starts[iam];
|
| -
|
| - // Distribute according to oracles, second main loop.
|
| - for (difference_type i = 0; i < length; ++i)
|
| - {
|
| - bin_index target_bin = oracles[i];
|
| - thread_index_t target_p = bin_proc[target_bin];
|
| -
|
| - // Last column [d->num_threads] stays unchanged.
|
| - ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
|
| - value_type(*(source + i + start));
|
| - }
|
| -
|
| - delete[] oracles;
|
| - delete[] dist;
|
| - delete[] bin_proc;
|
| - delete[] temporaries;
|
| -
|
| -# pragma omp barrier
|
| -
|
| - // Shuffle bins internally.
|
| - for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
|
| - {
|
| - value_type* begin =
|
| - sd->temporaries[iam] +
|
| - ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]),
|
| - * end =
|
| - sd->temporaries[iam] + sd->dist[b + 1][d->num_threads];
|
| - sequential_random_shuffle(begin, end, rng);
|
| - std::copy(begin, end, sd->source + global_offset +
|
| - ((b == d->bins_begin) ? 0 : sd->dist[b][d->num_threads]));
|
| - }
|
| -
|
| - ::operator delete(sd->temporaries[iam]);
|
| - }
|
| -
|
| -/** @brief Round up to the next greater power of 2.
|
| - * @param x Integer to round up */
|
| -template<typename T>
|
| - T
|
| - round_up_to_pow2(T x)
|
| - {
|
| - if (x <= 1)
|
| - return 1;
|
| - else
|
| - return (T)1 << (__log2(x - 1) + 1);
|
| - }
|
| -
|
| -/** @brief Main parallel random shuffle step.
|
| - * @param begin Begin iterator of sequence.
|
| - * @param end End iterator of sequence.
|
| - * @param n Length of sequence.
|
| - * @param num_threads Number of threads to use.
|
| - * @param rng Random number generator to use.
|
| - */
|
| -template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
| - void
|
| - parallel_random_shuffle_drs(RandomAccessIterator begin,
|
| - RandomAccessIterator end,
|
| - typename std::iterator_traits
|
| - <RandomAccessIterator>::difference_type n,
|
| - thread_index_t num_threads,
|
| - RandomNumberGenerator& rng)
|
| - {
|
| - typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
| - typedef typename traits_type::value_type value_type;
|
| - typedef typename traits_type::difference_type difference_type;
|
| -
|
| - _GLIBCXX_CALL(n)
|
| -
|
| - const _Settings& __s = _Settings::get();
|
| -
|
| - if (num_threads > n)
|
| - num_threads = static_cast<thread_index_t>(n);
|
| -
|
| - bin_index num_bins, num_bins_cache;
|
| -
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
| - // Try the L1 cache first.
|
| -
|
| - // Must fit into L1.
|
| - num_bins_cache = std::max<difference_type>(
|
| - 1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
| - num_bins_cache = round_up_to_pow2(num_bins_cache);
|
| -
|
| - // No more buckets than TLB entries, power of 2
|
| - // Power of 2 and at least one element per bin, at most the TLB size.
|
| - num_bins = std::min<difference_type>(n, num_bins_cache);
|
| -
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
| - // 2 TLB entries needed per bin.
|
| - num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
| -#endif
|
| - num_bins = round_up_to_pow2(num_bins);
|
| -
|
| - if (num_bins < num_bins_cache)
|
| - {
|
| -#endif
|
| - // Now try the L2 cache
|
| - // Must fit into L2
|
| - num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
|
| - 1, n / (__s.L2_cache_size / sizeof(value_type))));
|
| - num_bins_cache = round_up_to_pow2(num_bins_cache);
|
| -
|
| - // No more buckets than TLB entries, power of 2.
|
| - num_bins = static_cast<bin_index>(
|
| - std::min(n, static_cast<difference_type>(num_bins_cache)));
|
| - // Power of 2 and at least one element per bin, at most the TLB size.
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
| - // 2 TLB entries needed per bin.
|
| - num_bins = std::min(
|
| - static_cast<difference_type>(__s.TLB_size / 2), num_bins);
|
| -#endif
|
| - num_bins = round_up_to_pow2(num_bins);
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
| - }
|
| -#endif
|
| -
|
| - num_threads = std::min<bin_index>(num_threads, num_bins);
|
| -
|
| - if (num_threads <= 1)
|
| - return sequential_random_shuffle(begin, end, rng);
|
| -
|
| - DRandomShufflingGlobalData<RandomAccessIterator> sd(begin);
|
| - DRSSorterPU<RandomAccessIterator, random_number >* pus;
|
| - difference_type* starts;
|
| -
|
| -# pragma omp parallel num_threads(num_threads)
|
| - {
|
| - thread_index_t num_threads = omp_get_num_threads();
|
| -# pragma omp single
|
| - {
|
| - pus = new DRSSorterPU<RandomAccessIterator, random_number>
|
| - [num_threads];
|
| -
|
| - sd.temporaries = new value_type*[num_threads];
|
| - sd.dist = new difference_type*[num_bins + 1];
|
| - sd.bin_proc = new thread_index_t[num_bins];
|
| - for (bin_index b = 0; b < num_bins + 1; ++b)
|
| - sd.dist[b] = new difference_type[num_threads + 1];
|
| - for (bin_index b = 0; b < (num_bins + 1); ++b)
|
| - {
|
| - sd.dist[0][0] = 0;
|
| - sd.dist[b][0] = 0;
|
| - }
|
| - starts = sd.starts = new difference_type[num_threads + 1];
|
| - int bin_cursor = 0;
|
| - sd.num_bins = num_bins;
|
| - sd.num_bits = __log2(num_bins);
|
| -
|
| - difference_type chunk_length = n / num_threads,
|
| - split = n % num_threads, start = 0;
|
| - difference_type bin_chunk_length = num_bins / num_threads,
|
| - bin_split = num_bins % num_threads;
|
| - for (thread_index_t i = 0; i < num_threads; ++i)
|
| - {
|
| - starts[i] = start;
|
| - start += (i < split) ? (chunk_length + 1) : chunk_length;
|
| - int j = pus[i].bins_begin = bin_cursor;
|
| -
|
| - // Range of bins for this processor.
|
| - bin_cursor += (i < bin_split) ?
|
| - (bin_chunk_length + 1) : bin_chunk_length;
|
| - pus[i].bins_end = bin_cursor;
|
| - for (; j < bin_cursor; ++j)
|
| - sd.bin_proc[j] = i;
|
| - pus[i].num_threads = num_threads;
|
| - pus[i].seed = rng(std::numeric_limits<uint32>::max());
|
| - pus[i].sd = &sd;
|
| - }
|
| - starts[num_threads] = start;
|
| - } //single
|
| - // Now shuffle in parallel.
|
| - parallel_random_shuffle_drs_pu(pus);
|
| - } // parallel
|
| -
|
| - delete[] starts;
|
| - delete[] sd.bin_proc;
|
| - for (int s = 0; s < (num_bins + 1); ++s)
|
| - delete[] sd.dist[s];
|
| - delete[] sd.dist;
|
| - delete[] sd.temporaries;
|
| -
|
| - delete[] pus;
|
| - }
|
| -
|
| -/** @brief Sequential cache-efficient random shuffle.
|
| - * @param begin Begin iterator of sequence.
|
| - * @param end End iterator of sequence.
|
| - * @param rng Random number generator to use.
|
| - */
|
| -template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
| - void
|
| - sequential_random_shuffle(RandomAccessIterator begin,
|
| - RandomAccessIterator end,
|
| - RandomNumberGenerator& rng)
|
| - {
|
| - typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
| - typedef typename traits_type::value_type value_type;
|
| - typedef typename traits_type::difference_type difference_type;
|
| -
|
| - difference_type n = end - begin;
|
| - const _Settings& __s = _Settings::get();
|
| -
|
| - bin_index num_bins, num_bins_cache;
|
| -
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
| - // Try the L1 cache first, must fit into L1.
|
| - num_bins_cache =
|
| - std::max<difference_type>
|
| - (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
|
| - num_bins_cache = round_up_to_pow2(num_bins_cache);
|
| -
|
| - // No more buckets than TLB entries, power of 2
|
| - // Power of 2 and at least one element per bin, at most the TLB size
|
| - num_bins = std::min(n, (difference_type)num_bins_cache);
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
| - // 2 TLB entries needed per bin
|
| - num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
|
| -#endif
|
| - num_bins = round_up_to_pow2(num_bins);
|
| -
|
| - if (num_bins < num_bins_cache)
|
| - {
|
| -#endif
|
| - // Now try the L2 cache, must fit into L2.
|
| - num_bins_cache =
|
| - static_cast<bin_index>(std::max<difference_type>(
|
| - 1, n / (__s.L2_cache_size / sizeof(value_type))));
|
| - num_bins_cache = round_up_to_pow2(num_bins_cache);
|
| -
|
| - // No more buckets than TLB entries, power of 2
|
| - // Power of 2 and at least one element per bin, at most the TLB size.
|
| - num_bins = static_cast<bin_index>
|
| - (std::min(n, static_cast<difference_type>(num_bins_cache)));
|
| -
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
|
| - // 2 TLB entries needed per bin
|
| - num_bins =
|
| - std::min<difference_type>(__s.TLB_size / 2, num_bins);
|
| -#endif
|
| - num_bins = round_up_to_pow2(num_bins);
|
| -#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
|
| - }
|
| -#endif
|
| -
|
| - int num_bits = __log2(num_bins);
|
| -
|
| - if (num_bins > 1)
|
| - {
|
| - value_type* target = static_cast<value_type*>(
|
| - ::operator new(sizeof(value_type) * n));
|
| - bin_index* oracles = new bin_index[n];
|
| - difference_type* dist0 = new difference_type[num_bins + 1],
|
| - * dist1 = new difference_type[num_bins + 1];
|
| -
|
| - for (int b = 0; b < num_bins + 1; ++b)
|
| - dist0[b] = 0;
|
| -
|
| - random_number bitrng(rng(0xFFFFFFFF));
|
| -
|
| - for (difference_type i = 0; i < n; ++i)
|
| - {
|
| - bin_index oracle = random_number_pow2(num_bits, bitrng);
|
| - oracles[i] = oracle;
|
| -
|
| - // To allow prefix (partial) sum.
|
| - ++(dist0[oracle + 1]);
|
| - }
|
| -
|
| - // Sum up bins.
|
| - __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
|
| -
|
| - for (int b = 0; b < num_bins + 1; ++b)
|
| - dist1[b] = dist0[b];
|
| -
|
| - // Distribute according to oracles.
|
| - for (difference_type i = 0; i < n; ++i)
|
| - ::new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
|
| -
|
| - for (int b = 0; b < num_bins; ++b)
|
| - {
|
| - sequential_random_shuffle(target + dist1[b],
|
| - target + dist1[b + 1],
|
| - rng);
|
| - }
|
| -
|
| - // Copy elements back.
|
| - std::copy(target, target + n, begin);
|
| -
|
| - delete[] dist0;
|
| - delete[] dist1;
|
| - delete[] oracles;
|
| - ::operator delete(target);
|
| - }
|
| - else
|
| - __gnu_sequential::random_shuffle(begin, end, rng);
|
| - }
|
| -
|
| -/** @brief Parallel random public call.
|
| - * @param begin Begin iterator of sequence.
|
| - * @param end End iterator of sequence.
|
| - * @param rng Random number generator to use.
|
| - */
|
| -template<typename RandomAccessIterator, typename RandomNumberGenerator>
|
| - inline void
|
| - parallel_random_shuffle(RandomAccessIterator begin,
|
| - RandomAccessIterator end,
|
| - RandomNumberGenerator rng = random_number())
|
| - {
|
| - typedef std::iterator_traits<RandomAccessIterator> traits_type;
|
| - typedef typename traits_type::difference_type difference_type;
|
| - difference_type n = end - begin;
|
| - parallel_random_shuffle_drs(begin, end, n, get_max_threads(), rng) ;
|
| - }
|
| -
|
| -}
|
| -
|
| -#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */
|
|
|