Viewing file: partition.h (14.56 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
// -*- C++ -*-
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 3, or (at your option) any later // version.
// This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // <http://www.gnu.org/licenses/>.
/** @file parallel/partition.h * @brief Parallel implementation of std::partition(), * std::nth_element(), and std::partial_sort(). * This file is a GNU parallel extension to the Standard C++ Library. */
// Written by Johannes Singler and Felix Putze.
#ifndef _GLIBCXX_PARALLEL_PARTITION_H #define _GLIBCXX_PARALLEL_PARTITION_H 1
#include <parallel/basic_iterator.h> #include <parallel/sort.h> #include <parallel/random_number.h> #include <bits/stl_algo.h> #include <parallel/parallel.h>
/** @brief Decide whether to declare certain variables volatile. */ #define _GLIBCXX_VOLATILE volatile
namespace __gnu_parallel { /** @brief Parallel implementation of std::partition. * @param begin Begin iterator of input sequence to split. * @param end End iterator of input sequence to split. * @param pred Partition predicate, possibly including some kind of pivot. * @param num_threads Maximum number of threads to use for this task. * @return Number of elements not fulfilling the predicate. */ template<typename RandomAccessIterator, typename Predicate> typename std::iterator_traits<RandomAccessIterator>::difference_type parallel_partition(RandomAccessIterator begin, RandomAccessIterator end, Predicate pred, thread_index_t num_threads) { typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
_GLIBCXX_CALL(n)
const _Settings& __s = _Settings::get();
// Shared. _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1; _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right; _GLIBCXX_VOLATILE difference_type leftnew, rightnew;
bool* reserved_left = NULL, * reserved_right = NULL;
difference_type chunk_size = __s.partition_chunk_size;
omp_lock_t result_lock; omp_init_lock(&result_lock);
//at least two chunks per thread if(right - left + 1 >= 2 * num_threads * chunk_size) # pragma omp parallel num_threads(num_threads) { # pragma omp single { num_threads = omp_get_num_threads(); reserved_left = new bool[num_threads]; reserved_right = new bool[num_threads];
if (__s.partition_chunk_share > 0.0) chunk_size = std::max<difference_type>(__s.partition_chunk_size, (double)n * __s.partition_chunk_share / (double)num_threads); else chunk_size = __s.partition_chunk_size; }
while (right - left + 1 >= 2 * num_threads * chunk_size) { # pragma omp single { difference_type num_chunks = (right - left + 1) / chunk_size;
for (int r = 0; r < num_threads; ++r) { reserved_left[r] = false; reserved_right[r] = false; } leftover_left = 0; leftover_right = 0; } //implicit barrier
// Private. difference_type thread_left, thread_left_border, thread_right, thread_right_border; thread_left = left + 1;
// Just to satisfy the condition below. thread_left_border = thread_left - 1; thread_right = n - 1; thread_right_border = thread_right + 1;
bool iam_finished = false; while (!iam_finished) { if (thread_left > thread_left_border) { omp_set_lock(&result_lock); if (left + (chunk_size - 1) > right) iam_finished = true; else { thread_left = left; thread_left_border = left + (chunk_size - 1); left += chunk_size; } omp_unset_lock(&result_lock); }
if (thread_right < thread_right_border) { omp_set_lock(&result_lock); if (left > right - (chunk_size - 1)) iam_finished = true; else { thread_right = right; thread_right_border = right - (chunk_size - 1); right -= chunk_size; } omp_unset_lock(&result_lock); }
if (iam_finished) break;
// Swap as usual. while (thread_left < thread_right) { while (pred(begin[thread_left]) && thread_left <= thread_left_border) ++thread_left; while (!pred(begin[thread_right]) && thread_right >= thread_right_border) --thread_right;
if (thread_left > thread_left_border || thread_right < thread_right_border) // Fetch new chunk(s). break;
std::swap(begin[thread_left], begin[thread_right]); ++thread_left; --thread_right; } }
// Now swap the leftover chunks to the right places. if (thread_left <= thread_left_border) # pragma omp atomic ++leftover_left; if (thread_right >= thread_right_border) # pragma omp atomic ++leftover_right;
# pragma omp barrier
# pragma omp single { leftnew = left - leftover_left * chunk_size; rightnew = right + leftover_right * chunk_size; }
# pragma omp barrier
// <=> thread_left_border + (chunk_size - 1) >= leftnew if (thread_left <= thread_left_border && thread_left_border >= leftnew) { // Chunk already in place, reserve spot. reserved_left[(left - (thread_left_border + 1)) / chunk_size] = true; }
// <=> thread_right_border - (chunk_size - 1) <= rightnew if (thread_right >= thread_right_border && thread_right_border <= rightnew) { // Chunk already in place, reserve spot. reserved_right[((thread_right_border - 1) - right) / chunk_size] = true; }
# pragma omp barrier
if (thread_left <= thread_left_border && thread_left_border < leftnew) { // Find spot and swap. difference_type swapstart = -1; omp_set_lock(&result_lock); for (int r = 0; r < leftover_left; ++r) if (!reserved_left[r]) { reserved_left[r] = true; swapstart = left - (r + 1) * chunk_size; break; } omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); #endif
std::swap_ranges(begin + thread_left_border - (chunk_size - 1), begin + thread_left_border + 1, begin + swapstart); }
if (thread_right >= thread_right_border && thread_right_border > rightnew) { // Find spot and swap difference_type swapstart = -1; omp_set_lock(&result_lock); for (int r = 0; r < leftover_right; ++r) if (!reserved_right[r]) { reserved_right[r] = true; swapstart = right + r * chunk_size + 1; break; } omp_unset_lock(&result_lock);
#if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(swapstart != -1); #endif
std::swap_ranges(begin + thread_right_border, begin + thread_right_border + chunk_size, begin + swapstart); } #if _GLIBCXX_ASSERTIONS # pragma omp barrier
# pragma omp single { for (int r = 0; r < leftover_left; ++r) _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]); for (int r = 0; r < leftover_right; ++r) _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]); }
# pragma omp barrier #endif
# pragma omp barrier
left = leftnew; right = rightnew; } # pragma omp flush(left, right) } // end "recursion" //parallel
difference_type final_left = left, final_right = right;
while (final_left < final_right) { // Go right until key is geq than pivot. while (pred(begin[final_left]) && final_left < final_right) ++final_left;
// Go left until key is less than pivot. while (!pred(begin[final_right]) && final_left < final_right) --final_right;
if (final_left == final_right) break; std::swap(begin[final_left], begin[final_right]); ++final_left; --final_right; }
// All elements on the left side are < piv, all elements on the // right are >= piv delete[] reserved_left; delete[] reserved_right;
omp_destroy_lock(&result_lock);
// Element "between" final_left and final_right might not have // been regarded yet if (final_left < n && !pred(begin[final_left])) // Really swapped. return final_left; else return final_left + 1; }
/** * @brief Parallel implementation of std::nth_element(). * @param begin Begin iterator of input sequence. * @param nth Iterator of element that must be in position afterwards. * @param end End iterator of input sequence. * @param comp Comparator. */ template<typename RandomAccessIterator, typename Comparator> void parallel_nth_element(RandomAccessIterator begin, RandomAccessIterator nth, RandomAccessIterator end, Comparator comp) { typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type;
_GLIBCXX_CALL(end - begin)
RandomAccessIterator split; random_number rng;
const _Settings& __s = _Settings::get(); difference_type minimum_length = std::max<difference_type>(2, std::max(__s.nth_element_minimal_n, __s.partition_minimal_n));
// Break if input range to small. while (static_cast<sequence_index_t>(end - begin) >= minimum_length) { difference_type n = end - begin;
RandomAccessIterator pivot_pos = begin + rng(n);
// Swap pivot_pos value to end. if (pivot_pos != (end - 1)) std::swap(*pivot_pos, *(end - 1)); pivot_pos = end - 1;
// XXX Comparator must have first_value_type, second_value_type, // result_type // Comparator == __gnu_parallel::lexicographic<S, int, // __gnu_parallel::less<S, S> > // pivot_pos == std::pair<S, int>* // XXX binder2nd only for RandomAccessIterators?? __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos);
// Divide, leave pivot unchanged in last place. RandomAccessIterator split_pos1, split_pos2; split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads());
// Left side: < pivot_pos; right side: >= pivot_pos
// Swap pivot back to middle. if (split_pos1 != pivot_pos) std::swap(*split_pos1, *pivot_pos); pivot_pos = split_pos1;
// In case all elements are equal, split_pos1 == 0 if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7)) { // Very unequal split, one part smaller than one 128th // elements not strictly larger than the pivot. __gnu_parallel::unary_negate<__gnu_parallel:: binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos));
// Find other end of pivot-equal range. split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred); } else // Only skip the pivot. split_pos2 = split_pos1 + 1;
// Compare iterators. if (split_pos2 <= nth) begin = split_pos2; else if (nth < split_pos1) end = split_pos1; else break; }
// Only at most _Settings::partition_minimal_n elements left. __gnu_sequential::nth_element(begin, nth, end, comp); }
/** @brief Parallel implementation of std::partial_sort(). * @param begin Begin iterator of input sequence. * @param middle Sort until this position. * @param end End iterator of input sequence. * @param comp Comparator. */ template<typename RandomAccessIterator, typename Comparator> void parallel_partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, RandomAccessIterator end, Comparator comp) { parallel_nth_element(begin, middle, end, comp); std::sort(begin, middle, comp); }
} //namespace __gnu_parallel
#undef _GLIBCXX_VOLATILE
#endif /* _GLIBCXX_PARALLEL_PARTITION_H */
|