Viewing file: partial_sum.h (6.88 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
// -*- C++ -*-
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the terms // of the GNU General Public License as published by the Free Software // Foundation; either version 3, or (at your option) any later // version.
// This library is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // <http://www.gnu.org/licenses/>.
/** @file parallel/partial_sum.h * @brief Parallel implementation of std::partial_sum(), i. e. prefix * sums. * This file is a GNU parallel extension to the Standard C++ Library. */
// Written by Johannes Singler.
#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
#include <omp.h> #include <new> #include <bits/stl_algobase.h> #include <parallel/parallel.h> #include <parallel/numericfwd.h>
namespace __gnu_parallel { // Problem: there is no 0-element given.
/** @brief Base case prefix sum routine. * @param begin Begin iterator of input sequence. * @param end End iterator of input sequence. * @param result Begin iterator of output sequence. * @param bin_op Associative binary function. * @param value Start value. Must be passed since the neutral * element is unknown in general. * @return End iterator of output sequence. */ template<typename InputIterator, typename OutputIterator, typename BinaryOperation> OutputIterator parallel_partial_sum_basecase(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op, typename std::iterator_traits <InputIterator>::value_type value) { if (begin == end) return result;
while (begin != end) { value = bin_op(value, *begin); *result = value; ++result; ++begin; } return result; }
/** @brief Parallel partial sum implementation, two-phase approach, no recursion. * @param begin Begin iterator of input sequence. * @param end End iterator of input sequence. * @param result Begin iterator of output sequence. * @param bin_op Associative binary function. * @param n Length of sequence. * @param num_threads Number of threads to use. * @return End iterator of output sequence. */ template<typename InputIterator, typename OutputIterator, typename BinaryOperation> OutputIterator parallel_partial_sum_linear(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op, typename std::iterator_traits <InputIterator>::difference_type n) { typedef std::iterator_traits<InputIterator> traits_type; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type;
if (begin == end) return result;
thread_index_t num_threads = std::min<difference_type>(get_max_threads(), n - 1);
if (num_threads < 2) { *result = *begin; return parallel_partial_sum_basecase( begin + 1, end, result + 1, bin_op, *begin); }
difference_type* borders; value_type* sums;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads) { # pragma omp single { num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
if (__s.partial_sum_dilation == 1.0f) equally_split(n, num_threads + 1, borders); else { difference_type chunk_length = ((double)n / ((double)num_threads + __s.partial_sum_dilation)), borderstart = n - num_threads * chunk_length; borders[0] = 0; for (int i = 1; i < (num_threads + 1); ++i) { borders[i] = borderstart; borderstart += chunk_length; } borders[num_threads + 1] = n; }
sums = static_cast<value_type*>(::operator new(sizeof(value_type) * num_threads)); OutputIterator target_end; } //single
thread_index_t iam = omp_get_thread_num(); if (iam == 0) { *result = *begin; parallel_partial_sum_basecase(begin + 1, begin + borders[1], result + 1, bin_op, *begin); ::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); } else { ::new(&(sums[iam])) value_type(__gnu_parallel::accumulate(begin + borders[iam] + 1, begin + borders[iam + 1], *(begin + borders[iam]), bin_op, __gnu_parallel::sequential_tag())); }
# pragma omp barrier
# pragma omp single parallel_partial_sum_basecase( sums + 1, sums + num_threads, sums + 1, bin_op, sums[0]);
# pragma omp barrier
// Still same team. parallel_partial_sum_basecase(begin + borders[iam + 1], begin + borders[iam + 2], result + borders[iam + 1], bin_op, sums[iam]); } //parallel
::operator delete(sums); delete[] borders;
return result + n; }
/** @brief Parallel partial sum front-end. * @param begin Begin iterator of input sequence. * @param end End iterator of input sequence. * @param result Begin iterator of output sequence. * @param bin_op Associative binary function. * @return End iterator of output sequence. */ template<typename InputIterator, typename OutputIterator, typename BinaryOperation> OutputIterator parallel_partial_sum(InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op) { _GLIBCXX_CALL(begin - end)
typedef std::iterator_traits<InputIterator> traits_type; typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
switch (_Settings::get().partial_sum_algorithm) { case LINEAR: // Need an initial offset. return parallel_partial_sum_linear(begin, end, result, bin_op, n); default: // Partial_sum algorithm not implemented. _GLIBCXX_PARALLEL_ASSERT(0); return result + n; } } }
#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
|