libstdc++
multiway_mergesort.h
Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007-2015 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/multiway_mergesort.h
00026  *  @brief Parallel multiway merge sort.
00027  *  This file is a GNU parallel extension to the Standard C++ Library.
00028  */
00029 
00030 // Written by Johannes Singler.
00031 
00032 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
00033 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
00034 
00035 #include <vector>
00036 
00037 #include <parallel/basic_iterator.h>
00038 #include <bits/stl_algo.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/multiway_merge.h>
00041 
00042 namespace __gnu_parallel
00043 {
00044   /** @brief Subsequence description. */
00045   template<typename _DifferenceTp>
00046     struct _Piece
00047     {
00048       typedef _DifferenceTp _DifferenceType;
00049 
00050       /** @brief Begin of subsequence. */
00051       _DifferenceType _M_begin;
00052 
00053       /** @brief End of subsequence. */
00054       _DifferenceType _M_end;
00055     };
00056 
00057   /** @brief Data accessed by all threads.
00058    *
00059    *  PMWMS = parallel multiway mergesort */
00060   template<typename _RAIter>
00061     struct _PMWMSSortingData
00062     {
00063       typedef std::iterator_traits<_RAIter> _TraitsType;
00064       typedef typename _TraitsType::value_type _ValueType;
00065       typedef typename _TraitsType::difference_type _DifferenceType;
00066 
00067       /** @brief Number of threads involved. */
00068       _ThreadIndex _M_num_threads;
00069 
00070       /** @brief Input __begin. */
00071       _RAIter _M_source;
00072 
00073       /** @brief Start indices, per thread. */
00074       _DifferenceType* _M_starts;
00075 
00076       /** @brief Storage in which to sort. */
00077       _ValueType** _M_temporary;
00078 
00079       /** @brief Samples. */
00080       _ValueType* _M_samples;
00081 
00082       /** @brief Offsets to add to the found positions. */
00083       _DifferenceType* _M_offsets;
00084 
00085       /** @brief Pieces of data to merge @c [thread][__sequence] */
00086       std::vector<_Piece<_DifferenceType> >* _M_pieces;
00087   };
00088 
00089   /**
00090    *  @brief Select _M_samples from a sequence.
00091    *  @param __sd Pointer to algorithm data. _Result will be placed in
00092    *  @c __sd->_M_samples.
00093    *  @param __num_samples Number of _M_samples to select.
00094    */
00095   template<typename _RAIter, typename _DifferenceTp>
00096     void
00097     __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
00098                         _DifferenceTp __num_samples)
00099     {
00100       typedef std::iterator_traits<_RAIter> _TraitsType;
00101       typedef typename _TraitsType::value_type _ValueType;
00102       typedef _DifferenceTp _DifferenceType;
00103 
00104       _ThreadIndex __iam = omp_get_thread_num();
00105 
00106       _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
00107 
00108       __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam], 
00109                       __num_samples + 1, __es);
00110 
00111       for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
00112         ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
00113             _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
00114                                        + __es[__i + 1]]);
00115 
00116       delete[] __es;
00117     }
00118 
00119   /** @brief Split consistently. */
00120   template<bool __exact, typename _RAIter,
00121            typename _Compare, typename _SortingPlacesIterator>
00122     struct _SplitConsistently
00123     { };
00124 
00125   /** @brief Split by exact splitting. */
00126   template<typename _RAIter, typename _Compare,
00127            typename _SortingPlacesIterator>
00128     struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
00129     {
00130       void
00131       operator()(const _ThreadIndex __iam,
00132                  _PMWMSSortingData<_RAIter>* __sd,
00133                  _Compare& __comp,
00134                  const typename
00135                  std::iterator_traits<_RAIter>::difference_type
00136                  __num_samples) const
00137       {
00138 #       pragma omp barrier
00139 
00140         std::vector<std::pair<_SortingPlacesIterator,
00141                               _SortingPlacesIterator> >
00142           __seqs(__sd->_M_num_threads);
00143         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
00144           __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
00145                                        __sd->_M_temporary[__s]
00146                                        + (__sd->_M_starts[__s + 1]
00147                                           - __sd->_M_starts[__s]));
00148 
00149         std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
00150 
00151         // if not last thread
00152         if (__iam < __sd->_M_num_threads - 1)
00153           multiseq_partition(__seqs.begin(), __seqs.end(),
00154                              __sd->_M_starts[__iam + 1], __offsets.begin(),
00155                              __comp);
00156 
00157         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
00158           {
00159             // for each sequence
00160             if (__iam < (__sd->_M_num_threads - 1))
00161               __sd->_M_pieces[__iam][__seq]._M_end
00162                 = __offsets[__seq] - __seqs[__seq].first;
00163             else
00164               // very end of this sequence
00165               __sd->_M_pieces[__iam][__seq]._M_end =
00166                 __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
00167           }
00168 
00169 #       pragma omp barrier
00170 
00171         for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
00172           {
00173             // For each sequence.
00174             if (__iam > 0)
00175               __sd->_M_pieces[__iam][__seq]._M_begin =
00176                 __sd->_M_pieces[__iam - 1][__seq]._M_end;
00177             else
00178               // Absolute beginning.
00179               __sd->_M_pieces[__iam][__seq]._M_begin = 0;
00180           }
00181       }
00182   };
00183 
00184   /** @brief Split by sampling. */ 
00185   template<typename _RAIter, typename _Compare,
00186            typename _SortingPlacesIterator>
00187     struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
00188     {
00189       void
00190       operator()(const _ThreadIndex __iam,
00191                  _PMWMSSortingData<_RAIter>* __sd,
00192                  _Compare& __comp,
00193                  const typename
00194                  std::iterator_traits<_RAIter>::difference_type
00195                  __num_samples) const
00196       {
00197         typedef std::iterator_traits<_RAIter> _TraitsType;
00198         typedef typename _TraitsType::value_type _ValueType;
00199         typedef typename _TraitsType::difference_type _DifferenceType;
00200 
00201         __determine_samples(__sd, __num_samples);
00202 
00203 #       pragma omp barrier
00204 
00205 #       pragma omp single
00206         __gnu_sequential::sort(__sd->_M_samples,
00207                                __sd->_M_samples
00208                                + (__num_samples * __sd->_M_num_threads),
00209                                __comp);
00210 
00211 #       pragma omp barrier
00212 
00213         for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
00214           {
00215             // For each sequence.
00216             if (__num_samples * __iam > 0)
00217               __sd->_M_pieces[__iam][__s]._M_begin =
00218                 std::lower_bound(__sd->_M_temporary[__s],
00219                                  __sd->_M_temporary[__s]
00220                                  + (__sd->_M_starts[__s + 1]
00221                                     - __sd->_M_starts[__s]),
00222                                  __sd->_M_samples[__num_samples * __iam],
00223                                  __comp)
00224                 - __sd->_M_temporary[__s];
00225             else
00226               // Absolute beginning.
00227               __sd->_M_pieces[__iam][__s]._M_begin = 0;
00228 
00229             if ((__num_samples * (__iam + 1)) <
00230                 (__num_samples * __sd->_M_num_threads))
00231               __sd->_M_pieces[__iam][__s]._M_end =
00232                 std::lower_bound(__sd->_M_temporary[__s],
00233                                  __sd->_M_temporary[__s]
00234                                  + (__sd->_M_starts[__s + 1]
00235                                     - __sd->_M_starts[__s]),
00236                                  __sd->_M_samples[__num_samples * (__iam + 1)],
00237                                  __comp)
00238                 - __sd->_M_temporary[__s];
00239             else
00240               // Absolute end.
00241               __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
00242                                                     - __sd->_M_starts[__s]);
00243           }
00244       }
00245   };
00246   
00247   template<bool __stable, typename _RAIter, typename _Compare>
00248     struct __possibly_stable_sort
00249     { };
00250 
00251   template<typename _RAIter, typename _Compare>
00252     struct __possibly_stable_sort<true, _RAIter, _Compare>
00253     {
00254       void operator()(const _RAIter& __begin,
00255                       const _RAIter& __end, _Compare& __comp) const
00256       { __gnu_sequential::stable_sort(__begin, __end, __comp); }
00257     };
00258 
00259   template<typename _RAIter, typename _Compare>
00260     struct __possibly_stable_sort<false, _RAIter, _Compare>
00261     {
00262       void operator()(const _RAIter __begin,
00263                       const _RAIter __end, _Compare& __comp) const
00264       { __gnu_sequential::sort(__begin, __end, __comp); }
00265     };
00266 
00267   template<bool __stable, typename Seq_RAIter,
00268            typename _RAIter, typename _Compare,
00269            typename DiffType>
00270     struct __possibly_stable_multiway_merge
00271     { };
00272 
00273   template<typename Seq_RAIter, typename _RAIter,
00274            typename _Compare, typename _DiffType>
00275     struct __possibly_stable_multiway_merge<true, Seq_RAIter,
00276                                             _RAIter, _Compare, _DiffType>
00277     {
00278       void operator()(const Seq_RAIter& __seqs_begin,
00279                       const Seq_RAIter& __seqs_end,
00280                       const _RAIter& __target,
00281                       _Compare& __comp,
00282                       _DiffType __length_am) const
00283       { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
00284                               __length_am, __comp, sequential_tag()); }
00285     };
00286 
00287   template<typename Seq_RAIter, typename _RAIter,
00288            typename _Compare, typename _DiffType>
00289     struct __possibly_stable_multiway_merge<false, Seq_RAIter,
00290                                             _RAIter, _Compare, _DiffType>
00291     {
00292       void operator()(const Seq_RAIter& __seqs_begin,
00293                       const Seq_RAIter& __seqs_end,
00294                       const _RAIter& __target,
00295                       _Compare& __comp,
00296                       _DiffType __length_am) const
00297       { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
00298                        __comp, sequential_tag()); }
00299     };
00300 
00301   /** @brief PMWMS code executed by each thread.
00302    *  @param __sd Pointer to algorithm data.
00303    *  @param __comp Comparator.
00304    */
00305   template<bool __stable, bool __exact, typename _RAIter,
00306            typename _Compare>
00307     void
00308     parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
00309                           _Compare& __comp)
00310     {
00311       typedef std::iterator_traits<_RAIter> _TraitsType;
00312       typedef typename _TraitsType::value_type _ValueType;
00313       typedef typename _TraitsType::difference_type _DifferenceType;
00314 
00315       _ThreadIndex __iam = omp_get_thread_num();
00316 
00317       // Length of this thread's chunk, before merging.
00318       _DifferenceType __length_local =
00319         __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
00320 
00321       // Sort in temporary storage, leave space for sentinel.
00322 
00323       typedef _ValueType* _SortingPlacesIterator;
00324 
00325       __sd->_M_temporary[__iam] =
00326         static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00327                                                 * (__length_local + 1)));
00328 
00329       // Copy there.
00330       std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
00331                               __sd->_M_source + __sd->_M_starts[__iam]
00332                               + __length_local,
00333                               __sd->_M_temporary[__iam]);
00334 
00335       __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
00336         (__sd->_M_temporary[__iam],
00337          __sd->_M_temporary[__iam] + __length_local,
00338          __comp);
00339 
00340       // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
00341       // __sd->_M_temporary[__iam] + __length_local.
00342 
00343       // No barrier here: Synchronization is done by the splitting routine.
00344 
00345       _DifferenceType __num_samples =
00346         _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
00347       _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
00348         (__iam, __sd, __comp, __num_samples);
00349 
00350       // Offset from __target __begin, __length after merging.
00351       _DifferenceType __offset = 0, __length_am = 0;
00352       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
00353         {
00354           __length_am += (__sd->_M_pieces[__iam][__s]._M_end
00355                           - __sd->_M_pieces[__iam][__s]._M_begin);
00356           __offset += __sd->_M_pieces[__iam][__s]._M_begin;
00357         }
00358 
00359       typedef std::vector<
00360         std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
00361         _SeqVector;
00362       _SeqVector __seqs(__sd->_M_num_threads);
00363 
00364       for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
00365         {
00366           __seqs[__s] =
00367             std::make_pair(__sd->_M_temporary[__s]
00368                            + __sd->_M_pieces[__iam][__s]._M_begin,
00369                            __sd->_M_temporary[__s]
00370                            + __sd->_M_pieces[__iam][__s]._M_end);
00371         }
00372 
00373       __possibly_stable_multiway_merge<
00374         __stable, typename _SeqVector::iterator,
00375         _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
00376                                      __sd->_M_source + __offset, __comp,
00377                                      __length_am);
00378 
00379 #     pragma omp barrier
00380 
00381       for (_DifferenceType __i = 0; __i < __length_local; ++__i)
00382         __sd->_M_temporary[__iam][__i].~_ValueType();
00383       ::operator delete(__sd->_M_temporary[__iam]);
00384     }
00385 
00386   /** @brief PMWMS main call.
00387    *  @param __begin Begin iterator of sequence.
00388    *  @param __end End iterator of sequence.
00389    *  @param __comp Comparator.
00390    *  @param __num_threads Number of threads to use.
00391    */
00392   template<bool __stable, bool __exact, typename _RAIter,
00393            typename _Compare>
00394     void
00395     parallel_sort_mwms(_RAIter __begin, _RAIter __end,
00396                        _Compare __comp,
00397                        _ThreadIndex __num_threads)
00398     {
00399       _GLIBCXX_CALL(__end - __begin)
00400 
00401       typedef std::iterator_traits<_RAIter> _TraitsType;
00402       typedef typename _TraitsType::value_type _ValueType;
00403       typedef typename _TraitsType::difference_type _DifferenceType;
00404 
00405       _DifferenceType __n = __end - __begin;
00406 
00407       if (__n <= 1)
00408         return;
00409 
00410       // at least one element per thread
00411       if (__num_threads > __n)
00412         __num_threads = static_cast<_ThreadIndex>(__n);
00413 
00414       // shared variables
00415       _PMWMSSortingData<_RAIter> __sd;
00416       _DifferenceType* __starts;
00417       _DifferenceType __size;
00418 
00419 #     pragma omp parallel num_threads(__num_threads)
00420       {
00421         __num_threads = omp_get_num_threads(); //no more threads than requested
00422 
00423 #       pragma omp single
00424         {
00425           __sd._M_num_threads = __num_threads;
00426           __sd._M_source = __begin;
00427           
00428           __sd._M_temporary = new _ValueType*[__num_threads];
00429 
00430           if (!__exact)
00431             {
00432               __size =
00433                 (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
00434                 * __num_threads;
00435               __sd._M_samples = static_cast<_ValueType*>
00436                 (::operator new(__size * sizeof(_ValueType)));
00437             }
00438           else
00439             __sd._M_samples = 0;
00440 
00441           __sd._M_offsets = new _DifferenceType[__num_threads - 1];
00442           __sd._M_pieces
00443             = new std::vector<_Piece<_DifferenceType> >[__num_threads];
00444           for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
00445             __sd._M_pieces[__s].resize(__num_threads);
00446           __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
00447 
00448           _DifferenceType __chunk_length = __n / __num_threads;
00449           _DifferenceType __split = __n % __num_threads;
00450           _DifferenceType __pos = 0;
00451           for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
00452             {
00453               __starts[__i] = __pos;
00454               __pos += ((__i < __split)
00455                         ? (__chunk_length + 1) : __chunk_length);
00456             }
00457           __starts[__num_threads] = __pos;
00458         } //single
00459 
00460         // Now sort in parallel.
00461         parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
00462       } //parallel
00463 
00464       delete[] __starts;
00465       delete[] __sd._M_temporary;
00466 
00467       if (!__exact)
00468         {
00469           for (_DifferenceType __i = 0; __i < __size; ++__i)
00470             __sd._M_samples[__i].~_ValueType();
00471           ::operator delete(__sd._M_samples);
00472         }
00473 
00474       delete[] __sd._M_offsets;
00475       delete[] __sd._M_pieces;
00476     }
00477 
00478 } //namespace __gnu_parallel
00479 
00480 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */