gdbsupport/parallel-for.h

   1 /* Parallel for loops
   2
   3    Copyright (C) 2019-2022 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #ifndef GDBSUPPORT_PARALLEL_FOR_H
  21 #define GDBSUPPORT_PARALLEL_FOR_H
  22
  23 #include <algorithm>
  24 #include <type_traits>
  25 #include "gdbsupport/invoke-result.h"
  26 #include "gdbsupport/thread-pool.h"
  27 #include "gdbsupport/function-view.h"
  28
  29 namespace gdb
  30 {
  31
  32 namespace detail
  33 {
  34
  35 /* This is a helper class that is used to accumulate results for
  36    parallel_for.  There is a specialization for 'void', below.  */
  37 template<typename T>
  38 struct par_for_accumulator
  39 {
  40 public:
  41
  42   explicit par_for_accumulator (size_t n_threads)
  43     : m_futures (n_threads)
  44   {
  45   }
  46
  47   /* The result type that is accumulated.  */
  48   typedef std::vector<T> result_type;
  49
  50   /* Post the Ith task to a background thread, and store a future for
  51      later.  */
  52   void post (size_t i, std::function<T ()> task)
  53   {
  54     m_futures[i]
  55       = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
  56   }
  57
  58   /* Invoke TASK in the current thread, then compute all the results
  59      from all background tasks and put them into a result vector,
  60      which is returned.  */
  61   result_type finish (gdb::function_view<T ()> task)
  62   {
  63     result_type result (m_futures.size () + 1);
  64
  65     result.back () = task ();
  66
  67     for (size_t i = 0; i < m_futures.size (); ++i)
  68       result[i] = m_futures[i].get ();
  69
  70     return result;
  71   }
  72
  73 private:
  74
  75   /* A vector of futures coming from the tasks run in the
  76      background.  */
  77   std::vector<gdb::future<T>> m_futures;
  78 };
  79
  80 /* See the generic template.  */
  81 template<>
  82 struct par_for_accumulator<void>
  83 {
  84 public:
  85
  86   explicit par_for_accumulator (size_t n_threads)
  87     : m_futures (n_threads)
  88   {
  89   }
  90
  91   /* This specialization does not compute results.  */
  92   typedef void result_type;
  93
  94   void post (size_t i, std::function<void ()> task)
  95   {
  96     m_futures[i]
  97       = gdb::thread_pool::g_thread_pool->post_task (std::move (task));
  98   }
  99
 100   result_type finish (gdb::function_view<void ()> task)
 101   {
 102     task ();
 103
 104     for (auto &future : m_futures)
 105       {
 106         /* Use 'get' and not 'wait', to propagate any exception.  */
 107         future.get ();
 108       }
 109   }
 110
 111 private:
 112
 113   std::vector<gdb::future<void>> m_futures;
 114 };
 115
 116 }
 117
 118 /* A very simple "parallel for".  This splits the range of iterators
 119    into subranges, and then passes each subrange to the callback.  The
 120    work may or may not be done in separate threads.
 121
 122    This approach was chosen over having the callback work on single
 123    items because it makes it simple for the caller to do
 124    once-per-subrange initialization and destruction.
 125
 126    The parameter N says how batching ought to be done -- there will be
 127    at least N elements processed per thread.  Setting N to 0 is not
 128    allowed.
 129
 130    If the function returns a non-void type, then a vector of the
 131    results is returned.  The size of the resulting vector depends on
 132    the number of threads that were used.  */
 133
 134 template<class RandomIt, class RangeFunction>
 135 typename gdb::detail::par_for_accumulator<
 136     typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
 137   >::result_type
 138 parallel_for_each (unsigned n, RandomIt first, RandomIt last,
 139                    RangeFunction callback,
 140                    gdb::function_view<size_t(RandomIt)> task_size = nullptr)
 141 {
 142   using result_type
 143     = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
 144
 145   /* If enabled, print debug info about how the work is distributed across
 146      the threads.  */
 147   const bool parallel_for_each_debug = false;
 148
 149   size_t n_worker_threads = thread_pool::g_thread_pool->thread_count ();
 150   size_t n_threads = n_worker_threads;
 151   size_t n_elements = last - first;
 152   size_t elts_per_thread = 0;
 153   size_t elts_left_over = 0;
 154   size_t total_size = 0;
 155   size_t size_per_thread = 0;
 156   size_t max_element_size = n_elements == 0 ? 1 : SIZE_MAX / n_elements;
 157
 158   if (n_threads > 1)
 159     {
 160       if (task_size != nullptr)
 161         {
 162           gdb_assert (n == 1);
 163           for (RandomIt i = first; i != last; ++i)
 164             {
 165               size_t element_size = task_size (i);
 166               gdb_assert (element_size > 0);
 167               if (element_size > max_element_size)
 168                 /* We could start scaling here, but that doesn't seem to be
 169                    worth the effort.  */
 170                 element_size = max_element_size;
 171               size_t prev_total_size = total_size;
 172               total_size += element_size;
 173               /* Check for overflow.  */
 174               gdb_assert (prev_total_size < total_size);
 175             }
 176           size_per_thread = total_size / n_threads;
 177         }
 178       else
 179         {
 180           /* Require that there should be at least N elements in a
 181              thread.  */
 182           gdb_assert (n > 0);
 183           if (n_elements / n_threads < n)
 184             n_threads = std::max (n_elements / n, (size_t) 1);
 185           elts_per_thread = n_elements / n_threads;
 186           elts_left_over = n_elements % n_threads;
 187           /* n_elements == n_threads * elts_per_thread + elts_left_over. */
 188         }
 189     }
 190
 191   size_t count = n_threads == 0 ? 0 : n_threads - 1;
 192   gdb::detail::par_for_accumulator<result_type> results (count);
 193
 194   if (parallel_for_each_debug)
 195     {
 196       debug_printf (_("Parallel for: n_elements: %zu\n"), n_elements);
 197       if (task_size != nullptr)
 198         {
 199           debug_printf (_("Parallel for: total_size: %zu\n"), total_size);
 200           debug_printf (_("Parallel for: size_per_thread: %zu\n"), size_per_thread);
 201         }
 202       else
 203         {
 204           debug_printf (_("Parallel for: minimum elements per thread: %u\n"), n);
 205           debug_printf (_("Parallel for: elts_per_thread: %zu\n"), elts_per_thread);
 206         }
 207     }
 208
 209   size_t remaining_size = total_size;
 210   for (int i = 0; i < count; ++i)
 211     {
 212       RandomIt end;
 213       size_t chunk_size = 0;
 214       if (task_size == nullptr)
 215         {
 216           end = first + elts_per_thread;
 217           if (i < elts_left_over)
 218             /* Distribute the leftovers over the worker threads, to avoid having
 219                to handle all of them in a single thread.  */
 220             end++;
 221         }
 222       else
 223         {
 224           RandomIt j;
 225           for (j = first; j < last && chunk_size < size_per_thread; ++j)
 226             {
 227               size_t element_size = task_size (j);
 228               if (element_size > max_element_size)
 229                 element_size = max_element_size;
 230               chunk_size += element_size;
 231             }
 232           end = j;
 233           remaining_size -= chunk_size;
 234         }
 235       if (parallel_for_each_debug)
 236         {
 237           debug_printf (_("Parallel for: elements on worker thread %i\t: %zu"),
 238                         i, (size_t)(end - first));
 239           if (task_size != nullptr)
 240             debug_printf (_("\t(size: %zu)"), chunk_size);
 241           debug_printf (_("\n"));
 242         }
 243       results.post (i, [=] ()
 244         {
 245           return callback (first, end);
 246         });
 247       first = end;
 248     }
 249
 250   for (int i = count; i < n_worker_threads; ++i)
 251     if (parallel_for_each_debug)
 252       {
 253         debug_printf (_("Parallel for: elements on worker thread %i\t: 0"), i);
 254         if (task_size != nullptr)
 255           debug_printf (_("\t(size: 0)"));
 256         debug_printf (_("\n"));
 257       }
 258
 259   /* Process all the remaining elements in the main thread.  */
 260   if (parallel_for_each_debug)
 261     {
 262       debug_printf (_("Parallel for: elements on main thread\t\t: %zu"),
 263                     (size_t)(last - first));
 264       if (task_size != nullptr)
 265         debug_printf (_("\t(size: %zu)"), remaining_size);
 266       debug_printf (_("\n"));
 267     }
 268   return results.finish ([=] ()
 269     {
 270       return callback (first, last);
 271     });
 272 }
 273
 274 /* A sequential drop-in replacement of parallel_for_each.  This can be useful
 275    when debugging multi-threading behaviour, and you want to limit
 276    multi-threading in a fine-grained way.  */
 277
 278 template<class RandomIt, class RangeFunction>
 279 typename gdb::detail::par_for_accumulator<
 280     typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type
 281   >::result_type
 282 sequential_for_each (unsigned n, RandomIt first, RandomIt last,
 283                      RangeFunction callback,
 284                      gdb::function_view<size_t(RandomIt)> task_size = nullptr)
 285 {
 286   using result_type = typename gdb::invoke_result<RangeFunction, RandomIt, RandomIt>::type;
 287
 288   gdb::detail::par_for_accumulator<result_type> results (0);
 289
 290   /* Process all the remaining elements in the main thread.  */
 291   return results.finish ([=] ()
 292     {
 293       return callback (first, last);
 294     });
 295 }
 296
 297 }
 298
 299 #endif /* GDBSUPPORT_PARALLEL_FOR_H */