From 740936e08e68c62e24693f315d08c9ffe8fc1a5c Mon Sep 17 00:00:00 2001 From: Johannes Singler Date: Thu, 25 Oct 2007 17:07:56 +0000 Subject: [PATCH] multiway_merge.h: Removed Timing 2007-10-25 Johannes Singler * include/parallel/multiway_merge.h: Removed Timing * include/parallel/random_shuffle.h: Same * include/parallel/set_operations.h: Same * include/parallel/tree.h: Same * include/parallel/multiway_mergesort.h: Same * include/parallel/timing.h: Removed completely From-SVN: r129629 --- libstdc++-v3/ChangeLog | 9 + .../include/parallel/multiway_merge.h | 23 -- .../include/parallel/multiway_mergesort.h | 17 -- .../include/parallel/random_shuffle.h | 31 --- .../include/parallel/set_operations.h | 14 -- libstdc++-v3/include/parallel/timing.h | 217 ------------------ libstdc++-v3/include/parallel/tree.h | 53 ----- 7 files changed, 9 insertions(+), 355 deletions(-) delete mode 100644 libstdc++-v3/include/parallel/timing.h diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index fabbe8ace7a..a588ed935cd 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,12 @@ +2007-10-25 Johannes Singler + + * include/parallel/multiway_merge.h: Removed Timing + * include/parallel/random_shuffle.h: Same + * include/parallel/set_operations.h: Same + * include/parallel/tree.h: Same + * include/parallel/multiway_mergesort.h: Same + * include/parallel/timing.h: Removed completely + 2007-10-25 Paolo Carlini * include/bits/stl_algo.h (__lg<>(_Size)): Slightly tweak. diff --git a/libstdc++-v3/include/parallel/multiway_merge.h b/libstdc++-v3/include/parallel/multiway_merge.h index c727e44f0fc..c7d317a519a 100644 --- a/libstdc++-v3/include/parallel/multiway_merge.h +++ b/libstdc++-v3/include/parallel/multiway_merge.h @@ -52,7 +52,6 @@ #include #include #include -#include #if _GLIBCXX_ASSERTIONS #include #endif @@ -1354,11 +1353,6 @@ namespace __gnu_parallel thread_index_t num_threads = static_cast(std::min(static_cast(get_max_threads()), total_length)); - Timing* t = new Timing[num_threads]; - - for (int pr = 0; pr < num_threads; pr++) - t[pr].tic(); - bool tight = (total_length == length); // Thread t will have to merge pieces[iam][0..k - 1] @@ -1456,15 +1450,10 @@ namespace __gnu_parallel delete[] offsets; } - for (int pr = 0; pr < num_threads; pr++) - t[pr].tic(); - # pragma omp parallel num_threads(num_threads) { thread_index_t iam = omp_get_thread_num(); - t[iam].tic(); - difference_type target_position = 0; for (int c = 0; c < k; c++) @@ -1498,14 +1487,8 @@ namespace __gnu_parallel (pieces[iam][0].second - pieces[iam][0].first) + (pieces[iam][1].second - pieces[iam][1].first), comp); } - - t[iam].tic(); - } - for (int pr = 0; pr < num_threads; pr++) - t[pr].tic(); - #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); #endif @@ -1516,12 +1499,6 @@ namespace __gnu_parallel delete[] pieces; - for (int pr = 0; pr < num_threads; pr++) - t[pr].tic(); - for (int pr = 0; pr < num_threads; pr++) - t[pr].print(); - delete[] t; - return target + length; } diff --git a/libstdc++-v3/include/parallel/multiway_mergesort.h b/libstdc++-v3/include/parallel/multiway_mergesort.h index c1a07dc0352..89285e16389 100644 --- a/libstdc++-v3/include/parallel/multiway_mergesort.h +++ b/libstdc++-v3/include/parallel/multiway_mergesort.h @@ -44,7 +44,6 @@ #include #include #include -#include namespace __gnu_parallel { @@ -160,9 +159,6 @@ namespace __gnu_parallel typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type; - Timing t; - t.tic(); - PMWMSSortingData* sd = d->sd; thread_index_t iam = d->iam; @@ -196,7 +192,6 @@ namespace __gnu_parallel // Invariant: locally sorted subsequence in sd->sorting_places[iam], // sd->sorting_places[iam] + length_local. - t.tic("local sort"); if (Settings::sort_splitting == Settings::SAMPLING) { @@ -205,8 +200,6 @@ namespace __gnu_parallel #pragma omp barrier - t.tic("sample/wait"); - #pragma omp single __gnu_sequential::sort(sd->samples, sd->samples + (num_samples * d->num_threads), @@ -241,8 +234,6 @@ namespace __gnu_parallel { #pragma omp barrier - t.tic("wait"); - std::vector > seqs(d->num_threads); for (int s = 0; s < d->num_threads; s++) seqs[s] = std::make_pair(sd->sorting_places[s], sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]); @@ -276,8 +267,6 @@ namespace __gnu_parallel } } - t.tic("split"); - // Offset from target begin, length after merging. difference_type offset = 0, length_am = 0; for (int s = 0; s < d->num_threads; s++) @@ -308,8 +297,6 @@ namespace __gnu_parallel multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, d->stable, false, sequential_tag()); - t.tic("merge"); - #if _GLIBCXX_ASSERTIONS _GLIBCXX_PARALLEL_ASSERT(is_sorted(sd->merging_places[iam], sd->merging_places[iam] + length_am, comp)); #endif @@ -323,10 +310,6 @@ namespace __gnu_parallel #endif delete[] sd->temporaries[iam]; - - t.tic("copy back"); - - t.print(); } /** @brief PMWMS main call. diff --git a/libstdc++-v3/include/parallel/random_shuffle.h b/libstdc++-v3/include/parallel/random_shuffle.h index bb174e10b00..933ab3e8a8e 100644 --- a/libstdc++-v3/include/parallel/random_shuffle.h +++ b/libstdc++-v3/include/parallel/random_shuffle.h @@ -42,7 +42,6 @@ #include #include #include -#include namespace __gnu_parallel { @@ -136,9 +135,6 @@ namespace __gnu_parallel typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type; - Timing t; - t.tic(); - DRSSorterPU* d = &pus[omp_get_thread_num()]; DRandomShufflingGlobalData* sd = d->sd; thread_index_t iam = d->iam; @@ -170,12 +166,8 @@ namespace __gnu_parallel for (bin_index b = 0; b < sd->num_bins + 1; b++) sd->dist[b][iam + 1] = dist[b]; - t.tic(); - #pragma omp barrier - t.tic(); - #pragma omp single { // Sum up bins, sd->dist[s + 1][d->num_threads] now contains the @@ -188,8 +180,6 @@ namespace __gnu_parallel #pragma omp barrier - t.tic(); - sequence_index_t offset = 0, global_offset = 0; for (bin_index s = 0; s < d->bins_begin; s++) global_offset += sd->dist[s + 1][d->num_threads]; @@ -205,12 +195,8 @@ namespace __gnu_parallel sd->temporaries[iam] = static_cast(::operator new(sizeof(value_type) * offset)); - t.tic(); - #pragma omp barrier - t.tic(); - // Draw local copies to avoid false sharing. for (bin_index b = 0; b < sd->num_bins + 1; b++) dist[b] = sd->dist[b][iam]; @@ -237,12 +223,8 @@ namespace __gnu_parallel delete[] bin_proc; delete[] temporaries; - t.tic(); - #pragma omp barrier - t.tic(); - // Shuffle bins internally. for (bin_index b = d->bins_begin; b < d->bins_end; b++) { @@ -253,10 +235,6 @@ namespace __gnu_parallel } delete[] sd->temporaries[iam]; - - t.tic(); - - t.print(); } /** @brief Round up to the next greater power of 2. @@ -453,9 +431,6 @@ namespace __gnu_parallel for (int b = 0; b < num_bins + 1; b++) dist0[b] = 0; - Timing t; - t.tic(); - random_number bitrng(rng(0xFFFFFFFF)); for (difference_type i = 0; i < n; i++) @@ -467,16 +442,12 @@ namespace __gnu_parallel dist0[oracle + 1]++; } - t.tic(); - // Sum up bins. __gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0); for (int b = 0; b < num_bins + 1; b++) dist1[b] = dist0[b]; - t.tic(); - // Distribute according to oracles. for (difference_type i = 0; i < n; i++) target[(dist0[oracles[i]])++] = *(begin + i); @@ -485,9 +456,7 @@ namespace __gnu_parallel { sequential_random_shuffle(target + dist1[b], target + dist1[b + 1], rng); - t.tic(); } - t.print(); delete[] dist0; delete[] dist1; diff --git a/libstdc++-v3/include/parallel/set_operations.h b/libstdc++-v3/include/parallel/set_operations.h index 006176de46f..2a16691459a 100644 --- a/libstdc++-v3/include/parallel/set_operations.h +++ b/libstdc++-v3/include/parallel/set_operations.h @@ -381,10 +381,6 @@ namespace __gnu_parallel #pragma omp parallel num_threads(num_threads) { - Timing t; - - t.tic(); - // Result from multiseq_partition. InputIterator offset[2]; const int iam = omp_get_thread_num(); @@ -407,13 +403,9 @@ namespace __gnu_parallel iterator_pair block_end = block_begins[ iam + 1 ] = iterator_pair(offset[ 0 ], offset[ 1 ]); - t.tic(); - // Make sure all threads have their block_begin result written out. #pragma omp barrier - t.tic(); - iterator_pair block_begin = block_begins[ iam ]; // Begin working for the first block, while the others except @@ -429,12 +421,9 @@ namespace __gnu_parallel block_begin.second, block_end.second); } - t.tic(); - // Make sure everyone wrote their lengths. #pragma omp barrier - t.tic(); OutputIterator r = result; if (iam == 0) @@ -458,9 +447,6 @@ namespace __gnu_parallel op.invoke(block_begin.first, block_end.first, block_begin.second, block_end.second, r); } - - t.tic(); - t.print(); } return return_value; } diff --git a/libstdc++-v3/include/parallel/timing.h b/libstdc++-v3/include/parallel/timing.h deleted file mode 100644 index f1f75225c15..00000000000 --- a/libstdc++-v3/include/parallel/timing.h +++ /dev/null @@ -1,217 +0,0 @@ -// -*- C++ -*- - -// Copyright (C) 2007 Free Software Foundation, Inc. -// -// This file is part of the GNU ISO C++ Library. This library is free -// software; you can redistribute it and/or modify it under the terms -// of the GNU General Public License as published by the Free Software -// Foundation; either version 2, or (at your option) any later -// version. - -// This library is distributed in the hope that it will be useful, but -// WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. - -// You should have received a copy of the GNU General Public License -// along with this library; see the file COPYING. If not, write to -// the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -// MA 02111-1307, USA. - -// As a special exception, you may use this file as part of a free -// software library without restriction. Specifically, if other files -// instantiate templates or use macros or inline functions from this -// file, or you compile this file and link it with other files to -// produce an executable, this file does not by itself cause the -// resulting executable to be covered by the GNU General Public -// License. This exception does not however invalidate any other -// reasons why the executable file might be covered by the GNU General -// Public License. - -/** @file parallel/timing.h - * @brief Provides a simple tool to do performance debugging, also in - * parallel code. - * This file is a GNU parallel extension to the Standard C++ Library. - */ - -// Written by Johannes Singler. - -#ifndef _GLIBCXX_PARALLEL_TIMING_H -#define _GLIBCXX_PARALLEL_TIMING_H 1 - -#include -#include -#include -#include - -namespace __gnu_parallel -{ - // XXX integrate with existing performance testing infrastructure. - /** @brief Type of of point in time, used for the Timing classes. */ - typedef double point_in_time; - - template - class Timing; - - /** @brief A class that provides simple run time measurements, also - for parallel code. - * @param tag If parallel_tag, then the measurements are actually done. - * Otherwise, no code at all is emitted by the compiler. */ - template - class Timing - { - private: - static const int max_points_in_time = 100; - point_in_time points_in_time[max_points_in_time]; - point_in_time active, last_start; - int pos; - char* str; - const char* tags[max_points_in_time]; - - public: - Timing() - { - str = NULL; - pos = 0; - active = 0.0; - last_start = -1.0; - } - - ~Timing() - { - delete[] str; - } - - /** @brief Take a running time measurement. - * @param tag Optional description that will be output again with - * the timings. - * It should describe the operation before the tic(). To time a - * series of @c n operations, there should be @c n+1 calls to - * tic(), and one call to print(). */ - inline void - tic(const char* tag = NULL) - { - points_in_time[pos] = omp_get_wtime(); - tags[pos] = tag; - pos++; - } - - /** @brief Start the running time measurement. - * - * Should be paired with stop(). */ - inline void - start() - { - _GLIBCXX_PARALLEL_ASSERT(last_start == -1.0); - last_start = omp_get_wtime(); - } - - /** @brief Stop the running time measurement. - * - * Should be paired with start(). */ - inline void - stop() - { - _GLIBCXX_PARALLEL_ASSERT(last_start != -1.0); - active += (omp_get_wtime() - last_start); - last_start = -1.0; - } - - /** @brief Reset running time accumulation. */ - inline void - reset() - { - active = 0.0; - last_start = -1.0; - } - - /** @brief Accumulate the time between all pairs of start() and - stop() so far */ - inline point_in_time - active_time() - { return active; } - - /** @brief Total time between first and last tic() */ - inline point_in_time - total_time() - { return (points_in_time[pos - 1] - points_in_time[0]) * 1000.0; } - - private: - /** @brief Construct string to print out, presenting the timings. */ - const char* - c_str() - { - // Avoid stream library here, to avoid cyclic dependencies in - // header files. - char tmp[1000]; - - if (!str) - str = new char[pos * 200]; - else - str[0] = '\0'; - - sprintf(str, "t %2d T[ms]", omp_get_thread_num()); - strcat(str, "\n"); - - for (int i = 0; i < pos; ) - { - point_in_time last = points_in_time[i]; - i++; - if (i == pos) - break; - if (tags[i] == NULL) - sprintf(tmp, "%2d: ", i - 1); - else - sprintf(tmp, "%20s: ", tags[i]); - strcat(str, tmp); - - sprintf(tmp, "%7.2f ", (points_in_time[i] - last) * 1000.0); - strcat(str, tmp); - strcat(str, "\n"); - } - - return str; - } - - public: - /** @brief Print the running times between the tic()s. */ - void - print() - { - printf("print\n"); -#pragma omp barrier -#pragma omp master - printf("\n\n"); -#pragma omp critical - printf("%s\n", c_str()); - } - }; - - /** @brief A class that provides simple run time measurements, also - for parallel code. - * @param tag If parallel_tag, then the measurements are actually done, - * otherwise, no code at all is emitted by the compiler. - */ - template - class Timing - { - private: - static const char* empty_string; - - public: - inline void tic(const char* /*tag*/ = NULL) { } - inline void start() { } - inline void stop() { } - inline void reset() { } - inline point_in_time active_time() { return -1.0; } - inline point_in_time total_time() { return -1.0; } - inline const char* c_str() { return empty_string; } - inline void print() { } - }; - - template - const char* Timing::empty_string = ""; - -} - -#endif diff --git a/libstdc++-v3/include/parallel/tree.h b/libstdc++-v3/include/parallel/tree.h index 5b7b41c6ef9..eae33c0cba2 100644 --- a/libstdc++-v3/include/parallel/tree.h +++ b/libstdc++-v3/include/parallel/tree.h @@ -57,13 +57,6 @@ #include -//#define _GLIBCXX_TIMING -#ifdef _GLIBCXX_TIMING -#define _timing_tag parallel_tag -#else -#define _timing_tag sequential_tag -#endif - namespace std { // XXX Declaration should go to stl_tree.h. @@ -1217,10 +1210,6 @@ namespace __gnu_parallel void _M_bulk_insertion_construction(const _InputIterator __first, const _InputIterator __last, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal) { - Timing<_timing_tag> t; - - t.tic(); - thread_index_t num_threads = get_max_threads(); size_type n; size_type beg_partition[num_threads+1]; @@ -1228,8 +1217,6 @@ namespace __gnu_parallel beg_partition[0] = 0; bool is_sorted= is_sorted_distance_accessors(__first, __last, access, beg_partition,n, num_threads, std::__iterator_category(__first)); - t.tic("is_sorted"); - if (not is_sorted) { _M_not_sorted_bulk_insertion_construction(access, beg_partition, n, num_threads, is_construction, strictly_less_or_less_equal); @@ -1260,10 +1247,6 @@ namespace __gnu_parallel _M_sorted_bulk_insertion(access, beg_partition, n, num_threads, strictly_less_or_less_equal); } - - t.tic("main work"); - - t.print(); } /** @brief Bulk construction and insertion helper method on an @@ -1349,31 +1332,19 @@ namespace __gnu_parallel _M_not_sorted_bulk_insertion_construction(size_type* beg_partition, ElementsToSort* v, Comparator comp, const size_type n, thread_index_t num_threads, const bool is_construction, StrictlyLessOrLessEqual strictly_less_or_less_equal) { // The accessors have been calculated for the non sorted. - Timing<_timing_tag> t; - - t.tic(); - num_threads = static_cast(std::min(num_threads, n)); std::stable_sort(v, v+n, comp); - t.tic("sort"); - IteratorSortedElements sorted_access[num_threads+1]; range_accessors(IteratorSortedElements(v), IteratorSortedElements(v+n), sorted_access, beg_partition, n, num_threads, std::__iterator_category(v)); - t.tic("range_accessors"); - // Partial template specialization not available. if (is_construction) _M_sorted_bulk_construction(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal); else _M_sorted_bulk_insertion(sorted_access, beg_partition, n, num_threads, strictly_less_or_less_equal); delete v; - - t.tic("actual construction or insertion"); - - t.print(); } /** @brief Construct a tree sequentially using the parallel routine @@ -1753,17 +1724,11 @@ namespace __gnu_parallel void _M_sorted_bulk_construction(_Iterator* access, size_type* beg_partition, const size_type n, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) { - Timing<_timing_tag> t; - // Dealing with repetitions (EFFICIENCY ISSUE). size_type rank_shift[num_threads+1]; - t.tic(); - _Rb_tree_node_ptr* r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, n, num_threads, strictly_less_or_less_equal); - t.tic("bulk allocation and initialization"); - // Link the tree appropriately. // Dealing with repetitions (EFFICIENCY ISSUE). ranker_gaps rank(beg_partition, rank_shift, num_threads); @@ -1818,11 +1783,7 @@ namespace __gnu_parallel base_type::_M_impl._M_header._M_parent = nodes_init.get_root(); nodes_init.get_root()->_M_parent= &base_type::_M_impl._M_header; - t.tic("linking nodes"); ::operator delete(r); - - t.tic("delete array of pointers"); - t.print(); } @@ -1850,10 +1811,6 @@ namespace __gnu_parallel _M_sorted_bulk_insertion(_Iterator* access, size_type* beg_partition, size_type k, thread_index_t num_threads, StrictlyLessOrLessEqual strictly_less_or_less_equal) { _GLIBCXX_PARALLEL_ASSERT((size_type)num_threads <= k); - Timing<_timing_tag> t; - - t.tic(); - // num_thr-1 problems in the upper part of the tree // num_thr problems to further parallelize std::vector existing(num_threads,0); @@ -1873,7 +1830,6 @@ namespace __gnu_parallel // 1. Construct the nodes with their corresponding data #if _GLIBCXX_TREE_INITIAL_SPLITTING r = _M_sorted_bulk_allocation_and_initialization(access, beg_partition, rank_shift, k, num_threads, strictly_less_or_less_equal); - t.tic("bulk allocation and initialization"); #else r = _M_sorted_no_gapped_bulk_allocation_and_initialization(access, beg_partition, k, num_threads, strictly_less_or_less_equal); #endif @@ -1896,8 +1852,6 @@ namespace __gnu_parallel repetitions (EFFICIENCY ISSUE) *****/ size_type last = beg_partition[num_threads] - (rank_shift[num_threads] - rank_shift[num_threads - 1]); - t.tic("last element to be inserted"); - //2. Split the tree according to access in num_threads parts //Initialize upper concat_problems //Allocate them dynamically because they are afterwards so erased @@ -1960,8 +1914,6 @@ namespace __gnu_parallel size_type last = k; #endif - t.tic("sorted_no_gapped..."); - // 3. Split the range according to tree and create // 3. insertion/concatenation problems to be solved in parallel #if _GLIBCXX_TREE_DYNAMIC_BALANCING @@ -2018,8 +1970,6 @@ namespace __gnu_parallel } while (change); } - t.tic("merging"); - // Update root and sizes. base_type::_M_root() = root_problem->t; root_problem->t->_M_parent = &(base_type::_M_impl._M_header); @@ -2069,9 +2019,6 @@ namespace __gnu_parallel // Delete array of pointers ::operator delete(r); - - t.tic(); - t.print(); } -- 2.30.2