From 4e1af6bc18353ecf044545325024b5369ec50cae Mon Sep 17 00:00:00 2001 From: lkcl Date: Thu, 29 Jul 2021 05:32:58 +0100 Subject: [PATCH] --- openpower/sv/svp64/appendix.mdwn | 47 ++++++++------------------------ 1 file changed, 12 insertions(+), 35 deletions(-) diff --git a/openpower/sv/svp64/appendix.mdwn b/openpower/sv/svp64/appendix.mdwn index 395b732f8..65c089f07 100644 --- a/openpower/sv/svp64/appendix.mdwn +++ b/openpower/sv/svp64/appendix.mdwn @@ -735,54 +735,31 @@ For modes: # Proposed Parallel-reduction algorithm -this is actually prefix-sum (Pascal's Triangle) - ``` /// reference implementation of proposed SimpleV reduction semantics. /// -/// `temp_pred` is a non-user-visible register that can be stored in some -/// SPR if the reduction is interrupted, or we can just restart the -reduction -/// from the beginning since it will produce the same results. +/// `temp_pred` is a user-visible Vector Condition register /// /// all input arrays have length `vl` -pub fn reduce( - vl: usize, - input_vec: &[f32], - temp_vec: &mut [f32], - input_pred: &[bool], - temp_pred: &mut [bool], -) -> f32 { - assert_eq!(input_vec.len(), vl); - assert_eq!(temp_vec.len(), vl); - assert_eq!(input_pred.len(), vl); - assert_eq!(temp_pred.len(), vl); - for i in 0..vl { +pub fn reduce( vl, input_vec, temp_vec, input_pred, temp_pred,): + for i in 0..vl temp_pred[i] = input_pred[i]; - if temp_pred[i] { - temp_vec[i] = input_vec[i]; - } - } - let mut step = 1; - while step < vl { + if temp_pred[i] + temp_vec[i] = input_vec[i] + mut step = 1; + while step < vl step *= 2; - for i in (0..vl).step_by(step) { - let other = i + step / 2; - let other_pred = other < vl && temp_pred[other]; - if temp_pred[i] && other_pred { + for i in (0..vl).step_by(step) + other = i + step / 2; + other_pred = other < vl && temp_pred[other]; + if temp_pred[i] && other_pred // reduction operation -- we still use this algorithm even // if the reduction operation isn't associative or // commutative. - // - // `f32` addition is used as the reduction operation - // for ease of exposition. temp_vec[i] += temp_vec[other]; - } else if other_pred { + else if other_pred temp_vec[i] = temp_vec[other]; - } temp_pred[i] |= other_pred; - } - } if vl != 0 && temp_pred[0] { // return the result temp_vec[0] -- 2.30.2