From 88e700329b0aea451def26b271b1b70bd964894c Mon Sep 17 00:00:00 2001 From: Vadim Girlin Date: Mon, 27 May 2013 15:29:56 +0400 Subject: [PATCH] r600g/sb: optimize CNDcc instructions Signed-off-by: Vadim Girlin --- src/gallium/drivers/r600/sb/sb_expr.cpp | 24 ++++++ src/gallium/drivers/r600/sb/sb_expr.h | 1 + src/gallium/drivers/r600/sb/sb_peephole.cpp | 89 ++++++++++++++++++++- 3 files changed, 113 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/r600/sb/sb_expr.cpp b/src/gallium/drivers/r600/sb/sb_expr.cpp index 65a764153a7..b85302d713e 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.cpp +++ b/src/gallium/drivers/r600/sb/sb_expr.cpp @@ -698,6 +698,30 @@ unsigned get_killcc_op(unsigned cc, unsigned cmp_type) { return ~0u; } +unsigned get_cndcc_op(unsigned cc, unsigned cmp_type) { + + switch(cmp_type) { + case AF_FLOAT_CMP: { + switch (cc) { + case AF_CC_E: return ALU_OP3_CNDE; + case AF_CC_GT: return ALU_OP3_CNDGT; + case AF_CC_GE: return ALU_OP3_CNDGE; + } + break; + } + case AF_INT_CMP: { + switch (cc) { + case AF_CC_E: return ALU_OP3_CNDE_INT; + case AF_CC_GT: return ALU_OP3_CNDGT_INT; + case AF_CC_GE: return ALU_OP3_CNDGE_INT; + } + break; + } + } + + assert(!"unexpected cc&cmp_type combination"); + return ~0u; +} void convert_predset_to_set(shader& sh, alu_node* a) { diff --git a/src/gallium/drivers/r600/sb/sb_expr.h b/src/gallium/drivers/r600/sb/sb_expr.h index 89177141a76..1ee48a00597 100644 --- a/src/gallium/drivers/r600/sb/sb_expr.h +++ b/src/gallium/drivers/r600/sb/sb_expr.h @@ -40,6 +40,7 @@ unsigned invert_setcc_condition(unsigned cc, bool &swap_args); unsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst); unsigned get_predsetcc_op(unsigned cc, unsigned cmp_type); unsigned get_killcc_op(unsigned cc, unsigned cmp_type); +unsigned get_cndcc_op(unsigned cc, unsigned cmp_type); class expr_handler { diff --git a/src/gallium/drivers/r600/sb/sb_peephole.cpp b/src/gallium/drivers/r600/sb/sb_peephole.cpp index cb423c9b224..d4b97557d4e 100644 --- a/src/gallium/drivers/r600/sb/sb_peephole.cpp +++ b/src/gallium/drivers/r600/sb/sb_peephole.cpp @@ -111,6 +111,9 @@ void peephole::optimize_cc_op2(alu_node* a) { if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { std::swap(a->src[0],a->src[1]); swapped = true; + // clear modifiers + memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); + memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); } if (swapped || (a->src[1]->is_const() && @@ -187,8 +190,92 @@ void peephole::optimize_cc_op2(alu_node* a) { } void peephole::optimize_CNDcc_op(alu_node* a) { + unsigned flags = a->bc.op_ptr->flags; + unsigned cc = flags & AF_CC_MASK; + unsigned cmp_type = flags & AF_CMP_TYPE_MASK; + bool swap = false; + + if (cc == AF_CC_E) { + swap = !swap; + cc = AF_CC_NE; + } else if (cc != AF_CC_NE) + return; + + value *s = a->src[0]; + + bool_op_info bop = {}; + + PPH_DUMP( + sblog << "cndcc: "; + dump::dump_op(a); + sblog << "\n"; + ); + + if (!get_bool_op_info(s, bop)) + return; + + alu_node *d = bop.n; + + if (d->bc.omod) + return; + + PPH_DUMP( + sblog << "cndcc def: "; + dump::dump_op(d); + sblog << "\n"; + ); + + + unsigned dflags = d->bc.op_ptr->flags; + unsigned dcc = dflags & AF_CC_MASK; + unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK; + unsigned ddst_type = dflags & AF_DST_TYPE_MASK; + int nds; + + // TODO we can handle some of these cases, + // though probably this shouldn't happen + if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST) + return; + + if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0)) + nds = 1; + else if ((d->src[1]->is_const() && + d->src[1]->literal_value == literal(0))) + nds = 0; + else + return; + + // can't propagate ABS modifier to CNDcc because it's OP3 + if (d->bc.src[nds].abs) + return; + + // TODO we can handle some cases for uint comparison + if (dcmp_type == AF_UINT_CMP) + return; + + if (dcc == AF_CC_NE) { + dcc = AF_CC_E; + swap = !swap; + } + + if (nds == 1) { + switch (dcc) { + case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break; + case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break; + default: break; + } + } + + a->src[0] = d->src[nds]; + a->bc.src[0] = d->bc.src[nds]; + + if (swap) { + std::swap(a->src[1], a->src[2]); + std::swap(a->bc.src[1], a->bc.src[2]); + } + + a->bc.set_op(get_cndcc_op(dcc, dcmp_type)); - //TODO } bool peephole::get_bool_flt_to_int_source(alu_node* &a) { -- 2.30.2