From dafd050883660a42b2902388826cfecbfc9b8b83 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Tue, 9 Apr 2013 22:43:05 -0700 Subject: [PATCH] glsl: Add a pass to lower bitfield-insert into bfm+bfi. i965/Gen7+ and Radeon/Evergreen+ have bfm/bfi instructions to implement bitfieldInsert() from ARB_gpu_shader5. v2: Add ir_binop_bfm and ir_triop_bfi to st_glsl_to_tgsi.cpp. Remove spurious temporary assignment and dereference. Reviewed-by: Chris Forbes --- src/glsl/ir.cpp | 2 ++ src/glsl/ir.h | 18 ++++++++++ src/glsl/ir_optimization.h | 1 + src/glsl/ir_validate.cpp | 12 +++++++ src/glsl/lower_instructions.cpp | 39 ++++++++++++++++++++++ src/mesa/program/ir_to_mesa.cpp | 2 ++ src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 2 ++ 7 files changed, 76 insertions(+) diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index 2c989c91678..2c545259380 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -508,8 +508,10 @@ static const char *const operator_strs[] = { "max", "pow", "packHalf2x16_split", + "bfm", "ubo_load", "lrp", + "bfi", "bitfield_extract", "bitfield_insert", "vector", diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 470c08ca00d..6783ecaf2e9 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1118,6 +1118,15 @@ enum ir_expression_operation { ir_binop_pack_half_2x16_split, /*@}*/ + /** + * \name First half of a lowered bitfieldInsert() operation. + * + * \see lower_instructions::bitfield_insert_to_bfm_bfi + */ + /*@{*/ + ir_binop_bfm, + /*@}*/ + /** * Load a value the size of a given GLSL type from a uniform block. * @@ -1133,6 +1142,15 @@ enum ir_expression_operation { ir_triop_lrp, + /** + * \name Second half of a lowered bitfieldInsert() operation. + * + * \see lower_instructions::bitfield_insert_to_bfm_bfi + */ + /*@{*/ + ir_triop_bfi, + /*@}*/ + ir_triop_bitfield_extract, /** diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index a8885d7225b..49b1475b542 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -37,6 +37,7 @@ #define MOD_TO_FRACT 0x20 #define INT_DIV_TO_MUL_RCP 0x40 #define LRP_TO_ARITH 0x80 +#define BITFIELD_INSERT_TO_BFM_BFI 0x100 /** * \see class lower_packing_builtins_visitor diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 4a8df6953ee..26f09c71f63 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -474,6 +474,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[1]->type == glsl_type::float_type); break; + case ir_binop_bfm: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type->is_integer()); + assert(ir->operands[1]->type->is_integer()); + break; + case ir_binop_ubo_load: assert(ir->operands[0]->as_constant()); assert(ir->operands[0]->type == glsl_type::uint_type); @@ -487,6 +493,12 @@ ir_validate::visit_leave(ir_expression *ir) assert(ir->operands[2]->type == ir->operands[0]->type || ir->operands[2]->type == glsl_type::float_type); break; + case ir_triop_bfi: + assert(ir->operands[0]->type->is_integer()); + assert(ir->operands[1]->type == ir->operands[2]->type); + assert(ir->operands[1]->type == ir->type); + break; + case ir_triop_bitfield_extract: assert(ir->operands[0]->type == ir->type); assert(ir->operands[1]->type == glsl_type::int_type); diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 1ce7b7c9df4..d32ec80d6bb 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -38,6 +38,7 @@ * - LOG_TO_LOG2 * - MOD_TO_FRACT * - LRP_TO_ARITH + * - BITFIELD_INSERT_TO_BFM_BFI * * SUB_TO_ADD_NEG: * --------------- @@ -84,6 +85,15 @@ * LRP_TO_ARITH: * ------------- * Converts ir_triop_lrp to (op0 * (1.0f - op2)) + (op1 * op2). + * + * BITFIELD_INSERT_TO_BFM_BFI: + * --------------------------- + * Breaks ir_quadop_bitfield_insert into ir_binop_bfm (bitfield mask) and + * ir_triop_bfi (bitfield insert). + * + * Many GPUs implement the bitfieldInsert() built-in from ARB_gpu_shader_5 + * with a pair of instructions. + * */ #include "main/core.h" /* for M_LOG2E */ @@ -114,6 +124,7 @@ private: void pow_to_exp2(ir_expression *); void log_to_log2(ir_expression *); void lrp_to_arith(ir_expression *); + void bitfield_insert_to_bfm_bfi(ir_expression *); }; /** @@ -298,6 +309,29 @@ lower_instructions_visitor::lrp_to_arith(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::bitfield_insert_to_bfm_bfi(ir_expression *ir) +{ + /* Translates + * ir_quadop_bitfield_insert base insert offset bits + * into + * ir_triop_bfi (ir_binop_bfm bits offset) insert base + */ + + ir_rvalue *base_expr = ir->operands[0]; + + ir->operation = ir_triop_bfi; + ir->operands[0] = new(ir) ir_expression(ir_binop_bfm, + ir->type->get_base_type(), + ir->operands[3], + ir->operands[2]); + /* ir->operands[1] is still the value to insert. */ + ir->operands[2] = base_expr; + ir->operands[3] = NULL; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -339,6 +373,11 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) lrp_to_arith(ir); break; + case ir_quadop_bitfield_insert: + if (lowering(BITFIELD_INSERT_TO_BFM_BFI)) + bitfield_insert_to_bfm_bfi(ir); + break; + default: return visit_continue; } diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index c6f6bf42ea0..084846201c9 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1489,6 +1489,8 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); break; + case ir_binop_bfm: + case ir_triop_bfi: case ir_triop_bitfield_extract: case ir_quadop_bitfield_insert: assert(!"not supported"); diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index ad936bd20d0..ed314a0497f 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -1973,6 +1973,8 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) case ir_unop_bit_count: case ir_unop_find_msb: case ir_unop_find_lsb: + case ir_binop_bfm: + case ir_triop_bfi: case ir_triop_bitfield_extract: case ir_quadop_bitfield_insert: case ir_quadop_vector: -- 2.30.2