From 3079dcb00c19aa4773d1a1133bd8c4d1a48e4375 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 23 Jun 2016 23:16:10 -0700 Subject: [PATCH] glsl: Add lowering pass for ir_quadop_bitfield_insert Signed-off-by: Ian Romanick Reviewed-by: Matt Turner --- src/compiler/glsl/ir_optimization.h | 1 + src/compiler/glsl/lower_instructions.cpp | 73 ++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 9550052d6b0..6fda9f62b71 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -44,6 +44,7 @@ #define DFREXP_DLDEXP_TO_ARITH 0x1000 #define BIT_COUNT_TO_MATH 0x02000 #define EXTRACT_TO_SHIFTS 0x04000 +#define INSERT_TO_SHIFTS 0x08000 /** * \see class lower_packing_builtins_visitor diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp index 0d103d27134..902294397ab 100644 --- a/src/compiler/glsl/lower_instructions.cpp +++ b/src/compiler/glsl/lower_instructions.cpp @@ -161,6 +161,7 @@ private: void dsign_to_csel(ir_expression *); void bit_count_to_math(ir_expression *); void extract_to_shifts(ir_expression *); + void insert_to_shifts(ir_expression *); }; } /* anonymous namespace */ @@ -1076,6 +1077,72 @@ lower_instructions_visitor::extract_to_shifts(ir_expression *ir) this->progress = true; } +void +lower_instructions_visitor::insert_to_shifts(ir_expression *ir) +{ + ir_constant *c1; + ir_constant *c32; + ir_constant *cFFFFFFFF; + ir_variable *offset = + new(ir) ir_variable(ir->operands[0]->type, "offset", ir_var_temporary); + ir_variable *bits = + new(ir) ir_variable(ir->operands[0]->type, "bits", ir_var_temporary); + ir_variable *mask = + new(ir) ir_variable(ir->operands[0]->type, "mask", ir_var_temporary); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) { + c1 = new(ir) ir_constant(int(1), ir->operands[0]->type->vector_elements); + c32 = new(ir) ir_constant(int(32), ir->operands[0]->type->vector_elements); + cFFFFFFFF = new(ir) ir_constant(int(0xFFFFFFFF), ir->operands[0]->type->vector_elements); + } else { + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + + c1 = new(ir) ir_constant(1u, ir->operands[0]->type->vector_elements); + c32 = new(ir) ir_constant(32u, ir->operands[0]->type->vector_elements); + cFFFFFFFF = new(ir) ir_constant(0xFFFFFFFFu, ir->operands[0]->type->vector_elements); + } + + base_ir->insert_before(offset); + base_ir->insert_before(assign(offset, ir->operands[2])); + + base_ir->insert_before(bits); + base_ir->insert_before(assign(bits, ir->operands[3])); + + /* At least some hardware treats (x << y) as (x << (y%32)). This means + * we'd get a mask of 0 when bits is 32. Special case it. + * + * mask = (bits == 32 ? 0xffffffff : (1u << bits) - 1u) << offset; + * + * Section 8.8 (Integer Functions) of the GLSL 4.50 spec says: + * + * The result will be undefined if offset or bits is negative, or if the + * sum of offset and bits is greater than the number of bits used to + * store the operand. + * + * Since it's undefined, there are a couple other ways this could be + * implemented. The other way that was considered was to put the csel + * around the whole thing: + * + * final_result = bits == 32 ? insert : ... ; + */ + base_ir->insert_before(mask); + + base_ir->insert_before(assign(mask, csel(equal(bits, c32), + cFFFFFFFF, + lshift(sub(lshift(c1, bits), + c1->clone(ir, NULL)), + offset)))); + + /* (base & ~mask) | ((insert << offset) & mask) */ + ir->operation = ir_binop_bit_or; + ir->operands[0] = bit_and(ir->operands[0], bit_not(mask)); + ir->operands[1] = bit_and(lshift(ir->operands[1], offset), mask); + ir->operands[2] = NULL; + ir->operands[3] = NULL; + + this->progress = true; +} + ir_visitor_status lower_instructions_visitor::visit_leave(ir_expression *ir) { @@ -1188,6 +1255,12 @@ lower_instructions_visitor::visit_leave(ir_expression *ir) extract_to_shifts(ir); break; + case ir_quadop_bitfield_insert: + if (lowering(INSERT_TO_SHIFTS)) + insert_to_shifts(ir); + break; + + default: return visit_continue; } -- 2.30.2