From 84d6130c21a8a570efefe54aa723f549b34c3256 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 7 Jan 2016 16:01:51 -0800 Subject: [PATCH] glsl, nir: Make ir_triop_bitfield_extract a vectorized operation. We would like to be able to combine result.x = bitfieldExtract(src0.x, src1.x, src2.x); result.y = bitfieldExtract(src0.y, src1.y, src2.y); result.z = bitfieldExtract(src0.z, src1.z, src2.z); result.w = bitfieldExtract(src0.w, src1.w, src2.w); into a single ivec4 bitfieldInsert operation. This should be possible with most drivers. This patch changes the offset and bits parameters from scalar ints to ivecN or uvecN. The type of all three operands will be the same, for simplicity. Signed-off-by: Kenneth Graunke Reviewed-by: Matt Turner Reviewed-by: Ilia Mirkin --- src/glsl/builtin_functions.cpp | 8 +++++++- src/glsl/ir.h | 4 +--- src/glsl/ir_constant_expression.cpp | 6 +++--- src/glsl/ir_validate.cpp | 5 +++-- src/glsl/lower_packing_builtins.cpp | 4 ++-- src/glsl/nir/nir_opcodes.py | 8 ++++---- 6 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp index 38383bc1988..f2e2165e8c3 100644 --- a/src/glsl/builtin_functions.cpp +++ b/src/glsl/builtin_functions.cpp @@ -4889,12 +4889,18 @@ builtin_builder::_noise4(const glsl_type *type) ir_function_signature * builtin_builder::_bitfieldExtract(const glsl_type *type) { + bool is_uint = type->base_type == GLSL_TYPE_UINT; ir_variable *value = in_var(type, "value"); ir_variable *offset = in_var(glsl_type::int_type, "offset"); ir_variable *bits = in_var(glsl_type::int_type, "bits"); MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); - body.emit(ret(expr(ir_triop_bitfield_extract, value, offset, bits))); + operand cast_offset = is_uint ? i2u(offset) : operand(offset); + operand cast_bits = is_uint ? i2u(bits) : operand(bits); + + body.emit(ret(expr(ir_triop_bitfield_extract, value, + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); return sig; } diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 9af2fc16e16..5b845c6e856 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -1708,9 +1708,7 @@ public: operation == ir_binop_dot || operation == ir_binop_vector_extract || operation == ir_triop_vector_insert || - operation == ir_quadop_vector || - /* TODO: these can't currently be vectorized */ - operation == ir_triop_bitfield_extract; + operation == ir_quadop_vector; } /** diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index f5b5bd87b6b..7613139306f 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -1588,10 +1588,10 @@ ir_expression::constant_expression_value(struct hash_table *variable_context) break; case ir_triop_bitfield_extract: { - int offset = op[1]->value.i[0]; - int bits = op[2]->value.i[0]; - for (unsigned c = 0; c < components; c++) { + int offset = op[1]->value.i[c]; + int bits = op[2]->value.i[c]; + if (bits == 0) data.u[c] = 0; else if (offset < 0 || bits < 0) diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index fea9b760eca..94814799b9b 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -632,9 +632,10 @@ ir_validate::visit_leave(ir_expression *ir) break; case ir_triop_bitfield_extract: + assert(ir->type->is_integer()); assert(ir->operands[0]->type == ir->type); - assert(ir->operands[1]->type == glsl_type::int_type); - assert(ir->operands[2]->type == glsl_type::int_type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); break; case ir_triop_vector_insert: diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 19eeaa3775c..7f18238bc6e 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -365,11 +365,11 @@ private: if (op_mask & LOWER_PACK_USE_BFE) { /* u4.y = bitfield_extract(u, 8, 8); */ - factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), + factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), WRITEMASK_Y)); /* u4.z = bitfield_extract(u, 16, 8); */ - factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), + factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), WRITEMASK_Z)); } else { /* u4.y = (u >> 8u) & 0xffu; */ diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py index 3780628d1ea..855095f1f35 100644 --- a/src/glsl/nir/nir_opcodes.py +++ b/src/glsl/nir/nir_opcodes.py @@ -570,9 +570,9 @@ if (mask == 0) { """) opcode("ubitfield_extract", 0, tuint, - [0, 1, 1], [tuint, tint, tint], "", """ + [0, 0, 0], [tuint, tint, tint], "", """ unsigned base = src0; -int offset = src1.x, bits = src2.x; +int offset = src1, bits = src2; if (bits == 0) { dst = 0; } else if (bits < 0 || offset < 0 || offset + bits > 32) { @@ -582,9 +582,9 @@ if (bits == 0) { } """) opcode("ibitfield_extract", 0, tint, - [0, 1, 1], [tint, tint, tint], "", """ + [0, 0, 0], [tint, tint, tint], "", """ int base = src0; -int offset = src1.x, bits = src2.x; +int offset = src1, bits = src2; if (bits == 0) { dst = 0; } else if (offset < 0 || bits < 0 || offset + bits > 32) { -- 2.30.2