From e0579c50173514688078dd7f350f71d9ac4d06e7 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 2 Sep 2016 18:49:20 -0700 Subject: [PATCH] i965: Add 64-bit integer support for conversions and bitcasts v2 (idr): Make the "from" type in a cast unsized. This reduces the number of required cast operations at the expensive slightly more complex code. However, this will be a dramatic improvement when other sized integer types are added. Suggested by Connor. Signed-off-by: Ian Romanick Reviewed-by: Matt Turner --- .../dri/i965/brw_fs_channel_expressions.cpp | 8 +++-- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 32 +++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index c7500e2f579..45934bcdf71 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -86,6 +86,8 @@ channel_expressions_predicate(ir_instruction *ir) case ir_binop_interpolate_at_offset: case ir_binop_interpolate_at_sample: case ir_unop_pack_double_2x32: + case ir_unop_pack_int_2x32: + case ir_unop_pack_uint_2x32: return false; default: break; @@ -180,6 +182,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_binop_interpolate_at_sample: /* We scalarize these in NIR, so no need to do it here */ case ir_unop_pack_double_2x32: + case ir_unop_pack_int_2x32: + case ir_unop_pack_uint_2x32: return visit_continue; default: @@ -428,6 +432,8 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) unreachable("not reached: expression operates on scalars only"); case ir_unop_pack_double_2x32: + case ir_unop_pack_int_2x32: + case ir_unop_pack_uint_2x32: unreachable("not reached: to be lowered in NIR, should've been skipped"); case ir_unop_frexp_sig: @@ -461,9 +467,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_d2u64: case ir_unop_u642i64: case ir_unop_i642u64: - case ir_unop_pack_int_2x32: case ir_unop_unpack_int_2x32: - case ir_unop_pack_uint_2x32: case ir_unop_unpack_uint_2x32: unreachable("unsupported"); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 8f745dff440..67ee94a4e10 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -653,6 +653,13 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_f2d: case nir_op_i2d: case nir_op_u2d: + case nir_op_f2i64: + case nir_op_f2u64: + case nir_op_i2i64: + case nir_op_i2u64: + case nir_op_u2i64: + case nir_op_u2u64: + case nir_op_b2i64: /* CHV PRM, vol07, 3D Media GPGPU Engine, Register Region Restrictions: * * "When source or destination is 64b (...), regioning in Align1 @@ -666,7 +673,9 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) * data elements aligned to 64-bit. This restriction does not apply to * BDW and later. */ - if (devinfo->is_cherryview || devinfo->is_broxton) { + if (nir_dest_bit_size(instr->dest.dest) == 64 && + nir_src_bit_size(instr->src[0].src) == 32 && + (devinfo->is_cherryview || devinfo->is_broxton)) { fs_reg tmp = bld.vgrf(result.type, 1); tmp = subscript(tmp, op[0].type, 0); inst = bld.MOV(tmp, op[0]); @@ -678,8 +687,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) case nir_op_d2f: case nir_op_d2i: case nir_op_d2u: - inst = bld.MOV(result, op[0]); - inst->saturate = instr->dest.saturate; + if (instr->op == nir_op_b2i64) { + bld.MOV(result, negate(op[0])); + } else { + inst = bld.MOV(result, op[0]); + inst->saturate = instr->dest.saturate; + } break; case nir_op_f2i: @@ -1228,6 +1241,19 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; } + case nir_op_pack_int_2x32_split: + bld.emit(FS_OPCODE_PACK, result, op[0], op[1]); + break; + + case nir_op_unpack_int_2x32_split_x: + case nir_op_unpack_int_2x32_split_y: { + if (instr->op == nir_op_unpack_int_2x32_split_x) + bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0)); + else + bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1)); + break; + } + case nir_op_fpow: inst = bld.emit(SHADER_OPCODE_POW, result, op[0], op[1]); inst->saturate = instr->dest.saturate; -- 2.30.2