From 6e3265eae533a1bff4f23a4508c5d8e9ab23164d Mon Sep 17 00:00:00 2001 From: =?utf8?q?Samuel=20Iglesias=20Gons=C3=A1lvez?= Date: Fri, 24 Mar 2017 08:46:13 +0100 Subject: [PATCH] i965/vec4: split VEC4_OPCODE_FROM_DOUBLE into one opcode per destination's type MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This way we can set the destination type as double to all these new opcodes, avoiding any optimizer's confusion that was happening before. Signed-off-by: Samuel Iglesias Gonsálvez [ Francisco Jerez: Drop no_spill workaround originally needed due to the bogus destination type of VEC4_OPCODE_FROM_DOUBLE. ] Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_eu_defines.h | 4 +++- src/intel/compiler/brw_shader.cpp | 8 +++++-- src/intel/compiler/brw_vec4.cpp | 12 +++++++--- .../compiler/brw_vec4_copy_propagation.cpp | 4 +++- src/intel/compiler/brw_vec4_generator.cpp | 23 ++++++++++++++++-- src/intel/compiler/brw_vec4_nir.cpp | 24 ++++++++++++++----- src/intel/compiler/brw_vec4_reg_allocate.cpp | 12 ---------- 7 files changed, 60 insertions(+), 27 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index f0b0d5c2a06..13a70f6f6a1 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -447,7 +447,9 @@ enum opcode { VEC4_OPCODE_MOV_BYTES, VEC4_OPCODE_PACK_BYTES, VEC4_OPCODE_UNPACK_UNIFORM, - VEC4_OPCODE_FROM_DOUBLE, + VEC4_OPCODE_DOUBLE_TO_F32, + VEC4_OPCODE_DOUBLE_TO_D32, + VEC4_OPCODE_DOUBLE_TO_U32, VEC4_OPCODE_TO_DOUBLE, VEC4_OPCODE_PICK_LOW_32BIT, VEC4_OPCODE_PICK_HIGH_32BIT, diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 73bbc931352..304b4ecf4fa 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -326,8 +326,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "pack_bytes"; case VEC4_OPCODE_UNPACK_UNIFORM: return "unpack_uniform"; - case VEC4_OPCODE_FROM_DOUBLE: - return "double_to_single"; + case VEC4_OPCODE_DOUBLE_TO_F32: + return "double_to_f32"; + case VEC4_OPCODE_DOUBLE_TO_D32: + return "double_to_d32"; + case VEC4_OPCODE_DOUBLE_TO_U32: + return "double_to_u32"; case VEC4_OPCODE_TO_DOUBLE: return "single_to_double"; case VEC4_OPCODE_PICK_LOW_32BIT: diff --git a/src/intel/compiler/brw_vec4.cpp b/src/intel/compiler/brw_vec4.cpp index 386057e3e3c..0b92ba704e5 100644 --- a/src/intel/compiler/brw_vec4.cpp +++ b/src/intel/compiler/brw_vec4.cpp @@ -260,7 +260,9 @@ vec4_instruction::can_do_writemask(const struct gen_device_info *devinfo) { switch (opcode) { case SHADER_OPCODE_GEN4_SCRATCH_READ: - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: @@ -521,7 +523,9 @@ vec4_visitor::opt_reduce_swizzle() break; case VEC4_OPCODE_TO_DOUBLE: - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: case VEC4_OPCODE_SET_LOW_32BIT: @@ -2255,7 +2259,9 @@ static bool is_align1_df(vec4_instruction *inst) { switch (inst->opcode) { - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: diff --git a/src/intel/compiler/brw_vec4_copy_propagation.cpp b/src/intel/compiler/brw_vec4_copy_propagation.cpp index e7f6f93f8bd..c1ae32a2936 100644 --- a/src/intel/compiler/brw_vec4_copy_propagation.cpp +++ b/src/intel/compiler/brw_vec4_copy_propagation.cpp @@ -293,7 +293,9 @@ static bool is_align1_opcode(unsigned opcode) { switch (opcode) { - case VEC4_OPCODE_FROM_DOUBLE: + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: case VEC4_OPCODE_TO_DOUBLE: case VEC4_OPCODE_PICK_LOW_32BIT: case VEC4_OPCODE_PICK_HIGH_32BIT: diff --git a/src/intel/compiler/brw_vec4_generator.cpp b/src/intel/compiler/brw_vec4_generator.cpp index 65f3a9a9f00..5be4ef7fd4b 100644 --- a/src/intel/compiler/brw_vec4_generator.cpp +++ b/src/intel/compiler/brw_vec4_generator.cpp @@ -1940,9 +1940,28 @@ generate_code(struct brw_codegen *p, break; } - case VEC4_OPCODE_FROM_DOUBLE: { + case VEC4_OPCODE_DOUBLE_TO_F32: + case VEC4_OPCODE_DOUBLE_TO_D32: + case VEC4_OPCODE_DOUBLE_TO_U32: { assert(type_sz(src[0].type) == 8); - assert(type_sz(dst.type) == 4); + assert(type_sz(dst.type) == 8); + + brw_reg_type dst_type; + + switch (inst->opcode) { + case VEC4_OPCODE_DOUBLE_TO_F32: + dst_type = BRW_REGISTER_TYPE_F; + break; + case VEC4_OPCODE_DOUBLE_TO_D32: + dst_type = BRW_REGISTER_TYPE_D; + break; + case VEC4_OPCODE_DOUBLE_TO_U32: + dst_type = BRW_REGISTER_TYPE_UD; + break; + default: + unreachable("Not supported conversion"); + } + dst = retype(dst, dst_type); brw_set_default_access_mode(p, BRW_ALIGN_1); diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp index 64371a16de5..9d9ded2b965 100644 --- a/src/intel/compiler/brw_vec4_nir.cpp +++ b/src/intel/compiler/brw_vec4_nir.cpp @@ -1183,16 +1183,28 @@ vec4_visitor::emit_conversion_from_double(dst_reg dst, src_reg src, return; } + enum opcode op; + switch (dst.type) { + case BRW_REGISTER_TYPE_D: + op = VEC4_OPCODE_DOUBLE_TO_D32; + break; + case BRW_REGISTER_TYPE_UD: + op = VEC4_OPCODE_DOUBLE_TO_U32; + break; + case BRW_REGISTER_TYPE_F: + op = VEC4_OPCODE_DOUBLE_TO_F32; + break; + default: + unreachable("Unknown conversion"); + } + dst_reg temp = dst_reg(this, glsl_type::dvec4_type); emit(MOV(temp, src)); - dst_reg temp2 = dst_reg(this, glsl_type::dvec4_type); - temp2 = retype(temp2, dst.type); - emit(VEC4_OPCODE_FROM_DOUBLE, temp2, src_reg(temp)) - ->size_written = 2 * REG_SIZE; + emit(op, temp2, src_reg(temp)); - emit(VEC4_OPCODE_PICK_LOW_32BIT, temp2, src_reg(retype(temp2, BRW_REGISTER_TYPE_DF))); - vec4_instruction *inst = emit(MOV(dst, src_reg(temp2))); + emit(VEC4_OPCODE_PICK_LOW_32BIT, retype(temp2, dst.type), src_reg(temp2)); + vec4_instruction *inst = emit(MOV(dst, src_reg(retype(temp2, dst.type)))); inst->saturate = saturate; } diff --git a/src/intel/compiler/brw_vec4_reg_allocate.cpp b/src/intel/compiler/brw_vec4_reg_allocate.cpp index e3b46cc2f7f..a0ba77b867c 100644 --- a/src/intel/compiler/brw_vec4_reg_allocate.cpp +++ b/src/intel/compiler/brw_vec4_reg_allocate.cpp @@ -447,18 +447,6 @@ vec4_visitor::evaluate_spill_costs(float *spill_costs, bool *no_spill) if (type_sz(inst->dst.type) == 8 && inst->exec_size != 8) no_spill[inst->dst.nr] = true; - /* FROM_DOUBLE opcodes are setup so that they use a dst register - * with a size of 2 even if they only produce a single-precison - * result (this is so that the opcode can use the larger register to - * produce a 64-bit aligned intermediary result as required by the - * hardware during the conversion process). This creates a problem for - * spilling though, because when we attempt to emit a spill for the - * dst we see a 32-bit destination and emit a scratch write that - * allocates a single spill register. - */ - if (inst->opcode == VEC4_OPCODE_FROM_DOUBLE) - no_spill[inst->dst.nr] = true; - /* We can't spill registers that mix 32-bit and 64-bit access (that * contain 64-bit data that is operated on via 32-bit instructions) */ -- 2.30.2