From: Kenneth Graunke Date: Sat, 28 Jun 2014 23:08:39 +0000 (-0700) Subject: i965/eu: Emulate F32TO16 and F16TO32 on Broadwell. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=aafdf9eef481a77810258b828e2a0b4e3c0aa696;p=mesa.git i965/eu: Emulate F32TO16 and F16TO32 on Broadwell. When we combine the Gen4-7 and Gen8+ generators, we'll need to handle half float packing/unpacking functions somehow. The Gen8+ generator code today just emulates the behavior of the Gen7 F32TO16/F16TO32 instructions, including the align16 mode bugs. Rather than messing with fs_generator/vec4_generator, I decided to just emulate the instructions at the brw_eu_emit.c layer. v2: Change gen >= 7 asserts to gen == 7 (suggested by Chris Forbes). Fix regressions on Haswell in VS tests due to type assertions. Signed-off-by: Kenneth Graunke Reviewed-by: Chris Forbes Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index a1acd785743..4d1d6ce510b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -1004,8 +1004,6 @@ ALU2(XOR) ALU2(SHR) ALU2(SHL) ALU2(ASR) -ALU1(F32TO16) -ALU1(F16TO32) ALU1(FRC) ALU1(RNDD) ALU2(MAC) @@ -1110,6 +1108,56 @@ brw_MUL(struct brw_compile *p, struct brw_reg dest, return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1); } +brw_inst * +brw_F32TO16(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) +{ + const struct brw_context *brw = p->brw; + bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + + if (align16) { + assert(dst.type == BRW_REGISTER_TYPE_UD); + } else { + assert(dst.type == BRW_REGISTER_TYPE_W || + dst.type == BRW_REGISTER_TYPE_UW || + dst.type == BRW_REGISTER_TYPE_HF); + } + + if (brw->gen >= 8) { + if (align16) { + /* Emulate the Gen7 zeroing bug (see comments in vec4_visitor's + * emit_pack_half_2x16 method.) + */ + brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_UD), brw_imm_ud(0u)); + } + return brw_MOV(p, retype(dst, BRW_REGISTER_TYPE_HF), src); + } else { + assert(brw->gen == 7); + return brw_alu1(p, BRW_OPCODE_F32TO16, dst, src); + } +} + +brw_inst * +brw_F16TO32(struct brw_compile *p, struct brw_reg dst, struct brw_reg src) +{ + const struct brw_context *brw = p->brw; + bool align16 = brw_inst_access_mode(brw, p->current) == BRW_ALIGN_16; + + if (align16) { + assert(src.type == BRW_REGISTER_TYPE_UD); + } else { + assert(src.type == BRW_REGISTER_TYPE_W || + src.type == BRW_REGISTER_TYPE_UW || + src.type == BRW_REGISTER_TYPE_HF); + } + + if (brw->gen >= 8) { + return brw_MOV(p, dst, retype(src, BRW_REGISTER_TYPE_HF)); + } else { + assert(brw->gen == 7); + return brw_alu1(p, BRW_OPCODE_F16TO32, dst, src); + } +} + void brw_NOP(struct brw_compile *p) {