From: Ilia Mirkin Date: Sat, 7 Nov 2015 00:13:35 +0000 (-0500) Subject: nv50/ir: add support for const-folding OP_CVT with F64 source/dest X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2f9aaed7499499679d44e47b7a070df237f77683;p=mesa.git nv50/ir: add support for const-folding OP_CVT with F64 source/dest Signed-off-by: Ilia Mirkin --- diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp index ece6ce40643..dca799dd9b5 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp @@ -392,12 +392,24 @@ BuildUtil::mkImm(float f) return mkImm(u.u32); } +ImmediateValue * +BuildUtil::mkImm(double d) +{ + return new_ImmediateValue(prog, d); +} + Value * BuildUtil::loadImm(Value *dst, float f) { return mkOp1v(OP_MOV, TYPE_F32, dst ? dst : getScratch(), mkImm(f)); } +Value * +BuildUtil::loadImm(Value *dst, double d) +{ + return mkOp1v(OP_MOV, TYPE_F64, dst ? dst : getScratch(), mkImm(d)); +} + Value * BuildUtil::loadImm(Value *dst, uint32_t u) { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h index 0d544581697..8f3bf77949c 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.h @@ -90,12 +90,14 @@ public: void mkClobber(DataFile file, uint32_t regMask, int regUnitLog2); ImmediateValue *mkImm(float); + ImmediateValue *mkImm(double); ImmediateValue *mkImm(uint32_t); ImmediateValue *mkImm(uint64_t); ImmediateValue *mkImm(int i) { return mkImm((uint32_t)i); } Value *loadImm(Value *dst, float); + Value *loadImm(Value *dst, double); Value *loadImm(Value *dst, uint32_t); Value *loadImm(Value *dst, uint64_t); diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index b952c760a21..f0955978dc8 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -1164,6 +1164,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) #define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \ case type: \ switch (i->sType) { \ + case TYPE_F64: \ + res.data.dst = util_iround(i->saturate ? \ + CLAMP(imm0.reg.data.f64, fmin, fmax) : \ + imm0.reg.data.f64); \ + break; \ case TYPE_F32: \ res.data.dst = util_iround(i->saturate ? \ CLAMP(imm0.reg.data.f32, fmin, fmax) : \ @@ -1201,6 +1206,11 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX); case TYPE_F32: switch (i->sType) { + case TYPE_F64: + res.data.f32 = i->saturate ? + CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) : + imm0.reg.data.f64; + break; case TYPE_F32: res.data.f32 = i->saturate ? CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) : @@ -1215,6 +1225,27 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) } i->setSrc(0, bld.mkImm(res.data.f32)); break; + case TYPE_F64: + switch (i->sType) { + case TYPE_F64: + res.data.f64 = i->saturate ? + CLAMP(imm0.reg.data.f64, 0.0f, 1.0f) : + imm0.reg.data.f64; + break; + case TYPE_F32: + res.data.f64 = i->saturate ? + CLAMP(imm0.reg.data.f32, 0.0f, 1.0f) : + imm0.reg.data.f32; + break; + case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break; + case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break; + case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break; + case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break; + default: + return; + } + i->setSrc(0, bld.mkImm(res.data.f64)); + break; default: return; }