gk110/ir: Use the new rcp/rsq in library

author Boyan Ding <boyan.j.ding@gmail.com>

Thu, 9 Mar 2017 05:55:19 +0000 (13:55 +0800)

committer Ilia Mirkin <imirkin@alum.mit.edu>

Thu, 7 Feb 2019 00:35:57 +0000 (19:35 -0500)
author Boyan Ding <boyan.j.ding@gmail.com>
Thu, 9 Mar 2017 05:55:19 +0000 (13:55 +0800)
committer Ilia Mirkin <imirkin@alum.mit.edu>
Thu, 7 Feb 2019 00:35:57 +0000 (19:35 -0500)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp

index 49425b98b9137058c986b1c5fdc03aeab5358534..993d01c1e447ca0cda88196b5ce21f117acde264 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1119,6 +1119,7 @@ Program::Program(Type type, Target *arch)
     binSize = 0;
  
     maxGPR = -1;
+   fp64 = false;
  
     main = new Function(this, "MAIN", ~0);
     calls.insert(&main->call);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h

index 8085bb2f542072da91b474d3ccbc7662c2017c0a..8d32a25ec23823a22a17c384c4febfef9ebd414b 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -1311,6 +1311,7 @@ public:
     uint32_t tlsSize; // size required for FILE_MEMORY_LOCAL
  
     int maxGPR;
+   bool fp64;
  
     MemoryPool mem_Instruction;
     MemoryPool mem_CmpInstruction;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index 34cb7dc3e4b77930c7214a434a68fea16bdaca4a..65b26dccf22ad440138868a1eb98f51f333d67ac 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -83,6 +83,38 @@ NVC0LegalizeSSA::handleDIV(Instruction *i)
     delete_Instruction(prog, i);
  }
  
+void
+NVC0LegalizeSSA::handleRCPRSQLib(Instruction *i, Value *src[])
+{
+   FlowInstruction *call;
+   Value *def[2];
+   int builtin;
+
+   def[0] = bld.mkMovToReg(0, src[0])->getDef(0);
+   def[1] = bld.mkMovToReg(1, src[1])->getDef(0);
+
+   if (i->op == OP_RCP)
+      builtin = NVC0_BUILTIN_RCP_F64;
+   else
+      builtin = NVC0_BUILTIN_RSQ_F64;
+
+   call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL);
+   def[0] = bld.getSSA();
+   def[1] = bld.getSSA();
+   bld.mkMovFromReg(def[0], 0);
+   bld.mkMovFromReg(def[1], 1);
+   bld.mkClobber(FILE_GPR, 0x3fc, 2);
+   bld.mkClobber(FILE_PREDICATE, i->op == OP_RSQ ? 0x3 : 0x1, 0);
+   bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), def[0], def[1]);
+
+   call->fixed = 1;
+   call->absolute = call->builtin = 1;
+   call->target.builtin = builtin;
+   delete_Instruction(prog, i);
+
+   prog->fp64 = true;
+}
+
  void
  NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
  {
@@ -96,6 +128,12 @@ NVC0LegalizeSSA::handleRCPRSQ(Instruction *i)
     Value *src[2], *dst[2], *def = i->getDef(0);
     bld.mkSplit(src, 4, i->getSrc(0));
  
+   int chip = prog->getTarget()->getChipset();
+   if (chip >= NVISA_GK20A_CHIPSET && chip < NVISA_GM107_CHIPSET) {
+      handleRCPRSQLib(i, src);
+      return;
+   }
+
     // 2. We don't care about the low 32 bits of the destination. Stick a 0 in.
     dst[0] = bld.loadImm(NULL, 0);
     dst[1] = bld.getSSA();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h

index 4679c56471b902667791b81e75b63a7218ae3248..0ce2a4b80f8167485b28e7b4a66955d7b7d477d9 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -62,6 +62,7 @@ private:
  
     // we want to insert calls to the builtin library only after optimization
     void handleDIV(Instruction *); // integer division, modulus
+   void handleRCPRSQLib(Instruction *, Value *[]);
     void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt
     void handleFTZ(Instruction *);
     void handleSET(CmpInstruction *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp

index 9193a01f189874a7fb384529a4f34fbc9148a452..5c6d0570ae2b84516f6c609c3682949dbd5e1da0 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -399,6 +399,7 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
           }
        }
     }
+   info->io.fp64 |= fp64;
     info->bin.relocData = emit->getRelocInfo();
     info->bin.fixupData = emit->getFixupInfo();
author	Boyan Ding <boyan.j.ding@gmail.com>
	Thu, 9 Mar 2017 05:55:19 +0000 (13:55 +0800)
committer	Ilia Mirkin <imirkin@alum.mit.edu>
	Thu, 7 Feb 2019 00:35:57 +0000 (19:35 -0500)
src/gallium/drivers/nouveau/codegen/nv50_ir.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir.h		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp		patch \| blob \| history