nv50,nvc0: handle SQRT lowering inside the driver

author Ilia Mirkin <imirkin@alum.mit.edu>

Sun, 13 Mar 2016 02:26:21 +0000 (21:26 -0500)

committer Ilia Mirkin <imirkin@alum.mit.edu>

Sun, 13 Mar 2016 17:17:24 +0000 (13:17 -0400)
author Ilia Mirkin <imirkin@alum.mit.edu>
Sun, 13 Mar 2016 02:26:21 +0000 (21:26 -0500)
committer Ilia Mirkin <imirkin@alum.mit.edu>
Sun, 13 Mar 2016 17:17:24 +0000 (13:17 -0400)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp

index f58cf97646ebca956950302096e36a5b45aaf733..84ebfdb1cba2235f7ede8c5339f704b38dfd035a 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp
@@ -585,6 +585,7 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
           return NULL;
        srcNr = 2;
        break;
+   case OP_SELP: srcNr = 3; break;
     default:
        // TODO when needed
        return NULL;
@@ -601,7 +602,10 @@ BuildUtil::split64BitOpPostRA(Function *fn, Instruction *i,
  
     for (int s = 0; s < srcNr; ++s) {
        if (lo->getSrc(s)->reg.size < 8) {
-         hi->setSrc(s, zero);
+         if (s == 2)
+            hi->setSrc(s, lo->getSrc(s));
+         else
+            hi->setSrc(s, zero);
        } else {
           if (lo->getSrc(s)->refCount() > 1)
              lo->setSrc(s, cloneShallow(fn, lo->getSrc(s)));
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp

index b06d86ad7607cc3934fc83decbe819254a380dc6..d284446f5d90cc9fa86aaf906687f94c84461b5b 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -616,6 +616,7 @@ static nv50_ir::operation translateOpcode(uint opcode)
  
     NV50_IR_OPCODE_CASE(RCP, RCP);
     NV50_IR_OPCODE_CASE(RSQ, RSQ);
+   NV50_IR_OPCODE_CASE(SQRT, SQRT);
  
     NV50_IR_OPCODE_CASE(MUL, MUL);
     NV50_IR_OPCODE_CASE(ADD, ADD);
@@ -2689,6 +2690,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
     case TGSI_OPCODE_FLR:
     case TGSI_OPCODE_TRUNC:
     case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_SQRT:
     case TGSI_OPCODE_IABS:
     case TGSI_OPCODE_INEG:
     case TGSI_OPCODE_NOT:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp

index 8752b0c8c542b6afeb90ec40d9382fc1e3a0beb7..12c5f6996037c5c19dd3e4f26ba5e92fc4a5e79a 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -1203,10 +1203,9 @@ NV50LoweringPreSSA::handleDIV(Instruction *i)
  bool
  NV50LoweringPreSSA::handleSQRT(Instruction *i)
  {
-   Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32,
-                                bld.getSSA(), i->getSrc(0));
-   i->op = OP_MUL;
-   i->setSrc(1, rsq->getDef(0));
+   bld.setPosition(i, true);
+   i->op = OP_RSQ;
+   bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
  
     return true;
  }
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp

index d181f1574f1811bc0843b7e72ad413c78e85e02d..d0936d88d60f0838f59619e8420e08c9d5204a69 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1778,22 +1778,21 @@ NVC0LoweringPass::handleMOD(Instruction *i)
  bool
  NVC0LoweringPass::handleSQRT(Instruction *i)
  {
-   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-   Value *zero = bld.getSSA();
-   Instruction *rsq;
-
-   bld.mkOp1(OP_MOV, TYPE_U32, zero, bld.mkImm(0));
-   if (i->dType == TYPE_F64)
-      zero = bld.mkOp2v(OP_MERGE, TYPE_U64, bld.getSSA(8), zero, zero);
-   bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
-   bld.mkOp1(OP_MOV, i->dType, i->getDef(0), zero)->setPredicate(CC_P, pred);
-   rsq = bld.mkOp1(OP_RSQ, i->dType,
-                   bld.getSSA(typeSizeof(i->dType)), i->getSrc(0));
-   rsq->setPredicate(CC_NOT_P, pred);
-   i->op = OP_MUL;
-   i->setSrc(1, rsq->getDef(0));
-   i->setPredicate(CC_NOT_P, pred);
-
+   if (i->dType == TYPE_F64) {
+      Value *pred = bld.getSSA(1, FILE_PREDICATE);
+      Value *zero = bld.loadImm(NULL, 0.0d);
+      Value *dst = bld.getSSA(8);
+      bld.mkOp1(OP_RSQ, i->dType, dst, i->getSrc(0));
+      bld.mkCmp(OP_SET, CC_LE, i->dType, pred, i->dType, i->getSrc(0), zero);
+      bld.mkOp3(OP_SELP, TYPE_U64, dst, zero, dst, pred);
+      i->op = OP_MUL;
+      i->setSrc(1, dst);
+      // TODO: Handle this properly with a library function
+   } else {
+      bld.setPosition(i, true);
+      i->op = OP_RSQ;
+      bld.mkOp1(OP_RCP, i->dType, i->getDef(0), i->getDef(0));
+   }
  
     return true;
  }
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index 28e0ed3c225790717a39100abe7398ab44e6bd7a..5836bb23764590cdcf9f03f02b290b0e4965a033 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -305,7 +305,7 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
     case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
        return 1;
     case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-      return 0;
+      return 1;
     case PIPE_SHADER_CAP_SUBROUTINES:
        return 0; /* please inline, or provide function declarations */
     case PIPE_SHADER_CAP_INTEGERS:
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c

index 30afdf2f03c9f7300ec4c9b0995034fcdd03cd87..3c5b1da20639bf3671ae311db296dfa9a8ab00c1 100644 (file)
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -328,7 +328,7 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
     case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
        return 1;
     case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-      return 0;
+      return 1;
     case PIPE_SHADER_CAP_SUBROUTINES:
        return 1;
     case PIPE_SHADER_CAP_INTEGERS:
author	Ilia Mirkin <imirkin@alum.mit.edu>
	Sun, 13 Mar 2016 02:26:21 +0000 (21:26 -0500)
committer	Ilia Mirkin <imirkin@alum.mit.edu>
	Sun, 13 Mar 2016 17:17:24 +0000 (13:17 -0400)
src/gallium/drivers/nouveau/codegen/nv50_ir_build_util.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/nv50/nv50_screen.c		patch \| blob \| history
src/gallium/drivers/nouveau/nvc0/nvc0_screen.c		patch \| blob \| history