fix abs and start on rsq
authorZack Rusin <zack@tungstengraphics.com>
Fri, 16 May 2008 21:10:52 +0000 (17:10 -0400)
committerZack Rusin <zack@tungstengraphics.com>
Sat, 17 May 2008 17:58:44 +0000 (13:58 -0400)
src/gallium/auxiliary/gallivm/instructionssoa.cpp
src/gallium/auxiliary/gallivm/instructionssoa.h
src/gallium/auxiliary/gallivm/soabuiltins.c
src/gallium/auxiliary/gallivm/tgsitollvm.cpp

index 074dd0ecd6bc43fe949f57e4cf7b416030afa866..76049ade7c070b39dfe6f90e13d6d9984e93cba4 100644 (file)
@@ -180,6 +180,7 @@ void InstructionsSoa::createFunctionMap()
    m_functionsMap[TGSI_OPCODE_MAX]   = "max";
    m_functionsMap[TGSI_OPCODE_POWER] = "pow";
    m_functionsMap[TGSI_OPCODE_LIT]   = "lit";
+   m_functionsMap[TGSI_OPCODE_RSQ]   = "rsq";
 }
 
 void InstructionsSoa::createDependencies()
@@ -191,8 +192,9 @@ void InstructionsSoa::createDependencies()
       m_builtinDependencies["pow"] = powDeps;
    }
    {
-      std::vector<std::string> absDeps(1);
+      std::vector<std::string> absDeps(2);
       absDeps[0] = "fabsf";
+      absDeps[1] = "absvec";
       m_builtinDependencies["abs"] = absDeps;
    }
    {
@@ -213,6 +215,14 @@ void InstructionsSoa::createDependencies()
       litDeps[3] = "powvec";
       m_builtinDependencies["lit"] = litDeps;
    }
+   {
+      std::vector<std::string> rsqDeps(4);
+      rsqDeps[0] = "sqrtf";
+      rsqDeps[1] = "sqrtvec";
+      rsqDeps[2] = "fabsf";
+      rsqDeps[3] = "absvec";
+      m_builtinDependencies["rsq"] = rsqDeps;
+   }
 }
 
 llvm::Function * InstructionsSoa::function(int op)
@@ -453,7 +463,9 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op)
       currentModule()->dump();
    } else {
       DenseMap<const Value*, Value *> val;
+      val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf");
       val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf");
+      val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf");
       func = CloneFunction(originalFunc, val);
 #if 0
       std::cout <<" replacing "<<m_builtins->getFunction("powf")
@@ -490,3 +502,9 @@ std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> i
    return callBuiltin(func, in);
 }
 
+std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in)
+{
+   llvm::Function *func = function(TGSI_OPCODE_RSQ);
+   return callBuiltin(func, in);
+}
+
index 477ef4a1579e4e3c75cc0a0bc91a848c691b96a7..3e20b652dd35588c04dd72a3de84554ef06c1c9e 100644 (file)
@@ -68,6 +68,7 @@ public:
                                  const std::vector<llvm::Value*> in2);
    std::vector<llvm::Value*> pow(const std::vector<llvm::Value*> in1,
                                  const std::vector<llvm::Value*> in2);
+   std::vector<llvm::Value*> rsq(const std::vector<llvm::Value*> in1);
    std::vector<llvm::Value*> sub(const std::vector<llvm::Value*> in1,
                                  const std::vector<llvm::Value*> in2);
    void         end();
index b3bfebfe509ce6ce41540830882465f6f619c70a..64c02aa967539a6f54c22b84b43a2d855d5f73b1 100644 (file)
@@ -36,28 +36,24 @@ typedef __attribute__(( ext_vector_type(4) )) float float4;
 
 extern float fabsf(float val);
 
+float4 absvec(float4 vec)
+{
+   float4 res;
+   res.x = fabsf(vec.x);
+   res.y = fabsf(vec.y);
+   res.z = fabsf(vec.z);
+   res.w = fabsf(vec.w);
+
+   return res;
+}
+
 void abs(float4 *res,
          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
 {
-   res[0].x = fabsf(tmp0x.x);
-   res[0].y = fabsf(tmp0x.y);
-   res[0].z = fabsf(tmp0x.z);
-   res[0].w = fabsf(tmp0x.w);
-
-   res[1].x = fabsf(tmp0y.x);
-   res[1].y = fabsf(tmp0y.y);
-   res[1].z = fabsf(tmp0y.z);
-   res[1].w = fabsf(tmp0y.w);
-
-   res[2].x = fabsf(tmp0z.x);
-   res[2].y = fabsf(tmp0z.y);
-   res[2].z = fabsf(tmp0z.z);
-   res[2].w = fabsf(tmp0z.w);
-
-   res[3].x = fabsf(tmp0w.x);
-   res[3].y = fabsf(tmp0w.y);
-   res[3].z = fabsf(tmp0w.z);
-   res[3].w = fabsf(tmp0w.w);
+   res[0] = absvec(tmp0x);
+   res[1] = absvec(tmp0y);
+   res[2] = absvec(tmp0z);
+   res[3] = absvec(tmp0w);
 }
 
 void dp3(float4 *res,
@@ -88,6 +84,7 @@ void dp4(float4 *res,
 }
 
 extern float powf(float num, float p);
+extern float sqrtf(float x);
 
 float4 powvec(float4 vec, float4 q)
 {
@@ -168,3 +165,24 @@ void lit(float4 *res,
    }
    res[3] = (float4){1.0, 1.0, 1.0, 1.0};
 }
+
+
+float4 sqrtvec(float4 vec)
+{
+   float4 p;
+   p.x = sqrtf(vec.x);
+   p.y = sqrtf(vec.y);
+   p.z = sqrtf(vec.z);
+   p.w = sqrtf(vec.w);
+   return p;
+}
+
+void rsq(float4 *res,
+         float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
+{
+   const float4 onevec = (float4) {1., 1., 1., 1.};
+   res[0] = onevec/sqrtvec(absvec(tmp0x));
+   res[1] = res[0];
+   res[2] = res[0];
+   res[3] = res[0];
+}
index abcb240f46517c0c0550fa5947d8154837d497af..9695358ab82cc9ae74628ef102927a99aa913df2 100644 (file)
@@ -699,6 +699,7 @@ translate_instructionir(llvm::Module *module,
    }
       break;
    case TGSI_OPCODE_RSQ: {
+      out = instr->rsq(inputs[0]);
    }
       break;
    case TGSI_OPCODE_EXP: