Gallivm: fix the constant layout, this gets a bunch of progs/ working. Notably, gears...
authorStephane Marchesin <marchesin@icps.u-strasbg.fr>
Tue, 7 Oct 2008 19:11:01 +0000 (21:11 +0200)
committerStephane Marchesin <marchesin@icps.u-strasbg.fr>
Tue, 7 Oct 2008 19:11:01 +0000 (21:11 +0200)
src/gallium/auxiliary/gallivm/gallivm_cpu.cpp
src/gallium/auxiliary/gallivm/instructionssoa.cpp
src/gallium/auxiliary/gallivm/instructionssoa.h
src/gallium/auxiliary/gallivm/storagesoa.cpp
src/gallium/auxiliary/gallivm/storagesoa.h
src/gallium/auxiliary/gallivm/tgsitollvm.cpp

index 3a4a41e544510069e78d9e8d8f76758f6778b6e9..3a2f2878a30586ac30937b3896b7bb4d3c87339f 100644 (file)
@@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog
    llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod);
    llvm::ExecutionEngine *ee = cpu->engine;
    assert(ee);
-   /*FIXME : remove */
-   ee->DisableLazyCompilation();
+   /*FIXME : why was this disabled ? we need it for pow/sqrt/... */
+   ee->DisableLazyCompilation(false);
    ee->addModuleProvider(mp);
 
    llvm::Function *func = func_for_shader(prog);
@@ -202,7 +202,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog,
    unsigned int i, j;
    unsigned slot;
    vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function);
-
    assert(runner);
 
    for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
index a6580725512f6b4548c78183e0fd4a9c6a622c71..1143ee0b0bfb8c69b348650ccbadaa2bae36d40f 100644 (file)
@@ -171,6 +171,11 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector)
    return res;
 }
 
+llvm::IRBuilder<>* InstructionsSoa::getIRBuilder()
+{
+   return &m_builder;
+}
+
 void InstructionsSoa::createFunctionMap()
 {
    m_functionsMap[TGSI_OPCODE_ABS]   = "abs";
index 3817fdc904b2c6d304accefdad8e6107e797337e..d6831e0a6b98d3fab54829e6856756d247d24a73 100644 (file)
@@ -76,6 +76,7 @@ public:
    void         end();
 
    std::vector<llvm::Value*> extractVector(llvm::Value *vector);
+   llvm::IRBuilder<>*  getIRBuilder();
 private:
    const char * name(const char *prefix) const;
    llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y,
index 78d754371f09b6705e32ab1e1eb9d90f56fe2caf..646b9d7ca0d1b7e1bc431826bc745fbb5cbdb66b 100644 (file)
@@ -91,19 +91,29 @@ void StorageSoa::declareImmediates()
    for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) {
       std::vector<float> vec = m_immediatesToFlush[i];
       std::vector<float> vals(4);
+      float val;
       std::vector<Constant*> channelArray;
 
-      vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0];
-      llvm::Constant *xChannel = createConstGlobalVector(vals);
+      val = vec[0];
+      llvm::Constant *xChannel = createConstGlobalFloat(val);
+      val = vec[1];
+      llvm::Constant *yChannel = createConstGlobalFloat(val);
+      val = vec[2];
+      llvm::Constant *zChannel = createConstGlobalFloat(val);
+      val = vec[3];
+      llvm::Constant *wChannel = createConstGlobalFloat(val);
 
-      vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
+//      vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3];
+//      llvm::Constant *xChannel = createConstGlobalVector(vec[0]);
+
+/*      vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1];
       llvm::Constant *yChannel = createConstGlobalVector(vals);
 
       vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2];
       llvm::Constant *zChannel = createConstGlobalVector(vals);
 
       vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3];
-      llvm::Constant *wChannel = createConstGlobalVector(vals);
+      llvm::Constant *wChannel = createConstGlobalVector(vals);*/
       channelArray.push_back(xChannel);
       channelArray.push_back(yChannel);
       channelArray.push_back(zChannel);
@@ -144,22 +154,54 @@ std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx)
    return res;
 }
 
-std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx)
+llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc)
+{
+   std::vector<llvm::Value*> x(4);
+   x[0] = m_builder->CreateExtractElement(vector,
+                                           constantInt(cc),
+                                           name("x"));
+
+   VectorType  *vectorType = VectorType::get(Type::FloatTy, 4);
+   Constant *constVector = Constant::getNullValue(vectorType);
+   Value *res = m_builder->CreateInsertElement(constVector, x[0],
+                                              constantInt(0),
+                                              name("vecx"));
+   res = m_builder->CreateInsertElement(res, x[0], constantInt(1),
+                               name("vecxx"));
+   res = m_builder->CreateInsertElement(res, x[0], constantInt(2),
+                               name("vecxxx"));
+   res = m_builder->CreateInsertElement(res, x[0], constantInt(3),
+                               name("vecxxxx"));
+   return res;
+}
+
+std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx)
 {
    std::vector<llvm::Value*> res(4);
+   std::vector<llvm::Value*> res2(4);
    llvm::Value *xChannel, *yChannel, *zChannel, *wChannel;
 
    xChannel = elementPointer(m_consts, idx, 0);
-   yChannel = elementPointer(m_consts, idx, 1);
+/*   yChannel = elementPointer(m_consts, idx, 1);
    zChannel = elementPointer(m_consts, idx, 2);
-   wChannel = elementPointer(m_consts, idx, 3);
+   wChannel = elementPointer(m_consts, idx, 3);*/
 
    res[0] = alignedArrayLoad(xChannel);
+/* res[1] = alignedArrayLoad(xChannel);
+   res[2] = alignedArrayLoad(xChannel);
+   res[3] = alignedArrayLoad(xChannel);*/
+
+
+   res2[0]=unpackConstElement(m_builder, res[0],0);
+   res2[1]=unpackConstElement(m_builder, res[0],1);
+   res2[2]=unpackConstElement(m_builder, res[0],2);
+   res2[3]=unpackConstElement(m_builder, res[0],3);
+/*res[0] = alignedArrayLoad(xChannel);
    res[1] = alignedArrayLoad(yChannel);
    res[2] = alignedArrayLoad(zChannel);
-   res[3] = alignedArrayLoad(wChannel);
+   res[3] = alignedArrayLoad(wChannel);*/
 
-   return res;
+   return res2;
 }
 
 std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx)
@@ -260,6 +302,12 @@ llvm::Module * StorageSoa::currentModule() const
     return m_block->getParent()->getParent();
 }
 
+llvm::Constant * StorageSoa::createConstGlobalFloat(const float val)
+{
+   Constant*c = ConstantFP::get(APFloat(val));
+   return c;
+}
+
 llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec)
 {
    VectorType *vectorType = VectorType::get(Type::FloatTy, 4);
@@ -278,7 +326,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v
 }
 
 std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle,
-                                           llvm::Value *indIdx)
+                                           llvm::IRBuilder<>* m_builder,llvm::Value *indIdx)
 {
    std::vector<llvm::Value*> val(4);
 
@@ -302,7 +350,8 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in
       val = tempElement(realIndex);
       break;
    case TGSI_FILE_CONSTANT:
-      val = constElement(realIndex);
+      val = constElement(m_builder, realIndex);
+      printf("constant COUCOU index %d\n",realIndex);
       break;
    case TGSI_FILE_IMMEDIATE:
       val = immediateElement(realIndex);
index ae2fc7c6aeed9ef431219ee2d446ef9c58181a2e..f21ca6ec4331a7fac8272be6bd6dc69b2d831cb8 100644 (file)
@@ -29,6 +29,7 @@
 #define STORAGESOA_H
 
 #include <pipe/p_shader_tokens.h>
+#include <llvm/Support/IRBuilder.h>
 
 #include <vector>
 #include <list>
@@ -56,7 +57,7 @@ public:
 
 
    std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, 
-                                  llvm::Value *indIdx =0);
+                                  llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0);
    void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val,
               int mask);
 
@@ -76,10 +77,12 @@ private:
    const char *name(const char *prefix) const;
    llvm::Value  *alignedArrayLoad(llvm::Value *val);
    llvm::Module *currentModule() const;
+   llvm::Constant  *createConstGlobalFloat(const float val);
    llvm::Constant  *createConstGlobalVector(const std::vector<float> &vec);
 
    std::vector<llvm::Value*> inputElement(llvm::Value *indIdx);
-   std::vector<llvm::Value*> constElement(llvm::Value *indIdx);
+   llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc);
+   std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx);
    std::vector<llvm::Value*> outputElement(llvm::Value *indIdx);
    std::vector<llvm::Value*> tempElement(llvm::Value *indIdx);
    std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx);
index 7292c0e366c54fab35496f0ae8c4d24ca949d923..1191a6cae97af86ec30e53ead5ffe85c5a08b26b 100644 (file)
@@ -52,7 +52,7 @@ static inline FunctionType *vertexShaderFunctionType()
    // pass are castable to the following:
    // [4 x <4 x float>] inputs,
    // [4 x <4 x float>] output,
-   // [4 x [4 x float]] consts,
+   // [4 x [1 x float]] consts,
    // [4 x <4 x float>] temps
 
    std::vector<const Type*> funcArgs;
@@ -61,7 +61,7 @@ static inline FunctionType *vertexShaderFunctionType()
    PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0);
 
    ArrayType   *floatArray     = ArrayType::get(Type::FloatTy, 4);
-   ArrayType   *constsArray    = ArrayType::get(floatArray, 4);
+   ArrayType   *constsArray    = ArrayType::get(floatArray, 1);
    PointerType *constsArrayPtr = PointerType::get(constsArray, 0);
 
    funcArgs.push_back(vectorArrayPtr);//inputs
@@ -246,6 +246,7 @@ translate_instruction(llvm::Module *module,
          val = storage->constElement(src->SrcRegister.Index, indIdx);
       } else if (src->SrcRegister.File == TGSI_FILE_INPUT) {
          val = storage->inputElement(src->SrcRegister.Index, indIdx);
+      // FIXME we should not be generating elements for temporaries, this creates useless memory writes
       } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) {
          val = storage->tempElement(src->SrcRegister.Index);
       } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) {
@@ -676,6 +677,7 @@ translate_instruction(llvm::Module *module,
 
       if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
          storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
+      // FIXME we should not be generating elements for temporaries, this creates useless memory writes
       } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) {
          storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
       } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) {
@@ -707,9 +709,8 @@ translate_instructionir(llvm::Module *module,
       if (src->SrcRegister.Indirect) {
          indIdx = storage->addrElement(src->SrcRegisterInd.Index);
       }
-
       val = storage->load((enum tgsi_file_type)src->SrcRegister.File,
-                          src->SrcRegister.Index, swizzle, indIdx);
+                          src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx);
 
       inputs[i] = val;
    }
@@ -1025,7 +1026,6 @@ translate_instructionir(llvm::Module *module,
    /* store results  */
    for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) {
       struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i];
-
       storage->store((enum tgsi_file_type)dst->DstRegister.File,
                      dst->DstRegister.Index, out, dst->DstRegister.WriteMask);
    }