nv50/ir: fix threads calculation for non-compute shaders

[mesa.git] / src / gallium / drivers / nouveau / codegen / nv50_ir_target.h
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h

index 4e33997e1c1984cc5dc60e95f185d2d370f20d7a..afeca14d7d11d0a401aebb15da223449bb481ffd 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
@@ -58,21 +58,37 @@ struct RelocInfo
     RelocEntry entry[0];
  };
  
-struct InterpEntry
-{
-   InterpEntry(int ipa, int reg, int loc) : ipa(ipa), reg(reg), loc(loc) {}
-   uint32_t ipa:4; // SC mode used to identify colors
-   uint32_t reg:8; // The reg used for perspective division
-   uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
+struct FixupData {
+   FixupData(bool force, bool flat, uint8_t alphatest) :
+      force_persample_interp(force), flatshade(flat), alphatest(alphatest) {}
+   bool force_persample_interp;
+   bool flatshade;
+   uint8_t alphatest;
  };
  
-typedef void (*InterpApply)(const InterpEntry*, uint32_t*, bool, bool);
+struct FixupEntry;
+typedef void (*FixupApply)(const FixupEntry*, uint32_t*, const FixupData&);
+
+struct FixupEntry
+{
+   FixupEntry(FixupApply apply, int ipa, int reg, int loc) :
+      apply(apply), ipa(ipa), reg(reg), loc(loc) {}
+
+   FixupApply apply;
+   union {
+      struct {
+         uint32_t ipa:4; // SC mode used to identify colors
+         uint32_t reg:8; // The reg used for perspective division
+         uint32_t loc:20; // Let's hope we don't have more than 1M-sized shaders
+      };
+      uint32_t val;
+   };
+};
  
-struct InterpInfo
+struct FixupInfo
  {
     uint32_t count;
-   InterpApply apply;
-   InterpEntry entry[0];
+   FixupEntry entry[0];
  };
  
  class CodeEmitter
@@ -95,8 +111,8 @@ public:
  
     inline void *getRelocInfo() const { return relocInfo; }
  
-   bool addInterp(int ipa, int reg, InterpApply apply);
-   inline void *getInterpInfo() const { return interpInfo; }
+   bool addInterp(int ipa, int reg, FixupApply apply);
+   inline void *getFixupInfo() const { return fixupInfo; }
  
     virtual void prepareEmission(Program *);
     virtual void prepareEmission(Function *);
@@ -112,7 +128,7 @@ protected:
     uint32_t codeSizeLimit;
  
     RelocInfo *relocInfo;
-   InterpInfo *interpInfo;
+   FixupInfo *fixupInfo;
  };
  
  
@@ -157,7 +173,17 @@ public:
     // The address chosen is supplied to the relocation routine.
     virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
  
-   virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) { }
+   virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
+      if (info->type == PIPE_SHADER_COMPUTE) {
+         threads = info->prop.cp.numThreads[0] *
+            info->prop.cp.numThreads[1] *
+            info->prop.cp.numThreads[2];
+         if (threads == 0)
+            threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
+      } else {
+         threads = 32; // doesn't matter, just not too big.
+      }
+   }
  
     virtual bool runLegalizePass(Program *, CGStage stage) const = 0;
  
@@ -191,6 +217,8 @@ public:
  
     virtual bool insnCanLoad(const Instruction *insn, int s,
                              const Instruction *ld) const = 0;
+   virtual bool insnCanLoadOffset(const Instruction *insn, int s,
+                                  int offset) const = 0;
     virtual bool isOpSupported(operation, DataType) const = 0;
     virtual bool isAccessSupported(DataFile, DataType) const = 0;
     virtual bool isModSupported(const Instruction *,
@@ -231,6 +259,7 @@ public:
  
  protected:
     uint32_t chipset;
+   uint32_t threads;
  
     DataFile nativeFileMap[DATA_FILE_COUNT];