gallium: Create OGL state tracker wrappers for various CPU access operations.
[mesa.git] / src / mesa / state_tracker / st_mesa_to_tgsi.c
index 59c1abe4882369e9dc452a4b726736f95cf443a0..43c9afccc3b355bd7124758fada492783d93ffc1 100644 (file)
 #include "shader/prog_instruction.h"
 #include "shader/prog_parameter.h"
 #include "shader/prog_print.h"
-#include "pipe/p_debug.h"
+#include "util/u_debug.h"
 
 /*
  * Map mesa register file to TGSI register file.
  */
 static GLuint
 map_register_file(
-   enum register_file file,
+   gl_register_file file,
    GLuint index,
    const GLuint immediateMapping[],
    GLboolean indirectAccess )
@@ -132,19 +132,35 @@ map_register_file_index(
  */
 static GLuint
 map_texture_target(
-   GLuint textarget )
+    GLuint textarget,
+    GLboolean shadow )
 {
+#if 1
+   /* XXX remove this line after we've checked that the rest of gallium
+    * can handle the TGSI_TEXTURE_SHADOWx tokens.
+    */
+   shadow = GL_FALSE;
+#endif
    switch( textarget ) {
    case TEXTURE_1D_INDEX:
-      return TGSI_TEXTURE_1D;
+      if (shadow)
+         return TGSI_TEXTURE_SHADOW1D;
+      else
+         return TGSI_TEXTURE_1D;
    case TEXTURE_2D_INDEX:
-      return TGSI_TEXTURE_2D;
+      if (shadow)
+         return TGSI_TEXTURE_SHADOW2D;
+      else
+         return TGSI_TEXTURE_2D;
    case TEXTURE_3D_INDEX:
       return TGSI_TEXTURE_3D;
    case TEXTURE_CUBE_INDEX:
       return TGSI_TEXTURE_CUBE;
    case TEXTURE_RECT_INDEX:
-      return TGSI_TEXTURE_RECT;
+      if (shadow)
+         return TGSI_TEXTURE_SHADOWRECT;
+      else
+         return TGSI_TEXTURE_RECT;
    default:
       assert( 0 );
    }
@@ -188,7 +204,7 @@ make_immediate(const float *value, uint size)
    struct tgsi_full_immediate imm;
 
    imm = tgsi_default_full_immediate();
-   imm.Immediate.Size += size;
+   imm.Immediate.NrTokens += size;
    imm.Immediate.DataType = TGSI_IMM_FLOAT32;
    imm.u.Pointer = value;
    return imm;
@@ -203,8 +219,9 @@ compile_instruction(
    const GLuint immediateMapping[],
    GLboolean indirectAccess,
    GLuint preamble_size,
-   GLuint processor,
-   GLboolean *insideSubroutine)
+   GLuint procType,
+   GLboolean *insideSubroutine,
+   GLint wposTemp)
 {
    GLuint i;
    struct tgsi_full_dst_register *fulldst;
@@ -231,25 +248,35 @@ compile_instruction(
       GLuint j;
 
       fullsrc = &fullinst->FullSrcRegisters[i];
-      fullsrc->SrcRegister.File = map_register_file(
-         inst->SrcReg[i].File,
-         inst->SrcReg[i].Index,
-         immediateMapping,
-         indirectAccess );
-      fullsrc->SrcRegister.Index = map_register_file_index(
-         fullsrc->SrcRegister.File,
-         inst->SrcReg[i].Index,
-         inputMapping,
-         outputMapping,
-         immediateMapping,
-         indirectAccess );
 
+      if (procType == TGSI_PROCESSOR_FRAGMENT &&
+          inst->SrcReg[i].File == PROGRAM_INPUT &&
+          inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) {
+         /* special case of INPUT[WPOS] */
+         fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY;
+         fullsrc->SrcRegister.Index = wposTemp;
+      }
+      else {
+         /* any other src register */
+         fullsrc->SrcRegister.File = map_register_file(
+            inst->SrcReg[i].File,
+            inst->SrcReg[i].Index,
+            immediateMapping,
+            indirectAccess );
+         fullsrc->SrcRegister.Index = map_register_file_index(
+            fullsrc->SrcRegister.File,
+            inst->SrcReg[i].Index,
+            inputMapping,
+            outputMapping,
+            immediateMapping,
+            indirectAccess );
+      }
 
       /* swizzle (ext swizzle also depends on negation) */
       {
          GLuint swz[4];
-         GLboolean extended = (inst->SrcReg[i].NegateBase != NEGATE_NONE &&
-                               inst->SrcReg[i].NegateBase != NEGATE_XYZW);
+         GLboolean extended = (inst->SrcReg[i].Negate != NEGATE_NONE &&
+                               inst->SrcReg[i].Negate != NEGATE_XYZW);
          for( j = 0; j < 4; j++ ) {
             swz[j] = GET_SWZ( inst->SrcReg[i].Swizzle, j );
             if (swz[j] > SWIZZLE_W)
@@ -269,20 +296,20 @@ compile_instruction(
          }
       }
 
-      if( inst->SrcReg[i].NegateBase == NEGATE_XYZW ) {
+      if( inst->SrcReg[i].Negate == NEGATE_XYZW ) {
          fullsrc->SrcRegister.Negate = 1;
       }
-      else if( inst->SrcReg[i].NegateBase != NEGATE_NONE ) {
-         if( inst->SrcReg[i].NegateBase & NEGATE_X ) {
+      else if( inst->SrcReg[i].Negate != NEGATE_NONE ) {
+         if( inst->SrcReg[i].Negate & NEGATE_X ) {
             fullsrc->SrcRegisterExtSwz.NegateX = 1;
          }
-         if( inst->SrcReg[i].NegateBase & NEGATE_Y ) {
+         if( inst->SrcReg[i].Negate & NEGATE_Y ) {
             fullsrc->SrcRegisterExtSwz.NegateY = 1;
          }
-         if( inst->SrcReg[i].NegateBase & NEGATE_Z ) {
+         if( inst->SrcReg[i].Negate & NEGATE_Z ) {
             fullsrc->SrcRegisterExtSwz.NegateZ = 1;
          }
-         if( inst->SrcReg[i].NegateBase & NEGATE_W ) {
+         if( inst->SrcReg[i].Negate & NEGATE_W ) {
             fullsrc->SrcRegisterExtSwz.NegateW = 1;
          }
       }
@@ -291,10 +318,6 @@ compile_instruction(
          fullsrc->SrcRegisterExtMod.Absolute = 1;
       }
 
-      if( inst->SrcReg[i].NegateAbs ) {
-         fullsrc->SrcRegisterExtMod.Negate = 1;
-      }
-
       if( inst->SrcReg[i].RelAddr ) {
          fullsrc->SrcRegister.Indirect = 1;
 
@@ -475,9 +498,6 @@ compile_instruction(
       break;
    case OPCODE_RSQ:
       fullinst->Instruction.Opcode = TGSI_OPCODE_RSQ;
-      tgsi_util_set_full_src_register_sign_mode(
-         &fullinst->FullSrcRegisters[0],
-         TGSI_UTIL_SIGN_CLEAR );
       break;
    case OPCODE_SCS:
       fullinst->Instruction.Opcode = TGSI_OPCODE_SCS;
@@ -517,7 +537,8 @@ compile_instruction(
       /* ordinary texture lookup */
       fullinst->Instruction.Opcode = TGSI_OPCODE_TEX;
       fullinst->Instruction.NumSrcRegs = 2;
-      fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget );
+      fullinst->InstructionExtTexture.Texture =
+         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
       fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
       fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
       break;
@@ -525,7 +546,8 @@ compile_instruction(
       /* texture lookup with LOD bias */
       fullinst->Instruction.Opcode = TGSI_OPCODE_TXB;
       fullinst->Instruction.NumSrcRegs = 2;
-      fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget );
+      fullinst->InstructionExtTexture.Texture =
+         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
       fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
       fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
       break;
@@ -533,7 +555,8 @@ compile_instruction(
       /* texture lookup with explicit partial derivatives */
       fullinst->Instruction.Opcode = TGSI_OPCODE_TXD;
       fullinst->Instruction.NumSrcRegs = 4;
-      fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget );
+      fullinst->InstructionExtTexture.Texture =
+         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
       /* src[0] = coord, src[1] = d[strq]/dx, src[2] = d[strq]/dy */
       fullinst->FullSrcRegisters[3].SrcRegister.File = TGSI_FILE_SAMPLER;
       fullinst->FullSrcRegisters[3].SrcRegister.Index = inst->TexSrcUnit;
@@ -542,7 +565,8 @@ compile_instruction(
       /* texture lookup with explicit LOD */
       fullinst->Instruction.Opcode = TGSI_OPCODE_TXL;
       fullinst->Instruction.NumSrcRegs = 2;
-      fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget );
+      fullinst->InstructionExtTexture.Texture =
+         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
       fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
       fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
       break;
@@ -551,7 +575,8 @@ compile_instruction(
       /* convert to TEX w/ special flag for division */
       fullinst->Instruction.Opcode = TGSI_OPCODE_TXP;
       fullinst->Instruction.NumSrcRegs = 2;
-      fullinst->InstructionExtTexture.Texture = map_texture_target( inst->TexSrcTarget );
+      fullinst->InstructionExtTexture.Texture =
+         map_texture_target( inst->TexSrcTarget, inst->TexShadow );
       fullinst->FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
       fullinst->FullSrcRegisters[1].SrcRegister.Index = inst->TexSrcUnit;
       break;
@@ -578,7 +603,8 @@ make_input_decl(
    GLuint usage_mask,
    GLboolean semantic_info,
    GLuint semantic_name,
-   GLbitfield semantic_index )
+   GLbitfield semantic_index,
+   GLbitfield input_flags)
 {
    struct tgsi_full_declaration decl;
 
@@ -597,6 +623,10 @@ make_input_decl(
    if (interpolate_info) {
       decl.Declaration.Interpolate = interpolate;
    }
+   if (input_flags & PROG_PARAM_BIT_CENTROID)
+      decl.Declaration.Centroid = 1;
+   if (input_flags & PROG_PARAM_BIT_INVARIANT)
+      decl.Declaration.Invariant = 1;
 
    return decl;
 }
@@ -609,7 +639,8 @@ make_output_decl(
    GLuint index,
    GLuint semantic_name,
    GLuint semantic_index,
-   GLbitfield usage_mask )
+   GLuint usage_mask,
+   GLbitfield output_flags)
 {
    struct tgsi_full_declaration decl;
 
@@ -623,6 +654,10 @@ make_output_decl(
    decl.DeclarationRange.Last = index;
    decl.Semantic.SemanticName = semantic_name;
    decl.Semantic.SemanticIndex = semantic_index;
+   if (output_flags & PROG_PARAM_BIT_CENTROID)
+      decl.Declaration.Centroid = 1;
+   if (output_flags & PROG_PARAM_BIT_INVARIANT)
+      decl.Declaration.Invariant = 1;
 
    return decl;
 }
@@ -705,6 +740,111 @@ find_temporaries(const struct gl_program *program,
 }
 
 
+/**
+ * Find an unused temporary in the tempsUsed array.
+ */
+static int
+find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS])
+{
+   int i;
+   for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
+      if (!tempsUsed[i]) {
+         tempsUsed[i] = GL_TRUE;
+         return i;
+      }
+   }
+   return -1;
+}
+
+
+/** helper for building simple TGSI instruction, one src register */
+static void
+build_tgsi_instruction1(struct tgsi_full_instruction *inst,
+                        int opcode,
+                        int dstFile, int dstIndex, int writemask,
+                        int srcFile1, int srcIndex1)
+{
+   *inst = tgsi_default_full_instruction();
+
+   inst->Instruction.Opcode = opcode;
+
+   inst->Instruction.NumDstRegs = 1;
+   inst->FullDstRegisters[0].DstRegister.File = dstFile;
+   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
+   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
+
+   inst->Instruction.NumSrcRegs = 1;
+   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
+   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
+}
+
+
+/** helper for building simple TGSI instruction, two src registers */
+static void
+build_tgsi_instruction2(struct tgsi_full_instruction *inst,
+                        int opcode,
+                        int dstFile, int dstIndex, int writemask,
+                        int srcFile1, int srcIndex1,
+                        int srcFile2, int srcIndex2)
+{
+   *inst = tgsi_default_full_instruction();
+
+   inst->Instruction.Opcode = opcode;
+
+   inst->Instruction.NumDstRegs = 1;
+   inst->FullDstRegisters[0].DstRegister.File = dstFile;
+   inst->FullDstRegisters[0].DstRegister.Index = dstIndex;
+   inst->FullDstRegisters[0].DstRegister.WriteMask = writemask;
+
+   inst->Instruction.NumSrcRegs = 2;
+   inst->FullSrcRegisters[0].SrcRegister.File = srcFile1;
+   inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1;
+   inst->FullSrcRegisters[1].SrcRegister.File = srcFile2;
+   inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2;
+}
+
+
+
+/**
+ * Emit the TGSI instructions for inverting the WPOS y coordinate.
+ */
+static int
+emit_inverted_wpos(struct tgsi_token *tokens,
+                   int wpos_temp,
+                   int winsize_const,
+                   int wpos_input,
+                   struct tgsi_header *header, int maxTokens)
+{
+   struct tgsi_full_instruction fullinst;
+   int ti = 0;
+
+   /* MOV wpos_temp.xzw, input[wpos]; */
+   build_tgsi_instruction1(&fullinst,
+                           TGSI_OPCODE_MOV,
+                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW,
+                           TGSI_FILE_INPUT, 0);
+
+   ti += tgsi_build_full_instruction(&fullinst,
+                                     &tokens[ti],
+                                     header,
+                                     maxTokens - ti);
+
+   /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */
+   build_tgsi_instruction2(&fullinst,
+                           TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y,
+                           TGSI_FILE_CONSTANT, winsize_const,
+                           TGSI_FILE_INPUT, wpos_input);
+
+   ti += tgsi_build_full_instruction(&fullinst,
+                                     &tokens[ti],
+                                     header,
+                                     maxTokens - ti);
+
+   return ti;
+}
+
+
 
 
 /**
@@ -729,6 +869,7 @@ find_temporaries(const struct gl_program *program,
  */
 GLuint
 st_translate_mesa_program(
+   GLcontext *ctx,
    uint procType,
    const struct gl_program *program,
    GLuint numInputs,
@@ -736,10 +877,12 @@ st_translate_mesa_program(
    const ubyte inputSemanticName[],
    const ubyte inputSemanticIndex[],
    const GLuint interpMode[],
+   const GLbitfield inputFlags[],
    GLuint numOutputs,
    const GLuint outputMapping[],
    const ubyte outputSemanticName[],
    const ubyte outputSemanticIndex[],
+   const GLbitfield outputFlags[],
    struct tgsi_token *tokens,
    GLuint maxTokens )
 {
@@ -747,16 +890,34 @@ st_translate_mesa_program(
    GLuint ti;  /* token index */
    struct tgsi_header *header;
    struct tgsi_processor *processor;
-   struct tgsi_full_instruction fullinst;
    GLuint preamble_size = 0;
    GLuint immediates[1000];
    GLuint numImmediates = 0;
    GLboolean insideSubroutine = GL_FALSE;
    GLboolean indirectAccess = GL_FALSE;
+   GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
+   GLint wposTemp = -1, winHeightConst = -1;
 
    assert(procType == TGSI_PROCESSOR_FRAGMENT ||
           procType == TGSI_PROCESSOR_VERTEX);
 
+   find_temporaries(program, tempsUsed);
+
+   if (procType == TGSI_PROCESSOR_FRAGMENT) {
+      if (program->InputsRead & FRAG_BIT_WPOS) {
+         /* Fragment program uses fragment position input.
+          * Need to replace instances of INPUT[WPOS] with temp T
+          * where T = INPUT[WPOS] by y is inverted.
+          */
+         static const gl_state_index winSizeState[STATE_LENGTH]
+            = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 };
+         winHeightConst = _mesa_add_state_reference(program->Parameters,
+                                                    winSizeState);
+         wposTemp = find_free_temporary(tempsUsed);
+      }
+   }
+
+
    *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
 
    header = (struct tgsi_header *) &tokens[1];
@@ -777,7 +938,8 @@ st_translate_mesa_program(
                                     GL_TRUE, interpMode[i],
                                     TGSI_WRITEMASK_XYZW,
                                     GL_TRUE, inputSemanticName[i],
-                                    inputSemanticIndex[i]);
+                                    inputSemanticIndex[i],
+                                    inputFlags[i]);
          ti += tgsi_build_full_declaration(&fulldecl,
                                            &tokens[ti],
                                            header,
@@ -794,7 +956,8 @@ st_translate_mesa_program(
          fulldecl = make_input_decl(i,
                                     GL_FALSE, 0,
                                     TGSI_WRITEMASK_XYZW,
-                                    GL_FALSE, 0, 0);
+                                    GL_FALSE, 0, 0,
+                                    inputFlags[i]);
          ti += tgsi_build_full_declaration(&fulldecl,
                                            &tokens[ti],
                                            header,
@@ -813,13 +976,15 @@ st_translate_mesa_program(
             fulldecl = make_output_decl(i,
                                         TGSI_SEMANTIC_POSITION, /* Z / Depth */
                                         outputSemanticIndex[i],
-                                        TGSI_WRITEMASK_Z );
+                                        TGSI_WRITEMASK_Z,
+                                        outputFlags[i]);
             break;
          case TGSI_SEMANTIC_COLOR:
             fulldecl = make_output_decl(i,
                                         TGSI_SEMANTIC_COLOR,
                                         outputSemanticIndex[i],
-                                        TGSI_WRITEMASK_XYZW );
+                                        TGSI_WRITEMASK_XYZW,
+                                        outputFlags[i]);
             break;
          default:
             assert(0);
@@ -838,7 +1003,8 @@ st_translate_mesa_program(
          fulldecl = make_output_decl(i,
                                      outputSemanticName[i],
                                      outputSemanticIndex[i],
-                                     TGSI_WRITEMASK_XYZW );
+                                     TGSI_WRITEMASK_XYZW,
+                                     outputFlags[i]);
          ti += tgsi_build_full_declaration(&fulldecl,
                                            &tokens[ti],
                                            header,
@@ -848,11 +1014,9 @@ st_translate_mesa_program(
 
    /* temporary decls */
    {
-      GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1];
       GLboolean inside_range = GL_FALSE;
       GLuint start_range = 0;
 
-      find_temporaries(program, tempsUsed);
       tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE;
       for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) {
          if (tempsUsed[i] && !inside_range) {
@@ -969,7 +1133,7 @@ st_translate_mesa_program(
    }
 
    /* texture samplers */
-   for (i = 0; i < 8; i++) {
+   for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
       if (program->SamplersUsed & (1 << i)) {
          struct tgsi_full_declaration fulldecl;
 
@@ -982,7 +1146,17 @@ st_translate_mesa_program(
       }
    }
 
+   /* invert WPOS fragment input */
+   if (wposTemp >= 0) {
+      ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst,
+                               inputMapping[FRAG_ATTRIB_WPOS],
+                               header, maxTokens - ti);
+      preamble_size = 2; /* two instructions added */
+   }
+
    for (i = 0; i < program->NumInstructions; i++) {
+      struct tgsi_full_instruction fullinst;
+
       compile_instruction(
          &program->Instructions[i],
          &fullinst,
@@ -992,7 +1166,8 @@ st_translate_mesa_program(
          indirectAccess,
          preamble_size,
          procType,
-         &insideSubroutine );
+         &insideSubroutine,
+         wposTemp);
 
       ti += tgsi_build_full_instruction(
          &fullinst,