gallium: Redefine the max texture 2d cap from _LEVELS to _SIZE.

[mesa.git] / src / gallium / drivers / i915 / i915_fpc_translate.c
diff --git a/src/gallium/drivers/i915/i915_fpc_translate.c b/src/gallium/drivers/i915/i915_fpc_translate.c

index b383a7476b8164fe7e2a0e23b638394fc1553c6d..2eaa1e64ef736f54bb7997e02269069a6f4f00e6 100644 (file)
--- a/src/gallium/drivers/i915/i915_fpc_translate.c
+++ b/src/gallium/drivers/i915/i915_fpc_translate.c
@@ -1,6 +1,6 @@
  /**************************************************************************
   * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007 VMware, Inc.
   * All Rights Reserved.
   * 
   * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
   * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
   * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -31,6 +31,7 @@
  #include "i915_reg.h"
  #include "i915_context.h"
  #include "i915_fpc.h"
+#include "i915_debug_private.h"
  
  #include "pipe/p_shader_tokens.h"
  #include "util/u_math.h"
@@ -49,7 +50,7 @@
   * Simple pass-through fragment shader to use when we don't have
   * a real shader (or it fails to compile for some reason).
   */
-static unsigned passthrough[] =
+static unsigned passthrough_decl[] =
  {
     _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
  
@@ -61,7 +62,10 @@ static unsigned passthrough[] =
      D0_CHANNEL_ALL),
     0,
     0,
+};
  
+static unsigned passthrough_program[] =
+{
     /* move to output color:
      */
     (A0_MOV |
@@ -73,21 +77,6 @@ static unsigned passthrough[] =
     0
  };
  
-
-/* 1, -1/3!, 1/5!, -1/7! */
-static const float scs_sin_constants[4] = { 1.0,
-   -1.0f / (3 * 2 * 1),
-   1.0f / (5 * 4 * 3 * 2 * 1),
-   -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
-};
-
-/* 1, -1/2!, 1/4!, -1/6! */
-static const float scs_cos_constants[4] = { 1.0,
-   -1.0f / (2 * 1),
-   1.0f / (4 * 3 * 2 * 1),
-   -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
-};
-
  /* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
  static const float sin_constants[4] = { 2.0 * M_PI,
     -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
@@ -107,7 +96,7 @@ static const float cos_constants[4] = { 1.0,
  /**
   * component-wise negation of ureg
   */
-static INLINE int
+static inline int
  negate(int reg, int x, int y, int z, int w)
  {
     /* Another neat thing about the UREG representation */
@@ -125,10 +114,13 @@ negate(int reg, int x, int y, int z, int w)
  static void
  i915_use_passthrough_shader(struct i915_fragment_shader *fs)
  {
-   fs->program = (uint *) MALLOC(sizeof(passthrough));
+   fs->program = (uint *) MALLOC(sizeof(passthrough_program));
+   fs->decl = (uint *) MALLOC(sizeof(passthrough_decl));
     if (fs->program) {
-      memcpy(fs->program, passthrough, sizeof(passthrough));
-      fs->program_len = Elements(passthrough);
+      memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
+      memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl));
+      fs->program_len = ARRAY_SIZE(passthrough_program);
+      fs->decl_len = ARRAY_SIZE(passthrough_decl);
     }
     fs->num_constants = 0;
  }
@@ -173,7 +165,7 @@ static uint get_mapping(struct i915_fragment_shader* fs, int unit)
  static uint
  src_vector(struct i915_fp_compile *p,
             const struct i915_full_src_register *source,
-           struct i915_fragment_shader* fs)
+           struct i915_fragment_shader *fs)
  {
     uint index = source->Register.Index;
     uint src = 0, sem_name, sem_ind;
@@ -322,7 +314,7 @@ get_result_flags(const struct i915_full_instruction *inst)
        = inst->Dst[0].Register.WriteMask;
     uint flags = 0x0;
  
-   if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
+   if (inst->Instruction.Saturate)
        flags |= A0_DEST_SATURATE;
  
     if (writeMask & TGSI_WRITEMASK_X)
@@ -373,10 +365,10 @@ translate_tex_src_target(struct i915_fp_compile *p, uint tex)
  }
  
  /**
- * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
+ * Return the number of coords needed to access a given TGSI_TEXTURE_*
   */
-static uint
-texture_num_coords(struct i915_fp_compile *p, uint tex)
+uint
+i915_num_coords(uint tex)
  {
     switch (tex) {
     case TGSI_TEXTURE_SHADOW1D:
@@ -394,7 +386,7 @@ texture_num_coords(struct i915_fp_compile *p, uint tex)
        return 3;
  
     default:
-      i915_program_error(p, "Num coords");
+      debug_printf("Unknown texture target for num coords");
        return 2;
     }
  }
@@ -421,7 +413,7 @@ emit_tex(struct i915_fp_compile *p,
                      sampler,
                      coord,
                      opcode,
-                    texture_num_coords(p, texture) );
+                    i915_num_coords(texture) );
  }
  
  
@@ -434,7 +426,7 @@ static void
  emit_simple_arith(struct i915_fp_compile *p,
                    const struct i915_full_instruction *inst,
                    uint opcode, uint numArgs,
-                  struct i915_fragment_shader* fs)
+                  struct i915_fragment_shader *fs)
  {
     uint arg1, arg2, arg3;
  
@@ -459,7 +451,7 @@ static void
  emit_simple_arith_swap2(struct i915_fp_compile *p,
                          const struct i915_full_instruction *inst,
                          uint opcode, uint numArgs,
-                        struct i915_fragment_shader* fs)
+                        struct i915_fragment_shader *fs)
  {
     struct i915_full_instruction inst2;
  
@@ -488,31 +480,37 @@ i915_translate_instruction(struct i915_fp_compile *p,
                             const struct i915_full_instruction *inst,
                             struct i915_fragment_shader *fs)
  {
-   uint writemask;
     uint src0, src1, src2, flags;
     uint tmp = 0;
  
     switch (inst->Instruction.Opcode) {
-   case TGSI_OPCODE_ABS:
+   case TGSI_OPCODE_ADD:
+      emit_simple_arith(p, inst, A0_ADD, 2, fs);
+      break;
+
+   case TGSI_OPCODE_CEIL:
        src0 = src_vector(p, &inst->Src[0], fs);
+      tmp = i915_get_utemp(p);
+      flags = get_result_flags(inst);
+      i915_emit_arith(p,
+                      A0_FLR,
+                      tmp,
+                      flags & A0_DEST_CHANNEL_ALL, 0,
+                      negate(src0, 1, 1, 1, 1), 0, 0);
        i915_emit_arith(p,
-                      A0_MAX,
+                      A0_MOV,
                        get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      src0, negate(src0, 1, 1, 1, 1), 0);
-      break;
-
-   case TGSI_OPCODE_ADD:
-      emit_simple_arith(p, inst, A0_ADD, 2, fs);
+                      flags, 0,
+                      negate(tmp, 1, 1, 1, 1), 0, 0);
        break;
  
     case TGSI_OPCODE_CMP:
        src0 = src_vector(p, &inst->Src[0], fs);
        src1 = src_vector(p, &inst->Src[1], fs);
        src2 = src_vector(p, &inst->Src[2], fs);
-      i915_emit_arith(p, A0_CMP, 
+      i915_emit_arith(p, A0_CMP,
                        get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 
+                      get_result_flags(inst),
                        0, src0, src2, src1);   /* NOTE: order of src2, src1 */
        break;
  
@@ -528,7 +526,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
        i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
  
        /* 
-       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
+       * t0.xy = MUL x.xx11, x.x111  ; x^2, x, 1, 1
         * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
         * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
         * result = DP4 t0, cos_constants
@@ -562,7 +560,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
    case TGSI_OPCODE_DDX:
    case TGSI_OPCODE_DDY:
        /* XXX We just output 0 here */
-      debug_printf("Punting DDX/DDX\n");
+      debug_printf("Punting DDX/DDY\n");
        src0 = get_result_vector(p, &inst->Dst[0]);
        i915_emit_arith(p,
                        A0_MOV,
@@ -590,17 +588,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
        emit_simple_arith(p, inst, A0_DP4, 2, fs);
        break;
  
-   case TGSI_OPCODE_DPH:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-
-      i915_emit_arith(p,
-                      A0_DP4,
-                      get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      swizzle(src0, X, Y, Z, ONE), src1, 0);
-      break;
-
     case TGSI_OPCODE_DST:
        src0 = src_vector(p, &inst->Src[0], fs);
        src1 = src_vector(p, &inst->Src[1], fs);
@@ -640,7 +627,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
        emit_simple_arith(p, inst, A0_FRC, 1, fs);
        break;
  
-   case TGSI_OPCODE_KIL:
+   case TGSI_OPCODE_KILL_IF:
        /* kill if src[0].x < 0 || src[0].y < 0 ... */
        src0 = src_vector(p, &inst->Src[0], fs);
        tmp = i915_get_utemp(p);
@@ -654,8 +641,17 @@ i915_translate_instruction(struct i915_fp_compile *p,
                        1);                    /* num_coord */
        break;
  
-   case TGSI_OPCODE_KILP:
-      assert(0); /* not tested yet */
+   case TGSI_OPCODE_KILL:
+      /* unconditional kill */
+      tmp = i915_get_utemp(p);
+
+      i915_emit_texld(p,
+                      tmp,                                   /* dest reg: a dummy reg */
+                      A0_DEST_CHANNEL_ALL,                   /* dest writemask */
+                      0,                                     /* sampler */
+                      negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */
+                      T0_TEXKILL,                            /* opcode */
+                      1);                                    /* num_coord */
        break;
  
     case TGSI_OPCODE_LG2:
@@ -732,21 +728,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
        break;
  
     case TGSI_OPCODE_MIN:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-      tmp = i915_get_utemp(p);
-      flags = get_result_flags(inst);
-
-      i915_emit_arith(p,
-                      A0_MAX,
-                      tmp, flags & A0_DEST_CHANNEL_ALL, 0,
-                      negate(src0, 1, 1, 1, 1),
-                      negate(src1, 1, 1, 1, 1), 0);
-
-      i915_emit_arith(p,
-                      A0_MOV,
-                      get_result_vector(p, &inst->Dst[0]),
-                      flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
+      emit_simple_arith(p, inst, A0_MIN, 2, fs);
        break;
  
     case TGSI_OPCODE_MOV:
@@ -805,70 +787,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
                        swizzle(src0, X, X, X, X), 0, 0);
        break;
  
-   case TGSI_OPCODE_SCS:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      tmp = i915_get_utemp(p);
-
-      /* 
-       * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
-       * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
-       * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
-       * scs.x = DP4 t1, scs_sin_constants
-       * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
-       * scs.y = DP4 t1, scs_cos_constants
-       */
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_XY, 0,
-                      swizzle(src0, X, X, ONE, ONE),
-                      swizzle(src0, X, ONE, ONE, ONE), 0);
-
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_ALL, 0,
-                      swizzle(tmp, X, Y, X, Y),
-                      swizzle(tmp, X, X, ONE, ONE), 0);
-
-      writemask = inst->Dst[0].Register.WriteMask;
-
-      if (writemask & TGSI_WRITEMASK_Y) {
-         uint tmp1;
-
-         if (writemask & TGSI_WRITEMASK_X)
-            tmp1 = i915_get_utemp(p);
-         else
-            tmp1 = tmp;
-
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp1, A0_DEST_CHANNEL_ALL, 0,
-                         swizzle(tmp, X, Y, Y, W),
-                         swizzle(tmp, X, Z, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_Y, 0,
-                         swizzle(tmp1, W, Z, Y, X),
-                         i915_emit_const4fv(p, scs_sin_constants), 0);
-      }
-
-      if (writemask & TGSI_WRITEMASK_X) {
-         i915_emit_arith(p,
-                         A0_MUL,
-                         tmp, A0_DEST_CHANNEL_XYZ, 0,
-                         swizzle(tmp, X, X, Z, ONE),
-                         swizzle(tmp, Z, ONE, ONE, ONE), 0);
-
-         i915_emit_arith(p,
-                         A0_DP4,
-                         get_result_vector(p, &inst->Dst[0]),
-                         A0_DEST_CHANNEL_X, 0,
-                         swizzle(tmp, ONE, Z, Y, X),
-                         i915_emit_const4fv(p, scs_cos_constants), 0);
-      }
-      break;
-
     case TGSI_OPCODE_SEQ:
        /* if we're both >= and <= then we're == */
        src0 = src_vector(p, &inst->Src[0], fs);
@@ -1013,17 +931,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
                        negate(tmp, 1, 1, 1, 1), 0);
        break;
  
-   case TGSI_OPCODE_SUB:
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-
-      i915_emit_arith(p,
-                      A0_ADD,
-                      get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      src0, negate(src1, 1, 1, 1, 1), 0);
-      break;
-
     case TGSI_OPCODE_TEX:
        emit_tex(p, inst, T0_TEXLD, fs);
        break;
@@ -1040,32 +947,6 @@ i915_translate_instruction(struct i915_fp_compile *p,
        emit_tex(p, inst, T0_TEXLDP, fs);
        break;
  
-   case TGSI_OPCODE_XPD:
-      /* Cross product:
-       *      result.x = src0.y * src1.z - src0.z * src1.y;
-       *      result.y = src0.z * src1.x - src0.x * src1.z;
-       *      result.z = src0.x * src1.y - src0.y * src1.x;
-       *      result.w = undef;
-       */
-      src0 = src_vector(p, &inst->Src[0], fs);
-      src1 = src_vector(p, &inst->Src[1], fs);
-      tmp = i915_get_utemp(p);
-
-      i915_emit_arith(p,
-                      A0_MUL,
-                      tmp, A0_DEST_CHANNEL_ALL, 0,
-                      swizzle(src0, Z, X, Y, ONE),
-                      swizzle(src1, Y, Z, X, ONE), 0);
-
-      i915_emit_arith(p,
-                      A0_MAD,
-                      get_result_vector(p, &inst->Dst[0]),
-                      get_result_flags(inst), 0,
-                      swizzle(src0, Y, Z, X, ONE),
-                      swizzle(src1, Z, X, Y, ONE),
-                      negate(tmp, 1, 1, 1, 0));
-      break;
-
     default:
        i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
        p->error = 1;
@@ -1077,7 +958,7 @@ i915_translate_instruction(struct i915_fp_compile *p,
  
  
  static void i915_translate_token(struct i915_fp_compile *p,
-                                 const union i915_full_token* token,
+                                 const union i915_full_token *token,
                                   struct i915_fragment_shader *fs)
  {
     struct i915_fragment_shader *ifs = p->shader;
@@ -1096,7 +977,7 @@ static void i915_translate_token(struct i915_fp_compile *p,
                 == TGSI_FILE_CONSTANT) {
           uint i;
           for (i = token->FullDeclaration.Range.First;
-              i <= token->FullDeclaration.Range.Last;
+              i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1);
                i++) {
              assert(ifs->constant_flags[i] == 0x0);
              ifs->constant_flags[i] = I915_CONSTFLAG_USER;
@@ -1266,17 +1147,26 @@ i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
  
        /* Copy compilation results to fragment program struct: 
         */
+      assert(!ifs->decl);
        assert(!ifs->program);
+
+      ifs->decl
+         = (uint *) MALLOC(decl_size * sizeof(uint));
        ifs->program
-         = (uint *) MALLOC((program_size + decl_size) * sizeof(uint));
-      if (ifs->program) {
-         ifs->program_len = program_size + decl_size;
+         = (uint *) MALLOC(program_size * sizeof(uint));
  
-         memcpy(ifs->program,
+      if (ifs->decl) {
+         ifs->decl_len = decl_size;
+
+         memcpy(ifs->decl,
                  p->declarations,
                  decl_size * sizeof(uint));
+      }
  
-         memcpy(ifs->program + decl_size,
+      if (ifs->program) {
+         ifs->program_len = program_size;
+
+         memcpy(ifs->program,
                  p->program,
                  program_size * sizeof(uint));
        }