i965: Make the param pointer arrays for the WM dynamically sized.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_emit.c
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c

index 6ea4a7d6e501535516137eb6d99cebf14cf359ee..b6defa3d59d90bca2979da66ee00c0005ed2c894 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -34,27 +34,27 @@
  #include "brw_context.h"
  #include "brw_wm.h"
  
-static GLboolean can_do_pln(struct intel_context *intel,
-                           const struct brw_reg *deltas)
+static bool
+can_do_pln(struct intel_context *intel, const struct brw_reg *deltas)
  {
     struct brw_context *brw = brw_context(&intel->ctx);
  
     if (!brw->has_pln)
-      return GL_FALSE;
+      return false;
  
     if (deltas[1].nr != deltas[0].nr + 1)
-      return GL_FALSE;
+      return false;
  
     if (intel->gen < 6 && ((deltas[0].nr & 1) != 0))
-      return GL_FALSE;
+      return false;
  
-   return GL_TRUE;
+   return true;
  }
  
  /* Return the SrcReg index of the channels that can be immediate float operands
   * instead of usage of PROGRAM_CONSTANT values through push/pull.
   */
-GLboolean
+bool
  brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
  {
     int opcode_array[] = {
@@ -82,11 +82,11 @@ brw_wm_arg_can_be_immediate(enum prog_opcode opcode, int arg)
      */
     if (opcode == OPCODE_MAD || opcode == OPCODE_LRP) {
        if (arg == 1 || arg == 2)
-        return GL_TRUE;
+        return true;
     }
  
     if (opcode > ARRAY_SIZE(opcode_array))
-      return GL_FALSE;
+      return false;
  
     return arg == opcode_array[opcode] - 1;
  }
@@ -297,13 +297,11 @@ void emit_pixel_w(struct brw_wm_compile *c,
        if (c->dispatch_width == 16) {
          brw_math_16(p, dst[3],
                      BRW_MATH_FUNCTION_INV,
-                    BRW_MATH_SATURATE_NONE,
                      2, src,
                      BRW_MATH_PRECISION_FULL);
        } else {
          brw_math(p, dst[3],
                   BRW_MATH_FUNCTION_INV,
-                 BRW_MATH_SATURATE_NONE,
                   2, src,
                   BRW_MATH_DATA_VECTOR,
                   BRW_MATH_PRECISION_FULL);
@@ -457,12 +455,16 @@ void emit_frontfacing(struct brw_compile *p,
   * between each other.  We could probably do it like ddx and swizzle the right
   * order later, but bail for now and just produce
   * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
+ *
+ * The negate_value boolean is used to negate the d/dy computation for FBOs,
+ * since they place the origin at the upper left instead of the lower left.
   */
  void emit_ddxy(struct brw_compile *p,
                const struct brw_reg *dst,
                GLuint mask,
-              GLboolean is_ddx,
-              const struct brw_reg *arg0)
+              bool is_ddx,
+              const struct brw_reg *arg0,
+               bool negate_value)
  {
     int i;
     struct brw_reg src0, src1;
@@ -498,7 +500,10 @@ void emit_ddxy(struct brw_compile *p,
                            BRW_HORIZONTAL_STRIDE_0,
                            BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
          }
-        brw_ADD(p, dst[i], src0, negate(src1));
+         if (negate_value)
+            brw_ADD(p, dst[i], src1, negate(src0));
+         else
+            brw_ADD(p, dst[i], src0, negate(src1));
        }
     }
     if (mask & SATURATE)
@@ -766,7 +771,7 @@ void emit_dp2(struct brw_compile *p,
               const struct brw_reg *arg0,
               const struct brw_reg *arg1)
  {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
  
     if (!(mask & WRITEMASK_XYZW))
        return; /* Do not emit dead code */
@@ -787,7 +792,7 @@ void emit_dp3(struct brw_compile *p,
               const struct brw_reg *arg0,
               const struct brw_reg *arg1)
  {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
  
     if (!(mask & WRITEMASK_XYZW))
        return; /* Do not emit dead code */
@@ -809,7 +814,7 @@ void emit_dp4(struct brw_compile *p,
               const struct brw_reg *arg0,
               const struct brw_reg *arg1)
  {
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
  
     if (!(mask & WRITEMASK_XYZW))
        return; /* Do not emit dead code */
@@ -832,7 +837,7 @@ void emit_dph(struct brw_compile *p,
               const struct brw_reg *arg0,
               const struct brw_reg *arg1)
  {
-   const int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   const int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
  
     if (!(mask & WRITEMASK_XYZW))
        return; /* Do not emit dead code */
@@ -882,12 +887,14 @@ void emit_math1(struct brw_wm_compile *c,
  {
     struct brw_compile *p = &c->func;
     struct intel_context *intel = &p->brw->intel;
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
-   GLuint saturate = ((mask & SATURATE) ?
-                     BRW_MATH_SATURATE_SATURATE :
-                     BRW_MATH_SATURATE_NONE);
+   int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
     struct brw_reg src;
  
+   if (!(mask & WRITEMASK_XYZW))
+      return; /* Do not emit dead code */
+
+   assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
     if (intel->gen >= 6 && ((arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0 ||
                             arg0[0].file != BRW_GENERAL_REGISTER_FILE) ||
                            arg0[0].negate || arg0[0].abs)) {
@@ -903,19 +910,14 @@ void emit_math1(struct brw_wm_compile *c,
        src = arg0[0];
     }
  
-   if (!(mask & WRITEMASK_XYZW))
-      return; /* Do not emit dead code */
-
-   assert(is_power_of_two(mask & WRITEMASK_XYZW));
-
     /* Send two messages to perform all 16 operations:
      */
     brw_push_insn_state(p);
+   brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
     brw_set_compression_control(p, BRW_COMPRESSION_NONE);
     brw_math(p,
             dst[dst_chan],
             function,
-           saturate,
             2,
             src,
             BRW_MATH_DATA_VECTOR,
@@ -926,7 +928,6 @@ void emit_math1(struct brw_wm_compile *c,
        brw_math(p,
                offset(dst[dst_chan],1),
                function,
-              saturate,
                3,
                sechalf(src),
                BRW_MATH_DATA_VECTOR,
@@ -945,7 +946,7 @@ void emit_math2(struct brw_wm_compile *c,
  {
     struct brw_compile *p = &c->func;
     struct intel_context *intel = &p->brw->intel;
-   int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+   int dst_chan = ffs(mask & WRITEMASK_XYZW) - 1;
  
     if (!(mask & WRITEMASK_XYZW))
        return; /* Do not emit dead code */
@@ -998,10 +999,6 @@ void emit_math2(struct brw_wm_compile *c,
                    sechalf(src1));
        }
     } else {
-      GLuint saturate = ((mask & SATURATE) ?
-                        BRW_MATH_SATURATE_SATURATE :
-                        BRW_MATH_SATURATE_NONE);
-
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
        brw_MOV(p, brw_message_reg(3), arg1[0]);
        if (c->dispatch_width == 16) {
@@ -1009,11 +1006,11 @@ void emit_math2(struct brw_wm_compile *c,
          brw_MOV(p, brw_message_reg(5), sechalf(arg1[0]));
        }
  
+      brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
        brw_math(p,
                dst[dst_chan],
                function,
-              saturate,
                2,
                arg0[0],
                BRW_MATH_DATA_VECTOR,
@@ -1026,7 +1023,6 @@ void emit_math2(struct brw_wm_compile *c,
          brw_math(p,
                   offset(dst[dst_chan],1),
                   function,
-                 saturate,
                   4,
                   sechalf(arg0[0]),
                   BRW_MATH_DATA_VECTOR,
@@ -1044,7 +1040,7 @@ void emit_tex(struct brw_wm_compile *c,
               struct brw_reg depth_payload,
               GLuint tex_idx,
               GLuint sampler,
-             GLboolean shadow)
+             bool shadow)
  {
     struct brw_compile *p = &c->func;
     struct intel_context *intel = &p->brw->intel;
@@ -1076,11 +1072,14 @@ void emit_tex(struct brw_wm_compile *c,
        nr_texcoords = 1;
        break;
     case TEXTURE_2D_INDEX:
+   case TEXTURE_1D_ARRAY_INDEX:
     case TEXTURE_RECT_INDEX:
+   case TEXTURE_EXTERNAL_INDEX:
        emit = WRITEMASK_XY;
        nr_texcoords = 2;
        break;
     case TEXTURE_3D_INDEX:
+   case TEXTURE_2D_ARRAY_INDEX:
     case TEXTURE_CUBE_INDEX:
        emit = WRITEMASK_XYZ;
        nr_texcoords = 3;
@@ -1107,7 +1106,7 @@ void emit_tex(struct brw_wm_compile *c,
  
     /* Emit the texcoords. */
     for (i = 0; i < nr_texcoords; i++) {
-      if (c->key.gl_clamp_mask[i] & (1 << sampler))
+      if (c->key.tex.gl_clamp_mask[i] & (1 << sampler))
          brw_set_saturate(p, true);
  
        if (emit & (1<<i))
@@ -1159,9 +1158,9 @@ void emit_tex(struct brw_wm_compile *c,
               msg_type,
               response_length,
               cur_mrf - 1,
-             0,
               1,
-             simd_mode);
+             simd_mode,
+             BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
  }
  
  
@@ -1210,6 +1209,7 @@ void emit_txb(struct brw_wm_compile *c,
        break;
     case TEXTURE_2D_INDEX:
     case TEXTURE_RECT_INDEX:
+   case TEXTURE_EXTERNAL_INDEX:
        brw_MOV(p, brw_message_reg(2 + 0 * mrf_per_channel), arg[0]);
        brw_MOV(p, brw_message_reg(2 + 1 * mrf_per_channel), arg[1]);
        brw_MOV(p, brw_message_reg(2 + 2 * mrf_per_channel), brw_imm_f(0));
@@ -1238,9 +1238,9 @@ void emit_txb(struct brw_wm_compile *c,
               msg_type,
               response_length,
               msgLength,
-             0,        
               1,
-             BRW_SAMPLER_SIMD_MODE_SIMD16);    
+             BRW_SAMPLER_SIMD_MODE_SIMD16,
+             BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
  }
  
  
@@ -1327,6 +1327,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
  {
     struct brw_compile *p = &c->func;
     struct intel_context *intel = &p->brw->intel;
+   uint32_t msg_control;
  
     /* Pass through control information:
      * 
@@ -1344,17 +1345,23 @@ static void fire_fb_write( struct brw_wm_compile *c,
        brw_pop_insn_state(p);
     }
  
+   if (c->dispatch_width == 16)
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+   else
+      msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
     /* Send framebuffer write message: */
  /*  send (16) null.0<1>:uw m0               r0.0<8;8,1>:uw   0x85a04000:ud    { Align1 EOT } */
     brw_fb_WRITE(p,
                 c->dispatch_width,
                 base_reg,
                 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+               msg_control,
                 target,         
                 nr,
                 0, 
                 eot,
-               GL_TRUE);
+               true);
  }
  
  
@@ -1526,7 +1533,7 @@ void emit_fb_write(struct brw_wm_compile *c,
     else {
        struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
        struct brw_reg ip = brw_ip_reg();
-      struct brw_instruction *jmp;
+      int jmp;
        
        brw_set_compression_control(p, BRW_COMPRESSION_NONE);
        brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
@@ -1535,7 +1542,7 @@ void emit_fb_write(struct brw_wm_compile *c,
               get_element_ud(brw_vec8_grf(1,0), 6), 
               brw_imm_ud(1<<26)); 
  
-      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0));
+      jmp = brw_JMPI(p, ip, ip, brw_imm_w(0)) - p->store;
        {
          emit_aa(c, arg1, 2);
          fire_fb_write(c, 0, nr, target, eot);
@@ -1735,11 +1742,15 @@ void brw_wm_emit( struct brw_wm_compile *c )
          break;
  
        case OPCODE_DDX:
-        emit_ddxy(p, dst, dst_flags, GL_TRUE, args[0]);
+        emit_ddxy(p, dst, dst_flags, true, args[0], false);
          break;
  
        case OPCODE_DDY:
-        emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
+         /* Make sure fp->program.UsesDFdy flag got set (otherwise there's no
+          * guarantee that c->key.render_to_fbo is set).
+          */
+         assert(c->fp->program.UsesDFdy);
+        emit_ddxy(p, dst, dst_flags, false, args[0], c->key.render_to_fbo);
          break;
  
        case OPCODE_DP2: