i965/fs: Fix off-by-one region overlap comparison in copy propagation.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_sf_emit.c
index 866474121cd1410539cf6176bcdf073e5fc86163..f03b74143f4a5cbe8d765df1355d692e82078a3a 100644 (file)
@@ -30,7 +30,6 @@
   */
 
 
-#include "main/glheader.h"
 #include "main/macros.h"
 #include "main/enums.h"
 
@@ -101,7 +100,7 @@ have_attr(struct brw_sf_compile *c, GLuint attr)
 static void copy_bfc( struct brw_sf_compile *c,
                      struct brw_reg vert )
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
    for (i = 0; i < 2; i++) {
@@ -116,7 +115,7 @@ static void copy_bfc( struct brw_sf_compile *c,
 
 static void do_twoside_color( struct brw_sf_compile *c )
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint backface_conditional = c->key.frontface_ccw ? BRW_CONDITIONAL_G : BRW_CONDITIONAL_L;
 
    /* Already done in clip program:
@@ -137,7 +136,6 @@ static void do_twoside_color( struct brw_sf_compile *c )
     * we run with NoMask, so it's not an option and we can use
     * BRW_EXECUTE_1 for all comparisions.
     */
-   brw_push_insn_state(p);
    brw_CMP(p, vec4(brw_null_reg()), backface_conditional, c->det, brw_imm_f(0));
    brw_IF(p, BRW_EXECUTE_4);
    {
@@ -148,7 +146,6 @@ static void do_twoside_color( struct brw_sf_compile *c )
       }
    }
    brw_ENDIF(p);
-   brw_pop_insn_state(p);
 }
 
 
@@ -161,7 +158,7 @@ static void copy_flatshaded_attributes(struct brw_sf_compile *c,
                                        struct brw_reg dst,
                                        struct brw_reg src)
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    int i;
 
    for (i = 0; i < c->vue_map.num_slots; i++) {
@@ -193,9 +190,7 @@ static int count_flatshaded_attributes(struct brw_sf_compile *c)
  */
 static void do_flatshade_triangle( struct brw_sf_compile *c )
 {
-   struct brw_compile *p = &c->func;
-   struct brw_context *brw = p->brw;
-   struct brw_reg ip = brw_ip_reg();
+   struct brw_codegen *p = &c->func;
    GLuint nr;
    GLuint jmpi = 1;
 
@@ -204,36 +199,30 @@ static void do_flatshade_triangle( struct brw_sf_compile *c )
    if (c->key.primitive == SF_UNFILLED_TRIS)
       return;
 
-   if (brw->gen == 5)
+   if (p->devinfo->gen == 5)
        jmpi = 2;
 
    nr = count_flatshaded_attributes(c);
 
-   brw_push_insn_state(p);
-
    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr*2+1)));
-   brw_JMPI(p, ip, ip, c->pv);
+   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
 
    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
    copy_flatshaded_attributes(c, c->vert[2], c->vert[0]);
-   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*(nr*4+1)));
+   brw_JMPI(p, brw_imm_d(jmpi*(nr*4+1)), BRW_PREDICATE_NONE);
 
    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
    copy_flatshaded_attributes(c, c->vert[2], c->vert[1]);
-   brw_JMPI(p, ip, ip, brw_imm_d(jmpi*nr*2));
+   brw_JMPI(p, brw_imm_d(jmpi*nr*2), BRW_PREDICATE_NONE);
 
    copy_flatshaded_attributes(c, c->vert[0], c->vert[2]);
    copy_flatshaded_attributes(c, c->vert[1], c->vert[2]);
-
-   brw_pop_insn_state(p);
 }
-       
+
 
 static void do_flatshade_line( struct brw_sf_compile *c )
 {
-   struct brw_compile *p = &c->func;
-   struct brw_context *brw = p->brw;
-   struct brw_reg ip = brw_ip_reg();
+   struct brw_codegen *p = &c->func;
    GLuint nr;
    GLuint jmpi = 1;
 
@@ -242,24 +231,19 @@ static void do_flatshade_line( struct brw_sf_compile *c )
    if (c->key.primitive == SF_UNFILLED_TRIS)
       return;
 
-   if (brw->gen == 5)
+   if (p->devinfo->gen == 5)
        jmpi = 2;
 
    nr = count_flatshaded_attributes(c);
 
-   brw_push_insn_state(p);
-
    brw_MUL(p, c->pv, c->pv, brw_imm_d(jmpi*(nr+1)));
-   brw_JMPI(p, ip, ip, c->pv);
+   brw_JMPI(p, c->pv, BRW_PREDICATE_NONE);
    copy_flatshaded_attributes(c, c->vert[1], c->vert[0]);
 
-   brw_JMPI(p, ip, ip, brw_imm_ud(jmpi*nr));
+   brw_JMPI(p, brw_imm_ud(jmpi*nr), BRW_PREDICATE_NONE);
    copy_flatshaded_attributes(c, c->vert[0], c->vert[1]);
-
-   brw_pop_insn_state(p);
 }
 
-       
 
 /***********************************************************************
  * Triangle setup.
@@ -319,17 +303,13 @@ static void alloc_regs( struct brw_sf_compile *c )
 
 static void copy_z_inv_w( struct brw_sf_compile *c )
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
-   brw_push_insn_state(p);
-       
    /* Copy both scalars with a single MOV:
     */
    for (i = 0; i < c->nr_verts; i++)
       brw_MOV(p, vec2(suboffset(c->vert[i], 2)), vec2(c->z[i]));
-       
-   brw_pop_insn_state(p);
 }
 
 
@@ -338,13 +318,12 @@ static void invert_det( struct brw_sf_compile *c)
    /* Looks like we invert all 8 elements just to get 1/det in
     * position 2 !?!
     */
-   brw_math(&c->func,
-           c->inv_det,
-           BRW_MATH_FUNCTION_INV,
-           0,
-           c->det,
-           BRW_MATH_DATA_SCALAR,
-           BRW_MATH_PRECISION_FULL);
+   gen4_math(&c->func,
+            c->inv_det,
+            BRW_MATH_FUNCTION_INV,
+            0,
+            c->det,
+            BRW_MATH_PRECISION_FULL);
 
 }
 
@@ -415,13 +394,29 @@ calculate_point_sprite_mask(struct brw_sf_compile *c, GLuint reg)
    return pc;
 }
 
+static void
+set_predicate_control_flag_value(struct brw_codegen *p,
+                                 struct brw_sf_compile *c,
+                                 unsigned value)
+{
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+   if (value != 0xff) {
+      if (value != c->flag_value) {
+         brw_MOV(p, brw_flag_reg(0, 0), brw_imm_uw(value));
+         c->flag_value = value;
+      }
 
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NORMAL);
+   }
+}
 
 void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
+   c->flag_value = 0xff;
    c->nr_verts = 3;
 
    if (allocate)
@@ -449,7 +444,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
 
       if (pc_persp)
       {
-        brw_set_predicate_control_flag_value(p, pc_persp);
+        set_predicate_control_flag_value(p, c, pc_persp);
         brw_MUL(p, a0, a0, c->inv_w[0]);
         brw_MUL(p, a1, a1, c->inv_w[1]);
         brw_MUL(p, a2, a2, c->inv_w[2]);
@@ -460,7 +455,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
        */
       if (pc_linear)
       {
-        brw_set_predicate_control_flag_value(p, pc_linear);
+        set_predicate_control_flag_value(p, c, pc_linear);
 
         brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
         brw_ADD(p, c->a2_sub_a0, a2, negate(a0));
@@ -470,7 +465,7 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
         brw_MUL(p, brw_null_reg(), c->a1_sub_a0, c->dy2);
         brw_MAC(p, c->tmp, c->a2_sub_a0, negate(c->dy0));
         brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
-               
+
         /* calculate dA/dy
          */
         brw_MUL(p, brw_null_reg(), c->a2_sub_a0, c->dx0);
@@ -479,14 +474,14 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
       }
 
       {
-        brw_set_predicate_control_flag_value(p, pc);
+        set_predicate_control_flag_value(p, c, pc);
         /* start point for interpolation
          */
         brw_MOV(p, c->m3C0, a0);
 
         /* Copy m0..m3 to URB.  m0 is implicitly copied from r0 in
          * the send instruction:
-         */    
+         */
         brw_urb_WRITE(p,
                       brw_null_reg(),
                       0,
@@ -499,16 +494,18 @@ void brw_emit_tri_setup(struct brw_sf_compile *c, bool allocate)
                       BRW_URB_SWIZZLE_TRANSPOSE); /* XXX: Swizzle control "SF to windower" */
       }
    }
+
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
 
 
 void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
-
+   c->flag_value = 0xff;
    c->nr_verts = 2;
 
    if (allocate)
@@ -531,7 +528,7 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
 
       if (pc_persp)
       {
-        brw_set_predicate_control_flag_value(p, pc_persp);
+        set_predicate_control_flag_value(p, c, pc_persp);
         brw_MUL(p, a0, a0, c->inv_w[0]);
         brw_MUL(p, a1, a1, c->inv_w[1]);
       }
@@ -539,19 +536,19 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
       /* Calculate coefficients for position, color:
        */
       if (pc_linear) {
-        brw_set_predicate_control_flag_value(p, pc_linear);
+        set_predicate_control_flag_value(p, c, pc_linear);
 
         brw_ADD(p, c->a1_sub_a0, a1, negate(a0));
 
         brw_MUL(p, c->tmp, c->a1_sub_a0, c->dx0);
         brw_MUL(p, c->m1Cx, c->tmp, c->inv_det);
-               
+
         brw_MUL(p, c->tmp, c->a1_sub_a0, c->dy0);
         brw_MUL(p, c->m2Cy, c->tmp, c->inv_det);
       }
 
       {
-        brw_set_predicate_control_flag_value(p, pc);
+        set_predicate_control_flag_value(p, c, pc);
 
         /* start point for interpolation
          */
@@ -571,13 +568,16 @@ void brw_emit_line_setup(struct brw_sf_compile *c, bool allocate)
                       BRW_URB_SWIZZLE_TRANSPOSE);
       }
    }
+
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
 void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
+   c->flag_value = 0xff;
    c->nr_verts = 1;
 
    if (allocate)
@@ -594,7 +594,7 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
       pc_persp &= ~pc_coord_replace;
 
       if (pc_persp) {
-        brw_set_predicate_control_flag_value(p, pc_persp);
+        set_predicate_control_flag_value(p, c, pc_persp);
         brw_MUL(p, a0, a0, c->inv_w[0]);
       }
 
@@ -604,17 +604,16 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
        * point.
        */
       if (pc_coord_replace) {
-        brw_set_predicate_control_flag_value(p, pc_coord_replace);
+        set_predicate_control_flag_value(p, c, pc_coord_replace);
         /* Caculate 1.0/PointWidth */
-        brw_math(&c->func,
-                 c->tmp,
-                 BRW_MATH_FUNCTION_INV,
-                 0,
-                 c->dx0,
-                 BRW_MATH_DATA_SCALAR,
-                 BRW_MATH_PRECISION_FULL);
+        gen4_math(&c->func,
+                  c->tmp,
+                  BRW_MATH_FUNCTION_INV,
+                  0,
+                  c->dx0,
+                  BRW_MATH_PRECISION_FULL);
 
-        brw_set_access_mode(p, BRW_ALIGN_16);
+        brw_set_default_access_mode(p, BRW_ALIGN_16);
 
         /* dA/dx, dA/dy */
         brw_MOV(p, c->m1Cx, brw_imm_f(0.0));
@@ -634,18 +633,18 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
            brw_MOV(p, brw_writemask(c->m3C0, WRITEMASK_W), brw_imm_f(1.0));
         }
 
-        brw_set_access_mode(p, BRW_ALIGN_1);
+        brw_set_default_access_mode(p, BRW_ALIGN_1);
       }
 
       if (pc & ~pc_coord_replace) {
-        brw_set_predicate_control_flag_value(p, pc & ~pc_coord_replace);
+        set_predicate_control_flag_value(p, c, pc & ~pc_coord_replace);
         brw_MOV(p, c->m1Cx, brw_imm_ud(0));
         brw_MOV(p, c->m2Cy, brw_imm_ud(0));
         brw_MOV(p, c->m3C0, a0); /* constant value */
       }
 
 
-      brw_set_predicate_control_flag_value(p, pc);
+      set_predicate_control_flag_value(p, c, pc);
       /* Copy m0..m3 to URB. */
       brw_urb_WRITE(p,
                    brw_null_reg(),
@@ -658,6 +657,8 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
                    i*4,        /* urb destination offset */
                    BRW_URB_SWIZZLE_TRANSPOSE);
    }
+
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
 /* Points setup - several simplifications as all attributes are
@@ -665,9 +666,10 @@ void brw_emit_point_sprite_setup(struct brw_sf_compile *c, bool allocate)
  */
 void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
 {
-   struct brw_compile *p = &c->func;
+   struct brw_codegen *p = &c->func;
    GLuint i;
 
+   c->flag_value = 0xff;
    c->nr_verts = 1;
 
    if (allocate)
@@ -685,11 +687,11 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
       bool last = calculate_masks(c, i, &pc, &pc_persp, &pc_linear);
 
       if (pc_persp)
-      {                                
+      {
         /* This seems odd as the values are all constant, but the
          * fragment shader will be expecting it:
          */
-        brw_set_predicate_control_flag_value(p, pc_persp);
+        set_predicate_control_flag_value(p, c, pc_persp);
         brw_MUL(p, a0, a0, c->inv_w[0]);
       }
 
@@ -699,7 +701,7 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
        * code in the fragment shader.
        */
       {
-        brw_set_predicate_control_flag_value(p, pc);
+        set_predicate_control_flag_value(p, c, pc);
 
         brw_MOV(p, c->m3C0, a0); /* constant value */
 
@@ -717,20 +719,19 @@ void brw_emit_point_setup(struct brw_sf_compile *c, bool allocate)
                       BRW_URB_SWIZZLE_TRANSPOSE);
       }
    }
+
+   brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
 }
 
 void brw_emit_anyprim_setup( struct brw_sf_compile *c )
 {
-   struct brw_compile *p = &c->func;
-   struct brw_reg ip = brw_ip_reg();
+   struct brw_codegen *p = &c->func;
    struct brw_reg payload_prim = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0);
    struct brw_reg payload_attr = get_element_ud(brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, 1, 0), 0);
    struct brw_reg primmask;
    int jmp;
    struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 
-   GLuint saveflag;
-
    c->nr_verts = 3;
    alloc_regs(c);
 
@@ -739,7 +740,6 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
    brw_MOV(p, primmask, brw_imm_ud(1));
    brw_SHL(p, primmask, primmask, payload_prim);
 
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_TRILIST) |
                                               (1<<_3DPRIM_TRISTRIP) |
                                               (1<<_3DPRIM_TRIFAN) |
@@ -747,48 +747,26 @@ void brw_emit_anyprim_setup( struct brw_sf_compile *c )
                                               (1<<_3DPRIM_POLYGON) |
                                               (1<<_3DPRIM_RECTLIST) |
                                               (1<<_3DPRIM_TRIFAN_NOSTIPPLE)));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
-   {
-      saveflag = p->flag_value;
-      brw_push_insn_state(p);
-      brw_emit_tri_setup( c, false );
-      brw_pop_insn_state(p);
-      p->flag_value = saveflag;
-      /* note - thread killed in subroutine, so must
-       * restore the flag which is changed when building
-       * the subroutine. fix #13240
-       */
-   }
+   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
+   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
+   brw_emit_tri_setup(c, false);
    brw_land_fwd_jump(p, jmp);
 
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    brw_AND(p, v1_null_ud, primmask, brw_imm_ud((1<<_3DPRIM_LINELIST) |
                                               (1<<_3DPRIM_LINESTRIP) |
                                               (1<<_3DPRIM_LINELOOP) |
                                               (1<<_3DPRIM_LINESTRIP_CONT) |
                                               (1<<_3DPRIM_LINESTRIP_BF) |
                                               (1<<_3DPRIM_LINESTRIP_CONT_BF)));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
-   {
-      saveflag = p->flag_value;
-      brw_push_insn_state(p);
-      brw_emit_line_setup( c, false );
-      brw_pop_insn_state(p);
-      p->flag_value = saveflag;
-      /* note - thread killed in subroutine */
-   }
+   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
+   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
+   brw_emit_line_setup(c, false);
    brw_land_fwd_jump(p, jmp);
 
-   brw_set_conditionalmod(p, BRW_CONDITIONAL_Z);
    brw_AND(p, v1_null_ud, payload_attr, brw_imm_ud(1<<BRW_SPRITE_POINT_ENABLE));
-   jmp = brw_JMPI(p, ip, ip, brw_imm_d(0)) - p->store;
-   {
-      saveflag = p->flag_value;
-      brw_push_insn_state(p);
-      brw_emit_point_sprite_setup( c, false );
-      brw_pop_insn_state(p);
-      p->flag_value = saveflag;
-   }
+   brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_Z);
+   jmp = brw_JMPI(p, brw_imm_d(0), BRW_PREDICATE_NORMAL) - p->store;
+   brw_emit_point_sprite_setup(c, false);
    brw_land_fwd_jump(p, jmp);
 
    brw_emit_point_setup( c, false );