Merge branch 'master' into crestline
[mesa.git] / src / mesa / drivers / dri / i965 / brw_vs_emit.c
index cfade7c992a6d2232994e43430db393e4c906a27..8403e1bd7b6275b8285272fea355fd2ec5e8f18d 100644 (file)
@@ -78,7 +78,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
     */
    c->nr_inputs = 0;
    for (i = 0; i < BRW_ATTRIB_MAX; i++) {
-      if (c->prog_data.inputs_read & (1<<i)) {
+      if (c->prog_data.inputs_read & ((GLuint64EXT)1<<i)) {
         c->nr_inputs++;
         c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0);
         reg++;
@@ -416,9 +416,17 @@ static void emit_log_noalias( struct brw_vs_compile *c,
                              struct brw_reg arg0 )
 {
    struct brw_compile *p = &c->func;
-   struct brw_reg dst_ud = retype(dst, BRW_REGISTER_TYPE_UD);
+   struct brw_reg tmp = dst;
+   struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
    struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
+   GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
+                        dst.file != BRW_GENERAL_REGISTER_FILE);
 
+   if (need_tmp) {
+      tmp = get_tmp(c);
+      tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
+   }
+   
    /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
     * according to spec:
     *
@@ -430,30 +438,30 @@ static void emit_log_noalias( struct brw_vs_compile *c,
     */
    if (dst.dw1.bits.writemask & WRITEMASK_XZ) {
       brw_AND(p, 
-             brw_writemask(dst_ud, WRITEMASK_X),
+             brw_writemask(tmp_ud, WRITEMASK_X),
              brw_swizzle1(arg0_ud, 0),
              brw_imm_ud((1U<<31)-1));
 
       brw_SHR(p, 
-             brw_writemask(dst_ud, WRITEMASK_X), 
-             dst_ud,
+             brw_writemask(tmp_ud, WRITEMASK_X), 
+             tmp_ud,
              brw_imm_ud(23));
 
       brw_ADD(p, 
-             brw_writemask(dst, WRITEMASK_X), 
-             retype(dst_ud, BRW_REGISTER_TYPE_D),      /* does it matter? */
+             brw_writemask(tmp, WRITEMASK_X), 
+             retype(tmp_ud, BRW_REGISTER_TYPE_D),      /* does it matter? */
              brw_imm_d(-127));
    }
 
    if (dst.dw1.bits.writemask & WRITEMASK_YZ) {
       brw_AND(p, 
-             brw_writemask(dst_ud, WRITEMASK_Y),
+             brw_writemask(tmp_ud, WRITEMASK_Y),
              brw_swizzle1(arg0_ud, 0),
              brw_imm_ud((1<<23)-1));
 
       brw_OR(p, 
-            brw_writemask(dst_ud, WRITEMASK_Y), 
-            dst_ud,
+            brw_writemask(tmp_ud, WRITEMASK_Y), 
+            tmp_ud,
             brw_imm_ud(127<<23));
    }
    
@@ -472,19 +480,24 @@ static void emit_log_noalias( struct brw_vs_compile *c,
        */
       emit_math1(c, 
                 BRW_MATH_FUNCTION_LOG, 
-                brw_writemask(dst, WRITEMASK_Z), 
-                brw_swizzle1(dst, 1), 
+                brw_writemask(tmp, WRITEMASK_Z), 
+                brw_swizzle1(tmp, 1), 
                 BRW_MATH_PRECISION_FULL);
       
       brw_ADD(p, 
-             brw_writemask(dst, WRITEMASK_Z), 
-             brw_swizzle1(dst, 2), 
-             brw_swizzle1(dst, 0));
+             brw_writemask(tmp, WRITEMASK_Z), 
+             brw_swizzle1(tmp, 2), 
+             brw_swizzle1(tmp, 0));
    }  
 
    if (dst.dw1.bits.writemask & WRITEMASK_W) {
       /* result[3] = 1.0; */
-      brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1));
+      brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1));
+   }
+
+   if (need_tmp) {
+      brw_MOV(p, dst, tmp);
+      release_tmp(c, tmp);
    }
 }
 
@@ -635,10 +648,17 @@ static void emit_arl( struct brw_vs_compile *c,
                      struct brw_reg arg0 )
 {
    struct brw_compile *p = &c->func;
+   struct brw_reg tmp = dst;
+   GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
+   
+   if (need_tmp) 
+      tmp = get_tmp(c);
 
-   brw_RNDD(p, dst, arg0);
+   brw_RNDD(p, tmp, arg0);
+   brw_MUL(p, dst, tmp, brw_imm_d(16));
 
-   brw_MUL(p, dst, dst, brw_imm_d(16));
+   if (need_tmp)
+      release_tmp(c, tmp);
 }
 
 
@@ -777,13 +797,21 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
    /* Build ndc coords?   TODO: Shortcircuit when w is known to be one.
     */
-   ndc = get_tmp(c);
-   emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
-   brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   if (!c->key.know_w_is_one) {
+      ndc = get_tmp(c);
+      emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
+      brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc);
+   }
+   else {
+      ndc = pos;
+   }
 
    /* This includes the workaround for -ve rhw, so is no longer an
     * optional step:
     */
+   if ((c->prog_data.outputs_written & (1<<VERT_RESULT_PSIZ)) ||
+       c->key.nr_userclip ||
+       !c->key.know_w_is_one)
    {
       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
       GLuint i;
@@ -816,20 +844,17 @@ static void emit_vertex_write( struct brw_vs_compile *c)
        * Later, clipping will detect ucp[6] and ensure the primitive is
        * clipped against all fixed planes.
        */
-      brw_CMP(p,
-             vec8(brw_null_reg()),
-             BRW_CONDITIONAL_L,
-             brw_swizzle1(ndc, 3),
-             brw_imm_f(0));
+      if (!c->key.know_w_is_one) {
+        brw_CMP(p,
+                vec8(brw_null_reg()),
+                BRW_CONDITIONAL_L,
+                brw_swizzle1(ndc, 3),
+                brw_imm_f(0));
    
-      brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
-      brw_MOV(p, ndc, brw_imm_f(0));
-      brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
-
-
-
-
+        brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+        brw_MOV(p, ndc, brw_imm_f(0));
+        brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+      }
 
       brw_set_access_mode(p, BRW_ALIGN_1);     /* why? */
       brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
@@ -837,6 +862,9 @@ static void emit_vertex_write( struct brw_vs_compile *c)
 
       release_tmp(c, header1);
    }
+   else {
+      brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
+   }
 
 
    /* Emit the (interleaved) headers for the two vertices - an 8-reg
@@ -873,6 +901,13 @@ void brw_vs_emit( struct brw_vs_compile *c )
    GLuint nr_insns = c->vp->program.Base.NumInstructions;
    GLuint insn;
 
+
+   if (INTEL_DEBUG & DEBUG_VS) {
+      _mesa_printf("\n\n\nvs-emit:\n");
+      _mesa_print_program(&c->vp->program.Base); 
+      _mesa_printf("\n");
+   }
+
    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
    brw_set_access_mode(p, BRW_ALIGN_16);