X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_vs_emit.c;h=8403e1bd7b6275b8285272fea355fd2ec5e8f18d;hb=a27d3e43fe98837a851a6c0f6f0605f83a002187;hp=cfade7c992a6d2232994e43430db393e4c906a27;hpb=db0e53af74beafa0ba07b200396bfe12fa9f5c89;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index cfade7c992a..8403e1bd7b6 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -78,7 +78,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) */ c->nr_inputs = 0; for (i = 0; i < BRW_ATTRIB_MAX; i++) { - if (c->prog_data.inputs_read & (1<prog_data.inputs_read & ((GLuint64EXT)1<nr_inputs++; c->regs[PROGRAM_INPUT][i] = brw_vec8_grf(reg, 0); reg++; @@ -416,9 +416,17 @@ static void emit_log_noalias( struct brw_vs_compile *c, struct brw_reg arg0 ) { struct brw_compile *p = &c->func; - struct brw_reg dst_ud = retype(dst, BRW_REGISTER_TYPE_UD); + struct brw_reg tmp = dst; + struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD); + GLboolean need_tmp = (dst.dw1.bits.writemask != 0xf || + dst.file != BRW_GENERAL_REGISTER_FILE); + if (need_tmp) { + tmp = get_tmp(c); + tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD); + } + /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt * according to spec: * @@ -430,30 +438,30 @@ static void emit_log_noalias( struct brw_vs_compile *c, */ if (dst.dw1.bits.writemask & WRITEMASK_XZ) { brw_AND(p, - brw_writemask(dst_ud, WRITEMASK_X), + brw_writemask(tmp_ud, WRITEMASK_X), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1U<<31)-1)); brw_SHR(p, - brw_writemask(dst_ud, WRITEMASK_X), - dst_ud, + brw_writemask(tmp_ud, WRITEMASK_X), + tmp_ud, brw_imm_ud(23)); brw_ADD(p, - brw_writemask(dst, WRITEMASK_X), - retype(dst_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ + brw_writemask(tmp, WRITEMASK_X), + retype(tmp_ud, BRW_REGISTER_TYPE_D), /* does it matter? */ brw_imm_d(-127)); } if (dst.dw1.bits.writemask & WRITEMASK_YZ) { brw_AND(p, - brw_writemask(dst_ud, WRITEMASK_Y), + brw_writemask(tmp_ud, WRITEMASK_Y), brw_swizzle1(arg0_ud, 0), brw_imm_ud((1<<23)-1)); brw_OR(p, - brw_writemask(dst_ud, WRITEMASK_Y), - dst_ud, + brw_writemask(tmp_ud, WRITEMASK_Y), + tmp_ud, brw_imm_ud(127<<23)); } @@ -472,19 +480,24 @@ static void emit_log_noalias( struct brw_vs_compile *c, */ emit_math1(c, BRW_MATH_FUNCTION_LOG, - brw_writemask(dst, WRITEMASK_Z), - brw_swizzle1(dst, 1), + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 1), BRW_MATH_PRECISION_FULL); brw_ADD(p, - brw_writemask(dst, WRITEMASK_Z), - brw_swizzle1(dst, 2), - brw_swizzle1(dst, 0)); + brw_writemask(tmp, WRITEMASK_Z), + brw_swizzle1(tmp, 2), + brw_swizzle1(tmp, 0)); } if (dst.dw1.bits.writemask & WRITEMASK_W) { /* result[3] = 1.0; */ - brw_MOV(p, brw_writemask(dst, WRITEMASK_W), brw_imm_f(1)); + brw_MOV(p, brw_writemask(tmp, WRITEMASK_W), brw_imm_f(1)); + } + + if (need_tmp) { + brw_MOV(p, dst, tmp); + release_tmp(c, tmp); } } @@ -635,10 +648,17 @@ static void emit_arl( struct brw_vs_compile *c, struct brw_reg arg0 ) { struct brw_compile *p = &c->func; + struct brw_reg tmp = dst; + GLboolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE); + + if (need_tmp) + tmp = get_tmp(c); - brw_RNDD(p, dst, arg0); + brw_RNDD(p, tmp, arg0); + brw_MUL(p, dst, tmp, brw_imm_d(16)); - brw_MUL(p, dst, dst, brw_imm_d(16)); + if (need_tmp) + release_tmp(c, tmp); } @@ -777,13 +797,21 @@ static void emit_vertex_write( struct brw_vs_compile *c) /* Build ndc coords? TODO: Shortcircuit when w is known to be one. */ - ndc = get_tmp(c); - emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); - brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + if (!c->key.know_w_is_one) { + ndc = get_tmp(c); + emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL); + brw_MUL(p, brw_writemask(ndc, WRITEMASK_XYZ), pos, ndc); + } + else { + ndc = pos; + } /* This includes the workaround for -ve rhw, so is no longer an * optional step: */ + if ((c->prog_data.outputs_written & (1<key.nr_userclip || + !c->key.know_w_is_one) { struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD); GLuint i; @@ -816,20 +844,17 @@ static void emit_vertex_write( struct brw_vs_compile *c) * Later, clipping will detect ucp[6] and ensure the primitive is * clipped against all fixed planes. */ - brw_CMP(p, - vec8(brw_null_reg()), - BRW_CONDITIONAL_L, - brw_swizzle1(ndc, 3), - brw_imm_f(0)); + if (!c->key.know_w_is_one) { + brw_CMP(p, + vec8(brw_null_reg()), + BRW_CONDITIONAL_L, + brw_swizzle1(ndc, 3), + brw_imm_f(0)); - brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); - brw_MOV(p, ndc, brw_imm_f(0)); - brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - - - - + brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6)); + brw_MOV(p, ndc, brw_imm_f(0)); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + } brw_set_access_mode(p, BRW_ALIGN_1); /* why? */ brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1); @@ -837,6 +862,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) release_tmp(c, header1); } + else { + brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); + } /* Emit the (interleaved) headers for the two vertices - an 8-reg @@ -873,6 +901,13 @@ void brw_vs_emit( struct brw_vs_compile *c ) GLuint nr_insns = c->vp->program.Base.NumInstructions; GLuint insn; + + if (INTEL_DEBUG & DEBUG_VS) { + _mesa_printf("\n\n\nvs-emit:\n"); + _mesa_print_program(&c->vp->program.Base); + _mesa_printf("\n"); + } + brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16);