From: Keith Whitwell Date: Tue, 17 Nov 2009 22:46:23 +0000 (-0800) Subject: i965g: handle special vs outputs specially X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=1877e6cd2d76143ef8a9c516122afe614ae3b4a4;p=mesa.git i965g: handle special vs outputs specially Where vs output semantic tags indicate an output is signficant for fixed function processing (such as clipping, unfilled modes, etc), retain information about that output so that we can get to it easily later on. Fix up the unfilled processing, but hard-wire edgeflag to one for now. With this change, trivial/tri-unfilled works. --- diff --git a/src/gallium/drivers/i965/brw_clip.c b/src/gallium/drivers/i965/brw_clip.c index 35e1d2fdbd0..4ec7b823e83 100644 --- a/src/gallium/drivers/i965/brw_clip.c +++ b/src/gallium/drivers/i965/brw_clip.c @@ -58,7 +58,6 @@ compile_clip_prog( struct brw_context *brw, const GLuint *program; GLuint program_size; GLuint delta; - GLuint i; memset(&c, 0, sizeof(c)); @@ -82,16 +81,26 @@ compile_clip_prog( struct brw_context *brw, else delta = REG_SIZE; - /* XXX: c.offset is now pretty redundant: - */ - for (i = 0; i < c.key.nr_attrs; i++) { - c.offset[i] = delta; - delta += ATTR_SIZE; - } - /* XXX: c.nr_attrs is very redundant: */ c.nr_attrs = c.key.nr_attrs; + + c.offset_hpos = delta + c.key.output_hpos * ATTR_SIZE; + + if (c.key.output_color0) + c.offset_color0 = delta + c.key.output_color0 * ATTR_SIZE; + + if (c.key.output_color1) + c.offset_color1 = delta + c.key.output_color1 * ATTR_SIZE; + + if (c.key.output_bfc0) + c.offset_bfc0 = delta + c.key.output_bfc0 * ATTR_SIZE; + + if (c.key.output_bfc1) + c.offset_bfc1 = delta + c.key.output_bfc1 * ATTR_SIZE; + + if (c.key.output_edgeflag) + c.offset_edgeflag = delta + c.key.output_edgeflag * ATTR_SIZE; if (BRW_IS_IGDNG(brw)) c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */ @@ -158,21 +167,33 @@ compile_clip_prog( struct brw_context *brw, static enum pipe_error upload_clip_prog(struct brw_context *brw) { - enum pipe_error ret; + const struct brw_vertex_shader *vs = brw->curr.vertex_shader; struct brw_clip_prog_key key; + enum pipe_error ret; /* Populate the key, starting from the almost-complete version from * the rast state. */ /* PIPE_NEW_RAST */ - memcpy(&key, &brw->curr.rast->clip_key, sizeof key); - + key = brw->curr.rast->clip_key; + /* BRW_NEW_REDUCED_PRIMITIVE */ key.primitive = brw->reduced_primitive; + /* XXX: if edgeflag is moved to a proper TGSI vs output, can remove + * dependency on CACHE_NEW_VS_PROG + */ + /* CACHE_NEW_VS_PROG */ + key.nr_attrs = brw->vs.prog_data->nr_outputs; + key.output_edgeflag = brw->vs.prog_data->output_edgeflag; + /* PIPE_NEW_VS */ - key.nr_attrs = brw->curr.vertex_shader->info.file_max[TGSI_FILE_OUTPUT] + 1; + key.output_hpos = vs->output_hpos; + key.output_color0 = vs->output_color0; + key.output_color1 = vs->output_color1; + key.output_bfc0 = vs->output_bfc0; + key.output_bfc1 = vs->output_bfc1; /* PIPE_NEW_CLIP */ key.nr_userclip = brw->curr.ucp.nr; diff --git a/src/gallium/drivers/i965/brw_clip.h b/src/gallium/drivers/i965/brw_clip.h index 9bec9643d7c..8729efa47b9 100644 --- a/src/gallium/drivers/i965/brw_clip.h +++ b/src/gallium/drivers/i965/brw_clip.h @@ -42,7 +42,7 @@ * up polygon offset and flatshading at this point: */ struct brw_clip_prog_key { - GLuint nr_attrs:5; + GLuint nr_attrs:6; GLuint primitive:4; GLuint nr_userclip:3; GLuint do_flat_shading:1; @@ -54,7 +54,14 @@ struct brw_clip_prog_key { GLuint copy_bfc_cw:1; GLuint copy_bfc_ccw:1; GLuint clip_mode:3; - GLuint pad1:7; + GLuint output_hpos:6; /* not always zero? */ + + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; + GLuint pad1:2; GLfloat offset_factor; GLfloat offset_units; @@ -123,7 +130,6 @@ struct brw_clip_compile { GLuint last_mrf; GLuint header_position_offset; - GLuint offset[PIPE_MAX_SHADER_OUTPUTS]; GLboolean need_ff_sync; GLuint nr_color_attrs; @@ -131,7 +137,8 @@ struct brw_clip_compile { GLuint offset_color1; GLuint offset_bfc0; GLuint offset_bfc1; - + + GLuint offset_hpos; GLuint offset_edgeflag; }; diff --git a/src/gallium/drivers/i965/brw_clip_line.c b/src/gallium/drivers/i965/brw_clip_line.c index a4790bda954..54282d975ed 100644 --- a/src/gallium/drivers/i965/brw_clip_line.c +++ b/src/gallium/drivers/i965/brw_clip_line.c @@ -132,7 +132,6 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) struct brw_instruction *is_neg2 = NULL; struct brw_instruction *not_culled; struct brw_reg v1_null_ud = retype(vec1(brw_null_reg()), BRW_REGISTER_TYPE_UD); - const int hpos = 0; /* XXX: position not always first element */ brw_MOV(p, get_addr_reg(vtx0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vtx1), brw_address(c->reg.vertex[1])); @@ -173,12 +172,12 @@ static void clip_and_emit_line( struct brw_clip_compile *c ) /* dp = DP4(vtx->position, plane) */ - brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp0), deref_4f(vtx0, c->offset_hpos), c->reg.plane_equation); /* if (IS_NEGATIVE(dp1)) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset[hpos]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp1), deref_4f(vtx1, c->offset_hpos), c->reg.plane_equation); is_negative = brw_IF(p, BRW_EXECUTE_1); { /* diff --git a/src/gallium/drivers/i965/brw_clip_tri.c b/src/gallium/drivers/i965/brw_clip_tri.c index 5486f4fa89f..fa00f6044f9 100644 --- a/src/gallium/drivers/i965/brw_clip_tri.c +++ b/src/gallium/drivers/i965/brw_clip_tri.c @@ -249,13 +249,13 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(prev) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dpPrev), deref_4f(vtxPrev, c->offset_hpos), c->reg.plane_equation); prev_test = brw_IF(p, BRW_EXECUTE_1); { /* IS_POSITIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_GE); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { @@ -297,7 +297,7 @@ void brw_clip_tri( struct brw_clip_compile *c ) /* IS_NEGATIVE(next) */ brw_set_conditionalmod(p, BRW_CONDITIONAL_L); - brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset[VERT_RESULT_HPOS]), c->reg.plane_equation); + brw_DP4(p, vec4(c->reg.dp), deref_4f(vtx, c->offset_hpos), c->reg.plane_equation); next_test = brw_IF(p, BRW_EXECUTE_1); { /* Going out of bounds. Avoid division by zero as we @@ -462,9 +462,9 @@ static void brw_clip_test( struct brw_clip_compile *c ) brw_MOV(p, get_addr_reg(vt0), brw_address(c->reg.vertex[0])); brw_MOV(p, get_addr_reg(vt1), brw_address(c->reg.vertex[1])); brw_MOV(p, get_addr_reg(vt2), brw_address(c->reg.vertex[2])); - brw_MOV(p, v0, deref_4f(vt0, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v1, deref_4f(vt1, c->offset[VERT_RESULT_HPOS])); - brw_MOV(p, v2, deref_4f(vt2, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, v0, deref_4f(vt0, c->offset_hpos)); + brw_MOV(p, v1, deref_4f(vt1, c->offset_hpos)); + brw_MOV(p, v2, deref_4f(vt2, c->offset_hpos)); brw_AND(p, c->reg.planemask, c->reg.planemask, brw_imm_ud(~0x3f)); /* test nearz, xmin, ymin plane */ diff --git a/src/gallium/drivers/i965/brw_clip_unfilled.c b/src/gallium/drivers/i965/brw_clip_unfilled.c index 0fab3a5f1ae..aec835b8cec 100644 --- a/src/gallium/drivers/i965/brw_clip_unfilled.c +++ b/src/gallium/drivers/i965/brw_clip_unfilled.c @@ -45,9 +45,9 @@ static void compute_tri_direction( struct brw_clip_compile *c ) struct brw_compile *p = &c->func; struct brw_reg e = c->reg.tmp0; struct brw_reg f = c->reg.tmp1; - struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset[VERT_RESULT_HPOS]); - struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset[VERT_RESULT_HPOS]); + struct brw_reg v0 = byte_offset(c->reg.vertex[0], c->offset_hpos); + struct brw_reg v1 = byte_offset(c->reg.vertex[1], c->offset_hpos); + struct brw_reg v2 = byte_offset(c->reg.vertex[2], c->offset_hpos); struct brw_reg v0n = get_tmp(c); @@ -123,7 +123,8 @@ static void copy_bfc( struct brw_clip_compile *c ) /* Do we have any colors to copy? */ - if (c->nr_color_attrs == 0) + if ((c->offset_color0 == 0 || c->offset_bfc0 == 0) && + (c->offset_color1 == 0 || c->offset_bfc1 == 0)) return; /* In some wierd degnerate cases we can end up testing the diff --git a/src/gallium/drivers/i965/brw_clip_util.c b/src/gallium/drivers/i965/brw_clip_util.c index 018511e6999..872042c9a9a 100644 --- a/src/gallium/drivers/i965/brw_clip_util.c +++ b/src/gallium/drivers/i965/brw_clip_util.c @@ -106,7 +106,7 @@ static void brw_clip_project_vertex( struct brw_clip_compile *c, /* Fixup position. Extract from the original vertex and re-project * to screen space: */ - brw_MOV(p, tmp, deref_4f(vert_addr, c->offset[VERT_RESULT_HPOS])); + brw_MOV(p, tmp, deref_4f(vert_addr, c->offset_hpos)); brw_clip_project_position(c, tmp); brw_MOV(p, deref_4f(vert_addr, c->header_position_offset), tmp); diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 31f3cf36855..31e04b6e14c 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -152,13 +152,23 @@ struct brw_rasterizer_state; struct brw_vertex_shader { const struct tgsi_token *tokens; + struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ + struct tgsi_shader_info info; - unsigned has_flow_control:1; + GLuint has_flow_control:1; + GLuint use_const_buffer:1; + + /* Offsets of special vertex shader outputs required for clipping. + */ + GLuint output_hpos:6; /* not always zero? */ + GLuint output_color0:6; + GLuint output_color1:6; + GLuint output_bfc0:6; + GLuint output_bfc1:6; + GLuint output_edgeflag:6; unsigned id; - struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ - GLboolean use_const_buffer; }; struct brw_fs_signature { @@ -317,7 +327,8 @@ struct brw_vs_prog_data { GLuint nr_params; /**< number of TGSI_FILE_CONSTANT's */ - GLboolean copy_edgeflag; + GLuint output_edgeflag; + GLboolean writes_psiz; /* Used for calculating urb partitions: diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 7febf9e0c2f..02bc8fa1305 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -124,21 +124,51 @@ static void *brw_create_vs_state( struct pipe_context *pipe, const struct pipe_shader_state *shader ) { struct brw_context *brw = brw_context(pipe); + struct brw_vertex_shader *vs; + unsigned i; - struct brw_vertex_shader *vs = CALLOC_STRUCT(brw_vertex_shader); + vs = CALLOC_STRUCT(brw_vertex_shader); if (vs == NULL) return NULL; /* Duplicate tokens, scan shader */ - vs->id = brw->program_id++; - vs->has_flow_control = has_flow_control(&vs->info); - vs->tokens = tgsi_dup_tokens(shader->tokens); if (vs->tokens == NULL) goto fail; tgsi_scan_shader(vs->tokens, &vs->info); + + vs->id = brw->program_id++; + vs->has_flow_control = has_flow_control(&vs->info); + + for (i = 0; i < vs->info.num_outputs; i++) { + int index = vs->info.output_semantic_index[i]; + switch (vs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + vs->output_hpos = i; + break; + case TGSI_SEMANTIC_COLOR: + if (index == 0) + vs->output_color0 = i; + else + vs->output_color1 = i; + break; + case TGSI_SEMANTIC_BCOLOR: + if (index == 0) + vs->output_bfc0 = i; + else + vs->output_bfc1 = i; + break; +#if 0 + case TGSI_SEMANTIC_EDGEFLAG: + vs->output_edgeflag = i; + break; +#endif + } + } + + /* Done: */ diff --git a/src/gallium/drivers/i965/brw_vs.c b/src/gallium/drivers/i965/brw_vs.c index 05a62ed9745..26683929192 100644 --- a/src/gallium/drivers/i965/brw_vs.c +++ b/src/gallium/drivers/i965/brw_vs.c @@ -57,7 +57,18 @@ static enum pipe_error do_vs_prog( struct brw_context *brw, c.prog_data.nr_outputs = vp->info.num_outputs; c.prog_data.nr_inputs = vp->info.num_inputs; - c.prog_data.copy_edgeflag = c.key.copy_edgeflag; + + /* XXX: we want edgeflag handling to be integrated to the vertex + * shader, but are currently faking the edgeflag output: + */ + if (c.key.copy_edgeflag) { + c.prog_data.output_edgeflag = c.prog_data.nr_outputs; + c.prog_data.nr_outputs++; + } + else { + c.prog_data.output_edgeflag = ~0; + } + if (1) tgsi_dump(c.vp->tokens, 0); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 933c9c4d63c..bcaeaca62db 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -70,11 +70,17 @@ static boolean is_position_output( struct brw_vs_compile *c, unsigned vs_output ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - return (semantic == TGSI_SEMANTIC_POSITION && - index == 0); + if (vs_output == c->prog_data.output_edgeflag) { + return FALSE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + + return (semantic == TGSI_SEMANTIC_POSITION && + index == 0); + } } @@ -83,15 +89,22 @@ static boolean find_output_slot( struct brw_vs_compile *c, unsigned *fs_input_slot ) { struct brw_vertex_shader *vs = c->vp; - unsigned semantic = vs->info.output_semantic_name[vs_output]; - unsigned index = vs->info.output_semantic_index[vs_output]; - unsigned i; - for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { - if (c->key.fs_signature.input[i].semantic == semantic && + if (vs_output == c->prog_data.output_edgeflag) { + *fs_input_slot = c->key.fs_signature.nr_inputs; + return TRUE; + } + else { + unsigned semantic = vs->info.output_semantic_name[vs_output]; + unsigned index = vs->info.output_semantic_index[vs_output]; + unsigned i; + + for (i = 0; i < c->key.fs_signature.nr_inputs; i++) { + if (c->key.fs_signature.input[i].semantic == semantic && c->key.fs_signature.input[i].semantic_index == index) { - *fs_input_slot = i; - return TRUE; + *fs_input_slot = i; + return TRUE; + } } } @@ -219,7 +232,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* XXX: need to access vertex output semantics here: */ - for (i = 0; i < c->prog_data.nr_outputs; i++) { + for (i = 0; i < c->nr_outputs; i++) { unsigned slot; /* XXX: Put output position in slot zero always. Clipper, etc, @@ -1116,10 +1129,9 @@ static void emit_vertex_write( struct brw_vs_compile *c) GLuint len_vertext_header = 2; if (c->key.copy_edgeflag) { - assert(0); brw_MOV(p, - get_reg(c, TGSI_FILE_OUTPUT, 0), - get_reg(c, TGSI_FILE_INPUT, 0)); + get_reg(c, TGSI_FILE_OUTPUT, c->prog_data.output_edgeflag), + brw_imm_f(1)); } /* Build ndc coords */ diff --git a/src/gallium/drivers/i965/brw_wm_pass2.c b/src/gallium/drivers/i965/brw_wm_pass2.c index 2a879863ab5..56f39d036bd 100644 --- a/src/gallium/drivers/i965/brw_wm_pass2.c +++ b/src/gallium/drivers/i965/brw_wm_pass2.c @@ -93,7 +93,7 @@ static void init_registers( struct brw_wm_compile *c ) assert(c->key.vp_nr_outputs >= 1); c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2; - c->prog_data.urb_read_length = c->key.vp_nr_outputs * 2; + c->prog_data.urb_read_length = (c->key.nr_inputs + 1) * 2; c->prog_data.curb_read_length = c->nr_creg * 2; /* Note this allocation: