From: Keith Whitwell Date: Fri, 20 Nov 2009 03:51:04 +0000 (-0800) Subject: i965g: first pass at vs immediates in curbe X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=63b0af07755201e5ad630bf7f67a7997263734d6;p=mesa.git i965g: first pass at vs immediates in curbe --- diff --git a/src/gallium/drivers/i965/brw_context.h b/src/gallium/drivers/i965/brw_context.h index 64279c46768..096c8cf12b2 100644 --- a/src/gallium/drivers/i965/brw_context.h +++ b/src/gallium/drivers/i965/brw_context.h @@ -149,12 +149,17 @@ struct brw_blend_state { struct brw_rasterizer_state; +struct brw_immediate_data { + unsigned nr; + float (*data)[4]; +}; struct brw_vertex_shader { const struct tgsi_token *tokens; struct brw_winsys_buffer *const_buffer; /** Program constant buffer/surface */ struct tgsi_shader_info info; + struct brw_immediate_data immediates; GLuint has_flow_control:1; GLuint use_const_buffer:1; @@ -189,6 +194,7 @@ struct brw_fragment_shader { struct tgsi_shader_info info; struct brw_fs_signature signature; + struct brw_immediate_data immediates; unsigned iz_lookup; //unsigned wm_lookup; diff --git a/src/gallium/drivers/i965/brw_curbe.c b/src/gallium/drivers/i965/brw_curbe.c index 5fa17233113..3e821d5afee 100644 --- a/src/gallium/drivers/i965/brw_curbe.c +++ b/src/gallium/drivers/i965/brw_curbe.c @@ -226,21 +226,34 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) /* vertex shader constants */ if (brw->curbe.vs_size) { GLuint offset = brw->curbe.vs_start * 16; - GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT] + 1; + struct brw_vertex_shader *vs = brw->curr.vertex_shader; + GLuint nr_immediate, nr_const; - /* XXX: note that constant buffers are currently *already* in - * buffer objects. If we want to keep on putting them into the - * curbe, makes sense to treat constbuf's specially with malloc. - */ - const GLfloat *value = screen->buffer_map( screen, - brw->curr.vertex_constants, - PIPE_BUFFER_USAGE_CPU_READ); + nr_immediate = vs->immediates.nr; + if (nr_immediate) { + memcpy(&buf[offset], + vs->immediates.data, + nr_immediate * 4 * sizeof(float)); - /* XXX: what if user's constant buffer is too small? - */ - memcpy(&buf[offset], value, nr * 4 * sizeof(float)); + offset += nr_immediate * 4; + } - screen->buffer_unmap( screen, brw->curr.vertex_constants ); + nr_const = vs->info.file_max[TGSI_FILE_CONSTANT] + 1; + if (nr_const) { + /* XXX: note that constant buffers are currently *already* in + * buffer objects. If we want to keep on putting them into the + * curbe, makes sense to treat constbuf's specially with malloc. + */ + const GLfloat *value = screen->buffer_map( screen, + brw->curr.vertex_constants, + PIPE_BUFFER_USAGE_CPU_READ); + + /* XXX: what if user's constant buffer is too small? + */ + memcpy(&buf[offset], value, nr_const * 4 * sizeof(float)); + + screen->buffer_unmap( screen, brw->curr.vertex_constants ); + } } if (BRW_DEBUG & DEBUG_CURBE) { @@ -263,8 +276,7 @@ static enum pipe_error prepare_curbe_buffer(struct brw_context *brw) } else { /* constants have changed */ - if (brw->curbe.last_buf) - FREE(brw->curbe.last_buf); + FREE(brw->curbe.last_buf); brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; diff --git a/src/gallium/drivers/i965/brw_pipe_shader.c b/src/gallium/drivers/i965/brw_pipe_shader.c index 3222ee7777a..31a715ab655 100644 --- a/src/gallium/drivers/i965/brw_pipe_shader.c +++ b/src/gallium/drivers/i965/brw_pipe_shader.c @@ -55,6 +55,47 @@ static GLboolean has_flow_control(const struct tgsi_shader_info *info) } +static void scan_immediates(const struct tgsi_token *tokens, + const struct tgsi_shader_info *info, + struct brw_immediate_data *imm) +{ + struct tgsi_parse_context parse; + boolean done = FALSE; + + imm->nr = 0; + imm->data = MALLOC(info->immediate_count * 4 * sizeof(float)); + + tgsi_parse_init( &parse, tokens ); + while (!tgsi_parse_end_of_tokens( &parse ) && !done) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: { + static const float id[4] = {0,0,0,1}; + const float *value = &parse.FullToken.FullImmediate.u[0].Float; + unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + unsigned i; + + for (i = 0; i < size; i++) + imm->data[imm->nr][i] = value[i]; + + for (; i < 4; i++) + imm->data[imm->nr][i] = id[i]; + + imm->nr++; + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + done = 1; + break; + } + } +} + static void brw_bind_fs_state( struct pipe_context *pipe, void *prog ) { @@ -106,6 +147,7 @@ static void *brw_create_fs_state( struct pipe_context *pipe, goto fail; tgsi_scan_shader(fs->tokens, &fs->info); + scan_immediates(fs->tokens, &fs->info, &fs->immediates); fs->signature.nr_inputs = fs->info.num_inputs; for (i = 0; i < fs->info.num_inputs; i++) { @@ -150,6 +192,7 @@ static void *brw_create_vs_state( struct pipe_context *pipe, goto fail; tgsi_scan_shader(vs->tokens, &vs->info); + scan_immediates(vs->tokens, &vs->info, &vs->immediates); vs->id = brw->program_id++; vs->has_flow_control = has_flow_control(&vs->info); diff --git a/src/gallium/drivers/i965/brw_vs_emit.c b/src/gallium/drivers/i965/brw_vs_emit.c index 52d4731dfde..00f0af2d07f 100644 --- a/src/gallium/drivers/i965/brw_vs_emit.c +++ b/src/gallium/drivers/i965/brw_vs_emit.c @@ -42,6 +42,15 @@ #include "brw_vs.h" #include "brw_debug.h" +/* Choose one of the 4 vec4's which can be packed into each 16-wide reg. + */ +static INLINE struct brw_reg brw_vec4_grf_repeat( GLuint reg, GLuint slot ) +{ + int nr = reg + slot/2; + int subnr = (slot%2) * 4; + + return stride(brw_vec4_grf(nr, subnr), 0, 4, 1); +} static struct brw_reg get_tmp( struct brw_vs_compile *c ) @@ -119,7 +128,7 @@ static boolean find_output_slot( struct brw_vs_compile *c, */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) { - GLuint i, reg = 0, mrf; + GLuint i, reg = 0, subreg = 0, mrf; int attributes_in_vue; /* Determine whether to use a real constant buffer or use a block @@ -150,33 +159,57 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* User clip planes from curbe: */ if (c->key.nr_userclip) { - for (i = 0; i < c->key.nr_userclip; i++) { - c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1); + /* Skip over fixed planes: Or never read them into vs unit? + */ + subreg += 6; + + for (i = 0; i < c->key.nr_userclip; i++, subreg++) { + c->userplane[i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } /* Deal with curbe alignment: */ - reg += ((6 + c->key.nr_userclip + 3) / 4) * 2; + subreg = align(subreg, 2); + /*reg += ((6 + c->key.nr_userclip + 3) / 4) * 2;*/ } - /* Vertex program parameters from curbe: + + /* Immediates: always in the curbe. + * + * XXX: Can try to encode some immediates as brw immediates + * XXX: Make sure ureg sets minimal immediate size and respect it + * here. */ - if (c->vp->use_const_buffer) { - /* get constants from a real constant buffer */ - c->prog_data.curb_read_length = 0; - c->prog_data.nr_params = 4; /* XXX 0 causes a bug elsewhere... */ + for (i = 0; i < c->vp->info.immediate_count; i++, subreg++) { + c->regs[TGSI_FILE_IMMEDIATE][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } - else { - /* use a section of the GRF for constants */ + c->prog_data.nr_params = c->vp->info.immediate_count * 4; + + + /* Vertex constant buffer. + * + * Constants from the buffer can be either cached in the curbe or + * loaded as needed from the actual constant buffer. + */ + if (!c->vp->use_const_buffer) { GLuint nr_params = c->vp->info.file_max[TGSI_FILE_CONSTANT] + 1; - for (i = 0; i < nr_params; i++) { - c->regs[TGSI_FILE_CONSTANT][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + + for (i = 0; i < nr_params; i++, subreg++) { + c->regs[TGSI_FILE_CONSTANT][i] = + stride( brw_vec4_grf(reg+subreg/2, (subreg%2) * 4), 0, 4, 1); } - reg += (nr_params + 1) / 2; - c->prog_data.curb_read_length = reg - 1; - c->prog_data.nr_params = nr_params * 4; + + c->prog_data.nr_params += nr_params * 4; } + /* All regs allocated + */ + reg += (subreg + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + + /* Allocate input regs: */ c->nr_inputs = c->vp->info.num_inputs; @@ -191,28 +224,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) if (c->nr_inputs == 0) reg++; - /* Allocate a GRF and load immediate values by hand with 4 MOVs!!! - * - * XXX: Try to encode float immediates as brw immediates - * XXX: Put immediates into the CURBE. - * XXX: Make sure ureg sets minimal immediate size and respect it - * here. - */ - for (i = 0; i < c->nr_immediates; i++) { - struct brw_reg r; - int j; - - c->regs[TGSI_FILE_IMMEDIATE][i] = - r = brw_vec8_grf(reg, 0); - - for (j = 0; j < 4; j++) { - brw_MOV(&c->func, - brw_writemask(r, (1<immediate[i][j])); - } - - reg++; - } /* Allocate outputs. The non-position outputs go straight into message regs. @@ -1605,8 +1616,6 @@ void brw_vs_emit(struct brw_vs_compile *c) struct brw_instruction *end_inst, *last_inst; struct tgsi_parse_context parse; struct tgsi_full_instruction *inst; - boolean done = FALSE; - int i; if (BRW_DEBUG & DEBUG_VS) tgsi_dump(c->vp->tokens, 0); @@ -1616,37 +1625,6 @@ void brw_vs_emit(struct brw_vs_compile *c) brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_access_mode(p, BRW_ALIGN_16); - /* Inputs */ - tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* Nothing to do -- using info from tgsi_scan(). - */ - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: { - static const float id[4] = {0,0,0,1}; - const float *imm = &parse.FullToken.FullImmediate.u[0].Float; - unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; - - for (i = 0; i < size; i++) - c->immediate[c->nr_immediates][i] = imm[i]; - - for ( ; i < 4; i++) - c->immediate[c->nr_immediates][i] = id[i]; - - c->nr_immediates++; - break; - } - - case TGSI_TOKEN_TYPE_INSTRUCTION: - done = 1; - break; - } - } /* Static register allocation */