From cafea7528052624c8d3e4cd1c5b26a61bf04d1d0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 14 Apr 2009 11:08:42 -0600 Subject: [PATCH] i965: checkpoint commit: VS constant buffers Hook up a constant buffer, binding table, etc for the VS unit. This will allow using large constant buffers with vertex shaders. The new code is disabled at this time (use_const_buffer=FALSE). --- src/mesa/drivers/dri/i965/brw_context.h | 30 ++- src/mesa/drivers/dri/i965/brw_curbe.c | 2 + src/mesa/drivers/dri/i965/brw_eu.h | 11 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 66 ++++- src/mesa/drivers/dri/i965/brw_misc_state.c | 7 +- src/mesa/drivers/dri/i965/brw_vs.h | 7 + src/mesa/drivers/dri/i965/brw_vs_emit.c | 229 ++++++++++++++---- src/mesa/drivers/dri/i965/brw_vs_state.c | 8 + src/mesa/drivers/dri/i965/brw_vtbl.c | 1 + src/mesa/drivers/dri/i965/brw_wm_state.c | 2 +- .../drivers/dri/i965/brw_wm_surface_state.c | 205 +++++++++++++--- 11 files changed, 477 insertions(+), 91 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 6a9252d0375..4c2d3af8ae9 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -141,7 +141,8 @@ struct brw_context; #define BRW_NEW_BATCH 0x10000 /** brw->depth_region updated */ #define BRW_NEW_DEPTH_BUFFER 0x20000 -#define BRW_NEW_NR_SURFACES 0x40000 +#define BRW_NEW_NR_WM_SURFACES 0x40000 +#define BRW_NEW_NR_VS_SURFACES 0x80000 struct brw_state_flags { /** State update flags signalled by mesa internals */ @@ -245,20 +246,30 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 16 /** - * Size of our surface binding table. + * Size of our surface binding table for the WM. * This contains pointers to the drawing surfaces and current texture * objects and shader constant buffers (+2). */ -#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2) +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) /** * Helpers to convert drawing buffers, textures and constant buffers - * to surface binding table indexes. + * to surface binding table indexes, for WM. */ #define SURF_INDEX_DRAW(d) (d) -#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS + 0) -#define SURF_INDEX_VERT_CONST_BUFFER (MAX_DRAW_BUFFERS + 1) -#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 2 + t) +#define SURF_INDEX_FRAG_CONST_BUFFER (MAX_DRAW_BUFFERS) +#define SURF_INDEX_TEXTURE(t) (MAX_DRAW_BUFFERS + 1 + (t)) + +/** + * Size of surface binding table for the VS. + * Only one constant buffer for now. + */ +#define BRW_VS_MAX_SURF 1 + +/** + * Only a VS constant buffer + */ +#define SURF_INDEX_VERT_CONST_BUFFER 0 enum brw_cache_id { @@ -566,6 +577,11 @@ struct brw_context dri_bo *prog_bo; dri_bo *state_bo; + + /** Binding table of pointers to surf_bo entries */ + dri_bo *bind_bo; + dri_bo *surf_bo[BRW_VS_MAX_SURF]; + GLuint nr_surfaces; } vs; struct { diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 94bf2c0d67a..dfab14aa740 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -357,6 +357,7 @@ update_constant_buffer(struct brw_context *brw, } +/** Copy current vertex program's parameters into the constant buffer */ static void update_vertex_constant_buffer(struct brw_context *brw) { @@ -366,6 +367,7 @@ update_vertex_constant_buffer(struct brw_context *brw) } +/** Copy current fragment program's parameters into the constant buffer */ static void update_fragment_constant_buffer(struct brw_context *brw) { diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index d05f2e6c410..e492ce162ca 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -862,9 +862,18 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ); +/* XXX this function is temporary - merge with brw_dp_READ_4() above. */ +void brw_dp_READ_4_vs( struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src, + GLuint msg_reg_nr, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index ); + void brw_dp_WRITE_16( struct brw_compile *p, struct brw_reg src, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ec4d7fa76ff..bb7ea5c0492 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -952,7 +952,7 @@ void brw_dp_READ_16( struct brw_compile *p, /** * Read a float[4] vector from the data port Data Cache (const buffer). - * Scratch offset should be a multiple of 16. + * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. * If relAddr is true, we'll do an indirect fetch using the address register. */ @@ -960,7 +960,7 @@ void brw_dp_READ_4( struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, GLboolean relAddr, - GLuint scratch_offset, + GLuint location, GLuint bind_table_index ) { { @@ -971,7 +971,7 @@ void brw_dp_READ_4( struct brw_compile *p, /* set message header global offset field (reg 0, element 2) */ brw_MOV(p, retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), - brw_imm_d(scratch_offset)); + brw_imm_d(location)); brw_pop_insn_state(p); } @@ -1001,6 +1001,66 @@ void brw_dp_READ_4( struct brw_compile *p, } +/* XXX this function is temporary - merge with brw_dp_READ_4() above. */ +void brw_dp_READ_4_vs(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg src, + GLuint msg_reg_nr, + GLboolean relAddr, + GLuint location, + GLuint bind_table_index) +{ + { + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + /*src.nr = 0;*/ + + /* set message header global offset field (reg 0, element 2) */ + brw_MOV(p, +#if 1 + retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_UD), +#elif 0 + retype(brw_vec1_grf(src.nr, 2), BRW_REGISTER_TYPE_UD), +#endif + brw_imm_d(location)); + + brw_pop_insn_state(p); + } + + { + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + + insn->header.predicate_control = BRW_PREDICATE_NONE; + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.destreg__conditonalmod = msg_reg_nr; + insn->header.mask_control = BRW_MASK_DISABLE; + + /* cast dest to a uword[8] vector */ + // dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(insn, dest); +#if 1 + brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW)); +#elif 0 + brw_set_src0(insn, retype(brw_vec8_grf(src.nr, 0), BRW_REGISTER_TYPE_UW)); +#endif + + printf("vs const read msg, location %u, msg_reg_nr %d\n", location, msg_reg_nr); + brw_set_dp_read_message(insn, + bind_table_index, + 0, /* msg_control (0 means 1 Oword) */ + BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ + 0, /* source cache = data cache */ + 1, /* msg_length */ + 1, /* response_length (1 Oword) */ + 0); /* eot */ + } +} + + + void brw_fb_WRITE(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 5c94a49f60a..9bc5c35139c 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -101,6 +101,7 @@ const struct brw_tracked_state brw_drawing_rect = { static void prepare_binding_table_pointers(struct brw_context *brw) { + brw_add_validated_bo(brw, brw->vs.bind_bo); brw_add_validated_bo(brw, brw->wm.bind_bo); } @@ -117,13 +118,11 @@ static void upload_binding_table_pointers(struct brw_context *brw) BEGIN_BATCH(6, IGNORE_CLIPRECTS); OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); - OUT_BATCH(0); /* vs */ + OUT_RELOC(brw->vs.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* vs */ OUT_BATCH(0); /* gs */ OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ - OUT_RELOC(brw->wm.bind_bo, - I915_GEM_DOMAIN_SAMPLER, 0, - 0); + OUT_RELOC(brw->wm.bind_bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); /* wm/ps */ ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 99d0e937226..d20cf78b8af 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -75,6 +75,13 @@ struct brw_vs_compile { struct brw_reg userplane[6]; + /** using a real constant buffer? */ + GLboolean use_const_buffer; + /** we may need up to 3 constants per instruction (if use_const_buffer) */ + struct { + GLint index; + struct brw_reg reg; + } current_const[3]; }; void brw_vs_emit( struct brw_vs_compile *c ); diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c index 0d6c6ab9a8a..d21f2792afb 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c @@ -38,8 +38,31 @@ #include "brw_vs.h" +static struct brw_reg get_tmp( struct brw_vs_compile *c ) +{ + struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); + + if (++c->last_tmp > c->prog_data.total_grf) + c->prog_data.total_grf = c->last_tmp; -/* Do things as simply as possible. Allocate and populate all regs + return tmp; +} + +static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) +{ + if (tmp.nr == c->last_tmp-1) + c->last_tmp--; +} + +static void release_tmps( struct brw_vs_compile *c ) +{ + c->last_tmp = c->first_tmp; +} + + +/** + * Preallocate GRF register before code emit. + * Do things as simply as possible. Allocate and populate all regs * ahead of time. */ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) @@ -47,6 +70,14 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) GLuint i, reg = 0, mrf; GLuint nr_params; +#if 0 + if (c->vp->program.Base.Parameters->NumParameters >= 6) + c->use_const_buffer = 1; + else +#endif + c->use_const_buffer = GL_FALSE; + /*printf("use_const_buffer = %d\n", c->use_const_buffer);*/ + /* r0 -- reserved as usual */ c->r0 = brw_vec8_grf(reg, 0); @@ -66,13 +97,19 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) /* Vertex program parameters from curbe: */ - nr_params = c->vp->program.Base.Parameters->NumParameters; - for (i = 0; i < nr_params; i++) { - c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); - } - reg += (nr_params + 1) / 2; - - c->prog_data.curb_read_length = reg - 1; + if (c->use_const_buffer) { + /* get constants from a real constant buffer */ + c->prog_data.curb_read_length = 0; + } + else { + /* use a section of the GRF for constants */ + nr_params = c->vp->program.Base.Parameters->NumParameters; + for (i = 0; i < nr_params; i++) { + c->regs[PROGRAM_STATE_VAR][i] = stride( brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1); + } + reg += (nr_params + 1) / 2; + c->prog_data.curb_read_length = reg - 1; + } /* Allocate input regs: */ @@ -157,6 +194,13 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) c->prog_data.urb_entry_size = (c->nr_outputs + 2 + 3) / 4; c->prog_data.total_grf = reg; + if (c->use_const_buffer) { + for (i = 0; i < 3; i++) { + c->current_const[i].index = -1; + c->current_const[i].reg = get_tmp(c); + } + } + if (INTEL_DEBUG & DEBUG_VS) { _mesa_printf("%s NumAddrRegs %d\n", __FUNCTION__, c->vp->program.Base.NumAddressRegs); _mesa_printf("%s NumTemps %d\n", __FUNCTION__, c->vp->program.Base.NumTemporaries); @@ -165,28 +209,6 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c ) } -static struct brw_reg get_tmp( struct brw_vs_compile *c ) -{ - struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0); - - if (++c->last_tmp > c->prog_data.total_grf) - c->prog_data.total_grf = c->last_tmp; - - return tmp; -} - -static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp ) -{ - if (tmp.nr == c->last_tmp-1) - c->last_tmp--; -} - -static void release_tmps( struct brw_vs_compile *c ) -{ - c->last_tmp = c->first_tmp; -} - - /** * If an instruction uses a temp reg both as a src and the dest, we * sometimes need to allocate an intermediate temporary. @@ -673,13 +695,59 @@ static void emit_nrm( struct brw_vs_compile *c, } +static struct brw_reg +get_constant(struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex) +{ + const struct prog_src_register *src = &inst->SrcReg[argIndex]; + struct brw_compile *p = &c->func; + struct brw_reg const_reg; + + if (c->current_const[argIndex].index != src->Index) { + struct brw_reg src_reg = get_tmp(c); + struct brw_reg t = get_tmp(c); + + c->current_const[argIndex].index = src->Index; + + brw_MOV(p, t, brw_vec8_grf(0, 0));/*SAVE*/ + +#if 0 + printf(" fetch const[%d] for arg %d into reg %d\n", + src->Index, argIndex, c->current_const[argIndex].reg.nr); +#endif + + /* need to fetch the constant now */ + brw_dp_READ_4_vs(p, + c->current_const[argIndex].reg, /* writeback dest */ + src_reg, /* src reg */ + 1, /* msg_reg */ + src->RelAddr, /* relative indexing? */ + 16 * src->Index, /* byte offset */ + SURF_INDEX_VERT_CONST_BUFFER /* binding table index */ + ); + + brw_MOV(p, brw_vec8_grf(0, 0), t);/*RESTORE*/ + release_tmp(c, src_reg); + release_tmp(c, t); + } + + /* replicate lower four floats into upper four floats (to get XYZWXYZW) */ + const_reg = c->current_const[argIndex].reg; + const_reg = stride(const_reg, 0, 4, 0); + const_reg.subnr = 0; + + return const_reg; +} + + + /* TODO: relative addressing! */ static struct brw_reg get_reg( struct brw_vs_compile *c, gl_register_file file, GLuint index ) { - switch (file) { case PROGRAM_TEMPORARY: case PROGRAM_INPUT: @@ -708,13 +776,63 @@ static struct brw_reg get_reg( struct brw_vs_compile *c, } +/** + * Get brw reg corresponding to the instruction's [argIndex] src reg. + * TODO: relative addressing! + */ +static struct brw_reg +get_src_reg( struct brw_vs_compile *c, + const struct prog_instruction *inst, + GLuint argIndex ) +{ + const GLuint file = inst->SrcReg[argIndex].File; + const GLint index = inst->SrcReg[argIndex].Index; + + switch (file) { + case PROGRAM_TEMPORARY: + case PROGRAM_INPUT: + case PROGRAM_OUTPUT: + assert(c->regs[file][index].nr != 0); + return c->regs[file][index]; + case PROGRAM_STATE_VAR: + case PROGRAM_CONSTANT: + case PROGRAM_UNIFORM: + if (c->use_const_buffer) { + return get_constant(c, inst, argIndex); + } + else { + assert(c->regs[PROGRAM_STATE_VAR][index].nr != 0); + return c->regs[PROGRAM_STATE_VAR][index]; + } + case PROGRAM_ADDRESS: + assert(index == 0); + return c->regs[file][index]; + + case PROGRAM_UNDEFINED: + /* this is a normal case since we loop over all three src args */ + return brw_null_reg(); + + case PROGRAM_LOCAL_PARAM: + case PROGRAM_ENV_PARAM: + case PROGRAM_WRITE_ONLY: + default: + assert(0); + return brw_null_reg(); + } +} + + +/** + * Indirect addressing: get reg[[arg] + offset]. + */ static struct brw_reg deref( struct brw_vs_compile *c, struct brw_reg arg, GLint offset) { struct brw_compile *p = &c->func; struct brw_reg tmp = vec4(get_tmp(c)); - struct brw_reg vp_address = retype(vec1(get_reg(c, PROGRAM_ADDRESS, 0)), BRW_REGISTER_TYPE_UW); + struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0]; + struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_UW); GLuint byte_offset = arg.nr * 32 + arg.subnr + offset * 16; struct brw_reg indirect = brw_vec4_indirect(0,0); @@ -758,22 +876,29 @@ static void emit_arl( struct brw_vs_compile *c, } -/* Will return mangled results for SWZ op. The emit_swz() function +/** + * Return the brw reg for the given instruction's src argument. + * Will return mangled results for SWZ op. The emit_swz() function * ignores this result and recalculates taking extended swizzles into * account. */ static struct brw_reg get_arg( struct brw_vs_compile *c, - struct prog_src_register *src ) + const struct prog_instruction *inst, + GLuint argIndex ) { + const struct prog_src_register *src = &inst->SrcReg[argIndex]; struct brw_reg reg; if (src->File == PROGRAM_UNDEFINED) return brw_null_reg(); - if (src->RelAddr) + if (src->RelAddr) { + /* XXX fix */ reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index); - else - reg = get_reg(c, src->File, src->Index); + } + else { + reg = get_src_reg(c, inst, argIndex); + } /* Convert 3-bit swizzle to 2-bit. */ @@ -790,10 +915,28 @@ static struct brw_reg get_arg( struct brw_vs_compile *c, } +/** + * Get brw register for the given program dest register. + */ static struct brw_reg get_dst( struct brw_vs_compile *c, struct prog_dst_register dst ) { - struct brw_reg reg = get_reg(c, dst.File, dst.Index); + struct brw_reg reg; + + switch (dst.File) { + case PROGRAM_TEMPORARY: + case PROGRAM_OUTPUT: + assert(c->regs[dst.File][dst.Index].nr != 0); + reg = c->regs[dst.File][dst.Index]; + break; + case PROGRAM_UNDEFINED: + /* we may hit this for OPCODE_END, OPCODE_KIL, etc */ + reg = brw_null_reg(); + break; + default: + assert(0); + reg = brw_null_reg(); + } reg.dw1.bits.writemask = dst.WriteMask; @@ -803,8 +946,10 @@ static struct brw_reg get_dst( struct brw_vs_compile *c, static void emit_swz( struct brw_vs_compile *c, struct brw_reg dst, - struct prog_src_register src ) + const struct prog_instruction *inst) { + const GLuint argIndex = 0; + const struct prog_src_register src = inst->SrcReg[argIndex]; struct brw_compile *p = &c->func; GLuint zeros_mask = 0; GLuint ones_mask = 0; @@ -847,7 +992,7 @@ static void emit_swz( struct brw_vs_compile *c, if (src.RelAddr) arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index); else - arg0 = get_reg(c, src.File, src.Index); + arg0 = get_src_reg(c, inst, argIndex); arg0 = brw_swizzle(arg0, src_swz[0], src_swz[1], @@ -1053,7 +1198,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) if (file == PROGRAM_OUTPUT && c->output_regs[index].used_in_src) args[i] = c->output_regs[index].reg; else - args[i] = get_arg(c, src); + args[i] = get_arg(c, inst, i); } /* Get dest regs. Note that it is possible for a reg to be both @@ -1181,7 +1326,7 @@ void brw_vs_emit(struct brw_vs_compile *c ) /* The args[0] value can't be used here as it won't have * correctly encoded the full swizzle: */ - emit_swz(c, dst, inst->SrcReg[0] ); + emit_swz(c, dst, inst); break; case OPCODE_TRUNC: /* round toward zero */ diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index 1a63766ea1f..3d295388437 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -44,6 +44,8 @@ struct brw_vs_unit_key { unsigned int curbe_offset; unsigned int nr_urb_entries, urb_size; + + unsigned int nr_surfaces; }; static void @@ -62,6 +64,9 @@ vs_unit_populate_key(struct brw_context *brw, struct brw_vs_unit_key *key) key->nr_urb_entries = brw->urb.nr_vs_entries; key->urb_size = brw->urb.vsize; + /* BRW_NEW_NR_VS_SURFACES */ + key->nr_surfaces = brw->vs.nr_surfaces; + /* BRW_NEW_CURBE_OFFSETS, _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { /* Note that we read in the userclip planes as well, hence @@ -92,6 +97,8 @@ vs_unit_create_from_key(struct brw_context *brw, struct brw_vs_unit_key *key) * brw_urb_WRITE() results. */ vs.thread1.single_program_flow = 0; + vs.thread1.binding_table_entry_count = key->nr_surfaces; + vs.thread3.urb_entry_read_length = key->urb_entry_read_length; vs.thread3.const_urb_entry_read_length = key->curb_entry_read_length; vs.thread3.dispatch_grf_start_reg = 1; @@ -158,6 +165,7 @@ const struct brw_tracked_state brw_vs_unit = { .dirty = { .mesa = _NEW_TRANSFORM, .brw = (BRW_NEW_CURBE_OFFSETS | + BRW_NEW_NR_VS_SURFACES | BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index 960bbb311e3..ba03afd6c13 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -79,6 +79,7 @@ static void brw_destroy_context( struct intel_context *intel ) dri_bo_release(&brw->curbe.curbe_bo); dri_bo_release(&brw->vs.prog_bo); dri_bo_release(&brw->vs.state_bo); + dri_bo_release(&brw->vs.bind_bo); dri_bo_release(&brw->gs.prog_bo); dri_bo_release(&brw->gs.state_bo); dri_bo_release(&brw->clip.prog_bo); diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 58fa6aaf8f9..67b41173fb2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -290,7 +290,7 @@ const struct brw_tracked_state brw_wm_unit = { .brw = (BRW_NEW_FRAGMENT_PROGRAM | BRW_NEW_CURBE_OFFSETS | - BRW_NEW_NR_SURFACES), + BRW_NEW_NR_WM_SURFACES), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 095759f3a26..ce5dbb334b8 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -176,7 +176,11 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, } } -struct brw_wm_surface_key { + +/** + * Use same key for WM and VS surfaces. + */ +struct brw_surface_key { GLenum target, depthmode; dri_bo *bo; GLint format, internal_format; @@ -187,6 +191,7 @@ struct brw_wm_surface_key { GLuint offset; }; + static void brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) { @@ -208,7 +213,7 @@ brw_set_surface_tiling(struct brw_surface_state *surf, uint32_t tiling) static dri_bo * brw_create_texture_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { struct brw_surface_state surf; dri_bo *bo; @@ -287,7 +292,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; - struct brw_wm_surface_key key; + struct brw_surface_key key; const GLuint surf = SURF_INDEX_TEXTURE(unit); memset(&key, 0, sizeof(key)); @@ -328,12 +333,12 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) /** - * Create the constant buffer surface. Fragment shader constanst will be + * Create the constant buffer surface. Vertex/fragment shader constants will be * read from this buffer with Data Port Read instructions/messages. */ static dri_bo * brw_create_constant_surface( struct brw_context *brw, - struct brw_wm_surface_key *key ) + struct brw_surface_key *key ) { const GLint w = key->width - 1; struct brw_surface_state surf; @@ -345,8 +350,6 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss0.surface_type = BRW_SURFACE_BUFFER; surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; - /* This is ok for all textures with channel width 8bit or less: - */ assert(key->bo); if (key->bo) surf.ss1.base_addr = key->bo->offset; /* reloc */ @@ -356,8 +359,8 @@ brw_create_constant_surface( struct brw_context *brw, surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ - surf.ss3.pitch = (key->pitch * key->cpp) - 1; - brw_set_surface_tiling(&surf, key->tiling); + surf.ss3.pitch = (key->pitch * key->cpp) - 1; /* ignored?? */ + brw_set_surface_tiling(&surf, key->tiling); /* tiling now allowed */ bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, key, sizeof(*key), @@ -379,17 +382,17 @@ brw_create_constant_surface( struct brw_context *brw, /** - * Update the surface state for a constant buffer. + * Update the surface state for a WM constant buffer. * The constant buffer will be (re)allocated here if needed. */ static dri_bo * -brw_update_constant_surface( GLcontext *ctx, - GLuint surf, - dri_bo *const_buffer, - const struct gl_program_parameter_list *params) +brw_update_wm_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) { struct brw_context *brw = brw_context(ctx); - struct brw_wm_surface_key key; + struct brw_surface_key key; struct intel_context *intel = &brw->intel; const int size = params->NumParameters * 4 * sizeof(GLfloat); @@ -402,7 +405,7 @@ brw_update_constant_surface( GLcontext *ctx, /* alloc new buffer if needed */ if (!const_buffer) { const_buffer = - drm_intel_bo_alloc(intel->bufmgr, "vp/fp_const_buffer", size, 64); + drm_intel_bo_alloc(intel->bufmgr, "fp_const_buffer", size, 64); } memset(&key, 0, sizeof(key)); @@ -436,6 +439,66 @@ brw_update_constant_surface( GLcontext *ctx, } +/** + * Update the surface state for a VS constant buffer. + * The constant buffer will be (re)allocated here if needed. + */ +static dri_bo * +brw_update_vs_constant_surface( GLcontext *ctx, + GLuint surf, + dri_bo *const_buffer, + const struct gl_program_parameter_list *params) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_surface_key key; + struct intel_context *intel = &brw->intel; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(surf == 0); + + /* free old const buffer if too small */ + if (const_buffer && const_buffer->size < size) { + dri_bo_unreference(const_buffer); + const_buffer = NULL; + } + + /* alloc new buffer if needed */ + if (!const_buffer) { + const_buffer = + drm_intel_bo_alloc(intel->bufmgr, "vp_const_buffer", size, 64); + } + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = const_buffer; + key.depthmode = GL_NONE; + key.pitch = params->NumParameters; + key.width = params->NumParameters; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->vs.surf_bo[surf]); + brw->vs.surf_bo[surf] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->vs.surf_bo[surf] == NULL) { + brw->vs.surf_bo[surf] = brw_create_constant_surface(brw, &key); + } + + return const_buffer; +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be @@ -515,7 +578,7 @@ brw_update_region_surface(struct brw_context *brw, struct intel_region *region, /* Key size will never match key size for textures, so we're safe. */ brw->wm.surf_bo[unit] = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), + &key, sizeof(key), ®ion_bo, 1, &surf, sizeof(surf), NULL, NULL); @@ -544,6 +607,8 @@ brw_wm_get_binding_table(struct brw_context *brw) { dri_bo *bind_bo; + assert(brw->wm.nr_surfaces <= BRW_WM_MAX_SURF); + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, NULL, 0, brw->wm.surf_bo, brw->wm.nr_surfaces, @@ -603,25 +668,13 @@ static void prepare_wm_surfaces(struct brw_context *brw ) old_nr_surfaces = brw->wm.nr_surfaces; brw->wm.nr_surfaces = MAX_DRAW_BUFFERS; - /* Update surface / buffer for vertex shader constant buffer */ - { - const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; - struct brw_vertex_program *vp = - (struct brw_vertex_program *) brw->vertex_program; - vp->const_buffer = - brw_update_constant_surface(ctx, surf, vp->const_buffer, - vp->program.Base.Parameters); - - brw->wm.nr_surfaces = surf + 1; - } - /* Update surface / buffer for fragment shader constant buffer */ { const GLuint surf = SURF_INDEX_FRAG_CONST_BUFFER; struct brw_fragment_program *fp = (struct brw_fragment_program *) brw->fragment_program; fp->const_buffer = - brw_update_constant_surface(ctx, surf, fp->const_buffer, + brw_update_wm_constant_surface(ctx, surf, fp->const_buffer, fp->program.Base.Parameters); brw->wm.nr_surfaces = surf + 1; @@ -655,17 +708,103 @@ static void prepare_wm_surfaces(struct brw_context *brw ) brw->wm.bind_bo = brw_wm_get_binding_table(brw); if (brw->wm.nr_surfaces != old_nr_surfaces) - brw->state.dirty.brw |= BRW_NEW_NR_SURFACES; + brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES; +} + + +/** + * Constructs the binding table for the VS surface state. + */ +static dri_bo * +brw_vs_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + assert(brw->vs.nr_surfaces <= BRW_VS_MAX_SURF); + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->vs.nr_surfaces * sizeof(GLuint); + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->vs.nr_surfaces; i++) + if (brw->vs.surf_bo[i]) + data[i] = brw->vs.surf_bo[i]->offset; + else + data[i] = 0; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->vs.surf_bo, brw->vs.nr_surfaces, + data, data_size, + NULL, NULL); + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_VS_MAX_SURF; i++) { + if (brw->vs.surf_bo[i] != NULL) { + dri_bo_emit_reloc(bind_bo, + I915_GEM_DOMAIN_INSTRUCTION, 0, + 0, + i * sizeof(GLuint), + brw->vs.surf_bo[i]); + } + } + + free(data); + } + + return bind_bo; +} + + +/** + * Vertex shader surfaces. Just constant buffer for now. Could add vertex + * shader textures in the future. + */ +static void prepare_vs_surfaces(struct brw_context *brw ) +{ + GLcontext *ctx = &brw->intel.ctx; + + /* Update surface / buffer for vertex shader constant buffer */ + { + const GLuint surf = SURF_INDEX_VERT_CONST_BUFFER; + struct brw_vertex_program *vp = + (struct brw_vertex_program *) brw->vertex_program; + vp->const_buffer = + brw_update_vs_constant_surface(ctx, surf, vp->const_buffer, + vp->program.Base.Parameters); + + brw->vs.nr_surfaces = 1; + } + + dri_bo_unreference(brw->vs.bind_bo); + brw->vs.bind_bo = brw_vs_get_binding_table(brw); + + if (1) + brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES; +} + + +static void +prepare_surfaces(struct brw_context *brw) +{ + prepare_wm_surfaces(brw); + prepare_vs_surfaces(brw); } const struct brw_tracked_state brw_wm_surfaces = { .dirty = { - .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS, + .mesa = _NEW_COLOR | _NEW_TEXTURE | _NEW_BUFFERS | _NEW_PROGRAM, .brw = BRW_NEW_CONTEXT, .cache = 0 }, - .prepare = prepare_wm_surfaces, + .prepare = prepare_surfaces, }; -- 2.30.2