From: Brian Paul Date: Tue, 31 Mar 2009 23:06:22 +0000 (-0600) Subject: i965: check-point commit of new constant buffer support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=597cd5b94e4818679af1134d053541f3d2cfb80e;p=mesa.git i965: check-point commit of new constant buffer support Currently, shader constants are stored in the GRF (loaded from the CURBE prior to shader execution). This severly limits the number of constants and temps that we can support. This new code will support (practically) unlimited size constant buffers and free up registers in the GRF. We allocate a new buffer object for the constants and read them with "Read" messages/instructions. When only a small number of constants are used, we can still use the old method. The code works for fragment shaders only (and is actually disabled) for now. Need to do the same thing for vertex shaders and need to add the necessary code-gen to fetch the constants which are referenced by the shader instructions. --- diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index a020b621d6c..01e07c967fa 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -167,6 +167,9 @@ struct brw_fragment_program { struct gl_fragment_program program; GLuint id; /**< serial no. to identify frag progs, never re-used */ GLboolean isGLSL; /**< really, any IF/LOOP/CONT/BREAK instructions */ + + /** Program constant buffer/surface */ + dri_bo *const_buffer; }; @@ -238,8 +241,16 @@ struct brw_vs_ouput_sizes { }; +/** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 16 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + MAX_DRAW_BUFFERS + +/** + * Size of our surface binding table. + * This contains pointers to the drawing surfaces and current texture + * objects and shader constant buffer (+1). + */ +#define BRW_WM_MAX_SURF (MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1) + enum brw_cache_id { BRW_CC_VP, @@ -513,8 +524,8 @@ struct brw_context /* BRW_NEW_CURBE_OFFSETS: */ struct { - GLuint wm_start; - GLuint wm_size; + GLuint wm_start; /**< pos of first wm const in CURBE buffer */ + GLuint wm_size; /**< number of float[4] consts, multiple of 16 */ GLuint clip_start; GLuint clip_size; GLuint vs_start; diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 545dedd34ba..39a8610952c 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -38,6 +38,7 @@ #include "shader/prog_parameter.h" #include "shader/prog_statevars.h" #include "intel_batchbuffer.h" +#include "intel_regions.h" #include "brw_context.h" #include "brw_defines.h" #include "brw_state.h" @@ -251,6 +252,7 @@ static void prepare_constant_buffer(struct brw_context *brw) _mesa_load_state_parameters(ctx, vp->program.Base.Parameters); + /* XXX just use a memcpy here */ for (i = 0; i < nr; i++) { const GLfloat *value = vp->program.Base.Parameters->ParameterValues[i]; buf[offset + i * 4 + 0] = value[0]; @@ -330,11 +332,40 @@ static void prepare_constant_buffer(struct brw_context *brw) } +/** + * Vertex/fragment shader constants are stored in a pseudo 1D texture. + * This function updates the constants in that buffer. + */ +static void +update_texture_constant_buffer(struct brw_context *brw) +{ + struct intel_context *intel = &brw->intel; + struct brw_fragment_program *fp = + (struct brw_fragment_program *) brw->fragment_program; + const struct gl_program_parameter_list *params = fp->program.Base.Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + assert(fp->const_buffer); + assert(fp->const_buffer->size >= size); + + /* copy constants into the buffer */ + if (size > 0) { + GLubyte *map; + dri_bo_map(fp->const_buffer, GL_TRUE); + map = fp->const_buffer->virtual; + memcpy(map, params->ParameterValues, size); + dri_bo_unmap(fp->const_buffer); + } +} + + static void emit_constant_buffer(struct brw_context *brw) { struct intel_context *intel = &brw->intel; GLuint sz = brw->curbe.total_size; + update_texture_constant_buffer(brw); + BEGIN_BATCH(2, IGNORE_CLIPRECTS); if (sz == 0) { OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index d90bd820386..457bc2fc7f4 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -111,6 +111,8 @@ static void brwProgramStringNotify( GLcontext *ctx, struct gl_program *prog ) { struct brw_context *brw = brw_context(ctx); + struct intel_context *intel = &brw->intel; + if (target == GL_FRAGMENT_PROGRAM_ARB) { struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; struct brw_fragment_program *newFP = brw_fragment_program(fprog); @@ -126,6 +128,24 @@ static void brwProgramStringNotify( GLcontext *ctx, brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; newFP->id = brw->program_id++; newFP->isGLSL = brw_wm_is_glsl(fprog); + + /* alloc constant buffer/surface */ + { + const struct gl_program_parameter_list *params = prog->Parameters; + const int size = params->NumParameters * 4 * sizeof(GLfloat); + + /* free old const buffer if too small */ + if (newFP->const_buffer && newFP->const_buffer->size < size) { + dri_bo_unreference(newFP->const_buffer); + newFP->const_buffer = NULL; + } + + if (!newFP->const_buffer) { + newFP->const_buffer = drm_intel_bo_alloc(intel->bufmgr, + "fp_const_buffer", + size, 64); + } + } } else if (target == GL_VERTEX_PROGRAM_ARB) { struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c index 67ba59c3252..28ef84e4aac 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c +++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c @@ -192,28 +192,41 @@ static void prealloc_reg(struct brw_wm_compile *c) /* constants */ { const int nr_params = c->fp->program.Base.Parameters->NumParameters; - const struct gl_program_parameter_list *plist = - c->fp->program.Base.Parameters; - int index = 0; - - /* number of float constants */ - c->prog_data.nr_params = 4 * nr_params; - - /* loop over program constants (float[4]) */ - for (i = 0; i < nr_params; i++) { - /* loop over XYZW channels */ - for (j = 0; j < 4; j++, index++) { - reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); - /* Save pointer to parameter/constant value. - * Constants will be copied in prepare_constant_buffer() - */ - c->prog_data.param[index] = &plist->ParameterValues[i][j]; - set_reg(c, PROGRAM_STATE_VAR, i, j, reg); - } - } - /* number of constant regs used (each reg is float[8]) */ - c->nr_creg = 2 * ((4 * nr_params + 15) / 16); - c->reg_index += c->nr_creg; + + if (1 /* XXX threshold: nr_params <= 8 */) { + const struct gl_program_parameter_list *plist = + c->fp->program.Base.Parameters; + int index = 0; + + /* number of float constants in CURBE */ + c->prog_data.nr_params = 4 * nr_params; + + /* loop over program constants (float[4]) */ + for (i = 0; i < nr_params; i++) { + /* loop over XYZW channels */ + for (j = 0; j < 4; j++, index++) { + reg = brw_vec1_grf(c->reg_index + index / 8, index % 8); + /* Save pointer to parameter/constant value. + * Constants will be copied in prepare_constant_buffer() + */ + c->prog_data.param[index] = &plist->ParameterValues[i][j]; + set_reg(c, PROGRAM_STATE_VAR, i, j, reg); + } + } + /* number of constant regs used (each reg is float[8]) */ + c->nr_creg = 2 * ((4 * nr_params + 15) / 16); + c->reg_index += c->nr_creg; + } + else { + /* number of float constants in CURBE */ + c->prog_data.nr_params = 0; + + /* When there's a lot of FP constanst we'll store them in a + * texture-like buffer instead of using the CURBE buffer. + * This means we won't use GRF registers for constants and we'll + * have to fetch constants with a dataport read. + */ + } } /* fragment shader inputs */ @@ -892,6 +905,19 @@ static void emit_add(struct brw_wm_compile *c, brw_set_saturate(p, 0); } +static void emit_arl(struct brw_wm_compile *c, + struct prog_instruction *inst) +{ + struct brw_compile *p = &c->func; + struct brw_reg src0, addr_reg; + brw_set_saturate(p, (inst->SaturateMode != SATURATE_OFF) ? 1 : 0); + addr_reg = brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_ADDRESS, 0); + src0 = get_src_reg(c, &inst->SrcReg[0], 0); /* channel 0 */ + brw_MOV(p, addr_reg, src0); + brw_set_saturate(p, 0); +} + static void emit_sub(struct brw_wm_compile *c, struct prog_instruction *inst) { @@ -2331,9 +2357,10 @@ static void emit_txb(struct brw_wm_compile *c, struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint i; + payload_reg = get_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, 0, 1, 0, 0); + for (i = 0; i < 4; i++) dst[i] = get_dst_reg(c, inst, i); for (i = 0; i < 4; i++) @@ -2372,13 +2399,13 @@ static void emit_txb(struct brw_wm_compile *c, 0); /* eot */ } + static void emit_tex(struct brw_wm_compile *c, struct prog_instruction *inst) { struct brw_compile *p = &c->func; struct brw_reg dst[4], src[4], payload_reg; GLuint unit = c->fp->program.Base.SamplerUnits[inst->TexSrcUnit]; - GLuint msg_len; GLuint i, nr; GLuint emit; @@ -2419,7 +2446,7 @@ static void emit_tex(struct brw_wm_compile *c, } if (shadow) { - brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bais */ + brw_MOV(p, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */ brw_MOV(p, brw_message_reg(6), src[2]); /* ref value / R coord */ } @@ -2439,6 +2466,49 @@ static void emit_tex(struct brw_wm_compile *c, brw_MOV(p, dst[3], brw_imm_f(1.0)); } + +static void emit_get_constant(struct brw_context *brw, + struct brw_wm_compile *c, + struct prog_instruction *inst, + GLuint constIndex) +{ + struct brw_compile *p = &c->func; + struct brw_reg dst[4]; + GLuint i; + const int mark = mark_tmps( c ); + struct brw_reg writeback_reg[4]; + + /* XXX only need 1 temp reg??? */ + for (i = 0; i < 4; i++) { + writeback_reg[i] = alloc_tmp(c); + } + + for (i = 0; i < 4; i++) { + dst[i] = get_dst_reg(c, inst, i); + } + + /* Get float[4] vector from constant buffer */ + brw_dp_READ_4(p, + writeback_reg[0], /* first writeback dest */ + 1, /* msg_reg */ + GL_FALSE, /* rel addr? */ + 16 * constIndex, /* byte offset */ + BRW_WM_MAX_SURF - 1 /* surface, binding table index */ + ); + + /* Extract the four channel values, smear across dest registers */ + for (i = 0; i < 4; i++) { + /* extract 1 float from the writeback reg */ + struct brw_reg new_src = stride(writeback_reg[0], 0, 1, 0); + new_src.subnr = i * 4; + /* and smear it into the dest register */ + brw_MOV(p, dst[i], new_src); + } + + release_tmps( c, mark ); +} + + /** * Resolve subroutine calls after code emit is done. */ @@ -2504,6 +2574,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) case OPCODE_ADD: emit_add(c, inst); break; + case OPCODE_ARL: + emit_arl(c, inst); + break; case OPCODE_SUB: emit_sub(c, inst); break; @@ -2520,7 +2593,17 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c) emit_trunc(c, inst); break; case OPCODE_MOV: +#if 0 + /* test hook for new constant buffer code */ + if (inst->SrcReg[0].File == PROGRAM_UNIFORM) { + emit_get_constant(brw, c, inst, inst->SrcReg[0].Index); + } + else { + emit_mov(c, inst); + } +#else emit_mov(c, inst); +#endif break; case OPCODE_DP3: emit_dp3(c, inst); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 1fc537ca206..e7d55d5dbd1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -33,6 +33,7 @@ #include "main/mtypes.h" #include "main/texformat.h" #include "main/texstore.h" +#include "shader/prog_parameter.h" #include "intel_mipmap_tree.h" #include "intel_batchbuffer.h" @@ -287,6 +288,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) struct intel_texture_object *intelObj = intel_texture_object(tObj); struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_wm_surface_key key; + const GLuint j = MAX_DRAW_BUFFERS + unit; memset(&key, 0, sizeof(key)); @@ -313,16 +315,111 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.cpp = intelObj->mt->cpp; key.tiling = intelObj->mt->region->tiling; - dri_bo_unreference(brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS]); - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, - &key, sizeof(key), - &key.bo, key.bo ? 1 : 0, - NULL); - if (brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] == NULL) { - brw->wm.surf_bo[unit + MAX_DRAW_BUFFERS] = brw_create_texture_surface(brw, &key); + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_texture_surface(brw, &key); } } + + +/** + * Create the constant buffer surface. Fragment shader constanst will be + * read from this buffer with Data Port Read instructions/messages. + */ +static dri_bo * +brw_create_constant_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) +{ + const GLint w = key->width - 1; + struct brw_surface_state surf; + dri_bo *bo; + + memset(&surf, 0, sizeof(surf)); + + surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; + surf.ss0.surface_type = BRW_SURFACE_BUFFER; + surf.ss0.surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT; + + /* This is ok for all textures with channel width 8bit or less: + */ + assert(key->bo); + if (key->bo) + surf.ss1.base_addr = key->bo->offset; /* reloc */ + else + surf.ss1.base_addr = key->offset; + + surf.ss2.width = w & 0x7f; /* bits 6:0 of size or width */ + surf.ss2.height = (w >> 7) & 0x1fff; /* bits 19:7 of size or width */ + surf.ss3.depth = (w >> 20) & 0x7f; /* bits 26:20 of size or width */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + brw_set_surface_tiling(&surf, key->tiling); + + bo = brw_upload_cache(&brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, key->bo ? 1 : 0, + &surf, sizeof(surf), + NULL, NULL); + + if (key->bo) { + /* Emit relocation to surface contents */ + dri_bo_emit_reloc(bo, + I915_GEM_DOMAIN_SAMPLER, 0, + 0, + offsetof(struct brw_surface_state, ss1), + key->bo); + } + + return bo; +} + + +/** + * Update the constant buffer surface. + */ +static void +brw_update_constant_surface( GLcontext *ctx, + const struct brw_fragment_program *fp ) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_wm_surface_key key; + const GLuint j = BRW_WM_MAX_SURF - 1; + const GLuint numParams = fp->program.Base.Parameters->NumParameters; + + memset(&key, 0, sizeof(key)); + + key.format = MESA_FORMAT_RGBA_FLOAT32; + key.internal_format = GL_RGBA; + key.bo = fp->const_buffer; + + key.depthmode = GL_NONE; + key.pitch = numParams; + key.width = numParams; + key.height = 1; + key.depth = 1; + key.cpp = 16; + + /* + printf("%s:\n", __FUNCTION__); + printf(" width %d height %d depth %d cpp %d pitch %d\n", + key.width, key.height, key.depth, key.cpp, key.pitch); + */ + + dri_bo_unreference(brw->wm.surf_bo[j]); + brw->wm.surf_bo[j] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, key.bo ? 1 : 0, + NULL); + if (brw->wm.surf_bo[j] == NULL) { + brw->wm.surf_bo[j] = brw_create_constant_surface(brw, &key); + } +} + + /** * Sets up a surface state structure to point at the given region. * While it is only used for the front/back buffer currently, it should be @@ -514,6 +611,17 @@ static void prepare_wm_surfaces(struct brw_context *brw ) } } + /* Update surface for fragment shader constant buffer */ + { + const GLuint j = BRW_WM_MAX_SURF - 1; + const struct brw_fragment_program *fp = + brw_fragment_program_const(brw->fragment_program); + + brw_update_constant_surface(ctx, fp); + brw->wm.nr_surfaces = j + 1; + } + + dri_bo_unreference(brw->wm.bind_bo); brw->wm.bind_bo = brw_wm_get_binding_table(brw);