From: Eric Anholt Date: Fri, 14 Dec 2007 19:02:48 +0000 (-0800) Subject: [965] Replace the state cache suballocator with direct dri_bufmgr use. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=38bad7677e57d629eeffd4ef39a7fc254db12735;p=mesa.git [965] Replace the state cache suballocator with direct dri_bufmgr use. The user-space suballocator that was used avoided relocation computations by using the general and surface state base registers and allocating those types of buffers out of pools built on top of single buffer objects. It also avoided calls into the buffer manager for these small state allocations, since only one buffer object was being used. However, the buffer allocation cost appears to be low, and with relocation caching, computing relocations for buffers is essentially free. Additionally, implementing the suballocator required a don't-fence-subdata flag to disable waiting on buffer maps so that writing new data didn't block on rendering using old data, and careful handling when mapping to update old data (which we need to do for unavoidable relocations with FBOs). More importantly, when the suballocator filled, it had no replacement algorithm and just threw out all of the contents and forced them to be recomputed, which is a significant cost. This is the first step, which just changes the buffer type, but doesn't yet improve the hash table to not result in full recompute on overflow. Because the buffers are all allocated out of the general buffer allocator, we can no longer use the general/surface state bases to avoid relocations, and they are set to 0 instead. --- diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 5b1a83bccc3..ac1c2f73478 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -54,7 +54,6 @@ DRIVER_SOURCES = \ brw_state_batch.c \ brw_state_cache.c \ brw_state_dump.c \ - brw_state_pool.c \ brw_state_upload.c \ brw_tex.c \ brw_tex_layout.c \ diff --git a/src/mesa/drivers/dri/i965/brw_cc.c b/src/mesa/drivers/dri/i965/brw_cc.c index 8a1d1527db3..245d2ddcad1 100644 --- a/src/mesa/drivers/dri/i965/brw_cc.c +++ b/src/mesa/drivers/dri/i965/brw_cc.c @@ -46,7 +46,8 @@ static void upload_cc_vp( struct brw_context *brw ) ccv.min_depth = 0.0; ccv.max_depth = 1.0; - brw->cc.vp_gs_offset = brw_cache_data( &brw->cache[BRW_CC_VP], &ccv ); + dri_bo_unreference(brw->cc.vp_bo); + brw->cc.vp_bo = brw_cache_data( &brw->cache, BRW_CC_VP, &ccv, NULL, 0 ); } const struct brw_tracked_state brw_cc_vp = { @@ -152,12 +153,24 @@ static void upload_cc_unit( struct brw_context *brw ) } /* CACHE_NEW_CC_VP */ - cc.cc4.cc_viewport_state_offset = brw->cc.vp_gs_offset >> 5; + cc.cc4.cc_viewport_state_offset = brw->cc.vp_bo->offset >> 5; /* reloc */ if (INTEL_DEBUG & DEBUG_STATS) cc.cc5.statistics_enable = 1; - brw->cc.state_gs_offset = brw_cache_data( &brw->cache[BRW_CC_UNIT], &cc ); + dri_bo_unreference(brw->cc.state_bo); + brw->cc.state_bo = brw_cache_data( &brw->cache, BRW_CC_UNIT, &cc, + &brw->cc.vp_bo, 1); +} + +static void emit_reloc_cc_unit(struct brw_context *brw) +{ + /* Emit CC viewport relocation */ + dri_emit_reloc(brw->cc.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + offsetof(struct brw_cc_unit_state, cc4), + brw->cc.vp_bo); } const struct brw_tracked_state brw_cc_unit = { @@ -166,7 +179,8 @@ const struct brw_tracked_state brw_cc_unit = { .brw = 0, .cache = CACHE_NEW_CC_VP }, - .update = upload_cc_unit + .update = upload_cc_unit, + .emit_reloc = emit_reloc_cc_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_clip.c b/src/mesa/drivers/dri/i965/brw_clip.c index 8287fd9edf3..e6f3f631264 100644 --- a/src/mesa/drivers/dri/i965/brw_clip.c +++ b/src/mesa/drivers/dri/i965/brw_clip.c @@ -119,28 +119,16 @@ static void compile_clip_prog( struct brw_context *brw, /* Upload */ - brw->clip.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_CLIP_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->clip.prog_data ); + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_upload_cache( &brw->cache, + BRW_CLIP_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->clip.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_clip_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_CLIP_PROG], - key, sizeof(*key), - &brw->clip.prog_data, - &brw->clip.prog_gs_offset); -} - - - - /* Calculate interpolants for triangle and line rasterization. */ static void upload_clip_prog( struct brw_context *brw ) @@ -252,7 +240,12 @@ static void upload_clip_prog( struct brw_context *brw ) } } - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->clip.prog_bo); + brw->clip.prog_bo = brw_search_cache(&brw->cache, BRW_CLIP_PROG, + &key, sizeof(key), + NULL, 0, + &brw->clip.prog_data); + if (brw->clip.prog_bo == NULL) compile_clip_prog( brw, &key ); } diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c index ba2f0edf513..7640a39b452 100644 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ b/src/mesa/drivers/dri/i965/brw_clip_state.c @@ -45,7 +45,8 @@ static void upload_clip_unit( struct brw_context *brw ) /* CACHE_NEW_CLIP_PROG */ clip.thread0.grf_reg_count = ALIGN(brw->clip.prog_data->total_grf, 16) / 16 - 1; - clip.thread0.kernel_start_pointer = brw->clip.prog_gs_offset >> 6; + /* reloc */ + clip.thread0.kernel_start_pointer = brw->clip.prog_bo->offset >> 6; clip.thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; clip.thread3.const_urb_entry_read_length = brw->clip.prog_data->curb_read_length; clip.clip5.clip_mode = brw->clip.prog_data->clip_mode; @@ -79,9 +80,24 @@ static void upload_clip_unit( struct brw_context *brw ) clip.viewport_ymin = -1; clip.viewport_ymax = 1; - brw->clip.state_gs_offset = brw_cache_data( &brw->cache[BRW_CLIP_UNIT], &clip ); + brw->clip.thread0_delta = clip.thread0.grf_reg_count << 1; + + dri_bo_unreference(brw->clip.state_bo); + brw->clip.state_bo = brw_cache_data( &brw->cache, BRW_CLIP_UNIT, &clip, + &brw->clip.prog_bo, 1); } +static void emit_reloc_clip_unit(struct brw_context *brw) +{ + if (!brw->metaops.active) { + /* Emit clip program relocation */ + dri_emit_reloc(brw->clip.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->clip.thread0_delta, + offsetof(struct brw_clip_unit_state, thread0), + brw->clip.prog_bo); + } +} const struct brw_tracked_state brw_clip_unit = { .dirty = { @@ -90,5 +106,6 @@ const struct brw_tracked_state brw_clip_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_CLIP_PROG }, - .update = upload_clip_unit + .update = upload_clip_unit, + .emit_reloc = emit_reloc_clip_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 4654ab1ddfe..3c463c3b9a6 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -169,8 +169,6 @@ GLboolean brwCreateContext( const __GLcontextModes *mesaVis, brw->state.dirty.mesa = ~0; brw->state.dirty.brw = ~0; - memset(&brw->wm.bind, ~0, sizeof(brw->wm.bind)); - brw->emit_state_always = 0; ctx->FragmentProgram._MaintainTexEnvProgram = 1; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 68afea111d5..a5ef058e8ca 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -139,8 +139,13 @@ struct brw_context; struct brw_state_flags { + /** State update flags signalled by mesa internals */ GLuint mesa; + /** State update flags signalled by brw_state_cache.c searches */ GLuint cache; + /** + * State update flags signalled as the result of brw_tracked_state updates + */ GLuint brw; }; @@ -232,30 +237,44 @@ struct brw_vs_ouput_sizes { #define BRW_MAX_TEX_UNIT 8 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - GLuint surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - dri_bo *buffer; - - GLuint size; - GLuint offset; /* offset of first free byte */ +enum brw_cache_id { + BRW_CC_VP, + BRW_CC_UNIT, + BRW_WM_PROG, + BRW_SAMPLER_DEFAULT_COLOR, + BRW_SAMPLER, + BRW_WM_UNIT, + BRW_SF_PROG, + BRW_SF_VP, + BRW_SF_UNIT, + BRW_VS_UNIT, + BRW_VS_PROG, + BRW_GS_UNIT, + BRW_GS_PROG, + BRW_CLIP_VP, + BRW_CLIP_UNIT, + BRW_CLIP_PROG, + BRW_SS_SURFACE, + BRW_SS_SURF_BIND, - struct brw_context *brw; + BRW_MAX_CACHE }; struct brw_cache_item { + /** + * Effectively part of the key, cache_id identifies what kind of state + * buffer is involved, and also which brw->state.dirty.cache flag should + * be set when this cache item is chosen. + */ + enum brw_cache_id cache_id; + /** 32-bit hash of the key data */ GLuint hash; GLuint key_size; /* for variable-sized keys */ const void *key; + dri_bo **reloc_bufs; + GLuint nr_reloc_bufs; - GLuint offset; /* offset within pool's buffer */ + dri_bo *bo; GLuint data_size; struct brw_cache_item *next; @@ -264,20 +283,19 @@ struct brw_cache_item { struct brw_cache { - GLuint id; - - const char *name; - struct brw_context *brw; - struct brw_mem_pool *pool; struct brw_cache_item **items; GLuint size, n_items; - - GLuint key_size; /* for fixed-size keys */ - GLuint aux_size; - GLuint last_addr; /* offset of active item */ + GLuint key_size[BRW_MAX_CACHE]; /* for fixed-size keys */ + GLuint aux_size[BRW_MAX_CACHE]; + char *name[BRW_MAX_CACHE]; + + /* Record of the last BOs chosen for each cache_id. Used to set + * brw->state.dirty.cache when a new cache item is chosen. + */ + dri_bo *last_bo[BRW_MAX_CACHE]; }; @@ -314,33 +332,6 @@ struct brw_tracked_state { GLboolean always_update; }; - -enum brw_cache_id { - BRW_CC_VP, - BRW_CC_UNIT, - BRW_WM_PROG, - BRW_SAMPLER_DEFAULT_COLOR, - BRW_SAMPLER, - BRW_WM_UNIT, - BRW_SF_PROG, - BRW_SF_VP, - BRW_SF_UNIT, - BRW_VS_UNIT, - BRW_VS_PROG, - BRW_GS_UNIT, - BRW_GS_PROG, - BRW_CLIP_VP, - BRW_CLIP_UNIT, - BRW_CLIP_PROG, - - /* These two are in the SS pool: - */ - BRW_SS_SURFACE, - BRW_SS_SURF_BIND, - - BRW_MAX_CACHE -}; - /* Flags for brw->state.cache. */ #define CACHE_NEW_CC_VP (1<intel.ctx; struct brw_vertex_program *vp = (struct brw_vertex_program *)brw->vertex_program; struct brw_fragment_program *fp = (struct brw_fragment_program *)brw->fragment_program; - struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL]; GLuint sz = brw->curbe.total_size; GLuint bufsz = sz * 16 * sizeof(GLfloat); GLfloat *buf; @@ -291,7 +290,6 @@ static void upload_constant_buffer(struct brw_context *brw) bufsz == brw->curbe.last_bufsz && memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { free(buf); -/* return; */ } else { if (brw->curbe.last_buf) @@ -299,20 +297,16 @@ static void upload_constant_buffer(struct brw_context *brw) brw->curbe.last_buf = buf; brw->curbe.last_bufsz = bufsz; - - if (!brw_pool_alloc(pool, - bufsz, - 1 << 6, - &brw->curbe.gs_offset)) { - _mesa_printf("out of GS memory for curbe\n"); - assert(0); - return; - } - + dri_bo_unreference(brw->curbe.curbe_bo); + brw->curbe.curbe_bo = dri_bo_alloc(brw->intel.bufmgr, "CURBE", + bufsz, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); /* Copy data to the buffer: */ - dri_bo_subdata(pool->buffer, brw->curbe.gs_offset, bufsz, buf); + dri_bo_subdata(brw->curbe.curbe_bo, 0, bufsz, buf); } /* Because this provokes an action (ie copy the constants into the @@ -330,7 +324,8 @@ static void upload_constant_buffer(struct brw_context *brw) */ BEGIN_BATCH(2, INTEL_BATCH_NO_CLIPRECTS); OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); - OUT_BATCH(brw->curbe.gs_offset | (sz - 1)); + OUT_RELOC(brw->curbe.curbe_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + (sz - 1)); ADVANCE_BATCH(); } @@ -350,6 +345,7 @@ const struct brw_tracked_state brw_constant_buffer = { BRW_NEW_CURBE_OFFSETS), .cache = (CACHE_NEW_WM_PROG) }, - .update = upload_constant_buffer + .update = upload_constant_buffer, + .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */ }; diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 73263a5fff4..5c52212a3b0 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -119,26 +119,15 @@ static void compile_gs_prog( struct brw_context *brw, /* Upload */ - brw->gs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_GS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->gs.prog_data ); + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_upload_cache( &brw->cache, BRW_GS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->gs.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_gs_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_GS_PROG], - key, sizeof(*key), - &brw->gs.prog_data, - &brw->gs.prog_gs_offset); -} - - static const GLenum gs_prim[GL_POLYGON+1] = { GL_POINTS, GL_LINES, @@ -187,7 +176,12 @@ static void upload_gs_prog( struct brw_context *brw ) } if (brw->gs.prog_active) { - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->gs.prog_bo); + brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->gs.prog_data); + if (brw->gs.prog_bo == NULL) compile_gs_prog( brw, &key ); } } diff --git a/src/mesa/drivers/dri/i965/brw_gs_state.c b/src/mesa/drivers/dri/i965/brw_gs_state.c index 5db4dd4603b..6bbf11e2533 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_state.c +++ b/src/mesa/drivers/dri/i965/brw_gs_state.c @@ -48,7 +48,8 @@ static void upload_gs_unit( struct brw_context *brw ) if (brw->gs.prog_active) { gs.thread0.grf_reg_count = ALIGN(brw->gs.prog_data->total_grf, 16) / 16 - 1; - gs.thread0.kernel_start_pointer = brw->gs.prog_gs_offset >> 6; + /* reloc */ + gs.thread0.kernel_start_pointer = brw->gs.prog_bo->offset >> 6; gs.thread3.urb_entry_read_length = brw->gs.prog_data->urb_read_length; } else { @@ -73,11 +74,25 @@ static void upload_gs_unit( struct brw_context *brw ) gs.thread3.const_urb_entry_read_offset = 0; gs.thread3.const_urb_entry_read_length = 0; gs.thread3.urb_entry_read_offset = 0; - - brw->gs.state_gs_offset = brw_cache_data( &brw->cache[BRW_GS_UNIT], &gs ); + brw->gs.thread0_delta = gs.thread0.grf_reg_count << 1; + + dri_bo_unreference(brw->gs.state_bo); + brw->gs.state_bo = brw_cache_data( &brw->cache, BRW_GS_UNIT, &gs, + &brw->gs.prog_bo, 1 ); } +static void emit_reloc_gs_unit(struct brw_context *brw) +{ + if (brw->gs.prog_active) { + /* Emit GS program relocation */ + dri_emit_reloc(brw->gs.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->gs.thread0_delta, + offsetof(struct brw_gs_unit_state, thread0), + brw->gs.prog_bo); + } +} const struct brw_tracked_state brw_gs_unit = { .dirty = { @@ -86,5 +101,6 @@ const struct brw_tracked_state brw_gs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_GS_PROG }, - .update = upload_gs_unit + .update = upload_gs_unit, + .emit_reloc = emit_reloc_gs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c index 210745c63b0..6dc6c07ac11 100644 --- a/src/mesa/drivers/dri/i965/brw_misc_state.c +++ b/src/mesa/drivers/dri/i965/brw_misc_state.c @@ -128,31 +128,25 @@ const struct brw_tracked_state brw_drawing_rect = { * state pointers. * * The binding table pointers are relative to the surface state base address, - * which is the BRW_SS_POOL cache buffer. + * which is 0. */ static void upload_binding_table_pointers(struct brw_context *brw) { - struct brw_binding_table_pointers btp; - memset(&btp, 0, sizeof(btp)); - - btp.header.opcode = CMD_BINDING_TABLE_PTRS; - btp.header.length = sizeof(btp)/4 - 2; - btp.vs = 0; - btp.gs = 0; - btp.clp = 0; - btp.sf = 0; - btp.wm = brw->wm.bind_ss_offset; - - BRW_CACHED_BATCH_STRUCT(brw, &btp); + struct intel_context *intel = &brw->intel; + + BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_BINDING_TABLE_PTRS << 16 | (6 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + OUT_RELOC(brw->wm.bind_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + ADVANCE_BATCH(); } const struct brw_tracked_state brw_binding_table_pointers = { - .dirty = { - .mesa = 0, - .brw = 0, - .cache = CACHE_NEW_SURF_BIND - }, - .update = upload_binding_table_pointers + .update = upload_binding_table_pointers, + .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */ }; @@ -160,39 +154,33 @@ const struct brw_tracked_state brw_binding_table_pointers = { * Upload pointers to the per-stage state. * * The state pointers in this packet are all relative to the general state - * base address set by CMD_STATE_BASE_ADDRESS, which is the BRW_GS_POOL buffer. + * base address set by CMD_STATE_BASE_ADDRESS, which is 0. */ static void upload_pipelined_state_pointers(struct brw_context *brw ) { - struct brw_pipelined_state_pointers psp; - memset(&psp, 0, sizeof(psp)); - - psp.header.opcode = CMD_PIPELINED_STATE_POINTERS; - psp.header.length = sizeof(psp)/4 - 2; - - psp.vs.offset = brw->vs.state_gs_offset >> 5; - psp.sf.offset = brw->sf.state_gs_offset >> 5; - psp.wm.offset = brw->wm.state_gs_offset >> 5; - psp.cc.offset = brw->cc.state_gs_offset >> 5; - - /* GS gets turned on and off regularly. Need to re-emit URB fence - * after this occurs. - */ - if (brw->gs.prog_active) { - psp.gs.offset = brw->gs.state_gs_offset >> 5; - psp.gs.enable = 1; - } - - if (!brw->metaops.active) { - psp.clp.offset = brw->clip.state_gs_offset >> 5; - psp.clp.enable = 1; - } + struct intel_context *intel = &brw->intel; + BEGIN_BATCH(7, INTEL_BATCH_NO_CLIPRECTS); + OUT_BATCH(CMD_PIPELINED_STATE_POINTERS << 16 | (7 - 2)); + OUT_RELOC(brw->vs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + if (brw->gs.prog_active) + OUT_RELOC(brw->gs.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + else + OUT_BATCH(0); + if (!brw->metaops.active) + OUT_RELOC(brw->clip.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 1); + else + OUT_BATCH(0); + OUT_RELOC(brw->sf.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + OUT_RELOC(brw->cc.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0); + ADVANCE_BATCH(); - if (BRW_CACHED_BATCH_STRUCT(brw, &psp)) - brw->state.dirty.brw |= BRW_NEW_PSP; + brw->state.dirty.brw |= BRW_NEW_PSP; } +#if 0 +/* Combined into brw_psp_urb_cbs */ const struct brw_tracked_state brw_pipelined_state_pointers = { .dirty = { .mesa = 0, @@ -206,7 +194,9 @@ const struct brw_tracked_state brw_pipelined_state_pointers = { CACHE_NEW_CC_UNIT) }, .update = upload_pipelined_state_pointers + .always_update = GL_TRUE, /* Has a relocation in the batchbuffer */ }; +#endif static void upload_psp_urb_cbs(struct brw_context *brw ) { @@ -228,7 +218,8 @@ const struct brw_tracked_state brw_psp_urb_cbs = { CACHE_NEW_WM_UNIT | CACHE_NEW_CC_UNIT) }, - .update = upload_psp_urb_cbs + .update = upload_psp_urb_cbs, + .always_update = GL_TRUE, /* psp has relocations. */ }; /** @@ -491,20 +482,19 @@ static void upload_state_base_address( struct brw_context *brw ) */ BEGIN_BATCH(6, INTEL_BATCH_NO_CLIPRECTS); OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); - OUT_RELOC(brw->pool[BRW_GS_POOL].buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 1); /* General state base address */ - OUT_RELOC(brw->pool[BRW_SS_POOL].buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - 1); /* Surface state base address */ + OUT_BATCH(1); /* General state base address */ + OUT_BATCH(1); /* Surface state base address */ OUT_BATCH(1); /* Indirect object base address */ OUT_BATCH(1); /* General state upper bound */ OUT_BATCH(1); /* Indirect object upper bound */ ADVANCE_BATCH(); } - const struct brw_tracked_state brw_state_base_address = { - .always_update = GL_TRUE, + .dirty = { + .mesa = 0, + .brw = BRW_NEW_CONTEXT, + .cache = 0, + }, .update = upload_state_base_address }; diff --git a/src/mesa/drivers/dri/i965/brw_sf.c b/src/mesa/drivers/dri/i965/brw_sf.c index 738ceb0552e..b0702139b5c 100644 --- a/src/mesa/drivers/dri/i965/brw_sf.c +++ b/src/mesa/drivers/dri/i965/brw_sf.c @@ -116,26 +116,15 @@ static void compile_sf_prog( struct brw_context *brw, /* Upload */ - brw->sf.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_SF_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->sf.prog_data ); + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_upload_cache( &brw->cache, BRW_SF_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->sf.prog_data ); } - -static GLboolean search_cache( struct brw_context *brw, - struct brw_sf_prog_key *key ) -{ - return brw_search_cache(&brw->cache[BRW_SF_PROG], - key, sizeof(*key), - &brw->sf.prog_data, - &brw->sf.prog_gs_offset); -} - - /* Calculate interpolants for triangle and line rasterization. */ static void upload_sf_prog( struct brw_context *brw ) @@ -180,8 +169,12 @@ static void upload_sf_prog( struct brw_context *brw ) if (key.do_twoside_color) key.frontface_ccw = (brw->attribs.Polygon->FrontFace == GL_CCW); - - if (!search_cache(brw, &key)) + dri_bo_unreference(brw->sf.prog_bo); + brw->sf.prog_bo = brw_search_cache(&brw->cache, BRW_SF_PROG, + &key, sizeof(key), + NULL, 0, + &brw->sf.prog_data); + if (brw->sf.prog_bo == NULL) compile_sf_prog( brw, &key ); } diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index 2257916aaec..5a0106f77ec 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -97,7 +97,8 @@ static void upload_sf_vp(struct brw_context *brw) sfv.scissor.ymax = y2; } - brw->sf.vp_gs_offset = brw_cache_data( &brw->cache[BRW_SF_VP], &sfv ); + dri_bo_unreference(brw->sf.vp_bo); + brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); } const struct brw_tracked_state brw_sf_vp = { @@ -116,10 +117,11 @@ static void upload_sf_unit( struct brw_context *brw ) { struct brw_sf_unit_state sf; memset(&sf, 0, sizeof(sf)); + dri_bo *reloc_bufs[2]; /* CACHE_NEW_SF_PROG */ sf.thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1; - sf.thread0.kernel_start_pointer = brw->sf.prog_gs_offset >> 6; + sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */ sf.thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length; sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; @@ -138,7 +140,7 @@ static void upload_sf_unit( struct brw_context *brw ) sf.thread4.stats_enable = 1; /* CACHE_NEW_SF_VP */ - sf.sf5.sf_viewport_state_offset = brw->sf.vp_gs_offset >> 5; + sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */ sf.sf5.viewport_transform = 1; @@ -202,9 +204,33 @@ static void upload_sf_unit( struct brw_context *brw ) sf.sf6.dest_org_vbias = 0x8; sf.sf6.dest_org_hbias = 0x8; - brw->sf.state_gs_offset = brw_cache_data( &brw->cache[BRW_SF_UNIT], &sf ); + reloc_bufs[0] = brw->sf.prog_bo; + reloc_bufs[1] = brw->sf.vp_bo; + + brw->sf.thread0_delta = sf.thread0.grf_reg_count << 1; + brw->sf.sf5_delta = sf.sf5.front_winding | (sf.sf5.viewport_transform << 1); + + dri_bo_unreference(brw->sf.state_bo); + brw->sf.state_bo = brw_cache_data( &brw->cache, BRW_SF_UNIT, &sf, + reloc_bufs, 2 ); } +static void emit_reloc_sf_unit(struct brw_context *brw) +{ + /* Emit SF program relocation */ + dri_emit_reloc(brw->sf.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->sf.thread0_delta, + offsetof(struct brw_sf_unit_state, thread0), + brw->sf.prog_bo); + + /* Emit SF viewport relocation */ + dri_emit_reloc(brw->sf.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->sf.sf5_delta, + offsetof(struct brw_sf_unit_state, sf5), + brw->sf.vp_bo); +} const struct brw_tracked_state brw_sf_unit = { .dirty = { @@ -217,7 +243,6 @@ const struct brw_tracked_state brw_sf_unit = { .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG) }, - .update = upload_sf_unit + .update = upload_sf_unit, + .emit_reloc = emit_reloc_sf_unit, }; - - diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index ef2409df5ab..f0a740f456c 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -83,35 +83,42 @@ const struct brw_tracked_state brw_clear_batch_cache; /*********************************************************************** * brw_state_cache.c */ -GLuint brw_cache_data(struct brw_cache *cache, - const void *data ); - -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_sz); - -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_sz, - const void *data, - GLuint data_sz, - const void *aux, - void *aux_return ); - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return); - -void brw_init_caches( struct brw_context *brw ); -void brw_destroy_caches( struct brw_context *brw ); - -static inline dri_bo *brw_cache_buffer(struct brw_context *brw, - enum brw_cache_id id) -{ - return brw->cache[id].pool->buffer; -} +dri_bo *brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs); + +dri_bo *brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_sz, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_sz, + const void *aux, + void *aux_return ); + +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + void *aux_return); +void brw_clear_cache( struct brw_context *brw ); +void brw_state_cache_check_size( struct brw_context *brw ); + +void brw_init_cache( struct brw_context *brw ); +void brw_destroy_cache( struct brw_context *brw ); /*********************************************************************** * brw_state_batch.c @@ -122,31 +129,7 @@ static inline dri_bo *brw_cache_buffer(struct brw_context *brw, GLboolean brw_cached_batch_struct( struct brw_context *brw, const void *data, GLuint sz ); - void brw_destroy_batch_cache( struct brw_context *brw ); - - -/*********************************************************************** - * brw_state_pool.c - */ -void brw_init_pools( struct brw_context *brw ); -void brw_destroy_pools( struct brw_context *brw ); - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint alignment, - GLuint *offset_return); - -void brw_pool_fence( struct brw_context *brw, - struct brw_mem_pool *pool, - GLuint fence ); - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ); - -void brw_clear_all_caches( struct brw_context *brw ); -void brw_invalidate_pools( struct brw_context *brw ); void brw_clear_batch_cache_flush( struct brw_context *brw ); #endif diff --git a/src/mesa/drivers/dri/i965/brw_state_batch.c b/src/mesa/drivers/dri/i965/brw_state_batch.c index eabda257d3b..34be1012734 100644 --- a/src/mesa/drivers/dri/i965/brw_state_batch.c +++ b/src/mesa/drivers/dri/i965/brw_state_batch.c @@ -91,11 +91,6 @@ static void clear_batch_cache( struct brw_context *brw ) } brw->cached_batch_items = NULL; - - - brw_clear_all_caches(brw); - - brw_invalidate_pools(brw); } void brw_clear_batch_cache_flush( struct brw_context *brw ) diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index 618e445546e..d614316ab60 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -28,7 +28,33 @@ * Authors: * Keith Whitwell */ - + +/** @file brw_state_cache.c + * + * This file implements a simple static state cache for 965. The consumers + * can query the hash table of state using a cache_id, opaque key data, + * and list of buffers that will be used in relocations, and receive the + * corresponding state buffer object of state (plus associated auxiliary + * data) in return. + * + * The inner workings are a simple hash table based on a CRC of the key data. + * The cache_id and relocation target buffers associated with the state + * buffer are included as auxiliary key data, but are not part of the hash + * value (this should be fixed, but will likely be fixed instead by making + * consumers use structured keys). + * + * Replacement is not implemented. Instead, when the cache gets too big, at + * a safe point (unlock) we throw out all of the cache data let it regenerate + * it for the next rendering operation. + * + * The reloc_buf pointers need to be included as key data, otherwise the + * non-unique values stuffed in the offset in key data through + * brw_cache_data() may result in successful probe for state buffers + * even when the buffer being referenced doesn't match. The result would be + * that the same state cache entry is used twice for different buffers, + * only one of the two buffers referenced gets put into the offset, and the + * incorrect program is run for the other instance. + */ #include "brw_state.h" #include "intel_batchbuffer.h" @@ -43,15 +69,6 @@ #include "brw_sf.h" #include "brw_gs.h" - -/*********************************************************************** - * Check cache for uploaded version of struct, else upload new one. - * Fail when memory is exhausted. - * - * XXX: FIXME: Currently search is so slow it would be quicker to - * regenerate the data every time... - */ - static GLuint hash_key( const void *key, GLuint key_size ) { GLuint *ikey = (GLuint *)key; @@ -67,17 +84,34 @@ static GLuint hash_key( const void *key, GLuint key_size ) return hash; } -static struct brw_cache_item *search_cache( struct brw_cache *cache, - GLuint hash, - const void *key, - GLuint key_size) +/** + * Marks a new buffer as being chosen for the given cache id. + */ +static void +update_cache_last(struct brw_cache *cache, enum brw_cache_id cache_id, + dri_bo *bo) +{ + dri_bo_unreference(cache->last_bo[cache_id]); + cache->last_bo[cache_id] = bo; + dri_bo_reference(cache->last_bo[cache_id]); + cache->brw->state.dirty.cache |= 1 << cache_id; +} + +static struct brw_cache_item * +search_cache(struct brw_cache *cache, enum brw_cache_id cache_id, + GLuint hash, const void *key, GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs) { struct brw_cache_item *c; for (c = cache->items[hash % cache->size]; c; c = c->next) { - if (c->hash == hash && + if (c->cache_id == cache_id && + c->hash == hash && c->key_size == key_size && - memcmp(c->key, key, key_size) == 0) + memcmp(c->key, key, key_size) == 0 && + c->nr_reloc_bufs == nr_reloc_bufs && + memcmp(c->reloc_bufs, reloc_bufs, + nr_reloc_bufs * sizeof(dri_bo *)) == 0) return c; } @@ -92,8 +126,7 @@ static void rehash( struct brw_cache *cache ) GLuint size, i; size = cache->size * 3; - items = (struct brw_cache_item**) _mesa_malloc(size * sizeof(*items)); - _mesa_memset(items, 0, size * sizeof(*items)); + items = (struct brw_cache_item**) _mesa_calloc(size * sizeof(*items)); for (i = 0; i < cache->size; i++) for (c = cache->items[i]; c; c = next) { @@ -107,116 +140,156 @@ static void rehash( struct brw_cache *cache ) cache->size = size; } - -GLboolean brw_search_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - void *aux_return, - GLuint *offset_return) +/** + * Returns the buffer object matching cache_id and key, or NULL. + */ +dri_bo *brw_search_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, GLuint nr_reloc_bufs, + void *aux_return ) { struct brw_cache_item *item; - GLuint addr = 0; GLuint hash = hash_key(key, key_size); - item = search_cache(cache, hash, key, key_size); + item = search_cache(cache, cache_id, hash, key, key_size, + reloc_bufs, nr_reloc_bufs); - if (item) { - if (aux_return) - *(void **)aux_return = (void *)((char *)item->key + item->key_size); - - *offset_return = addr = item->offset; - } - - if (item == NULL || addr != cache->last_addr) { - cache->brw->state.dirty.cache |= 1<id; - cache->last_addr = addr; - } - - return item != NULL; + if (item == NULL) + return NULL; + + if (aux_return) + *(void **)aux_return = (void *)((char *)item->key + item->key_size); + + update_cache_last(cache, cache_id, item->bo); + + dri_bo_reference(item->bo); + return item->bo; } -GLuint brw_upload_cache( struct brw_cache *cache, - const void *key, - GLuint key_size, - const void *data, - GLuint data_size, - const void *aux, - void *aux_return ) -{ - GLuint offset; +dri_bo * +brw_upload_cache( struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *key, + GLuint key_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs, + const void *data, + GLuint data_size, + const void *aux, + void *aux_return ) +{ struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item); GLuint hash = hash_key(key, key_size); - void *tmp = _mesa_malloc(key_size + cache->aux_size); - - if (!brw_pool_alloc(cache->pool, data_size, 1 << 6, &offset)) { - /* Should not be possible: - */ - _mesa_printf("brw_pool_alloc failed\n"); - exit(1); - } + GLuint relocs_size = nr_reloc_bufs * sizeof(dri_bo *); + GLuint aux_size = cache->aux_size[cache_id]; + void *tmp; + dri_bo *bo; + int i; + + /* Create the buffer object to contain the data */ + bo = dri_bo_alloc(cache->brw->intel.bufmgr, + cache->name[cache_id], data_size, 1 << 6, + DRM_BO_FLAG_MEM_LOCAL | + DRM_BO_FLAG_CACHED | + DRM_BO_FLAG_CACHED_MAPPED); + + + /* Set up the memory containing the key, aux_data, and reloc_bufs */ + tmp = _mesa_malloc(key_size + aux_size + relocs_size); memcpy(tmp, key, key_size); + memcpy(tmp + key_size, aux, cache->aux_size[cache_id]); + memcpy(tmp + key_size + aux_size, reloc_bufs, relocs_size); + for (i = 0; i < nr_reloc_bufs; i++) { + if (reloc_bufs[i] != NULL) + dri_bo_reference(reloc_bufs[i]); + } - if (cache->aux_size) - memcpy(tmp+key_size, aux, cache->aux_size); - + item->cache_id = cache_id; item->key = tmp; item->hash = hash; item->key_size = key_size; - item->offset = offset; + item->reloc_bufs = tmp + key_size + aux_size; + item->nr_reloc_bufs = nr_reloc_bufs; + + item->bo = bo; + dri_bo_reference(bo); item->data_size = data_size; - if (++cache->n_items > cache->size * 1.5) + if (cache->n_items > cache->size * 1.5) rehash(cache); - + hash %= cache->size; item->next = cache->items[hash]; cache->items[hash] = item; - + cache->n_items++; + if (aux_return) { - assert(cache->aux_size); + assert(cache->aux_size[cache_id]); *(void **)aux_return = (void *)((char *)item->key + item->key_size); } if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("upload %s: %d bytes to pool buffer %d offset %x\n", - cache->name, - data_size, - cache->pool->buffer, - offset); + _mesa_printf("upload %s: %d bytes to cache id %d\n", + cache->name[cache_id], + data_size); - /* Copy data to the buffer: - */ - dri_bo_subdata(cache->pool->buffer, offset, data_size, data); + /* Copy data to the buffer */ + dri_bo_subdata(bo, 0, data_size, data); - cache->brw->state.dirty.cache |= 1<id; - cache->last_addr = offset; + update_cache_last(cache, cache_id, bo); - return offset; + return bo; } /* This doesn't really work with aux data. Use search/upload instead */ -GLuint brw_cache_data_sz(struct brw_cache *cache, - const void *data, - GLuint data_size) +dri_bo * +brw_cache_data_sz(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + GLuint data_size, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - GLuint addr; + dri_bo *bo; + struct brw_cache_item *item; + GLuint hash = hash_key(data, data_size); - if (!brw_search_cache(cache, data, data_size, NULL, &addr)) { - addr = brw_upload_cache(cache, - data, data_size, - data, data_size, - NULL, NULL); + item = search_cache(cache, cache_id, hash, data, data_size, + reloc_bufs, nr_reloc_bufs); + if (item) { + dri_bo_reference(item->bo); + return item->bo; } - return addr; + bo = brw_upload_cache(cache, cache_id, + data, data_size, + reloc_bufs, nr_reloc_bufs, + data, data_size, + NULL, NULL); + + return bo; } -GLuint brw_cache_data(struct brw_cache *cache, - const void *data) +/** + * Wrapper around brw_cache_data_sz using the cache_id's canonical key size. + * + * If nr_reloc_bufs is nonzero, brw_search_cache()/brw_upload_cache() would be + * better to use, as the potentially changing offsets in the data-used-as-key + * will result in excessive cache misses. + */ +dri_bo * +brw_cache_data(struct brw_cache *cache, + enum brw_cache_id cache_id, + const void *data, + dri_bo **reloc_bufs, + GLuint nr_reloc_bufs) { - return brw_cache_data_sz(cache, data, cache->key_size); + return brw_cache_data_sz(cache, cache_id, data, cache->key_size[cache_id], + reloc_bufs, nr_reloc_bufs); } enum pool_type { @@ -224,18 +297,25 @@ enum pool_type { DW_GENERAL_STATE }; -static void brw_init_cache( struct brw_context *brw, - const char *name, - GLuint id, - GLuint key_size, - GLuint aux_size, - enum pool_type pool_type) +static void +brw_init_cache_id( struct brw_context *brw, + const char *name, + enum brw_cache_id id, + GLuint key_size, + GLuint aux_size) { - struct brw_cache *cache = &brw->cache[id]; + struct brw_cache *cache = &brw->cache; + + cache->name[id] = strdup(name); + cache->key_size[id] = key_size; + cache->aux_size[id] = aux_size; +} + +void brw_init_cache( struct brw_context *brw ) +{ + struct brw_cache *cache = &brw->cache; + cache->brw = brw; - cache->id = id; - cache->name = name; - cache->items = NULL; cache->size = 7; cache->n_items = 0; @@ -243,137 +323,107 @@ static void brw_init_cache( struct brw_context *brw, _mesa_calloc(cache->size * sizeof(struct brw_cache_item)); - - cache->key_size = key_size; - cache->aux_size = aux_size; - switch (pool_type) { - case DW_GENERAL_STATE: cache->pool = &brw->pool[BRW_GS_POOL]; break; - case DW_SURFACE_STATE: cache->pool = &brw->pool[BRW_SS_POOL]; break; - default: assert(0); break; - } -} - -void brw_init_caches( struct brw_context *brw ) -{ - - brw_init_cache(brw, - "CC_VP", - BRW_CC_VP, - sizeof(struct brw_cc_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CC_UNIT", - BRW_CC_UNIT, - sizeof(struct brw_cc_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_PROG", - BRW_WM_PROG, - sizeof(struct brw_wm_prog_key), - sizeof(struct brw_wm_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER_DEFAULT_COLOR", - BRW_SAMPLER_DEFAULT_COLOR, - sizeof(struct brw_sampler_default_color), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SAMPLER", - BRW_SAMPLER, - 0, /* variable key/data size */ - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "WM_UNIT", - BRW_WM_UNIT, - sizeof(struct brw_wm_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_PROG", - BRW_SF_PROG, - sizeof(struct brw_sf_prog_key), - sizeof(struct brw_sf_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_VP", - BRW_SF_VP, - sizeof(struct brw_sf_viewport), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SF_UNIT", - BRW_SF_UNIT, - sizeof(struct brw_sf_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_UNIT", - BRW_VS_UNIT, - sizeof(struct brw_vs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "VS_PROG", - BRW_VS_PROG, - sizeof(struct brw_vs_prog_key), - sizeof(struct brw_vs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_UNIT", - BRW_CLIP_UNIT, - sizeof(struct brw_clip_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "CLIP_PROG", - BRW_CLIP_PROG, - sizeof(struct brw_clip_prog_key), - sizeof(struct brw_clip_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_UNIT", - BRW_GS_UNIT, - sizeof(struct brw_gs_unit_state), - 0, - DW_GENERAL_STATE); - - brw_init_cache(brw, - "GS_PROG", - BRW_GS_PROG, - sizeof(struct brw_gs_prog_key), - sizeof(struct brw_gs_prog_data), - DW_GENERAL_STATE); - - brw_init_cache(brw, - "SS_SURFACE", - BRW_SS_SURFACE, - sizeof(struct brw_surface_state), - 0, - DW_SURFACE_STATE); - - brw_init_cache(brw, - "SS_SURF_BIND", - BRW_SS_SURF_BIND, - sizeof(struct brw_surface_binding_table), - 0, - DW_SURFACE_STATE); + brw_init_cache_id(brw, + "CC_VP", + BRW_CC_VP, + sizeof(struct brw_cc_viewport), + 0); + + brw_init_cache_id(brw, + "CC_UNIT", + BRW_CC_UNIT, + sizeof(struct brw_cc_unit_state), + 0); + + brw_init_cache_id(brw, + "WM_PROG", + BRW_WM_PROG, + sizeof(struct brw_wm_prog_key), + sizeof(struct brw_wm_prog_data)); + + brw_init_cache_id(brw, + "SAMPLER_DEFAULT_COLOR", + BRW_SAMPLER_DEFAULT_COLOR, + sizeof(struct brw_sampler_default_color), + 0); + + brw_init_cache_id(brw, + "SAMPLER", + BRW_SAMPLER, + 0, /* variable key/data size */ + 0); + + brw_init_cache_id(brw, + "WM_UNIT", + BRW_WM_UNIT, + sizeof(struct brw_wm_unit_state), + 0); + + brw_init_cache_id(brw, + "SF_PROG", + BRW_SF_PROG, + sizeof(struct brw_sf_prog_key), + sizeof(struct brw_sf_prog_data)); + + brw_init_cache_id(brw, + "SF_VP", + BRW_SF_VP, + sizeof(struct brw_sf_viewport), + 0); + + brw_init_cache_id(brw, + "SF_UNIT", + BRW_SF_UNIT, + sizeof(struct brw_sf_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_UNIT", + BRW_VS_UNIT, + sizeof(struct brw_vs_unit_state), + 0); + + brw_init_cache_id(brw, + "VS_PROG", + BRW_VS_PROG, + sizeof(struct brw_vs_prog_key), + sizeof(struct brw_vs_prog_data)); + + brw_init_cache_id(brw, + "CLIP_UNIT", + BRW_CLIP_UNIT, + sizeof(struct brw_clip_unit_state), + 0); + + brw_init_cache_id(brw, + "CLIP_PROG", + BRW_CLIP_PROG, + sizeof(struct brw_clip_prog_key), + sizeof(struct brw_clip_prog_data)); + + brw_init_cache_id(brw, + "GS_UNIT", + BRW_GS_UNIT, + sizeof(struct brw_gs_unit_state), + 0); + + brw_init_cache_id(brw, + "GS_PROG", + BRW_GS_PROG, + sizeof(struct brw_gs_prog_key), + sizeof(struct brw_gs_prog_data)); + + brw_init_cache_id(brw, + "SS_SURFACE", + BRW_SS_SURFACE, + sizeof(struct brw_surface_state), + 0); + + brw_init_cache_id(brw, + "SS_SURF_BIND", + BRW_SS_SURF_BIND, + 0, + 0); } @@ -399,7 +449,12 @@ static void clear_cache( struct brw_cache *cache ) for (i = 0; i < cache->size; i++) { for (c = cache->items[i]; c; c = next) { + int j; + next = c->next; + for (j = 0; j < c->nr_reloc_bufs; j++) + dri_bo_unreference(c->reloc_bufs[j]); + dri_bo_unreference(c->bo); free((void *)c->key); free(c); } @@ -409,15 +464,12 @@ static void clear_cache( struct brw_cache *cache ) cache->n_items = 0; } -void brw_clear_all_caches( struct brw_context *brw ) +void brw_clear_cache( struct brw_context *brw ) { - GLint i; - if (INTEL_DEBUG & DEBUG_STATE) _mesa_printf("%s\n", __FUNCTION__); - for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + clear_cache(&brw->cache); if (brw->curbe.last_buf) { _mesa_free(brw->curbe.last_buf); @@ -429,14 +481,24 @@ void brw_clear_all_caches( struct brw_context *brw ) brw->state.dirty.cache |= ~0; } +void brw_state_cache_check_size( struct brw_context *brw ) +{ + /* un-tuned guess. We've got around 20 state objects for a total of around + * 32k, so 1000 of them is around 1.5MB. + */ + if (brw->cache.n_items > 1000) + brw_clear_cache(brw); +} - - - -void brw_destroy_caches( struct brw_context *brw ) +void brw_destroy_cache( struct brw_context *brw ) { GLuint i; + clear_cache(&brw->cache); for (i = 0; i < BRW_MAX_CACHE; i++) - clear_cache(&brw->cache[i]); + free(brw->cache.name[i]); + + free(brw->cache.items); + brw->cache.items = NULL; + brw->cache.size = 0; } diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c b/src/mesa/drivers/dri/i965/brw_state_dump.c index 1e8fc972755..0bdccae0d69 100644 --- a/src/mesa/drivers/dri/i965/brw_state_dump.c +++ b/src/mesa/drivers/dri/i965/brw_state_dump.c @@ -41,13 +41,13 @@ * \param index Index of the DWORD being output. */ static void -state_out(char *name, uint32_t *data, uint32_t hw_offset, int index, +state_out(const char *name, void *data, uint32_t hw_offset, int index, char *fmt, ...) { va_list va; fprintf(stderr, "%8s: 0x%08x: 0x%08x: ", - name, hw_offset + index * 4, data[index]); + name, hw_offset + index * 4, ((uint32_t *)data)[index]); va_start(va, fmt); vfprintf(stderr, fmt, va); va_end(va); @@ -55,43 +55,100 @@ state_out(char *name, uint32_t *data, uint32_t hw_offset, int index, /** Generic, undecoded state buffer debug printout */ static void -state_struct_out(char *name, dri_bo *buffer, unsigned int pool_offset, - unsigned int state_size) +state_struct_out(const char *name, dri_bo *buffer, unsigned int state_size) { int i; - uint32_t *state; - state = buffer->virtual + pool_offset; + if (buffer == NULL) + return; + + dri_bo_map(buffer, GL_FALSE); for (i = 0; i < state_size / 4; i++) { - state_out(name, state, buffer->offset + pool_offset, i, + state_out(name, buffer->virtual, buffer->offset, i, "dword %d\n", i); } + dri_bo_unmap(buffer); } -static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer) +static void dump_wm_surface_state(struct brw_context *brw) { int i; for (i = 0; i < brw->wm.nr_surfaces; i++) { - unsigned int surfoff = ss_buffer->offset + brw->wm.bind.surf_ss_offset[i]; - struct brw_surface_state *surf = - (struct brw_surface_state *)(ss_buffer->virtual + - brw->wm.bind.surf_ss_offset[i]); - uint32_t *surfvals = (uint32_t *)surf; + dri_bo *surf_bo = brw->wm.surf_bo[i]; + unsigned int surfoff; + struct brw_surface_state *surf; char name[20]; + dri_bo_map(surf_bo, GL_FALSE); + surfoff = surf_bo->offset; + surf = (struct brw_surface_state *)(surf_bo->virtual); + sprintf(name, "WM SS%d", i); - state_out(name, surfvals, surfoff, 0, "\n"); - state_out(name, surfvals, surfoff, 1, "offset\n"); - state_out(name, surfvals, surfoff, 2, "%dx%d size, %d mips\n", + state_out(name, surf, surfoff, 0, "\n"); + state_out(name, surf, surfoff, 1, "offset\n"); + state_out(name, surf, surfoff, 2, "%dx%d size, %d mips\n", surf->ss2.width + 1, surf->ss2.height + 1, surf->ss2.mip_count); - state_out(name, surfvals, surfoff, 3, "pitch %d, %stiled\n", + state_out(name, surf, surfoff, 3, "pitch %d, %stiled\n", surf->ss3.pitch + 1, surf->ss3.tiled_surface ? "" : "not "); - state_out(name, surfvals, surfoff, 4, "mip base %d\n", + state_out(name, surf, surfoff, 4, "mip base %d\n", surf->ss4.min_lod); + + dri_bo_unmap(surf_bo); + } +} + +static void dump_sf_viewport_state(struct brw_context *brw) +{ + const char *name = "SF VP"; + struct brw_sf_viewport *vp; + uint32_t vp_off; + + if (brw->sf.vp_bo == NULL) + return; + + dri_bo_map(brw->sf.vp_bo, GL_FALSE); + + vp = brw->sf.vp_bo->virtual; + vp_off = brw->sf.vp_bo->offset; + + state_out(name, vp, vp_off, 0, "m00 = %f\n", vp->viewport.m00); + state_out(name, vp, vp_off, 1, "m11 = %f\n", vp->viewport.m11); + state_out(name, vp, vp_off, 2, "m22 = %f\n", vp->viewport.m22); + state_out(name, vp, vp_off, 3, "m30 = %f\n", vp->viewport.m30); + state_out(name, vp, vp_off, 4, "m31 = %f\n", vp->viewport.m31); + state_out(name, vp, vp_off, 5, "m32 = %f\n", vp->viewport.m32); + + state_out(name, vp, vp_off, 6, "top left = %d,%d\n", + vp->scissor.xmin, vp->scissor.ymin); + state_out(name, vp, vp_off, 7, "bottom right = %d,%d\n", + vp->scissor.xmax, vp->scissor.ymax); + + dri_bo_unmap(brw->sf.vp_bo); +} + +static void brw_debug_prog(const char *name, dri_bo *prog) +{ + unsigned int i; + uint32_t *data; + + if (prog == NULL) + return; + + dri_bo_map(prog, GL_FALSE); + + data = prog->virtual; + + for (i = 0; i < prog->size / 4 / 4; i++) { + fprintf(stderr, "%8s: 0x%08x: 0x%08x 0x%08x 0x%08x 0x%08x\n", + name, (unsigned int)prog->offset + i * 4 * 4, + data[i * 4], data[i * 4 + 1], data[i * 4 + 2], data[i * 4 + 3]); } + + dri_bo_unmap(prog); } + /** * Print additional debug information associated with the batchbuffer * when DEBUG_BATCH is set. @@ -105,27 +162,20 @@ static void dump_wm_surface_state(struct brw_context *brw, dri_bo *ss_buffer) void brw_debug_batch(struct intel_context *intel) { struct brw_context *brw = brw_context(&intel->ctx); - dri_bo *ss_buffer, *gs_buffer; - ss_buffer = brw->pool[BRW_SS_POOL].buffer; - gs_buffer = brw->pool[BRW_GS_POOL].buffer; + state_struct_out("WM bind", brw->wm.bind_bo, 4 * brw->wm.nr_surfaces); + dump_wm_surface_state(brw); - dri_bo_map(ss_buffer, GL_FALSE); - dri_bo_map(gs_buffer, GL_FALSE); + state_struct_out("VS", brw->vs.state_bo, sizeof(struct brw_vs_unit_state)); + brw_debug_prog("VS prog", brw->vs.prog_bo); - state_struct_out("WM bind", ss_buffer, brw->wm.bind_ss_offset, - 4 * brw->wm.nr_surfaces); - dump_wm_surface_state(brw, ss_buffer); + state_struct_out("GS", brw->gs.state_bo, sizeof(struct brw_gs_unit_state)); + brw_debug_prog("GS prog", brw->gs.prog_bo); - state_struct_out("VS", gs_buffer, brw->vs.state_gs_offset, - sizeof(struct brw_vs_unit_state)); - state_struct_out("SF", gs_buffer, brw->sf.state_gs_offset, - sizeof(struct brw_sf_unit_state)); - state_struct_out("SF viewport", gs_buffer, brw->sf.state_gs_offset, - sizeof(struct brw_sf_unit_state)); - state_struct_out("WM", gs_buffer, brw->wm.state_gs_offset, - sizeof(struct brw_wm_unit_state)); + state_struct_out("SF", brw->sf.state_bo, sizeof(struct brw_sf_unit_state)); + dump_sf_viewport_state(brw); + brw_debug_prog("SF prog", brw->vs.prog_bo); - dri_bo_unmap(gs_buffer); - dri_bo_unmap(ss_buffer); + state_struct_out("WM", brw->sf.state_bo, sizeof(struct brw_wm_unit_state)); + brw_debug_prog("WM prog", brw->vs.prog_bo); } diff --git a/src/mesa/drivers/dri/i965/brw_state_pool.c b/src/mesa/drivers/dri/i965/brw_state_pool.c deleted file mode 100644 index 148bb516a69..00000000000 --- a/src/mesa/drivers/dri/i965/brw_state_pool.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#include "brw_state.h" -#include "imports.h" - -#include "intel_ioctl.h" -#include "dri_bufmgr.h" - -GLboolean brw_pool_alloc( struct brw_mem_pool *pool, - GLuint size, - GLuint align, - GLuint *offset_return) -{ - GLuint fixup = ALIGN(pool->offset, align) - pool->offset; - - size = ALIGN(size, 4); - - if (pool->offset + fixup + size >= pool->size) { - _mesa_printf("%s failed\n", __FUNCTION__); - assert(0); - exit(0); - } - - pool->offset += fixup; - *offset_return = pool->offset; - pool->offset += size; - - return GL_TRUE; -} - -static -void brw_invalidate_pool( struct intel_context *intel, - struct brw_mem_pool *pool ) -{ - if (INTEL_DEBUG & DEBUG_STATE) - _mesa_printf("\n\n\n %s \n\n\n", __FUNCTION__); - - pool->offset = 0; - - brw_clear_all_caches(pool->brw); -} - -static void -brw_invalidate_pool_cb(dri_bo *bo, void *ptr) -{ - struct brw_mem_pool *pool = ptr; - struct brw_context *brw = pool->brw; - - brw_invalidate_pool(&brw->intel, pool); -} - -static void brw_init_pool( struct brw_context *brw, - GLuint pool_id, - GLuint size ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - pool->size = size; - pool->brw = brw; - - pool->buffer = dri_bo_alloc(brw->intel.bufmgr, - (pool_id == BRW_GS_POOL) ? "GS pool" : "SS pool", - size, 4096, DRM_BO_FLAG_MEM_TT); - - /* Disable the backing store for the state cache. It's not worth the - * cost of keeping a backing store copy, since we can just regenerate - * the contents at approximately the same cost as the memcpy, and only - * if the contents are lost. - */ - if (!brw->intel.ttm) { - dri_bo_fake_disable_backing_store(pool->buffer, brw_invalidate_pool_cb, - pool); - } -} - -static void brw_destroy_pool( struct brw_context *brw, - GLuint pool_id ) -{ - struct brw_mem_pool *pool = &brw->pool[pool_id]; - - dri_bo_unreference(pool->buffer); -} - - -void brw_pool_check_wrap( struct brw_context *brw, - struct brw_mem_pool *pool ) -{ - if (pool->offset > (pool->size * 3) / 4) { - brw->state.dirty.brw |= BRW_NEW_CONTEXT; - } - -} - -void brw_init_pools( struct brw_context *brw ) -{ - brw_init_pool(brw, BRW_GS_POOL, 0x80000); - brw_init_pool(brw, BRW_SS_POOL, 0x80000); -} - -void brw_destroy_pools( struct brw_context *brw ) -{ - brw_destroy_pool(brw, BRW_GS_POOL); - brw_destroy_pool(brw, BRW_SS_POOL); -} - - -void brw_invalidate_pools( struct brw_context *brw ) -{ - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_GS_POOL]); - brw_invalidate_pool(&brw->intel, &brw->pool[BRW_SS_POOL]); -} diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 98637a60975..94165da8164 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -112,8 +112,7 @@ void brw_init_state( struct brw_context *brw ) { GLuint i; - brw_init_pools(brw); - brw_init_caches(brw); + brw_init_cache(brw); brw->state.atoms = _mesa_malloc(sizeof(atoms)); brw->state.nr_atoms = sizeof(atoms)/sizeof(*atoms); @@ -138,9 +137,8 @@ void brw_destroy_state( struct brw_context *brw ) brw->state.atoms = NULL; } - brw_destroy_caches(brw); + brw_destroy_cache(brw); brw_destroy_batch_cache(brw); - brw_destroy_pools(brw); } /*********************************************************************** diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 10fee944e8d..b4500ea8a5f 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -239,39 +239,39 @@ struct brw_pipelined_state_pointers struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } vs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } gs; struct { GLuint enable:1; GLuint pad:4; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } clp; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } sf; struct { GLuint pad:5; - GLuint offset:27; + GLuint offset:27; /* Offset from GENERAL_STATE_BASE */ } wm; struct { GLuint pad:5; - GLuint offset:27; /* KW: check me! */ + GLuint offset:27; /* Offset from GENERAL_STATE_BASE. KW: check me! */ } cc; }; @@ -473,7 +473,7 @@ struct thread0 GLuint pad0:1; GLuint grf_reg_count:3; GLuint pad1:2; - GLuint kernel_start_pointer:26; + GLuint kernel_start_pointer:26; /* Offset from GENERAL_STATE_BASE */ }; struct thread1 @@ -637,7 +637,7 @@ struct brw_cc_unit_state struct { GLuint pad0:5; - GLuint cc_viewport_state_offset:27; + GLuint cc_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } cc4; struct @@ -699,7 +699,7 @@ struct brw_sf_unit_state GLuint front_winding:1; GLuint viewport_transform:1; GLuint pad0:3; - GLuint sf_viewport_state_offset:27; + GLuint sf_viewport_state_offset:27; /* Offset from GENERAL_STATE_BASE */ } sf5; struct diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index e173f6fce3e..038d7f7911c 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -73,15 +73,13 @@ static void do_vs_prog( struct brw_context *brw, */ program = brw_get_program(&c.func, &program_size); - /* - */ - brw->vs.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_VS_PROG], - &c.key, - sizeof(c.key), - program, - program_size, - &c.prog_data, - &brw->vs.prog_data); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_upload_cache( &brw->cache, BRW_VS_PROG, + &c.key, sizeof(c.key), + NULL, 0, + program, program_size, + &c.prog_data, + &brw->vs.prog_data ); } @@ -110,13 +108,13 @@ static void brw_upload_vs_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_VS_PROG], - &key, sizeof(key), - &brw->vs.prog_data, - &brw->vs.prog_gs_offset)) - return; - - do_vs_prog(brw, vp, &key); + dri_bo_unreference(brw->vs.prog_bo); + brw->vs.prog_bo = brw_search_cache(&brw->cache, BRW_VS_PROG, + &key, sizeof(key), + NULL, 0, + &brw->vs.prog_data); + if (brw->vs.prog_bo == NULL) + do_vs_prog(brw, vp, &key); } diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c index f561979138c..2d788d35eca 100644 --- a/src/mesa/drivers/dri/i965/brw_vs_state.c +++ b/src/mesa/drivers/dri/i965/brw_vs_state.c @@ -43,7 +43,7 @@ static void upload_vs_unit( struct brw_context *brw ) memset(&vs, 0, sizeof(vs)); /* CACHE_NEW_VS_PROG */ - vs.thread0.kernel_start_pointer = brw->vs.prog_gs_offset >> 6; + vs.thread0.kernel_start_pointer = brw->vs.prog_bo->offset >> 6; /* reloc */ vs.thread0.grf_reg_count = ALIGN(brw->vs.prog_data->total_grf, 16) / 16 - 1; vs.thread3.urb_entry_read_length = brw->vs.prog_data->urb_read_length; vs.thread3.const_urb_entry_read_length = brw->vs.prog_data->curb_read_length; @@ -87,9 +87,22 @@ static void upload_vs_unit( struct brw_context *brw ) */ vs.vs6.vs_enable = 1; - brw->vs.state_gs_offset = brw_cache_data( &brw->cache[BRW_VS_UNIT], &vs ); + brw->vs.thread0_delta = vs.thread0.grf_reg_count << 1; + + dri_bo_unreference(brw->vs.state_bo); + brw->vs.state_bo = brw_cache_data( &brw->cache, BRW_VS_UNIT , &vs, + &brw->vs.prog_bo, 1 ); } +static void emit_reloc_vs_unit(struct brw_context *brw) +{ + /* Emit VS program relocation */ + dri_emit_reloc(brw->vs.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->vs.thread0_delta, + offsetof(struct brw_vs_unit_state, thread0), + brw->vs.prog_bo); +} const struct brw_tracked_state brw_vs_unit = { .dirty = { @@ -98,5 +111,6 @@ const struct brw_tracked_state brw_vs_unit = { BRW_NEW_URB_FENCE), .cache = CACHE_NEW_VS_PROG }, - .update = upload_vs_unit + .update = upload_vs_unit, + .emit_reloc = emit_reloc_vs_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c index b9dc9ad1809..45e1b7235db 100644 --- a/src/mesa/drivers/dri/i965/brw_vtbl.c +++ b/src/mesa/drivers/dri/i965/brw_vtbl.c @@ -112,10 +112,9 @@ static void brw_note_fence( struct intel_context *intel, static void brw_note_unlock( struct intel_context *intel ) { - struct brw_context *brw = brw_context(&intel->ctx); + struct brw_context *brw = brw_context(&intel->ctx); - brw_pool_check_wrap(brw, &brw->pool[BRW_GS_POOL]); - brw_pool_check_wrap(brw, &brw->pool[BRW_SS_POOL]); + brw_state_cache_check_size(brw); brw_context(&intel->ctx)->state.dirty.brw |= BRW_NEW_LOCK; } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 2d6249e3b56..009acc7680b 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -201,15 +201,13 @@ static void do_wm_prog( struct brw_context *brw, */ program = brw_get_program(&c->func, &program_size); - /* - */ - brw->wm.prog_gs_offset = brw_upload_cache( &brw->cache[BRW_WM_PROG], - &c->key, - sizeof(c->key), - program, - program_size, - &c->prog_data, - &brw->wm.prog_data ); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_upload_cache( &brw->cache, BRW_WM_PROG, + &c->key, sizeof(c->key), + NULL, 0, + program, program_size, + &c->prog_data, + &brw->wm.prog_data ); } @@ -331,13 +329,13 @@ static void brw_upload_wm_prog( struct brw_context *brw ) /* Make an early check for the key. */ - if (brw_search_cache(&brw->cache[BRW_WM_PROG], - &key, sizeof(key), - &brw->wm.prog_data, - &brw->wm.prog_gs_offset)) - return; - - do_wm_prog(brw, fp, &key); + dri_bo_unreference(brw->wm.prog_bo); + brw->wm.prog_bo = brw_search_cache(&brw->cache, BRW_WM_PROG, + &key, sizeof(key), + NULL, 0, + &brw->wm.prog_data); + if (brw->wm.prog_bo == NULL) + do_wm_prog(brw, fp, &key); } diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c index 3c0952acf0d..d410b1e804f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c @@ -79,14 +79,15 @@ static GLint S_FIXED(GLfloat value, GLuint frac_bits) } -static GLuint upload_default_color( struct brw_context *brw, - const GLfloat *color ) +static dri_bo *upload_default_color( struct brw_context *brw, + const GLfloat *color ) { struct brw_sampler_default_color sdc; COPY_4V(sdc.color, color); - return brw_cache_data( &brw->cache[BRW_SAMPLER_DEFAULT_COLOR], &sdc ); + return brw_cache_data( &brw->cache, BRW_SAMPLER_DEFAULT_COLOR, &sdc, + NULL, 0 ); } @@ -94,7 +95,7 @@ static GLuint upload_default_color( struct brw_context *brw, */ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, struct gl_texture_object *texObj, - GLuint sdc_gs_offset, + dri_bo *sdc_bo, struct brw_sampler_state *sampler) { _mesa_memset(sampler, 0, sizeof(*sampler)); @@ -195,7 +196,7 @@ static void brw_update_sampler_state( struct gl_texture_unit *texUnit, sampler->ss1.max_lod = U_FIXED(MIN2(MAX2(texObj->MaxLod, 0), 13), 6); sampler->ss1.min_lod = U_FIXED(MIN2(MAX2(texObj->MinLod, 0), 13), 6); - sampler->ss2.default_color_pointer = sdc_gs_offset >> 5; + sampler->ss2.default_color_pointer = sdc_bo->offset >> 5; /* reloc */ } @@ -208,6 +209,7 @@ static void upload_wm_samplers( struct brw_context *brw ) { GLuint unit; GLuint sampler_count = 0; + dri_bo *reloc_bufs[BRW_MAX_TEX_UNIT]; /* _NEW_TEXTURE */ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { @@ -215,15 +217,20 @@ static void upload_wm_samplers( struct brw_context *brw ) struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; struct gl_texture_object *texObj = texUnit->_Current; - GLuint sdc_gs_offset = upload_default_color(brw, texObj->BorderColor); + dri_bo_unreference(brw->wm.sdc_bo[unit]); + brw->wm.sdc_bo[unit] = upload_default_color(brw, texObj->BorderColor); brw_update_sampler_state(texUnit, - texObj, - sdc_gs_offset, + texObj, + brw->wm.sdc_bo[unit], &brw->wm.sampler[unit]); sampler_count = unit + 1; + } else { + dri_bo_unreference(brw->wm.sdc_bo[unit]); + brw->wm.sdc_bo[unit] = NULL; } + reloc_bufs[unit] = brw->wm.sdc_bo[unit]; } if (brw->wm.sampler_count != sampler_count) { @@ -231,15 +238,39 @@ static void upload_wm_samplers( struct brw_context *brw ) brw->state.dirty.cache |= CACHE_NEW_SAMPLER; } - brw->wm.sampler_gs_offset = 0; - - if (brw->wm.sampler_count) - brw->wm.sampler_gs_offset = - brw_cache_data_sz(&brw->cache[BRW_SAMPLER], + dri_bo_unreference(brw->wm.sampler_bo); + if (brw->wm.sampler_count) { + brw->wm.sampler_bo = + brw_cache_data_sz(&brw->cache, BRW_SAMPLER, brw->wm.sampler, - sizeof(struct brw_sampler_state) * brw->wm.sampler_count); + sizeof(struct brw_sampler_state) * + brw->wm.sampler_count, + reloc_bufs, BRW_MAX_TEX_UNIT); + } else { + brw->wm.sampler_bo = NULL; + } } +static void emit_reloc_wm_samplers(struct brw_context *brw) +{ + GLuint unit; + + if (brw->wm.sampler_count == 0) + return; + + /* Emit SDC relocations */ + for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + if (!brw->attribs.Texture->Unit[unit]._ReallyEnabled) + continue; + + dri_emit_reloc(brw->wm.sampler_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + 0, + unit * sizeof(struct brw_sampler_state) + + offsetof(struct brw_sampler_state, ss2), + brw->wm.sdc_bo[unit]); + } +} const struct brw_tracked_state brw_wm_samplers = { .dirty = { @@ -247,7 +278,8 @@ const struct brw_tracked_state brw_wm_samplers = { .brw = 0, .cache = 0 }, - .update = upload_wm_samplers + .update = upload_wm_samplers, + .emit_reloc = emit_reloc_wm_samplers, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 76865217bff..8a7236e62f4 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -47,6 +47,7 @@ static void upload_wm_unit(struct brw_context *brw ) struct brw_wm_unit_state wm; GLuint max_threads; GLuint per_thread; + dri_bo *reloc_bufs[3]; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) max_threads = 0; @@ -58,7 +59,7 @@ static void upload_wm_unit(struct brw_context *brw ) /* CACHE_NEW_WM_PROG */ wm.thread0.grf_reg_count = ALIGN(brw->wm.prog_data->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_gs_offset >> 6; + wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ wm.thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; wm.thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; wm.thread3.const_urb_entry_read_length = brw->wm.prog_data->curb_read_length; @@ -87,19 +88,21 @@ static void upload_wm_unit(struct brw_context *brw ) 4096, DRM_BO_FLAG_MEM_TT); } } - /* XXX: Scratch buffers are not implemented correectly. - * - * The scratch offset to be programmed into wm is relative to the general - * state base address. However, using dri_bo_alloc/dri_bo_emit_reloc (or - * the previous bmGenBuffers scheme), we get an offset relative to the - * start of framebuffer. Even before then, it was broken in other ways, - * so just fail for now if we hit that path. - */ - assert(brw->wm.prog_data->total_scratch == 0); /* CACHE_NEW_SURFACE */ wm.thread1.binding_table_entry_count = brw->wm.nr_surfaces; + /* CACHE_NEW_WM_PROG */ + if (per_thread != 0) { + /* reloc */ + wm.thread2.scratch_space_base_pointer = + brw->wm.scratch_buffer->offset >> 10; + wm.thread2.per_thread_scratch_space = per_thread / 1024 - 1; + } else { + wm.thread2.scratch_space_base_pointer = 0; + wm.thread2.per_thread_scratch_space = 0; + } + /* BRW_NEW_CURBE_OFFSETS */ wm.thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; @@ -109,7 +112,12 @@ static void upload_wm_unit(struct brw_context *brw ) /* CACHE_NEW_SAMPLER */ wm.wm4.sampler_count = (brw->wm.sampler_count + 1) / 4; - wm.wm4.sampler_state_pointer = brw->wm.sampler_gs_offset >> 5; + if (brw->wm.sampler_bo != NULL) { + /* reloc */ + wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + } else { + wm.wm4.sampler_state_pointer = 0; + } /* BRW_NEW_FRAGMENT_PROGRAM */ { @@ -166,19 +174,44 @@ static void upload_wm_unit(struct brw_context *brw ) if (INTEL_DEBUG & DEBUG_STATS || intel->stats_wm) wm.wm4.stats_enable = 1; - brw->wm.state_gs_offset = brw_cache_data( &brw->cache[BRW_WM_UNIT], &wm ); + reloc_bufs[0] = brw->wm.prog_bo; + reloc_bufs[1] = brw->wm.scratch_buffer; + reloc_bufs[2] = brw->wm.sampler_bo; - if (brw->wm.prog_data->total_scratch) { - /* - dri_emit_reloc(brw->cache[BRW_WM_UNIT].pool->buffer, + brw->wm.thread0_delta = wm.thread0.grf_reg_count << 1; + brw->wm.thread2_delta = wm.thread2.per_thread_scratch_space; + brw->wm.wm4_delta = wm.wm4.stats_enable | (wm.wm4.sampler_count << 2); + + dri_bo_unreference(brw->wm.state_bo); + brw->wm.state_bo = brw_cache_data( &brw->cache, BRW_WM_UNIT, &wm, + reloc_bufs, 3 ); +} + +static void emit_reloc_wm_unit(struct brw_context *brw) +{ + /* Emit WM program relocation */ + dri_emit_reloc(brw->wm.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->wm.thread0_delta, + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_bo); + + /* Emit scratch space relocation */ + if (brw->wm.scratch_buffer != NULL) { + dri_emit_reloc(brw->wm.state_bo, DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - (per_thread / 1024) - 1, - brw->wm.state_gs_offset + - ((char *)&wm.thread2 - (char *)&wm), + brw->wm.thread2_delta, + offsetof(struct brw_wm_unit_state, thread2), brw->wm.scratch_buffer); - */ - } else { - wm.thread2.scratch_space_base_pointer = 0; + } + + /* Emit sampler state relocation */ + if (brw->wm.sampler_bo != NULL) { + dri_emit_reloc(brw->wm.state_bo, + DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, + brw->wm.wm4_delta, + offsetof(struct brw_wm_unit_state, wm4), + brw->wm.sampler_bo); } } @@ -197,6 +230,7 @@ const struct brw_tracked_state brw_wm_unit = { CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .update = upload_wm_unit + .update = upload_wm_unit, + .emit_reloc = emit_reloc_wm_unit, }; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 2ade4eeae81..affbca89024 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -139,40 +139,46 @@ static GLuint translate_tex_format( GLuint mesa_format ) } } -static -void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +struct brw_wm_surface_key { + GLenum target; + dri_bo *bo; + GLint format; + GLint first_level, last_level; + GLint width, height, depth; + GLint pitch, cpp; + GLboolean tiled; +}; + +static dri_bo * +brw_create_texture_surface( struct brw_context *brw, + struct brw_wm_surface_key *key ) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; - struct intel_texture_object *intelObj = intel_texture_object(tObj); - struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; struct brw_surface_state surf; memset(&surf, 0, sizeof(surf)); surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; - surf.ss0.surface_type = translate_tex_target(tObj->Target); - surf.ss0.surface_format = translate_tex_format(firstImage->TexFormat->MesaFormat); + surf.ss0.surface_type = translate_tex_target(key->target); + surf.ss0.surface_format = translate_tex_format(key->format); /* This is ok for all textures with channel width 8bit or less: */ /* surf.ss0.data_return_format = BRW_SURFACERETURNFORMAT_S1; */ - /* Updated in emit_reloc */ - surf.ss1.base_addr = intelObj->mt->region->buffer->offset; + surf.ss1.base_addr = key->bo->offset; /* reloc */ - surf.ss2.mip_count = intelObj->lastLevel - intelObj->firstLevel; - surf.ss2.width = firstImage->Width - 1; - surf.ss2.height = firstImage->Height - 1; + surf.ss2.mip_count = key->last_level - key->first_level; + surf.ss2.width = key->width - 1; + surf.ss2.height = key->height - 1; surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; - surf.ss3.tiled_surface = intelObj->mt->region->tiled; /* always zero */ - surf.ss3.pitch = (intelObj->mt->pitch * intelObj->mt->cpp) - 1; - surf.ss3.depth = firstImage->Depth - 1; + surf.ss3.tiled_surface = key->tiled; /* always zero */ + surf.ss3.pitch = (key->pitch * key->cpp) - 1; + surf.ss3.depth = key->depth - 1; surf.ss4.min_lod = 0; - if (tObj->Target == GL_TEXTURE_CUBE_MAP) { + if (key->target == GL_TEXTURE_CUBE_MAP) { surf.ss0.cube_pos_x = 1; surf.ss0.cube_pos_y = 1; surf.ss0.cube_pos_z = 1; @@ -181,14 +187,78 @@ void brw_update_texture_surface( GLcontext *ctx, GLuint unit ) surf.ss0.cube_neg_z = 1; } - brw->wm.bind.surf_ss_offset[unit + 1] = - brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + return brw_upload_cache( &brw->cache, BRW_SS_SURFACE, + key, sizeof(*key), + &key->bo, 1, + &surf, sizeof(surf), + NULL, NULL ); } - +static void +brw_update_texture_surface( GLcontext *ctx, GLuint unit ) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = brw->attribs.Texture->Unit[unit]._Current; + struct intel_texture_object *intelObj = intel_texture_object(tObj); + struct gl_texture_image *firstImage = tObj->Image[0][intelObj->firstLevel]; + struct brw_wm_surface_key key; + + key.target = tObj->Target; + key.format = firstImage->TexFormat->MesaFormat; + key.bo = intelObj->mt->region->buffer; + key.first_level = intelObj->firstLevel; + key.last_level = intelObj->lastLevel; + key.width = firstImage->Width; + key.height = firstImage->Height; + key.pitch = intelObj->mt->pitch; + key.cpp = intelObj->mt->cpp; + key.depth = firstImage->Depth; + key.tiled = intelObj->mt->region->tiled; + + dri_bo_unreference(brw->wm.surf_bo[unit + 1]); + brw->wm.surf_bo[unit + 1] = brw_search_cache(&brw->cache, BRW_SS_SURFACE, + &key, sizeof(key), + &key.bo, 1, + NULL); + if (brw->wm.surf_bo[unit + 1] == NULL) + brw->wm.surf_bo[unit + 1] = brw_create_texture_surface(brw, &key); +} #define OFFSET(TYPE, FIELD) ( (GLuint)&(((TYPE *)0)->FIELD) ) +/** + * Constructs the binding table for the WM surface state, which maps unit + * numbers to surface state objects. + */ +static dri_bo * +brw_wm_get_binding_table(struct brw_context *brw) +{ + dri_bo *bind_bo; + + bind_bo = brw_search_cache(&brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + NULL); + + if (bind_bo == NULL) { + GLuint data_size = brw->wm.nr_surfaces * 4; + uint32_t *data = malloc(data_size); + int i; + + for (i = 0; i < brw->wm.nr_surfaces; i++) + data[i] = brw->wm.surf_bo[i]->offset; + + bind_bo = brw_upload_cache( &brw->cache, BRW_SS_SURF_BIND, + NULL, 0, + brw->wm.surf_bo, brw->wm.nr_surfaces, + data, data_size, + NULL, NULL); + + free(data); + } + + return bind_bo; +} static void upload_wm_surfaces(struct brw_context *brw ) { @@ -219,8 +289,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.writedisable_blue = !brw->attribs.Color->ColorMask[2]; surf.ss0.writedisable_alpha = !brw->attribs.Color->ColorMask[3]; - /* Updated in emit_reloc */ - surf.ss1.base_addr = region->buffer->offset; + surf.ss1.base_addr = region->buffer->offset; /* reloc */ surf.ss2.width = region->pitch - 1; /* XXX: not really! */ surf.ss2.height = region->height - 1; @@ -228,7 +297,10 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss3.tiled_surface = region->tiled; surf.ss3.pitch = (region->pitch * region->cpp) - 1; - brw->wm.bind.surf_ss_offset[0] = brw_cache_data( &brw->cache[BRW_SS_SURFACE], &surf ); + /* Key size will never match key size for textures, so we're safe. */ + dri_bo_unreference(brw->wm.surf_bo[0]); + brw->wm.surf_bo[0] = brw_cache_data( &brw->cache, BRW_SS_SURFACE, &surf, + ®ion->buffer, 1 ); brw->wm.nr_surfaces = 1; } @@ -248,45 +320,58 @@ static void upload_wm_surfaces(struct brw_context *brw ) else if( texUnit->_ReallyEnabled && texUnit->_Current == intel->frame_buffer_texobj ) { - brw->wm.bind.surf_ss_offset[i+1] = brw->wm.bind.surf_ss_offset[0]; + dri_bo_unreference(brw->wm.surf_bo[i+1]); + brw->wm.surf_bo[i+1] = brw->wm.surf_bo[0]; + dri_bo_reference(brw->wm.surf_bo[i+1]); brw->wm.nr_surfaces = i+2; - } - else { - brw->wm.bind.surf_ss_offset[i+1] = 0; + } else { + dri_bo_unreference(brw->wm.surf_bo[i+1]); + brw->wm.surf_bo[i+1] = NULL; } } - brw->wm.bind_ss_offset = brw_cache_data( &brw->cache[BRW_SS_SURF_BIND], - &brw->wm.bind ); + dri_bo_unreference(brw->wm.bind_bo); + brw->wm.bind_bo = brw_wm_get_binding_table(brw); } static void emit_reloc_wm_surfaces(struct brw_context *brw) { - int unit; + int unit, i; - /* Emit framebuffer relocation */ - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + /* Emit SS framebuffer relocation */ + dri_emit_reloc(brw->wm.surf_bo[0], DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0, - brw->wm.bind.surf_ss_offset[0] + offsetof(struct brw_surface_state, ss1), brw->state.draw_region->buffer); - /* Emit relocations for texture buffers */ + /* Emit SS relocations for texture buffers */ for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { struct gl_texture_unit *texUnit = &brw->attribs.Texture->Unit[unit]; struct gl_texture_object *tObj = texUnit->_Current; struct intel_texture_object *intelObj = intel_texture_object(tObj); if (texUnit->_ReallyEnabled && intelObj->mt != NULL) { - dri_emit_reloc(brw_cache_buffer(brw, BRW_SS_SURFACE), + dri_emit_reloc(brw->wm.surf_bo[unit + 1], DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, 0, - brw->wm.bind.surf_ss_offset[unit + 1] + offsetof(struct brw_surface_state, ss1), intelObj->mt->region->buffer); } } + + /* Emit binding table relocations to surface state */ + for (i = 0; i < BRW_WM_MAX_SURF; i++) { + if (brw->wm.surf_bo[i] != NULL) { + dri_emit_reloc(brw->wm.bind_bo, + DRM_BO_FLAG_MEM_TT | + DRM_BO_FLAG_READ | + DRM_BO_FLAG_WRITE, + 0, + i * 4, + brw->wm.surf_bo[i]); + } + } } const struct brw_tracked_state brw_wm_surfaces = {