X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_wm_state.c;h=dd67795e74311127b0fc76c7bb34d94517ceeb9e;hb=caa4ae5d7d864278ffbf5dbd9c25bb2932e91fc5;hp=1789b21451d3118b311b67f3eedcd4a70cfb1539;hpb=bcc13b74443137043e8a34f8cb64a5add0d8af93;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c index 1789b21451d..dd67795e743 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_state.c @@ -31,6 +31,7 @@ +#include "intel_fbo.h" #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" @@ -40,242 +41,209 @@ * WM unit - fragment programs and rasterization */ -struct brw_wm_unit_key { - unsigned int total_grf, total_scratch; - unsigned int urb_entry_read_length; - unsigned int curb_entry_read_length; - unsigned int dispatch_grf_start_reg; - - unsigned int curbe_offset; - unsigned int urb_size; - - unsigned int nr_surfaces, sampler_count; - GLboolean uses_depth, computes_depth, uses_kill, is_glsl; - GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable; - GLfloat offset_units, offset_factor; -}; - -static void -wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key) +bool +brw_color_buffer_write_enabled(struct brw_context *brw) { - GLcontext *ctx = &brw->intel.ctx; + struct gl_context *ctx = &brw->intel.ctx; const struct gl_fragment_program *fp = brw->fragment_program; - const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp; - struct intel_context *intel = &brw->intel; - - memset(key, 0, sizeof(*key)); - - /* CACHE_NEW_WM_PROG */ - key->total_grf = brw->wm.prog_data->total_grf; - key->urb_entry_read_length = brw->wm.prog_data->urb_read_length; - key->curb_entry_read_length = brw->wm.prog_data->curb_read_length; - key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; - key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024); - - /* BRW_NEW_URB_FENCE */ - key->urb_size = brw->urb.vsize; - - /* BRW_NEW_CURBE_OFFSETS */ - key->curbe_offset = brw->curbe.wm_start; - - /* BRW_NEW_NR_SURFACEs */ - key->nr_surfaces = brw->wm.nr_surfaces; - - /* CACHE_NEW_SAMPLER */ - key->sampler_count = brw->wm.sampler_count; - - /* _NEW_POLYGONSTIPPLE */ - key->polygon_stipple = ctx->Polygon.StippleFlag; - - /* BRW_NEW_FRAGMENT_PROGRAM */ - key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0; - - /* as far as we can tell */ - key->computes_depth = - (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; - /* BRW_NEW_DEPTH_BUFFER - * Override for NULL depthbuffer case, required by the Pixel Shader Computed - * Depth field. - */ - if (brw->state.depth_region == NULL) - key->computes_depth = 0; - - /* _NEW_COLOR */ - key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled; - key->is_glsl = bfp->isGLSL; - - /* temporary sanity check assertion */ - ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp)); - - /* _NEW_DEPTH */ - key->stats_wm = intel->stats_wm; - - /* _NEW_LINE */ - key->line_stipple = ctx->Line.StippleFlag; + int i; + + /* _NEW_BUFFERS */ + for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { + struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i]; + + /* _NEW_COLOR */ + if (rb && + (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) || + fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) && + (ctx->Color.ColorMask[i][0] || + ctx->Color.ColorMask[i][1] || + ctx->Color.ColorMask[i][2] || + ctx->Color.ColorMask[i][3])) { + return true; + } + } - /* _NEW_POLYGON */ - key->offset_enable = ctx->Polygon.OffsetFill; - key->offset_units = ctx->Polygon.OffsetUnits; - key->offset_factor = ctx->Polygon.OffsetFactor; + return false; } /** * Setup wm hardware state. See page 225 of Volume 2 */ -static drm_intel_bo * -wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key, - drm_intel_bo **reloc_bufs) +static void +brw_upload_wm_unit(struct brw_context *brw) { struct intel_context *intel = &brw->intel; - struct brw_wm_unit_state wm; - drm_intel_bo *bo; + struct gl_context *ctx = &intel->ctx; + const struct gl_fragment_program *fp = brw->fragment_program; + struct brw_wm_unit_state *wm; - memset(&wm, 0, sizeof(wm)); + wm = brw_state_batch(brw, AUB_TRACE_WM_STATE, + sizeof(*wm), 32, &brw->wm.state_offset); + memset(wm, 0, sizeof(*wm)); - wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; - wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */ - wm.thread1.depth_coef_urb_read_offset = 1; - wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + if (brw->wm.prog_data->prog_offset_16) { + /* These two fields should be the same pre-gen6, which is why we + * only have one hardware field to program for both dispatch + * widths. + */ + assert(brw->wm.prog_data->first_curbe_grf == + brw->wm.prog_data->first_curbe_grf_16); + } - if (intel->gen == 5) - wm.thread1.binding_table_entry_count = 0; /* hardware requirement */ - else - wm.thread1.binding_table_entry_count = key->nr_surfaces; + /* BRW_NEW_PROGRAM_CACHE | CACHE_NEW_WM_PROG */ + wm->thread0.grf_reg_count = brw->wm.prog_data->reg_blocks; + wm->wm9.grf_reg_count_2 = brw->wm.prog_data->reg_blocks_16; + + wm->thread0.kernel_start_pointer = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread0), + brw->wm.prog_offset + + (wm->thread0.grf_reg_count << 1)) >> 6; - if (key->total_scratch != 0) { - wm.thread2.scratch_space_base_pointer = + wm->wm9.kernel_start_pointer_2 = + brw_program_reloc(brw, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm9), + brw->wm.prog_offset + + brw->wm.prog_data->prog_offset_16 + + (wm->wm9.grf_reg_count_2 << 1)) >> 6; + + wm->thread1.depth_coef_urb_read_offset = 1; + wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; + + wm->thread1.binding_table_entry_count = 0; + + if (brw->wm.prog_data->total_scratch != 0) { + wm->thread2.scratch_space_base_pointer = brw->wm.scratch_bo->offset >> 10; /* reloc */ - wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1; + wm->thread2.per_thread_scratch_space = + ffs(brw->wm.prog_data->total_scratch) - 11; } else { - wm.thread2.scratch_space_base_pointer = 0; - wm.thread2.per_thread_scratch_space = 0; + wm->thread2.scratch_space_base_pointer = 0; + wm->thread2.per_thread_scratch_space = 0; } - wm.thread3.dispatch_grf_start_reg = key->dispatch_grf_start_reg; - wm.thread3.urb_entry_read_length = key->urb_entry_read_length; - wm.thread3.urb_entry_read_offset = 0; - wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length; - wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2; + wm->thread3.dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf; + wm->thread3.urb_entry_read_length = brw->wm.prog_data->urb_read_length; + wm->thread3.urb_entry_read_offset = 0; + wm->thread3.const_urb_entry_read_length = + brw->wm.prog_data->curb_read_length; + /* BRW_NEW_CURBE_OFFSETS */ + wm->thread3.const_urb_entry_read_offset = brw->curbe.wm_start * 2; if (intel->gen == 5) - wm.wm4.sampler_count = 0; /* hardware requirement */ - else - wm.wm4.sampler_count = (key->sampler_count + 1) / 4; + wm->wm4.sampler_count = 0; /* hardware requirement */ + else { + /* CACHE_NEW_SAMPLER */ + wm->wm4.sampler_count = (brw->sampler.count + 1) / 4; + } - if (brw->wm.sampler_bo != NULL) { + if (brw->sampler.count) { /* reloc */ - wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5; + wm->wm4.sampler_state_pointer = (intel->batch.bo->offset + + brw->sampler.offset) >> 5; } else { - wm.wm4.sampler_state_pointer = 0; + wm->wm4.sampler_state_pointer = 0; } - wm.wm5.program_uses_depth = key->uses_depth; - wm.wm5.program_computes_depth = key->computes_depth; - wm.wm5.program_uses_killpixel = key->uses_kill; + /* BRW_NEW_FRAGMENT_PROGRAM */ + wm->wm5.program_uses_depth = (fp->Base.InputsRead & + (1 << FRAG_ATTRIB_WPOS)) != 0; + wm->wm5.program_computes_depth = (fp->Base.OutputsWritten & + BITFIELD64_BIT(FRAG_RESULT_DEPTH)) != 0; + /* _NEW_BUFFERS + * Override for NULL depthbuffer case, required by the Pixel Shader Computed + * Depth field. + */ + if (!intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH)) + wm->wm5.program_computes_depth = 0; - if (key->is_glsl) - wm.wm5.enable_8_pix = 1; - else - wm.wm5.enable_16_pix = 1; + /* _NEW_COLOR */ + wm->wm5.program_uses_killpixel = fp->UsesKill || ctx->Color.AlphaEnabled; - wm.wm5.max_threads = brw->wm_max_threads - 1; - wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */ - wm.wm5.legacy_line_rast = 0; - wm.wm5.legacy_global_depth_bias = 0; - wm.wm5.early_depth_test = 1; /* never need to disable */ - wm.wm5.line_aa_region_width = 0; - wm.wm5.line_endcap_aa_region_width = 1; - wm.wm5.polygon_stipple = key->polygon_stipple; + /* BRW_NEW_FRAGMENT_PROGRAM + * + * If using the fragment shader backend, the program is always + * 8-wide. If not, it's always 16. + */ + if (ctx->Shader._CurrentFragmentProgram) { + struct brw_shader *shader = (struct brw_shader *) + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + if (shader != NULL && shader->ir != NULL) { + wm->wm5.enable_8_pix = 1; + if (brw->wm.prog_data->prog_offset_16) + wm->wm5.enable_16_pix = 1; + } + } + if (!wm->wm5.enable_8_pix) + wm->wm5.enable_16_pix = 1; + + wm->wm5.max_threads = brw->max_wm_threads - 1; + + /* _NEW_BUFFERS | _NEW_COLOR */ + if (brw_color_buffer_write_enabled(brw) || + wm->wm5.program_uses_killpixel || + wm->wm5.program_computes_depth) { + wm->wm5.thread_dispatch_enable = 1; + } + + wm->wm5.legacy_line_rast = 0; + wm->wm5.legacy_global_depth_bias = 0; + wm->wm5.early_depth_test = 1; /* never need to disable */ + wm->wm5.line_aa_region_width = 0; + wm->wm5.line_endcap_aa_region_width = 1; + + /* _NEW_POLYGONSTIPPLE */ + wm->wm5.polygon_stipple = ctx->Polygon.StippleFlag; - if (key->offset_enable) { - wm.wm5.depth_offset = 1; + /* _NEW_POLYGON */ + if (ctx->Polygon.OffsetFill) { + wm->wm5.depth_offset = 1; /* Something wierd going on with legacy_global_depth_bias, * offset_constant, scaling and MRD. This value passes glean * but gives some odd results elsewere (eg. the * quad-offset-units test). */ - wm.global_depth_offset_constant = key->offset_units * 2; + wm->global_depth_offset_constant = ctx->Polygon.OffsetUnits * 2; /* This is the only value that passes glean: */ - wm.global_depth_offset_scale = key->offset_factor; + wm->global_depth_offset_scale = ctx->Polygon.OffsetFactor; } - wm.wm5.line_stipple = key->line_stipple; - - if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm) - wm.wm4.stats_enable = 1; - - bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT, - key, sizeof(*key), - reloc_bufs, 3, - &wm, sizeof(wm)); + /* _NEW_LINE */ + wm->wm5.line_stipple = ctx->Line.StippleFlag; - /* Emit WM program relocation */ - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0), - brw->wm.prog_bo, wm.thread0.grf_reg_count << 1, - I915_GEM_DOMAIN_INSTRUCTION, 0); + /* _NEW_DEPTH */ + if (unlikely(INTEL_DEBUG & DEBUG_STATS) || intel->stats_wm) + wm->wm4.stats_enable = 1; /* Emit scratch space relocation */ - if (key->total_scratch != 0) { - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2), + if (brw->wm.prog_data->total_scratch != 0) { + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, thread2), brw->wm.scratch_bo, - wm.thread2.per_thread_scratch_space, - 0, 0); + wm->thread2.per_thread_scratch_space, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); } /* Emit sampler state relocation */ - if (key->sampler_count != 0) { - drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4), - brw->wm.sampler_bo, (wm.wm4.stats_enable | - (wm.wm4.sampler_count << 2)), + if (brw->sampler.count != 0) { + drm_intel_bo_emit_reloc(intel->batch.bo, + brw->wm.state_offset + + offsetof(struct brw_wm_unit_state, wm4), + intel->batch.bo, (brw->sampler.offset | + wm->wm4.stats_enable | + (wm->wm4.sampler_count << 2)), I915_GEM_DOMAIN_INSTRUCTION, 0); } - return bo; -} - - -static void upload_wm_unit( struct brw_context *brw ) -{ - struct intel_context *intel = &brw->intel; - struct brw_wm_unit_key key; - drm_intel_bo *reloc_bufs[3]; - wm_unit_populate_key(brw, &key); - - /* Allocate the necessary scratch space if we haven't already. Don't - * bother reducing the allocation later, since we use scratch so - * rarely. - */ - assert(key.total_scratch <= 12 * 1024); - if (key.total_scratch) { - GLuint total = key.total_scratch * brw->wm_max_threads; - - if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) { - drm_intel_bo_unreference(brw->wm.scratch_bo); - brw->wm.scratch_bo = NULL; - } - if (brw->wm.scratch_bo == NULL) { - brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr, - "wm scratch", - total, - 4096); - } - } - - reloc_bufs[0] = brw->wm.prog_bo; - reloc_bufs[1] = brw->wm.scratch_bo; - reloc_bufs[2] = brw->wm.sampler_bo; - - drm_intel_bo_unreference(brw->wm.state_bo); - brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT, - &key, sizeof(key), - reloc_bufs, 3, - NULL); - if (brw->wm.state_bo == NULL) { - brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs); - } + brw->state.dirty.cache |= CACHE_NEW_WM_UNIT; } const struct brw_tracked_state brw_wm_unit = { @@ -284,16 +252,17 @@ const struct brw_tracked_state brw_wm_unit = { _NEW_POLYGONSTIPPLE | _NEW_LINE | _NEW_COLOR | - _NEW_DEPTH), + _NEW_DEPTH | + _NEW_BUFFERS), - .brw = (BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_CURBE_OFFSETS | - BRW_NEW_DEPTH_BUFFER | - BRW_NEW_NR_WM_SURFACES), + .brw = (BRW_NEW_BATCH | + BRW_NEW_PROGRAM_CACHE | + BRW_NEW_FRAGMENT_PROGRAM | + BRW_NEW_CURBE_OFFSETS), .cache = (CACHE_NEW_WM_PROG | CACHE_NEW_SAMPLER) }, - .prepare = upload_wm_unit, + .emit = brw_upload_wm_unit, };