X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_sf_state.c;h=1a6c8218fd1305446349085f6e549225b3d394df;hb=ccd13da0fc1f1813b55fc0d2181a6cb0d3b42b0d;hp=c99918724b36b8964b021c74cf82563bab12a34e;hpb=9f6ec50f8c79283583eeebdebd16bf7dcd134816;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_sf_state.c b/src/mesa/drivers/dri/i965/brw_sf_state.c index c99918724b3..1a6c8218fd1 100644 --- a/src/mesa/drivers/dri/i965/brw_sf_state.c +++ b/src/mesa/drivers/dri/i965/brw_sf_state.c @@ -35,7 +35,6 @@ #include "brw_state.h" #include "brw_defines.h" #include "main/macros.h" -#include "intel_fbo.h" static void upload_sf_vp(struct brw_context *brw) { @@ -70,14 +69,27 @@ static void upload_sf_vp(struct brw_context *brw) * for DrawBuffer->_[XY]{min,max} */ - /* The scissor only needs to handle the intersection of drawable and - * scissor rect. Clipping to the boundaries of static shared buffers - * for front/back/depth is covered by looping over cliprects in brw_draw.c. + /* The scissor only needs to handle the intersection of drawable + * and scissor rect, since there are no longer cliprects for shared + * buffers with DRI2. * * Note that the hardware's coordinates are inclusive, while Mesa's min is * inclusive but max is exclusive. */ - if (render_to_fbo) { + + if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax || + ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) { + /* If the scissor was out of bounds and got clamped to 0 + * width/height at the bounds, the subtraction of 1 from + * maximums could produce a negative number and thus not clip + * anything. Instead, just provide a min > max scissor inside + * the bounds, which produces the expected no rendering. + */ + sfv.scissor.xmin = 1; + sfv.scissor.xmax = 0; + sfv.scissor.ymin = 1; + sfv.scissor.ymax = 0; + } else if (render_to_fbo) { /* texmemory: Y=0=bottom */ sfv.scissor.xmin = ctx->DrawBuffer->_Xmin; sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1; @@ -93,7 +105,8 @@ static void upload_sf_vp(struct brw_context *brw) } dri_bo_unreference(brw->sf.vp_bo); - brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 ); + brw->sf.vp_bo = brw_cache_data(&brw->cache, BRW_SF_VP, &sfv, sizeof(sfv), + NULL, 0); } const struct brw_tracked_state brw_sf_vp = { @@ -114,6 +127,7 @@ struct brw_sf_unit_key { unsigned int nr_urb_entries, urb_size, sfsize; GLenum front_face, cull_face; + unsigned pv_first:1; unsigned scissor:1; unsigned line_smooth:1; unsigned point_sprite:1; @@ -153,6 +167,9 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key) key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); key->point_attenuated = ctx->Point._Attenuated; + /* _NEW_LIGHT */ + key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION); + key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; } @@ -160,9 +177,10 @@ static dri_bo * sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, dri_bo **reloc_bufs) { + struct intel_context *intel = &brw->intel; struct brw_sf_unit_state sf; dri_bo *bo; - + int chipset_max_threads; memset(&sf, 0, sizeof(sf)); sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1; @@ -171,13 +189,26 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; sf.thread3.dispatch_grf_start_reg = 3; - sf.thread3.urb_entry_read_offset = 1; + + if (intel->gen == 5) + sf.thread3.urb_entry_read_offset = 3; + else + sf.thread3.urb_entry_read_offset = 1; + sf.thread3.urb_entry_read_length = key->urb_entry_read_length; sf.thread4.nr_urb_entries = key->nr_urb_entries; sf.thread4.urb_entry_allocation_size = key->sfsize - 1; - /* Each SF thread produces 1 PUE, and there can be up to 24 threads */ - sf.thread4.max_threads = MIN2(24, key->nr_urb_entries) - 1; + + /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or + * 48 (Ironlake) threads. + */ + if (intel->gen == 5) + chipset_max_threads = 48; + else + chipset_max_threads = 24; + + sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1; if (INTEL_DEBUG & DEBUG_SINGLE_THREAD) sf.thread4.max_threads = 0; @@ -233,7 +264,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, else if (sf.sf6.line_width <= 0x2) sf.sf6.line_width = 0; - /* _NEW_POINT */ + /* _NEW_BUFFERS */ key->render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0; if (!key->render_to_fbo) { /* Rendering to an OpenGL window */ @@ -263,6 +294,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, } /* XXX clamp max depends on AA vs. non-AA */ + /* _NEW_POINT */ sf.sf7.sprite_point = key->point_sprite; sf.sf7.point_size = CLAMP(rint(key->point_size), 1, 255) * (1<<3); sf.sf7.use_point_size_state = !key->point_attenuated; @@ -270,9 +302,15 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons: */ - sf.sf7.trifan_pv = 2; - sf.sf7.linestrip_pv = 1; - sf.sf7.tristrip_pv = 2; + if (!key->pv_first) { + sf.sf7.trifan_pv = 2; + sf.sf7.linestrip_pv = 1; + sf.sf7.tristrip_pv = 2; + } else { + sf.sf7.trifan_pv = 1; + sf.sf7.linestrip_pv = 0; + sf.sf7.tristrip_pv = 0; + } sf.sf7.line_last_pixel_enable = 0; /* Set bias for OpenGL rasterization rules: @@ -283,9 +321,11 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key, bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT, key, sizeof(*key), reloc_bufs, 2, - &sf, sizeof(sf), - NULL, NULL); + &sf, sizeof(sf)); + /* STATE_PREFETCH command description describes this state as being + * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain. + */ /* Emit SF program relocation */ dri_bo_emit_reloc(bo, I915_GEM_DOMAIN_INSTRUCTION, 0, @@ -326,9 +366,11 @@ static void upload_sf_unit( struct brw_context *brw ) const struct brw_tracked_state brw_sf_unit = { .dirty = { .mesa = (_NEW_POLYGON | + _NEW_LIGHT | _NEW_LINE | _NEW_POINT | - _NEW_SCISSOR), + _NEW_SCISSOR | + _NEW_BUFFERS), .brw = BRW_NEW_URB_FENCE, .cache = (CACHE_NEW_SF_VP | CACHE_NEW_SF_PROG)