From: Paul Berry Date: Tue, 3 Sep 2013 04:59:04 +0000 (-0700) Subject: i965/sf: Consult brw_wm_prog_data when setting up SF/SBE state. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0af1252ae478a28a76326a5c5d784864327793e2;p=mesa.git i965/sf: Consult brw_wm_prog_data when setting up SF/SBE state. Previously, the SF/SBE setup code delivered varying inputs to the FS in the order in which they appear in the gl_program::InputsRead bitfield, since that's what the FS expects. When we add support for more than 64 varying components, this will no longer always be the case, because the Gen6+ SF/SBE stage is only capable of performing arbitrary reorderings of 16 varying slots. So, when there are more than 16 vec4's worth of varying inputs, the FS will have to adjust the order its input varyings in order to partially match the order of outputs from the geometry or vertex shader. To allow extra flexibility in the ordering of FS varyings, this patch causes the SF/SBE to deliver varying inputs to the FS in exactly the order that the FS requests, by consulting brw_wm_prog_data::urb_setup and brw_wm_prog_data::num_varying_inputs. Reviewed-by: Kenneth Graunke --- diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index 91b640d8fa2..8bac5597094 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -137,17 +137,23 @@ calculate_attr_overrides(const struct brw_context *brw, { const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; uint32_t max_source_attr = 0; - int input_index = 0; /* _NEW_LIGHT */ bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT; + /* Initialize all the attr_overrides to 0. In the loop below we'll modify + * just the ones that correspond to inputs used by the fs. + */ + memset(attr_overrides, 0, 16*sizeof(*attr_overrides)); + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) { enum glsl_interp_qualifier interp_qualifier = brw->fragment_program->InterpQualifier[attr]; bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr == VARYING_SLOT_COL1; + /* CACHE_NEW_WM_PROG */ + int input_index = brw->wm.prog_data->urb_setup[attr]; - if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr))) + if (input_index < 0) continue; /* _NEW_POINT */ @@ -166,23 +172,23 @@ calculate_attr_overrides(const struct brw_context *brw, interp_qualifier == INTERP_QUALIFIER_NONE)) *flat_enables |= (1 << input_index); - /* The hardware can only do the overrides on 16 overrides at a - * time, and the other up to 16 have to be lined up so that the - * input index = the output index. We'll need to do some - * tweaking to make sure that's the case. - */ - assert(input_index < 16 || attr == input_index); - /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */ - attr_overrides[input_index++] = + uint16_t attr_override = get_attr_override(&brw->vue_map_geom_out, urb_entry_read_offset, attr, brw->ctx.VertexProgram._TwoSideEnabled, &max_source_attr); - } - for (; input_index < VARYING_SLOT_MAX; input_index++) - attr_overrides[input_index] = 0; + /* The hardware can only do the overrides on 16 overrides at a + * time, and the other up to 16 have to be lined up so that the + * input index = the output index. We'll need to do some + * tweaking to make sure that's the case. + */ + if (input_index < 16) + attr_overrides[input_index] = attr_override; + else + assert(attr_override == input_index); + } /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": @@ -207,8 +213,8 @@ static void upload_sf_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); + /* CACHE_NEW_WM_PROG */ + uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint32_t dw1, dw2, dw3, dw4, dw16, dw17; int i; /* _NEW_BUFFER */ @@ -217,7 +223,7 @@ upload_sf_state(struct brw_context *brw) const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; float point_size; - uint16_t attr_overrides[VARYING_SLOT_MAX]; + uint16_t attr_overrides[16]; uint32_t point_sprite_origin; dw1 = GEN6_SF_SWIZZLE_ENABLE | num_outputs << GEN6_SF_NUM_OUTPUTS_SHIFT; @@ -353,7 +359,9 @@ upload_sf_state(struct brw_context *brw) (1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT); } - /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM */ + /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | + * CACHE_NEW_WM_PROG + */ uint32_t urb_entry_read_length; calculate_attr_overrides(brw, attr_overrides, &dw16, &dw17, &urb_entry_read_length); @@ -391,7 +399,8 @@ const struct brw_tracked_state gen6_sf_state = { _NEW_MULTISAMPLE), .brw = (BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VUE_MAP_GEOM_OUT) + BRW_NEW_VUE_MAP_GEOM_OUT), + .cache = CACHE_NEW_WM_PROG }, .emit = upload_sf_state, }; diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 3f7c2e550d1..62362c2928a 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -33,12 +33,12 @@ static void upload_sbe_state(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_FRAGMENT_PROGRAM */ - uint32_t num_outputs = _mesa_bitcount_64(brw->fragment_program->Base.InputsRead); + /* CACHE_NEW_WM_PROG */ + uint32_t num_outputs = brw->wm.prog_data->num_varying_inputs; uint32_t dw1, dw10, dw11; int i; const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET; - uint16_t attr_overrides[VARYING_SLOT_MAX]; + uint16_t attr_overrides[16]; /* _NEW_BUFFERS */ bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); uint32_t point_sprite_origin; @@ -62,7 +62,9 @@ upload_sbe_state(struct brw_context *brw) dw10 = 0; dw11 = 0; - /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM */ + /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_POINT | _NEW_LIGHT | _NEW_PROGRAM | + * CACHE_NEW_WM_PROG + */ uint32_t urb_entry_read_length; calculate_attr_overrides(brw, attr_overrides, &dw10, &dw11, &urb_entry_read_length); @@ -93,7 +95,8 @@ const struct brw_tracked_state gen7_sbe_state = { _NEW_PROGRAM), .brw = (BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM | - BRW_NEW_VUE_MAP_GEOM_OUT) + BRW_NEW_VUE_MAP_GEOM_OUT), + .cache = CACHE_NEW_WM_PROG }, .emit = upload_sbe_state, };