From: Chia-I Wu Date: Thu, 30 May 2013 06:37:49 +0000 (+0800) Subject: ilo: introduce viewport CSO X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e51806ee7ab52873c773619595962582c2abf0fe;p=mesa.git ilo: introduce viewport CSO Introduce ilo_viewport_cso and initialize it in set_viewport_states(). This saves us from having to perform CPU-intensive calculations to construct hardware viewport states in draw_vbo(). --- diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index cc1b2985bc4..f2bdf371d60 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -598,21 +598,22 @@ gen6_pipeline_clip(struct ilo_3d_pipeline *p, /* 3DSTATE_CLIP */ if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FRAMEBUFFER)) { - const struct pipe_viewport_state *vp = &ilo->viewport.states[0]; - bool enable_guardband; - float x1, x2, y1, y2; + bool enable_guardband = true; + unsigned i; /* * We do not do 2D clipping yet. Guard band test should only be enabled * when the viewport is larger than the framebuffer. */ - x1 = fabs(vp->scale[0]) * -1.0f + vp->translate[0]; - x2 = fabs(vp->scale[0]) * 1.0f + vp->translate[0]; - y1 = fabs(vp->scale[1]) * -1.0f + vp->translate[1]; - y2 = fabs(vp->scale[1]) * 1.0f + vp->translate[1]; - enable_guardband = - (x1 <= 0.0f && x2 >= (float) ilo->fb.state.width && - y1 <= 0.0f && y2 >= (float) ilo->fb.state.height); + for (i = 0; i < ilo->viewport.count; i++) { + const struct ilo_viewport_cso *vp = &ilo->viewport.cso[i]; + + if (vp->min_x > 0.0f || vp->max_x < ilo->fb.state.width || + vp->min_y > 0.0f || vp->max_y < ilo->fb.state.height) { + enable_guardband = false; + break; + } + } p->gen6_3DSTATE_CLIP(p->dev, &ilo->rasterizer->state, @@ -776,23 +777,23 @@ gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p, /* SF_CLIP_VIEWPORT and CC_VIEWPORT */ if (p->dev->gen >= ILO_GEN(7) && DIRTY(VIEWPORT)) { p->state.SF_CLIP_VIEWPORT = p->gen7_SF_CLIP_VIEWPORT(p->dev, - ilo->viewport.states, ilo->viewport.count, p->cp); + ilo->viewport.cso, ilo->viewport.count, p->cp); p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, - ilo->viewport.states, ilo->viewport.count, p->cp); + ilo->viewport.cso, ilo->viewport.count, p->cp); session->viewport_state_changed = true; } /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */ else if (DIRTY(VIEWPORT)) { p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(p->dev, - ilo->viewport.states, ilo->viewport.count, p->cp); + ilo->viewport.cso, ilo->viewport.count, p->cp); p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(p->dev, - ilo->viewport.states, ilo->viewport.count, p->cp); + ilo->viewport.cso, ilo->viewport.count, p->cp); p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(p->dev, - ilo->viewport.states, ilo->viewport.count, p->cp); + ilo->viewport.cso, ilo->viewport.count, p->cp); session->viewport_state_changed = true; } diff --git a/src/gallium/drivers/ilo/ilo_blit.c b/src/gallium/drivers/ilo/ilo_blit.c index fda30de1ead..362128c2aa8 100644 --- a/src/gallium/drivers/ilo/ilo_blit.c +++ b/src/gallium/drivers/ilo/ilo_blit.c @@ -553,7 +553,7 @@ ilo_blitter_begin(struct ilo_context *ilo, enum ilo_blitter_op op) util_blitter_save_blend(ilo->blitter, (void *) ilo->blend); /* undocumented? */ - util_blitter_save_viewport(ilo->blitter, &ilo->viewport.states[0]); + util_blitter_save_viewport(ilo->blitter, &ilo->viewport.viewport0); util_blitter_save_stencil_ref(ilo->blitter, &ilo->stencil_ref); util_blitter_save_sample_mask(ilo->blitter, ilo->sample_mask); diff --git a/src/gallium/drivers/ilo/ilo_gpe.h b/src/gallium/drivers/ilo/ilo_gpe.h index 903d654f051..044076b522c 100644 --- a/src/gallium/drivers/ilo/ilo_gpe.h +++ b/src/gallium/drivers/ilo/ilo_gpe.h @@ -75,9 +75,23 @@ struct ilo_so_state { bool enabled; }; +struct ilo_viewport_cso { + /* matrix form */ + float m00, m11, m22, m30, m31, m32; + + /* guardband in NDC space */ + float min_gbx, min_gby, max_gbx, max_gby; + + /* viewport in screen space */ + float min_x, min_y, min_z; + float max_x, max_y, max_z; +}; + struct ilo_viewport_state { - struct pipe_viewport_state states[ILO_MAX_VIEWPORTS]; + struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS]; unsigned count; + + struct pipe_viewport_state viewport0; }; struct ilo_scissor_state { @@ -144,4 +158,9 @@ struct ilo_global_binding { unsigned count; }; +void +ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, + const struct pipe_viewport_state *state, + struct ilo_viewport_cso *vp); + #endif /* ILO_GPE_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.c b/src/gallium/drivers/ilo/ilo_gpe_gen6.c index a5cb95e95b5..a2a24154be7 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.c @@ -2973,166 +2973,115 @@ gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev, return state_offset; } -void -ilo_gpe_gen6_fill_SF_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, - uint32_t *dw, int num_dwords) -{ - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert(num_dwords == 8 * num_viewports); - - for (i = 0; i < num_viewports; i++) { - const struct pipe_viewport_state *vp = &viewports[i]; - - dw[0] = fui(vp->scale[0]); - dw[1] = fui(vp->scale[1]); - dw[2] = fui(vp->scale[2]); - dw[3] = fui(vp->translate[0]); - dw[4] = fui(vp->translate[1]); - dw[5] = fui(vp->translate[2]); - - /* padding */ - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } -} - -void -ilo_gpe_gen6_fill_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, - uint32_t *dw, int num_dwords) +static void +viewport_get_guardband(const struct ilo_dev_info *dev, + int center_x, int center_y, + int *min_gbx, int *max_gbx, + int *min_gby, int *max_gby) { - int i; - - ILO_GPE_VALID_GEN(dev, 6, 7); - assert(num_dwords == 4 * num_viewports); - /* - * CLIP_VIEWPORT specifies the guard band. + * From the Sandy Bridge PRM, volume 2 part 1, page 234: + * + * "Per-Device Guardband Extents + * + * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] + * - Maximum Post-Clamp Delta (X or Y): 16K" + * + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * From the Ivy Bridge PRM, volume 2 part 1, page 248: + * + * "Per-Device Guardband Extents * - * Clipping an object that is not entirely inside or outside the viewport - * (that is, trivially accepted or rejected) is expensive. Guard band test - * allows clipping to be skipped in this stage and let the renderer dicards - * pixels that are outside the viewport. + * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] + * - Maximum Post-Clamp Delta (X or Y): N/A" * - * The reason that we need CLIP_VIEWPORT is that the renderer has a limit - * on the object size. We have to clip normally when the object exceeds - * the limit. + * "In addition, in order to be correctly rendered, objects must have a + * screenspace bounding box not exceeding 8K in the X or Y direction. + * This additional restriction must also be comprehended by software, + * i.e., enforced by use of clipping." + * + * Combined, the bounding box of any object can not exceed 8K in both + * width and height. + * + * Below we set the guardband as a squre of length 8K, centered at where + * the viewport is. This makes sure all objects passing the GB test are + * valid to the renderer, and those failing the XY clipping have a + * better chance of passing the GB test. */ - - for (i = 0; i < num_viewports; i++) { - const struct pipe_viewport_state *vp = &viewports[i]; - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 234: - * - * "Per-Device Guardband Extents - * - * * Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1] - * * Maximum Post-Clamp Delta (X or Y): 16K" - * - * "In addition, in order to be correctly rendered, objects must - * have a screenspace bounding box not exceeding 8K in the X or Y - * direction. This additional restriction must also be - * comprehended by software, i.e., enforced by use of clipping." - * - * From the Ivy Bridge PRM, volume 2 part 1, page 248: - * - * "Per-Device Guardband Extents - * - * * Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1] - * * Maximum Post-Clamp Delta (X or Y): N/A" - * - * "In addition, in order to be correctly rendered, objects must - * have a screenspace bounding box not exceeding 8K in the X or Y - * direction. This additional restriction must also be comprehended - * by software, i.e., enforced by use of clipping." - * - * Combined, the bounding box of any object can not exceed 8K in both - * width and height. - * - * Below we set the guardband as a squre of length 8K, centered at where - * the viewport is. This makes sure all objects passing the GB test are - * valid to the renderer, and those failing the XY clipping have a - * better chance of passing the GB test. - */ - const float xscale = fabs(vp->scale[0]); - const float yscale = fabs(vp->scale[1]); - const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384; - const int half_len = 8192 / 2; - int center_x = (int) vp->translate[0]; - int center_y = (int) vp->translate[1]; - float xmin, xmax, ymin, ymax; - - /* make sure the guardband is within the valid range */ - if (center_x - half_len < -max_extent) - center_x = -max_extent + half_len; - else if (center_x + half_len > max_extent) - center_x = max_extent - half_len; - - if (center_y - half_len < -max_extent) - center_y = -max_extent + half_len; - else if (center_y + half_len > max_extent) - center_y = max_extent - half_len; - - xmin = (float) (center_x - half_len); - xmax = (float) (center_x + half_len); - ymin = (float) (center_y - half_len); - ymax = (float) (center_y + half_len); - - /* screen space to NDC space */ - xmin = (xmin - vp->translate[0]) / xscale; - xmax = (xmax - vp->translate[0]) / xscale; - ymin = (ymin - vp->translate[1]) / yscale; - ymax = (ymax - vp->translate[1]) / yscale; - - dw[0] = fui(xmin); - dw[1] = fui(xmax); - dw[2] = fui(ymin); - dw[3] = fui(ymax); - - dw += 4; - } + const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384; + const int half_len = 8192 / 2; + + /* make sure the guardband is within the valid range */ + if (center_x - half_len < -max_extent) + center_x = -max_extent + half_len; + else if (center_x + half_len > max_extent - 1) + center_x = max_extent - half_len; + + if (center_y - half_len < -max_extent) + center_y = -max_extent + half_len; + else if (center_y + half_len > max_extent - 1) + center_y = max_extent - half_len; + + *min_gbx = (float) (center_x - half_len); + *max_gbx = (float) (center_x + half_len); + *min_gby = (float) (center_y - half_len); + *max_gby = (float) (center_y + half_len); } -static void -gen6_fill_CC_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, - uint32_t *dw, int num_dwords) +void +ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev, + const struct pipe_viewport_state *state, + struct ilo_viewport_cso *vp) { - int i; + const float scale_x = fabs(state->scale[0]); + const float scale_y = fabs(state->scale[1]); + const float scale_z = fabs(state->scale[2]); + int min_gbx, max_gbx, min_gby, max_gby; ILO_GPE_VALID_GEN(dev, 6, 7); - assert(num_dwords == 2 * num_viewports); - for (i = 0; i < num_viewports; i++) { - const struct pipe_viewport_state *vp = &viewports[i]; - const float scale = fabs(vp->scale[2]); - const float min = vp->translate[2] - scale; - const float max = vp->translate[2] + scale; - - dw[0] = fui(min); - dw[1] = fui(max); - - dw += 2; - } + viewport_get_guardband(dev, + (int) state->translate[0], + (int) state->translate[1], + &min_gbx, &max_gbx, &min_gby, &max_gby); + + /* matrix form */ + vp->m00 = state->scale[0]; + vp->m11 = state->scale[1]; + vp->m22 = state->scale[2]; + vp->m30 = state->translate[0]; + vp->m31 = state->translate[1]; + vp->m32 = state->translate[2]; + + /* guardband in NDC space */ + vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x; + vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x; + vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y; + vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y; + + /* viewport in screen space */ + vp->min_x = scale_x * -1.0f + state->translate[0]; + vp->max_x = scale_x * 1.0f + state->translate[0]; + vp->min_y = scale_y * -1.0f + state->translate[1]; + vp->max_y = scale_y * 1.0f + state->translate[1]; + vp->min_z = scale_z * -1.0f + state->translate[2]; + vp->max_z = scale_z * 1.0f + state->translate[2]; } static uint32_t gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp) { const int state_align = 32 / 4; const int state_len = 8 * num_viewports; uint32_t state_offset, *dw; + unsigned i; ILO_GPE_VALID_GEN(dev, 6, 6); @@ -3147,21 +3096,34 @@ gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev, dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT", state_len, state_align, &state_offset); - ilo_gpe_gen6_fill_SF_VIEWPORT(dev, - viewports, num_viewports, dw, state_len); + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } return state_offset; } static uint32_t gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp) { const int state_align = 32 / 4; const int state_len = 4 * num_viewports; uint32_t state_offset, *dw; + unsigned i; ILO_GPE_VALID_GEN(dev, 6, 6); @@ -3176,21 +3138,30 @@ gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev, dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT", state_len, state_align, &state_offset); - ilo_gpe_gen6_fill_CLIP_VIEWPORT(dev, - viewports, num_viewports, dw, state_len); + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_gbx); + dw[1] = fui(vp->max_gbx); + dw[2] = fui(vp->min_gby); + dw[3] = fui(vp->max_gby); + + dw += 4; + } return state_offset; } static uint32_t gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp) { const int state_align = 32 / 4; const int state_len = 2 * num_viewports; uint32_t state_offset, *dw; + unsigned i; ILO_GPE_VALID_GEN(dev, 6, 7); @@ -3204,7 +3175,14 @@ gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev, dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT", state_len, state_align, &state_offset); - gen6_fill_CC_VIEWPORT(dev, viewports, num_viewports, dw, state_len); + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_z); + dw[1] = fui(vp->max_z); + + dw += 2; + } return state_offset; } diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index 424137f0d0d..301ff8be502 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -377,20 +377,20 @@ typedef uint32_t struct ilo_cp *cp); typedef uint32_t (*ilo_gpe_gen6_SF_VIEWPORT)(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp); typedef uint32_t (*ilo_gpe_gen6_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp); typedef uint32_t (*ilo_gpe_gen6_CC_VIEWPORT)(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp); typedef uint32_t @@ -576,16 +576,4 @@ ilo_gpe_gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev, bool hiz, struct ilo_cp *cp); -void -ilo_gpe_gen6_fill_SF_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, - uint32_t *dw, int num_dwords); - -void -ilo_gpe_gen6_fill_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, - uint32_t *dw, int num_dwords); - #endif /* ILO_GPE_GEN6_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 56a1cf0556b..dfb5fe15dc8 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -1193,14 +1193,14 @@ gen7_emit_3DPRIMITIVE(const struct ilo_dev_info *dev, static uint32_t gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp) { const int state_align = 64 / 4; const int state_len = 16 * num_viewports; uint32_t state_offset, *dw; - int i; + unsigned i; ILO_GPE_VALID_GEN(dev, 7, 7); @@ -1220,12 +1220,20 @@ gen7_emit_SF_CLIP_VIEWPORT(const struct ilo_dev_info *dev, state_len, state_align, &state_offset); for (i = 0; i < num_viewports; i++) { - const struct pipe_viewport_state *vp = &viewports[i]; - - ilo_gpe_gen6_fill_SF_VIEWPORT(dev, vp, 1, dw, 8); - - ilo_gpe_gen6_fill_CLIP_VIEWPORT(dev, vp, 1, dw + 8, 4); - + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + dw[8] = fui(vp->min_gbx); + dw[9] = fui(vp->max_gbx); + dw[10] = fui(vp->min_gby); + dw[11] = fui(vp->max_gby); dw[12] = 0; dw[13] = 0; dw[14] = 0; diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_gpe_gen7.h index 118a5392847..d270ea677d6 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.h @@ -382,8 +382,8 @@ typedef ilo_gpe_gen6_INTERFACE_DESCRIPTOR_DATA ilo_gpe_gen7_INTERFACE_DESCRIPTOR typedef uint32_t (*ilo_gpe_gen7_SF_CLIP_VIEWPORT)(const struct ilo_dev_info *dev, - const struct pipe_viewport_state *viewports, - int num_viewports, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports, struct ilo_cp *cp); typedef ilo_gpe_gen6_CC_VIEWPORT ilo_gpe_gen7_CC_VIEWPORT; diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 1151242ea57..205128e0993 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -600,11 +600,17 @@ ilo_set_viewport_states(struct pipe_context *pipe, if (viewports) { unsigned i; - for (i = 0; i < num_viewports; i++) - ilo->viewport.states[start_slot + i] = viewports[i]; + for (i = 0; i < num_viewports; i++) { + ilo_gpe_set_viewport_cso(ilo->dev, &viewports[i], + &ilo->viewport.cso[start_slot + i]); + } if (ilo->viewport.count < start_slot + num_viewports) ilo->viewport.count = start_slot + num_viewports; + + /* need to save viewport 0 for util_blitter */ + if (!start_slot && num_viewports) + ilo->viewport.viewport0 = viewports[0]; } else { if (ilo->viewport.count <= start_slot + num_viewports &&