#include <sys/errno.h>
+#include "main/arrayobj.h"
#include "main/blend.h"
#include "main/context.h"
#include "main/condrender.h"
static void
-brw_merge_inputs(struct brw_context *brw,
- const struct gl_vertex_array *arrays)
+brw_merge_inputs(struct brw_context *brw)
{
const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct gl_context *ctx = &brw->ctx;
brw->vb.nr_buffers = 0;
for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- brw->vb.inputs[i].buffer = -1;
- brw->vb.inputs[i].glarray = &arrays[i];
+ struct brw_vertex_element *input = &brw->vb.inputs[i];
+ input->buffer = -1;
+ _mesa_draw_attrib_and_binding(ctx, i,
+ &input->glattrib, &input->glbinding);
}
if (devinfo->gen < 8 && !devinfo->is_haswell) {
* 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
*/
while (mask) {
- const struct gl_array_attributes *glattrib;
+ const struct gl_vertex_format *glformat;
uint8_t wa_flags = 0;
i = u_bit_scan64(&mask);
- glattrib = brw->vb.inputs[i].glarray->VertexAttrib;
+ glformat = &brw->vb.inputs[i].glattrib->Format;
- switch (glattrib->Type) {
+ switch (glformat->Type) {
case GL_FIXED:
- wa_flags = glattrib->Size;
+ wa_flags = glformat->Size;
break;
case GL_INT_2_10_10_10_REV:
/* fallthough */
case GL_UNSIGNED_INT_2_10_10_10_REV:
- if (glattrib->Format == GL_BGRA)
+ if (glformat->Format == GL_BGRA)
wa_flags |= BRW_ATTRIB_WA_BGRA;
- if (glattrib->Normalized)
+ if (glformat->Normalized)
wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
- else if (!glattrib->Integer)
+ else if (!glformat->Integer)
wa_flags |= BRW_ATTRIB_WA_SCALE;
break;
return found;
}
+/** Implement the ASTC 5x5 sampler workaround
+ *
+ * Gen9 sampling hardware has a bug where an ASTC 5x5 compressed surface
+ * cannot live in the sampler cache at the same time as an aux compressed
+ * surface. In order to work around the bug we have to stall rendering with a
+ * CS and pixel scoreboard stall (implicit in the CS stall) and invalidate the
+ * texture cache whenever one of ASTC 5x5 or aux compressed may be in the
+ * sampler cache and we're about to render with something which samples from
+ * the other.
+ *
+ * In the case of a single shader which textures from both ASTC 5x5 and
+ * a texture which is CCS or HiZ compressed, we have to resolve the aux
+ * compressed texture prior to rendering. This second part is handled in
+ * brw_predraw_resolve_inputs() below.
+ *
+ * We have observed this issue to affect CCS and HiZ sampling but whether or
+ * not it also affects MCS is unknown. Because MCS has no concept of a
+ * resolve (and doing one would be stupid expensive), we choose to simply
+ * ignore the possibility and hope for the best.
+ */
+static void
+gen9_apply_astc5x5_wa_flush(struct brw_context *brw,
+ enum gen9_astc5x5_wa_tex_type curr_mask)
+{
+ assert(brw->screen->devinfo.gen == 9);
+
+ if (((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX)) ||
+ ((brw->gen9_astc5x5_wa_tex_mask & GEN9_ASTC5X5_WA_TEX_TYPE_AUX) &&
+ (curr_mask & GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5))) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+ }
+
+ brw->gen9_astc5x5_wa_tex_mask = curr_mask;
+}
+
+static enum gen9_astc5x5_wa_tex_type
+gen9_astc5x5_wa_bits(mesa_format format, enum isl_aux_usage aux_usage)
+{
+ if (aux_usage != ISL_AUX_USAGE_NONE &&
+ aux_usage != ISL_AUX_USAGE_MCS)
+ return GEN9_ASTC5X5_WA_TEX_TYPE_AUX;
+
+ if (format == MESA_FORMAT_RGBA_ASTC_5x5 ||
+ format == MESA_FORMAT_SRGB8_ALPHA8_ASTC_5x5)
+ return GEN9_ASTC5X5_WA_TEX_TYPE_ASTC5x5;
+
+ return 0;
+}
+
+/* Helper for the gen9 ASTC 5x5 workaround. This version exists for BLORP's
+ * use-cases where only a single texture is bound.
+ */
+void
+gen9_apply_single_tex_astc5x5_wa(struct brw_context *brw,
+ mesa_format format,
+ enum isl_aux_usage aux_usage)
+{
+ gen9_apply_astc5x5_wa_flush(brw, gen9_astc5x5_wa_bits(format, aux_usage));
+}
+
static void
mark_textures_used_for_txf(BITSET_WORD *used_for_txf,
const struct gl_program *prog)
mark_textures_used_for_txf(used_for_txf, ctx->ComputeProgram._Current);
}
- /* Resolve depth buffer and render cache of each enabled texture. */
int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
+
+ enum gen9_astc5x5_wa_tex_type astc5x5_wa_bits = 0;
+ if (brw->screen->devinfo.gen == 9) {
+ /* In order to properly implement the ASTC 5x5 workaround for an
+ * arbitrary draw or dispatch call, we have to walk the entire list of
+ * textures looking for ASTC 5x5. If there is any ASTC 5x5 in this draw
+ * call, all aux compressed textures must be resolved and have aux
+ * compression disabled while sampling.
+ */
+ for (int i = 0; i <= maxEnabledUnit; i++) {
+ if (!ctx->Texture.Unit[i]._Current)
+ continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+ continue;
+
+ astc5x5_wa_bits |= gen9_astc5x5_wa_bits(tex_obj->_Format,
+ tex_obj->mt->aux_usage);
+ }
+ gen9_apply_astc5x5_wa_flush(brw, astc5x5_wa_bits);
+ }
+
+ /* Resolve depth buffer and render cache of each enabled texture. */
for (int i = 0; i <= maxEnabledUnit; i++) {
if (!ctx->Texture.Unit[i]._Current)
continue;
intel_miptree_prepare_texture(brw, tex_obj->mt, view_format,
min_level, num_levels,
- min_layer, num_layers);
+ min_layer, num_layers,
+ astc5x5_wa_bits);
/* If any programs are using it with texelFetch, we may need to also do
* a prepare with an sRGB format to ensure texelFetch works "properly".
if (txf_format != view_format) {
intel_miptree_prepare_texture(brw, tex_obj->mt, txf_format,
min_level, num_levels,
- min_layer, num_layers);
+ min_layer, num_layers,
+ astc5x5_wa_bits);
}
}
if (irb) {
intel_miptree_prepare_texture(brw, irb->mt, irb->mt->surf.format,
irb->mt_level, 1,
- irb->mt_layer, irb->layer_count);
+ irb->mt_layer, irb->layer_count,
+ brw->gen9_astc5x5_wa_tex_mask);
}
}
}
* If the depth buffer was written to and if it has an accompanying HiZ
* buffer, then mark that it needs a depth resolve.
*
+ * If the stencil buffer was written to then mark that it may need to be
+ * copied to an R8 texture.
+ *
* If the color buffer is a multisample window system buffer, then
* mark that it needs a downsample.
*
brw_depth_cache_add_bo(brw, depth_irb->mt->bo);
}
- if (stencil_irb && brw->stencil_write_enabled)
- brw_depth_cache_add_bo(brw, stencil_irb->mt->bo);
+ if (stencil_irb && brw->stencil_write_enabled) {
+ struct intel_mipmap_tree *stencil_mt =
+ stencil_irb->mt->stencil_mt != NULL ?
+ stencil_irb->mt->stencil_mt : stencil_irb->mt;
+ brw_depth_cache_add_bo(brw, stencil_mt->bo);
+ intel_miptree_finish_write(brw, stencil_mt, stencil_irb->mt_level,
+ stencil_irb->mt_layer,
+ stencil_irb->layer_count, ISL_AUX_USAGE_NONE);
+ }
for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
struct intel_renderbuffer *irb =
static void
brw_prepare_drawing(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
const struct _mesa_index_buffer *ib,
bool index_bounds_valid,
GLuint min_index,
/* Bind all inputs, derive varying and size information:
*/
- brw_merge_inputs(brw, arrays);
+ brw_merge_inputs(brw);
brw->ib.ib = ib;
brw->ctx.NewDriverState |= BRW_NEW_INDICES;
}
}
+/**
+ * Implement workarounds for preemption:
+ * - WaDisableMidObjectPreemptionForGSLineStripAdj
+ * - WaDisableMidObjectPreemptionForTrifanOrPolygon
+ * - WaDisableMidObjectPreemptionForLineLoop
+ * - WA#0798
+ */
+static void
+gen9_emit_preempt_wa(struct brw_context *brw,
+ const struct _mesa_prim *prim)
+{
+ bool object_preemption = true;
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ /* Only apply these workarounds for gen9 */
+ assert(devinfo->gen == 9);
+
+ /* WaDisableMidObjectPreemptionForGSLineStripAdj
+ *
+ * WA: Disable mid-draw preemption when draw-call is a linestrip_adj and
+ * GS is enabled.
+ */
+ if (brw->primitive == _3DPRIM_LINESTRIP_ADJ && brw->gs.enabled)
+ object_preemption = false;
+
+ /* WaDisableMidObjectPreemptionForTrifanOrPolygon
+ *
+ * TriFan miscompare in Execlist Preemption test. Cut index that is on a
+ * previous context. End the previous, the resume another context with a
+ * tri-fan or polygon, and the vertex count is corrupted. If we prempt
+ * again we will cause corruption.
+ *
+ * WA: Disable mid-draw preemption when draw-call has a tri-fan.
+ */
+ if (brw->primitive == _3DPRIM_TRIFAN)
+ object_preemption = false;
+
+ /* WaDisableMidObjectPreemptionForLineLoop
+ *
+ * VF Stats Counters Missing a vertex when preemption enabled.
+ *
+ * WA: Disable mid-draw preemption when the draw uses a lineloop
+ * topology.
+ */
+ if (brw->primitive == _3DPRIM_LINELOOP)
+ object_preemption = false;
+
+ /* WA#0798
+ *
+ * VF is corrupting GAFS data when preempted on an instance boundary and
+ * replayed with instancing enabled.
+ *
+ * WA: Disable preemption when using instanceing.
+ */
+ if (prim->num_instances > 1)
+ object_preemption = false;
+
+ brw_enable_obj_preemption(brw, object_preemption);
+}
+
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
static void
brw_draw_single_prim(struct gl_context *ctx,
- const struct gl_vertex_array *arrays,
const struct _mesa_prim *prim,
unsigned prim_id,
struct brw_transform_feedback_object *xfb_obj,
{
struct brw_context *brw = brw_context(ctx);
const struct gen_device_info *devinfo = &brw->screen->devinfo;
- bool fail_next = false;
+ bool fail_next;
/* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have
* atoms that happen on every draw call.
/* Flush the batch if the batch/state buffers are nearly full. We can
* grow them if needed, but this is not free, so we'd like to avoid it.
*/
- intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 1500);
brw_require_statebuffer_space(brw, 2400);
intel_batchbuffer_save_state(brw);
+ fail_next = intel_batchbuffer_saved_state_is_empty(brw);
if (brw->num_instances != prim->num_instances ||
brw->basevertex != prim->basevertex ||
brw->baseinstance = prim->base_instance;
if (prim_id > 0) { /* For i == 0 we just did this before the loop */
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
- brw_merge_inputs(brw, arrays);
+ brw_merge_inputs(brw);
}
}
* always flag if the shader uses one of the values. For direct draws,
* we only flag if the values change.
*/
- const int new_basevertex =
+ const int new_firstvertex =
prim->indexed ? prim->basevertex : prim->start;
const int new_baseinstance = prim->base_instance;
const struct brw_vs_prog_data *vs_prog_data =
brw_vs_prog_data(brw->vs.base.prog_data);
if (prim_id > 0) {
const bool uses_draw_parameters =
- vs_prog_data->uses_basevertex ||
+ vs_prog_data->uses_firstvertex ||
vs_prog_data->uses_baseinstance;
if ((uses_draw_parameters && prim->is_indirect) ||
- (vs_prog_data->uses_basevertex &&
- brw->draw.params.gl_basevertex != new_basevertex) ||
+ (vs_prog_data->uses_firstvertex &&
+ brw->draw.params.firstvertex != new_firstvertex) ||
(vs_prog_data->uses_baseinstance &&
brw->draw.params.gl_baseinstance != new_baseinstance))
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
}
- brw->draw.params.gl_basevertex = new_basevertex;
+ brw->draw.params.firstvertex = new_firstvertex;
brw->draw.params.gl_baseinstance = new_baseinstance;
brw_bo_unreference(brw->draw.draw_params_bo);
}
/* gl_DrawID always needs its own vertex buffer since it's not part of
- * the indirect parameter buffer. If the program uses gl_DrawID we need
- * to flag BRW_NEW_VERTICES. For the first iteration, we don't have
- * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
- * the loop.
+ * the indirect parameter buffer. Same for is_indexed_draw, which shares
+ * the buffer with gl_DrawID. If the program uses gl_DrawID, we need to
+ * flag BRW_NEW_VERTICES. For the first iteration, we don't have valid
+ * vs_prog_data, but we always flag BRW_NEW_VERTICES before the loop.
*/
- brw->draw.gl_drawid = prim->draw_id;
- brw_bo_unreference(brw->draw.draw_id_bo);
- brw->draw.draw_id_bo = NULL;
if (prim_id > 0 && vs_prog_data->uses_drawid)
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
+ brw->draw.derived_params.gl_drawid = prim->draw_id;
+ brw->draw.derived_params.is_indexed_draw = prim->indexed ? ~0 : 0;
+
+ brw_bo_unreference(brw->draw.derived_draw_params_bo);
+ brw->draw.derived_draw_params_bo = NULL;
+ brw->draw.derived_draw_params_offset = 0;
+
if (devinfo->gen < 6)
brw_set_prim(brw, prim);
else
brw_upload_render_state(brw);
}
+ if (devinfo->gen == 9)
+ gen9_emit_preempt_wa(brw, prim);
+
brw_emit_prim(brw, prim, brw->primitive, xfb_obj, stream);
brw->batch.no_wrap = false;
}
-static bool
-all_varyings_in_vbos(const struct gl_vertex_array *arrays)
-{
- GLuint i;
-
- for (i = 0; i < VERT_ATTRIB_MAX; i++)
- if (arrays[i].BufferBinding->Stride &&
- arrays[i].BufferBinding->BufferObj->Name == 0)
- return false;
-
- return true;
-}
-
-
void
brw_draw_prims(struct gl_context *ctx,
{
unsigned i;
struct brw_context *brw = brw_context(ctx);
- const struct gl_vertex_array *arrays;
int predicate_state = brw->predicate.state;
struct brw_transform_feedback_object *xfb_obj =
(struct brw_transform_feedback_object *) gl_xfb_obj;
- /* The initial pushdown of the inputs array into the drivers */
- _mesa_set_drawing_arrays(ctx, brw->vb.draw_arrays.inputs);
- arrays = ctx->Array._DrawArrays;
- _vbo_update_inputs(ctx, &brw->vb.draw_arrays);
-
if (!brw_check_conditional_render(brw))
return;
* get the minimum and maximum of their index buffer so we know what range
* to upload.
*/
- if (!index_bounds_valid && !all_varyings_in_vbos(arrays)) {
+ if (!index_bounds_valid && _mesa_draw_user_array_bits(ctx) != 0) {
perf_debug("Scanning index buffer to compute index buffer bounds. "
"Use glDrawRangeElements() to avoid this.\n");
vbo_get_minmax_indices(ctx, prims, ib, &min_index, &max_index, nr_prims);
index_bounds_valid = true;
}
- brw_prepare_drawing(ctx, arrays, ib, index_bounds_valid, min_index,
- max_index);
+ brw_prepare_drawing(ctx, ib, index_bounds_valid, min_index, max_index);
/* Try drawing with the hardware, but don't do anything else if we can't
* manage it. swrast doesn't support our featureset, so we can't fall back
* to it.
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
}
- brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,
- indirect);
+ brw_draw_single_prim(ctx, &prims[i], i, xfb_obj, stream, indirect);
}
brw_finish_drawing(ctx);
void
brw_draw_init(struct brw_context *brw)
{
- /* Keep our list of gl_vertex_array inputs */
- _vbo_init_inputs(&brw->vb.draw_arrays);
-
for (int i = 0; i < VERT_ATTRIB_MAX; i++)
brw->vb.inputs[i].buffer = -1;
brw->vb.nr_buffers = 0;