*
**************************************************************************/
+#include <sys/errno.h>
#include "main/glheader.h"
#include "main/context.h"
+#include "main/condrender.h"
+#include "main/samplerobj.h"
#include "main/state.h"
#include "main/enums.h"
+#include "main/macros.h"
#include "tnl/tnl.h"
#include "vbo/vbo_context.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
+#include "drivers/common/meta.h"
#include "brw_draw.h"
#include "brw_defines.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+#include "intel_regions.h"
#define FILE_DEBUG_FLAG DEBUG_PRIMS
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
-static GLuint brw_set_prim(struct brw_context *brw,
- const struct _mesa_prim *prim)
+static void brw_set_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim)
{
struct gl_context *ctx = &brw->intel.ctx;
- GLenum mode = prim->mode;
+ uint32_t hw_prim = prim_to_hw_prim[prim->mode];
DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
/* Slight optimization to avoid the GS program when not needed:
*/
- if (mode == GL_QUAD_STRIP &&
+ if (prim->mode == GL_QUAD_STRIP &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL)
- mode = GL_TRIANGLE_STRIP;
+ hw_prim = _3DPRIM_TRISTRIP;
if (prim->mode == GL_QUADS && prim->count == 4 &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL) {
- mode = GL_TRIANGLE_FAN;
+ hw_prim = _3DPRIM_TRIFAN;
}
- if (mode != brw->primitive) {
- brw->primitive = mode;
+ if (hw_prim != brw->primitive) {
+ brw->primitive = hw_prim;
brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
- if (reduced_prim[mode] != brw->intel.reduced_primitive) {
- brw->intel.reduced_primitive = reduced_prim[mode];
+ if (reduced_prim[prim->mode] != brw->intel.reduced_primitive) {
+ brw->intel.reduced_primitive = reduced_prim[prim->mode];
brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
}
}
+}
+
+static void gen6_set_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim)
+{
+ uint32_t hw_prim;
+
+ DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
- return prim_to_hw_prim[mode];
+ if (brw->hiz.op) {
+ assert(prim->mode == GL_TRIANGLES);
+ hw_prim = _3DPRIM_RECTLIST;
+ } else {
+ hw_prim = prim_to_hw_prim[prim->mode];
+ }
+
+ if (hw_prim != brw->primitive) {
+ brw->primitive = hw_prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+ }
}
const struct _mesa_prim *prim,
uint32_t hw_prim)
{
- struct brw_3d_primitive prim_packet;
struct intel_context *intel = &brw->intel;
+ int verts_per_instance;
+ int vertex_access_type;
+ int start_vertex_location;
+ int base_vertex_location;
DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
prim->start, prim->count);
- prim_packet.header.opcode = CMD_3D_PRIM;
- prim_packet.header.length = sizeof(prim_packet)/4 - 2;
- prim_packet.header.pad = 0;
- prim_packet.header.topology = hw_prim;
- prim_packet.header.indexed = prim->indexed;
-
- prim_packet.verts_per_instance = trim(prim->mode, prim->count);
- prim_packet.start_vert_location = prim->start;
- if (prim->indexed)
- prim_packet.start_vert_location += brw->ib.start_vertex_offset;
- else
- prim_packet.start_vert_location += brw->vb.start_vertex_bias;
- prim_packet.instance_count = 1;
- prim_packet.start_instance_location = 0;
- prim_packet.base_vert_location = prim->basevertex;
- if (prim->indexed)
- prim_packet.base_vert_location += brw->vb.start_vertex_bias;
+ start_vertex_location = prim->start;
+ base_vertex_location = prim->basevertex;
+ if (prim->indexed) {
+ vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+ start_vertex_location += brw->ib.start_vertex_offset;
+ base_vertex_location += brw->vb.start_vertex_bias;
+ } else {
+ vertex_access_type = GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ start_vertex_location += brw->vb.start_vertex_bias;
+ }
+
+ verts_per_instance = trim(prim->mode, prim->count);
+ /* If nothing to emit, just return. */
+ if (verts_per_instance == 0)
+ return;
/* If we're set to always flush, do it before and after the primitive emit.
* We want to catch both missed flushes that hurt instruction/state cache
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel);
}
- if (prim_packet.verts_per_instance) {
- intel_batchbuffer_data(&brw->intel, &prim_packet,
- sizeof(prim_packet), false);
+
+ BEGIN_BATCH(6);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
+ hw_prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
+ vertex_access_type);
+ OUT_BATCH(verts_per_instance);
+ OUT_BATCH(start_vertex_location);
+ OUT_BATCH(1); // instance count
+ OUT_BATCH(0); // start instance location
+ OUT_BATCH(base_vertex_location);
+ ADVANCE_BATCH();
+
+ intel->batch.need_workaround_flush = true;
+
+ if (intel->always_flush_cache) {
+ intel_batchbuffer_emit_mi_flush(intel);
}
+}
+
+static void gen7_emit_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ uint32_t hw_prim)
+{
+ struct intel_context *intel = &brw->intel;
+ int verts_per_instance;
+ int vertex_access_type;
+ int start_vertex_location;
+ int base_vertex_location;
+
+ DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+ prim->start, prim->count);
+
+ start_vertex_location = prim->start;
+ base_vertex_location = prim->basevertex;
+ if (prim->indexed) {
+ vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+ start_vertex_location += brw->ib.start_vertex_offset;
+ base_vertex_location += brw->vb.start_vertex_bias;
+ } else {
+ vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ start_vertex_location += brw->vb.start_vertex_bias;
+ }
+
+ verts_per_instance = trim(prim->mode, prim->count);
+
+ /* If nothing to emit, just return. */
+ if (verts_per_instance == 0)
+ return;
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (intel->always_flush_cache) {
+ intel_batchbuffer_emit_mi_flush(intel);
+ }
+
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+ OUT_BATCH(hw_prim | vertex_access_type);
+ OUT_BATCH(verts_per_instance);
+ OUT_BATCH(start_vertex_location);
+ OUT_BATCH(1); // instance count
+ OUT_BATCH(0); // start instance location
+ OUT_BATCH(base_vertex_location);
+ ADVANCE_BATCH();
+
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel);
}
}
+
static void brw_merge_inputs( struct brw_context *brw,
const struct gl_client_array *arrays[])
{
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
}
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
+/*
+ * \brief Resolve buffers before drawing.
+ *
+ * Resolve the depth buffer's HiZ buffer and resolve the depth buffer of each
+ * enabled depth texture.
+ *
+ * (In the future, this will also perform MSAA resolves).
*/
-static GLboolean check_fallbacks( struct brw_context *brw,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
+static void
+brw_predraw_resolve_buffers(struct brw_context *brw)
{
struct gl_context *ctx = &brw->intel.ctx;
- GLuint i;
+ struct intel_context *intel = &brw->intel;
+ struct intel_renderbuffer *depth_irb;
+ struct intel_texture_object *tex_obj;
+ bool did_resolve = false;
- /* If we don't require strict OpenGL conformance, never
- * use fallbacks. If we're forcing fallbacks, always
- * use fallfacks.
- */
- if (brw->intel.conformance_mode == 0)
- return GL_FALSE;
+ /* Avoid recursive HiZ op. */
+ if (brw->hiz.op) {
+ return;
+ }
- if (brw->intel.conformance_mode == 2)
- return GL_TRUE;
+ /* Resolve the depth buffer's HiZ buffer. */
+ depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
+ if (depth_irb && depth_irb->mt) {
+ did_resolve |= intel_renderbuffer_resolve_hiz(intel, depth_irb);
+ }
- if (ctx->Polygon.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
- return GL_TRUE;
+ /* Resolve depth buffer of each enabled depth texture. */
+ for (int i = 0; i < BRW_MAX_TEX_UNIT; i++) {
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
+ tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
+ if (!tex_obj || !tex_obj->mt)
+ continue;
+ did_resolve |= intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
}
- /* BRW hardware will do AA lines, but they are non-conformant it
- * seems. TBD whether we keep this fallback:
- */
- if (ctx->Line.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_LINES)
- return GL_TRUE;
+ if (did_resolve) {
+ /* Call vbo_bind_array() to synchronize the vbo module's vertex
+ * attributes to the gl_context's.
+ *
+ * Details
+ * -------
+ * The vbo module tracks vertex attributes separately from the
+ * gl_context. Specifically, the vbo module maintins vertex attributes
+ * in vbo_exec_context::array::inputs, which is synchronized with
+ * gl_context::Array::ArrayObj::VertexAttrib by vbo_bind_array().
+ * vbo_draw_arrays() calls vbo_bind_array() to perform the
+ * synchronization before calling the real draw call,
+ * vbo_context::draw_arrays.
+ *
+ * At this point (after performing a resolve meta-op but before calling
+ * vbo_bind_array), the gl_context's vertex attributes have been
+ * restored to their original state (that is, their state before the
+ * meta-op began), but the vbo module's vertex attribute are those used
+ * in the last meta-op. Therefore we must manually synchronize the two with
+ * vbo_bind_array() before continuing with the original draw command.
+ */
+ _mesa_update_state(ctx);
+ vbo_bind_arrays(ctx);
+ _mesa_update_state(ctx);
}
+}
- /* Stipple -- these fallbacks could be resolved with a little
- * bit of work?
- */
- if (ctx->Line.StippleFlag) {
- for (i = 0; i < nr_prims; i++) {
- /* GS doesn't get enough information to know when to reset
- * the stipple counter?!?
- */
- if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
- return GL_TRUE;
-
- if (prim[i].mode == GL_POLYGON &&
- (ctx->Polygon.FrontMode == GL_LINE ||
- ctx->Polygon.BackMode == GL_LINE))
- return GL_TRUE;
- }
+/**
+ * \brief Call this after drawing to mark which buffers need resolving
+ *
+ * If the depth buffer was written to and if it has an accompanying HiZ
+ * buffer, then mark that it needs a depth resolve.
+ *
+ * (In the future, this will also mark needed MSAA resolves).
+ */
+static void brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ struct gl_framebuffer *fb = ctx->DrawBuffer;
+ struct intel_renderbuffer *depth_irb =
+ intel_get_renderbuffer(fb, BUFFER_DEPTH);
+
+ if (depth_irb &&
+ ctx->Depth.Mask &&
+ !brw->hiz.op) {
+ intel_renderbuffer_set_needs_depth_resolve(depth_irb);
}
+}
- if (ctx->Point.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (prim[i].mode == GL_POINTS)
- return GL_TRUE;
+static int
+verts_per_prim(GLenum mode)
+{
+ switch (mode) {
+ case GL_POINTS:
+ return 1;
+ case GL_LINE_STRIP:
+ case GL_LINE_LOOP:
+ case GL_LINES:
+ return 2;
+ case GL_TRIANGLE_STRIP:
+ case GL_TRIANGLE_FAN:
+ case GL_POLYGON:
+ case GL_TRIANGLES:
+ case GL_QUADS:
+ case GL_QUAD_STRIP:
+ return 3;
+ default:
+ _mesa_problem(NULL,
+ "unknown prim type in transform feedback primitive count");
+ return 0;
}
+}
- /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
- * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
- * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
- * we want strict conformance, force the fallback.
- * Right now, we only do this for 2D textures.
- */
- {
- int u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
- if (texUnit->Enabled) {
- if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->WrapS == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->WrapT == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->WrapR == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- }
- }
+/**
+ * Update internal counters based on the the drawing operation described in
+ * prim.
+ */
+static void
+brw_update_primitive_count(struct brw_context *brw,
+ const struct _mesa_prim *prim)
+{
+ uint32_t count = count_tessellated_primitives(prim);
+ brw->sol.primitives_generated += count;
+ if (brw->intel.ctx.TransformFeedback.CurrentObject->Active &&
+ !brw->intel.ctx.TransformFeedback.CurrentObject->Paused) {
+ /* Update brw->sol.svbi_0_max_index to reflect the amount by which the
+ * hardware is going to increment SVBI 0 when this drawing operation
+ * occurs. This is necessary because the kernel does not (yet) save and
+ * restore GPU registers when context switching, so we'll need to be
+ * able to reload SVBI 0 with the correct value in case we have to start
+ * a new batch buffer.
+ */
+ unsigned verts = verts_per_prim(prim->mode);
+ uint32_t space_avail =
+ (brw->sol.svbi_0_max_index - brw->sol.svbi_0_starting_index) / verts;
+ uint32_t primitives_written = MIN2 (space_avail, count);
+ brw->sol.svbi_0_starting_index += verts * primitives_written;
+
+ /* And update the TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN query. */
+ brw->sol.primitives_written += primitives_written;
}
-
- /* Nothing stopping us from the fast path now */
- return GL_FALSE;
}
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
-static GLboolean brw_try_draw_prims( struct gl_context *ctx,
+static bool brw_try_draw_prims( struct gl_context *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
{
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
- GLboolean retval = GL_FALSE;
- GLboolean warn = GL_FALSE;
+ bool retval = true;
GLuint i;
+ bool fail_next = false;
if (ctx->NewState)
_mesa_update_state( ctx );
*/
brw_validate_textures( brw );
- if (check_fallbacks(brw, prim, nr_prims))
- return GL_FALSE;
+ /* Resolves must occur after updating state and finalizing textures but
+ * before setting up any hardware state for this draw call.
+ */
+ brw_predraw_resolve_buffers(brw);
/* Bind all inputs, derive varying and size information:
*/
intel_prepare_render(intel);
for (i = 0; i < nr_prims; i++) {
- uint32_t hw_prim;
+ int estimated_max_prim_size;
+
+ estimated_max_prim_size = 512; /* batchbuffer commands */
+ estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
+ (sizeof(struct brw_sampler_state) +
+ sizeof(struct gen5_sampler_default_color)));
+ estimated_max_prim_size += 1024; /* gen6 VS push constants */
+ estimated_max_prim_size += 1024; /* gen6 WM push constants */
+ estimated_max_prim_size += 512; /* misc. pad */
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
+ * primitives.
+ */
+ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
+ intel_batchbuffer_save_state(intel);
+
+ if (intel->gen < 6)
+ brw_set_prim(brw, &prim[i]);
+ else
+ gen6_set_prim(brw, &prim[i]);
+
+retry:
+ /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
+ * that the state updated in the loop outside of this block is that in
+ * *_set_prim or intel_batchbuffer_flush(), which only impacts
+ * brw->state.dirty.brw.
*/
- intel_batchbuffer_require_space(intel, 1024, false);
-
- hw_prim = brw_set_prim(brw, &prim[i]);
if (brw->state.dirty.brw) {
- brw_validate_state(brw);
+ intel->no_batch_wrap = true;
+ brw_upload_state(brw);
- /* Various fallback checks: */
- if (brw->intel.Fallback)
+ if (unlikely(brw->intel.Fallback)) {
+ intel->no_batch_wrap = false;
+ retval = false;
goto out;
+ }
+ }
- /* Check that we can fit our state in with our existing batchbuffer, or
- * flush otherwise.
- */
- if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- static GLboolean warned;
- intel_batchbuffer_flush(intel);
+ if (intel->gen >= 7)
+ gen7_emit_prim(brw, &prim[i], brw->primitive);
+ else
+ brw_emit_prim(brw, &prim[i], brw->primitive);
- /* Validate the state after we flushed the batch (which would have
- * changed the set of dirty state). If we still fail to
- * check_aperture, warn of what's happening, but attempt to continue
- * on since it may succeed anyway, and the user would probably rather
- * see a failure and a warning than a fallback.
- */
- brw_validate_state(brw);
- if (!warned &&
- dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- warn = GL_TRUE;
- warned = GL_TRUE;
+ intel->no_batch_wrap = false;
+
+ if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) {
+ if (!fail_next) {
+ intel_batchbuffer_reset_to_saved(intel);
+ intel_batchbuffer_flush(intel);
+ fail_next = true;
+ goto retry;
+ } else {
+ if (intel_batchbuffer_flush(intel) == -ENOSPC) {
+ static bool warned = false;
+
+ if (!warned) {
+ fprintf(stderr, "i965: Single primitive emit exceeded"
+ "available aperture space\n");
+ warned = true;
+ }
+
+ retval = false;
}
}
-
- intel->no_batch_wrap = GL_TRUE;
- brw_upload_state(brw);
}
- brw_emit_prim(brw, &prim[i], hw_prim);
-
- intel->no_batch_wrap = GL_FALSE;
-
- retval = GL_TRUE;
+ if (!_mesa_meta_in_progress(ctx))
+ brw_update_primitive_count(brw, &prim[i]);
}
if (intel->always_flush_batch)
out:
brw_state_cache_check_size(brw);
-
- if (warn)
- fprintf(stderr, "i965: Single primitive emit potentially exceeded "
- "available aperture space\n");
-
- if (!retval)
- DBG("%s failed\n", __FUNCTION__);
+ brw_postdraw_set_buffers_need_resolve(brw);
return retval;
}
const struct _mesa_index_buffer *ib,
GLboolean index_bounds_valid,
GLuint min_index,
- GLuint max_index )
+ GLuint max_index,
+ struct gl_transform_feedback_object *tfb_vertcount )
{
- GLboolean retval;
+ bool retval;
+
+ if (!_mesa_check_conditional_render(ctx))
+ return;
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
*/
if (!retval) {
_swsetup_Wakeup(ctx);
+ _tnl_wakeup(ctx);
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}