*
**************************************************************************/
+#include <sys/errno.h>
#include "main/glheader.h"
#include "main/context.h"
+#include "main/condrender.h"
+#include "main/samplerobj.h"
#include "main/state.h"
#include "main/enums.h"
#include "tnl/tnl.h"
* programs be immune to the active primitive (ie. cope with all
* possibilities). That may not be realistic however.
*/
-static GLuint brw_set_prim(struct brw_context *brw,
- const struct _mesa_prim *prim)
+static void brw_set_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim)
{
struct gl_context *ctx = &brw->intel.ctx;
- GLenum mode = prim->mode;
+ uint32_t hw_prim = prim_to_hw_prim[prim->mode];
DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
/* Slight optimization to avoid the GS program when not needed:
*/
- if (mode == GL_QUAD_STRIP &&
+ if (prim->mode == GL_QUAD_STRIP &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL)
- mode = GL_TRIANGLE_STRIP;
+ hw_prim = _3DPRIM_TRISTRIP;
if (prim->mode == GL_QUADS && prim->count == 4 &&
ctx->Light.ShadeModel != GL_FLAT &&
ctx->Polygon.FrontMode == GL_FILL &&
ctx->Polygon.BackMode == GL_FILL) {
- mode = GL_TRIANGLE_FAN;
+ hw_prim = _3DPRIM_TRIFAN;
}
- if (mode != brw->primitive) {
- brw->primitive = mode;
+ if (hw_prim != brw->primitive) {
+ brw->primitive = hw_prim;
brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
- if (reduced_prim[mode] != brw->intel.reduced_primitive) {
- brw->intel.reduced_primitive = reduced_prim[mode];
+ if (reduced_prim[prim->mode] != brw->intel.reduced_primitive) {
+ brw->intel.reduced_primitive = reduced_prim[prim->mode];
brw->state.dirty.brw |= BRW_NEW_REDUCED_PRIMITIVE;
}
}
+}
+
+static void gen6_set_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim)
+{
+ uint32_t hw_prim = prim_to_hw_prim[prim->mode];
- return prim_to_hw_prim[mode];
+ DBG("PRIM: %s\n", _mesa_lookup_enum_by_nr(prim->mode));
+
+ if (hw_prim != brw->primitive) {
+ brw->primitive = hw_prim;
+ brw->state.dirty.brw |= BRW_NEW_PRIMITIVE;
+ }
}
OUT_BATCH(base_vertex_location);
ADVANCE_BATCH();
+ intel->batch.need_workaround_flush = true;
+
+ if (intel->always_flush_cache) {
+ intel_batchbuffer_emit_mi_flush(intel);
+ }
+}
+
+static void gen7_emit_prim(struct brw_context *brw,
+ const struct _mesa_prim *prim,
+ uint32_t hw_prim)
+{
+ struct intel_context *intel = &brw->intel;
+ int verts_per_instance;
+ int vertex_access_type;
+ int start_vertex_location;
+ int base_vertex_location;
+
+ DBG("PRIM: %s %d %d\n", _mesa_lookup_enum_by_nr(prim->mode),
+ prim->start, prim->count);
+
+ start_vertex_location = prim->start;
+ base_vertex_location = prim->basevertex;
+ if (prim->indexed) {
+ vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM;
+ start_vertex_location += brw->ib.start_vertex_offset;
+ base_vertex_location += brw->vb.start_vertex_bias;
+ } else {
+ vertex_access_type = GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
+ start_vertex_location += brw->vb.start_vertex_bias;
+ }
+
+ verts_per_instance = trim(prim->mode, prim->count);
+
+ /* If nothing to emit, just return. */
+ if (verts_per_instance == 0)
+ return;
+
+ /* If we're set to always flush, do it before and after the primitive emit.
+ * We want to catch both missed flushes that hurt instruction/state cache
+ * and missed flushes of the render cache as it heads to other parts of
+ * the besides the draw code.
+ */
+ if (intel->always_flush_cache) {
+ intel_batchbuffer_emit_mi_flush(intel);
+ }
+
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+ OUT_BATCH(hw_prim | vertex_access_type);
+ OUT_BATCH(verts_per_instance);
+ OUT_BATCH(start_vertex_location);
+ OUT_BATCH(1); // instance count
+ OUT_BATCH(0); // start instance location
+ OUT_BATCH(base_vertex_location);
+ ADVANCE_BATCH();
+
if (intel->always_flush_cache) {
intel_batchbuffer_emit_mi_flush(intel);
}
}
+
static void brw_merge_inputs( struct brw_context *brw,
const struct gl_client_array *arrays[])
{
brw->state.dirty.brw |= BRW_NEW_INPUT_DIMENSIONS;
}
-/* XXX: could split the primitive list to fallback only on the
- * non-conformant primitives.
- */
-static GLboolean check_fallbacks( struct brw_context *brw,
- const struct _mesa_prim *prim,
- GLuint nr_prims )
-{
- struct gl_context *ctx = &brw->intel.ctx;
- GLuint i;
-
- /* If we don't require strict OpenGL conformance, never
- * use fallbacks. If we're forcing fallbacks, always
- * use fallfacks.
- */
- if (brw->intel.conformance_mode == 0)
- return GL_FALSE;
-
- if (brw->intel.conformance_mode == 2)
- return GL_TRUE;
-
- if (ctx->Polygon.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_TRIANGLES)
- return GL_TRUE;
- }
-
- /* BRW hardware will do AA lines, but they are non-conformant it
- * seems. TBD whether we keep this fallback:
- */
- if (ctx->Line.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (reduced_prim[prim[i].mode] == GL_LINES)
- return GL_TRUE;
- }
-
- /* Stipple -- these fallbacks could be resolved with a little
- * bit of work?
- */
- if (ctx->Line.StippleFlag) {
- for (i = 0; i < nr_prims; i++) {
- /* GS doesn't get enough information to know when to reset
- * the stipple counter?!?
- */
- if (prim[i].mode == GL_LINE_LOOP || prim[i].mode == GL_LINE_STRIP)
- return GL_TRUE;
-
- if (prim[i].mode == GL_POLYGON &&
- (ctx->Polygon.FrontMode == GL_LINE ||
- ctx->Polygon.BackMode == GL_LINE))
- return GL_TRUE;
- }
- }
-
- if (ctx->Point.SmoothFlag) {
- for (i = 0; i < nr_prims; i++)
- if (prim[i].mode == GL_POINTS)
- return GL_TRUE;
- }
-
- /* BRW hardware doesn't handle GL_CLAMP texturing correctly;
- * brw_wm_sampler_state:translate_wrap_mode() treats GL_CLAMP
- * as GL_CLAMP_TO_EDGE instead. If we're using GL_CLAMP, and
- * we want strict conformance, force the fallback.
- * Right now, we only do this for 2D textures.
- */
- {
- int u;
- for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
- struct gl_texture_unit *texUnit = &ctx->Texture.Unit[u];
- if (texUnit->Enabled) {
- if (texUnit->Enabled & TEXTURE_1D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_1D_INDEX]->Sampler.WrapS == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_2D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_2D_INDEX]->Sampler.WrapT == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- if (texUnit->Enabled & TEXTURE_3D_BIT) {
- if (texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapS == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapT == GL_CLAMP ||
- texUnit->CurrentTex[TEXTURE_3D_INDEX]->Sampler.WrapR == GL_CLAMP) {
- return GL_TRUE;
- }
- }
- }
- }
- }
-
- /* Nothing stopping us from the fast path now */
- return GL_FALSE;
-}
-
/* May fail if out of video memory for texture or vbo upload, or on
* fallback conditions.
*/
-static GLboolean brw_try_draw_prims( struct gl_context *ctx,
+static bool brw_try_draw_prims( struct gl_context *ctx,
const struct gl_client_array *arrays[],
const struct _mesa_prim *prim,
GLuint nr_prims,
{
struct intel_context *intel = intel_context(ctx);
struct brw_context *brw = brw_context(ctx);
- GLboolean retval = GL_FALSE;
- GLboolean warn = GL_FALSE;
+ bool retval = true;
GLuint i;
+ bool fail_next = false;
if (ctx->NewState)
_mesa_update_state( ctx );
*/
brw_validate_textures( brw );
- if (check_fallbacks(brw, prim, nr_prims))
- return GL_FALSE;
-
/* Bind all inputs, derive varying and size information:
*/
brw_merge_inputs( brw, arrays );
intel_prepare_render(intel);
for (i = 0; i < nr_prims; i++) {
- uint32_t hw_prim;
+ int estimated_max_prim_size;
+
+ estimated_max_prim_size = 512; /* batchbuffer commands */
+ estimated_max_prim_size += (BRW_MAX_TEX_UNIT *
+ (sizeof(struct brw_sampler_state) +
+ sizeof(struct gen5_sampler_default_color)));
+ estimated_max_prim_size += 1024; /* gen6 VS push constants */
+ estimated_max_prim_size += 1024; /* gen6 WM push constants */
+ estimated_max_prim_size += 512; /* misc. pad */
/* Flush the batch if it's approaching full, so that we don't wrap while
* we've got validated state that needs to be in the same batch as the
- * primitives. This fraction is just a guess (minimal full state plus
- * a primitive is around 512 bytes), and would be better if we had
- * an upper bound of how much we might emit in a single
- * brw_try_draw_prims().
+ * primitives.
+ */
+ intel_batchbuffer_require_space(intel, estimated_max_prim_size, false);
+ intel_batchbuffer_save_state(intel);
+
+ if (intel->gen < 6)
+ brw_set_prim(brw, &prim[i]);
+ else
+ gen6_set_prim(brw, &prim[i]);
+
+retry:
+ /* Note that before the loop, brw->state.dirty.brw was set to != 0, and
+ * that the state updated in the loop outside of this block is that in
+ * *_set_prim or intel_batchbuffer_flush(), which only impacts
+ * brw->state.dirty.brw.
*/
- intel_batchbuffer_require_space(intel, 1024, false);
-
- hw_prim = brw_set_prim(brw, &prim[i]);
if (brw->state.dirty.brw) {
- brw_validate_state(brw);
+ intel->no_batch_wrap = true;
+ brw_upload_state(brw);
- /* Various fallback checks: */
- if (brw->intel.Fallback)
+ if (unlikely(brw->intel.Fallback)) {
+ intel->no_batch_wrap = false;
+ retval = false;
goto out;
-
- /* Check that we can fit our state in with our existing batchbuffer, or
- * flush otherwise.
- */
- if (dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- static GLboolean warned;
- intel_batchbuffer_flush(intel);
-
- /* Validate the state after we flushed the batch (which would have
- * changed the set of dirty state). If we still fail to
- * check_aperture, warn of what's happening, but attempt to continue
- * on since it may succeed anyway, and the user would probably rather
- * see a failure and a warning than a fallback.
- */
- brw_validate_state(brw);
- if (!warned &&
- dri_bufmgr_check_aperture_space(brw->state.validated_bos,
- brw->state.validated_bo_count)) {
- warn = GL_TRUE;
- warned = GL_TRUE;
- }
}
-
- intel->no_batch_wrap = GL_TRUE;
- brw_upload_state(brw);
}
- brw_emit_prim(brw, &prim[i], hw_prim);
+ if (intel->gen >= 7)
+ gen7_emit_prim(brw, &prim[i], brw->primitive);
+ else
+ brw_emit_prim(brw, &prim[i], brw->primitive);
- intel->no_batch_wrap = GL_FALSE;
+ intel->no_batch_wrap = false;
- retval = GL_TRUE;
+ if (dri_bufmgr_check_aperture_space(&intel->batch.bo, 1)) {
+ if (!fail_next) {
+ intel_batchbuffer_reset_to_saved(intel);
+ intel_batchbuffer_flush(intel);
+ fail_next = true;
+ goto retry;
+ } else {
+ if (intel_batchbuffer_flush(intel) == -ENOSPC) {
+ static bool warned = false;
+
+ if (!warned) {
+ fprintf(stderr, "i965: Single primitive emit exceeded"
+ "available aperture space\n");
+ warned = true;
+ }
+
+ retval = false;
+ }
+ }
+ }
}
if (intel->always_flush_batch)
brw_state_cache_check_size(brw);
- if (warn)
- fprintf(stderr, "i965: Single primitive emit potentially exceeded "
- "available aperture space\n");
-
- if (!retval)
- DBG("%s failed\n", __FUNCTION__);
-
return retval;
}
GLuint min_index,
GLuint max_index )
{
- GLboolean retval;
+ bool retval;
+
+ if (!_mesa_check_conditional_render(ctx))
+ return;
if (!vbo_all_varyings_in_vbos(arrays)) {
if (!index_bounds_valid)
*/
if (!retval) {
_swsetup_Wakeup(ctx);
+ _tnl_wakeup(ctx);
_tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
}