unsigned int dispatch_grf_start_reg;
unsigned int curbe_offset;
- unsigned int urb_size;
unsigned int nr_surfaces, sampler_count;
GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
+ GLboolean color_write_enable;
GLfloat offset_units, offset_factor;
};
+bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->intel.ctx;
+ const struct gl_fragment_program *fp = brw->fragment_program;
+ int i;
+
+ /* _NEW_BUFFERS */
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+ /* _NEW_COLOR */
+ if (rb &&
+ (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+ fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+ (ctx->Color.ColorMask[i][0] ||
+ ctx->Color.ColorMask[i][1] ||
+ ctx->Color.ColorMask[i][2] ||
+ ctx->Color.ColorMask[i][3])) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void
wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
- GLcontext *ctx = &brw->intel.ctx;
+ struct gl_context *ctx = &brw->intel.ctx;
const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
struct intel_context *intel = &brw->intel;
memset(key, 0, sizeof(*key));
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
- key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
-
- /* BRW_NEW_URB_FENCE */
- key->urb_size = brw->urb.vsize;
+ key->total_scratch = brw->wm.prog_data->total_scratch;
/* BRW_NEW_CURBE_OFFSETS */
key->curbe_offset = brw->curbe.wm_start;
if (brw->state.depth_region == NULL)
key->computes_depth = 0;
+ /* _NEW_BUFFERS | _NEW_COLOR */
+ key->color_write_enable = brw_color_buffer_write_enabled(brw);
+
/* _NEW_COLOR */
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = bfp->isGLSL;
- /* temporary sanity check assertion */
- ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+ /* If using the fragment shader backend, the program is always
+ * 8-wide.
+ */
+ if (ctx->Shader.CurrentFragmentProgram) {
+ struct brw_shader *shader = (struct brw_shader *)
+ ctx->Shader.CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+ if (shader != NULL && shader->ir != NULL) {
+ key->is_glsl = GL_TRUE;
+ }
+ }
/* _NEW_DEPTH */
key->stats_wm = intel->stats_wm;
/**
* Setup wm hardware state. See page 225 of Volume 2
*/
-static dri_bo *
+static drm_intel_bo *
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
- dri_bo **reloc_bufs)
+ drm_intel_bo **reloc_bufs)
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_state wm;
- dri_bo *bo;
+ drm_intel_bo *bo;
memset(&wm, 0, sizeof(wm));
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (intel->is_ironlake)
+ if (intel->gen == 5)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
if (key->total_scratch != 0) {
wm.thread2.scratch_space_base_pointer =
brw->wm.scratch_bo->offset >> 10; /* reloc */
- wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
+ wm.thread2.per_thread_scratch_space = ffs(key->total_scratch) - 11;
} else {
wm.thread2.scratch_space_base_pointer = 0;
wm.thread2.per_thread_scratch_space = 0;
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (intel->is_ironlake)
+ if (intel->gen == 5)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
wm.wm5.enable_16_pix = 1;
wm.wm5.max_threads = brw->wm_max_threads - 1;
- wm.wm5.thread_dispatch_enable = 1; /* AKA: color_write */
+
+ if (key->color_write_enable ||
+ key->uses_kill ||
+ key->computes_depth) {
+ wm.wm5.thread_dispatch_enable = 1;
+ }
+
wm.wm5.legacy_line_rast = 0;
wm.wm5.legacy_global_depth_bias = 0;
wm.wm5.early_depth_test = 1; /* never need to disable */
wm.wm5.line_stipple = key->line_stipple;
- if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+ if (unlikely(INTEL_DEBUG & DEBUG_STATS) || key->stats_wm)
wm.wm4.stats_enable = 1;
bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
key, sizeof(*key),
reloc_bufs, 3,
- &wm, sizeof(wm),
- NULL, NULL);
+ &wm, sizeof(wm));
/* Emit WM program relocation */
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- wm.thread0.grf_reg_count << 1,
- offsetof(struct brw_wm_unit_state, thread0),
- brw->wm.prog_bo);
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo, wm.thread0.grf_reg_count << 1,
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
/* Emit scratch space relocation */
if (key->total_scratch != 0) {
- dri_bo_emit_reloc(bo,
- 0, 0,
- wm.thread2.per_thread_scratch_space,
- offsetof(struct brw_wm_unit_state, thread2),
- brw->wm.scratch_bo);
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_bo,
+ wm.thread2.per_thread_scratch_space,
+ I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
}
/* Emit sampler state relocation */
if (key->sampler_count != 0) {
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
- offsetof(struct brw_wm_unit_state, wm4),
- brw->wm.sampler_bo);
+ drm_intel_bo_emit_reloc(bo, offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo, (wm.wm4.stats_enable |
+ (wm.wm4.sampler_count << 2)),
+ I915_GEM_DOMAIN_INSTRUCTION, 0);
}
return bo;
{
struct intel_context *intel = &brw->intel;
struct brw_wm_unit_key key;
- dri_bo *reloc_bufs[3];
+ drm_intel_bo *reloc_bufs[3];
wm_unit_populate_key(brw, &key);
/* Allocate the necessary scratch space if we haven't already. Don't
* bother reducing the allocation later, since we use scratch so
* rarely.
*/
- assert(key.total_scratch <= 12 * 1024);
if (key.total_scratch) {
GLuint total = key.total_scratch * brw->wm_max_threads;
if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
- dri_bo_unreference(brw->wm.scratch_bo);
+ drm_intel_bo_unreference(brw->wm.scratch_bo);
brw->wm.scratch_bo = NULL;
}
if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = dri_bo_alloc(intel->bufmgr,
- "wm scratch",
- total,
- 4096);
+ brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "wm scratch",
+ total,
+ 4096);
}
}
reloc_bufs[1] = brw->wm.scratch_bo;
reloc_bufs[2] = brw->wm.sampler_bo;
- dri_bo_unreference(brw->wm.state_bo);
+ drm_intel_bo_unreference(brw->wm.state_bo);
brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
&key, sizeof(key),
reloc_bufs, 3,
_NEW_POLYGONSTIPPLE |
_NEW_LINE |
_NEW_COLOR |
- _NEW_DEPTH),
+ _NEW_DEPTH |
+ _NEW_BUFFERS),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
BRW_NEW_CURBE_OFFSETS |