* Keith Whitwell <keith@tungstengraphics.com>
*/
-
+#include "util/u_math.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_defines.h"
#include "brw_wm.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
/***********************************************************************
* WM unit - fragment programs and rasterization
unsigned int max_threads;
unsigned int nr_surfaces, sampler_count;
- GLboolean uses_depth, computes_depth, uses_kill, is_glsl;
+ GLboolean uses_depth, computes_depth, uses_kill, has_flow_control;
GLboolean polygon_stipple, stats_wm, line_stipple, offset_enable;
GLfloat offset_units, offset_factor;
};
static void
wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
{
- const struct gl_fragment_program *fp = brw->fragment_program;
- const struct brw_fragment_program *bfp = (struct brw_fragment_program *) fp;
+ const struct brw_fragment_shader *fp = brw->curr.fragment_shader;
memset(key, 0, sizeof(*key));
- if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+ if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
key->max_threads = 1;
else {
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
key->max_threads = 12 * 6;
- else if (BRW_IS_G4X(brw))
+ else if (brw->is_g4x)
key->max_threads = 10 * 5;
else
key->max_threads = 8 * 4;
key->urb_entry_read_length = brw->wm.prog_data->urb_read_length;
key->curb_entry_read_length = brw->wm.prog_data->curb_read_length;
key->dispatch_grf_start_reg = brw->wm.prog_data->first_curbe_grf;
- key->total_scratch = ALIGN(brw->wm.prog_data->total_scratch, 1024);
+ key->total_scratch = align(brw->wm.prog_data->total_scratch, 1024);
/* BRW_NEW_URB_FENCE */
key->urb_size = brw->urb.vsize;
/* CACHE_NEW_SAMPLER */
key->sampler_count = brw->wm.sampler_count;
- /* _NEW_POLYGONSTIPPLE */
- key->polygon_stipple = ctx->Polygon.StippleFlag;
+ /* PIPE_NEW_RAST */
+ key->polygon_stipple = brw->curr.rast->templ.poly_stipple_enable;
- /* BRW_NEW_FRAGMENT_PROGRAM */
- key->uses_depth = (fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) != 0;
+ /* PIPE_NEW_FRAGMENT_PROGRAM */
+ key->uses_depth = fp->uses_depth;
+ key->computes_depth = fp->info.writes_z;
- /* as far as we can tell */
- key->computes_depth =
- (fp->Base.OutputsWritten & (1 << FRAG_RESULT_DEPTH)) != 0;
/* PIPE_NEW_DEPTH_BUFFER
+ *
* Override for NULL depthbuffer case, required by the Pixel Shader Computed
* Depth field.
*/
if (brw->curr.fb.zsbuf == NULL)
key->computes_depth = 0;
- /* _NEW_COLOR */
- key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
- key->is_glsl = bfp->isGLSL;
+ /* PIPE_NEW_DEPTH_STENCIL_ALPHA */
+ key->uses_kill = (fp->info.uses_kill ||
+ brw->curr.zstencil->cc3.alpha_test);
+
+ key->has_flow_control = fp->has_flow_control;
/* temporary sanity check assertion */
- ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+ assert(fp->has_flow_control == 0);
+
+ /* PIPE_NEW_QUERY */
+ key->stats_wm = (brw->query.stats_wm != 0);
- /* _NEW_QUERY */
- key->stats_wm = intel->stats_wm;
+ /* PIPE_NEW_RAST */
+ key->line_stipple = brw->curr.rast->templ.line_stipple_enable;
- /* _NEW_LINE */
- key->line_stipple = ctx->Line.StippleFlag;
- /* _NEW_POLYGON */
- key->offset_enable = ctx->Polygon.OffsetFill;
- key->offset_units = ctx->Polygon.OffsetUnits;
- key->offset_factor = ctx->Polygon.OffsetFactor;
+ key->offset_enable = (brw->curr.rast->templ.offset_point ||
+ brw->curr.rast->templ.offset_line ||
+ brw->curr.rast->templ.offset_tri);
+
+ key->offset_units = brw->curr.rast->templ.offset_units;
+ key->offset_factor = brw->curr.rast->templ.offset_scale;
}
/**
* Setup wm hardware state. See page 225 of Volume 2
*/
-static struct brw_winsys_buffer *
+static enum pipe_error
wm_unit_create_from_key(struct brw_context *brw, struct brw_wm_unit_key *key,
- struct brw_winsys_buffer **reloc_bufs)
+ struct brw_winsys_reloc *reloc,
+ unsigned nr_reloc,
+ struct brw_winsys_buffer **bo_out)
{
struct brw_wm_unit_state wm;
- struct brw_winsys_buffer *bo;
+ enum pipe_error ret;
memset(&wm, 0, sizeof(wm));
- wm.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
- wm.thread0.kernel_start_pointer = brw->wm.prog_bo->offset >> 6; /* reloc */
+ wm.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+ wm.thread0.kernel_start_pointer = 0; /* reloc */
wm.thread1.depth_coef_urb_read_offset = 1;
wm.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.thread1.binding_table_entry_count = 0; /* hardware requirement */
else
wm.thread1.binding_table_entry_count = key->nr_surfaces;
if (key->total_scratch != 0) {
- wm.thread2.scratch_space_base_pointer =
- brw->wm.scratch_bo->offset >> 10; /* reloc */
+ wm.thread2.scratch_space_base_pointer = 0; /* reloc */
wm.thread2.per_thread_scratch_space = key->total_scratch / 1024 - 1;
} else {
wm.thread2.scratch_space_base_pointer = 0;
wm.thread3.const_urb_entry_read_length = key->curb_entry_read_length;
wm.thread3.const_urb_entry_read_offset = key->curbe_offset * 2;
- if (BRW_IS_IGDNG(brw))
+ if (brw->gen == 5)
wm.wm4.sampler_count = 0; /* hardware requirement */
else
wm.wm4.sampler_count = (key->sampler_count + 1) / 4;
- if (brw->wm.sampler_bo != NULL) {
- /* reloc */
- wm.wm4.sampler_state_pointer = brw->wm.sampler_bo->offset >> 5;
- } else {
- wm.wm4.sampler_state_pointer = 0;
- }
+ /* reloc */
+ wm.wm4.sampler_state_pointer = 0;
wm.wm5.program_uses_depth = key->uses_depth;
wm.wm5.program_computes_depth = key->computes_depth;
wm.wm5.program_uses_killpixel = key->uses_kill;
- if (key->is_glsl)
+ if (key->has_flow_control)
wm.wm5.enable_8_pix = 1;
else
wm.wm5.enable_16_pix = 1;
wm.wm5.line_stipple = key->line_stipple;
- if (INTEL_DEBUG & DEBUG_STATS || key->stats_wm)
+ if ((BRW_DEBUG & DEBUG_STATS) || key->stats_wm)
wm.wm4.stats_enable = 1;
- bo = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
- key, sizeof(*key),
- reloc_bufs, 3,
- &wm, sizeof(wm),
- NULL, NULL);
-
- /* Emit WM program relocation */
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- wm.thread0.grf_reg_count << 1,
- offsetof(struct brw_wm_unit_state, thread0),
- brw->wm.prog_bo);
+ ret = brw_upload_cache(&brw->cache, BRW_WM_UNIT,
+ key, sizeof(*key),
+ reloc, nr_reloc,
+ &wm, sizeof(wm),
+ NULL, NULL,
+ bo_out);
+ if (ret)
+ return ret;
- /* Emit scratch space relocation */
- if (key->total_scratch != 0) {
- dri_bo_emit_reloc(bo,
- 0, 0,
- wm.thread2.per_thread_scratch_space,
- offsetof(struct brw_wm_unit_state, thread2),
- brw->wm.scratch_bo);
- }
-
- /* Emit sampler state relocation */
- if (key->sampler_count != 0) {
- dri_bo_emit_reloc(bo,
- I915_GEM_DOMAIN_INSTRUCTION, 0,
- wm.wm4.stats_enable | (wm.wm4.sampler_count << 2),
- offsetof(struct brw_wm_unit_state, wm4),
- brw->wm.sampler_bo);
- }
-
- return bo;
+ return PIPE_OK;
}
-static void upload_wm_unit( struct brw_context *brw )
+static enum pipe_error upload_wm_unit( struct brw_context *brw )
{
struct brw_wm_unit_key key;
- struct brw_winsys_buffer *reloc_bufs[3];
+ struct brw_winsys_reloc reloc[3];
+ unsigned nr_reloc = 0;
+ enum pipe_error ret;
+ unsigned grf_reg_count;
+ unsigned per_thread_scratch_space;
+ unsigned stats_enable;
+ unsigned sampler_count;
+
wm_unit_populate_key(brw, &key);
+
/* Allocate the necessary scratch space if we haven't already. Don't
* bother reducing the allocation later, since we use scratch so
* rarely.
if (key.total_scratch) {
GLuint total = key.total_scratch * key.max_threads;
- if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size) {
- brw->sws->bo_unreference(brw->wm.scratch_bo);
- brw->wm.scratch_bo = NULL;
- }
+ /* Do we need a new buffer:
+ */
+ if (brw->wm.scratch_bo && total > brw->wm.scratch_bo->size)
+ bo_reference(&brw->wm.scratch_bo, NULL);
+
if (brw->wm.scratch_bo == NULL) {
- brw->wm.scratch_bo = brw->sws->bo_alloc(brw->sws,
- BRW_BUFFER_TYPE_WM_SCRATCH,
- total,
- 4096);
+ ret = brw->sws->bo_alloc(brw->sws,
+ BRW_BUFFER_TYPE_SHADER_SCRATCH,
+ total,
+ 4096,
+ &brw->wm.scratch_bo);
+ if (ret)
+ return ret;
}
}
- reloc_bufs[0] = brw->wm.prog_bo;
- reloc_bufs[1] = brw->wm.scratch_bo;
- reloc_bufs[2] = brw->wm.sampler_bo;
-
- brw->sws->bo_unreference(brw->wm.state_bo);
- brw->wm.state_bo = brw_search_cache(&brw->cache, BRW_WM_UNIT,
- &key, sizeof(key),
- reloc_bufs, 3,
- NULL);
- if (brw->wm.state_bo == NULL) {
- brw->wm.state_bo = wm_unit_create_from_key(brw, &key, reloc_bufs);
+
+ /* XXX: temporary:
+ */
+ grf_reg_count = (align(key.total_grf, 16) / 16 - 1);
+ per_thread_scratch_space = key.total_scratch / 1024 - 1;
+ stats_enable = (BRW_DEBUG & DEBUG_STATS) || key.stats_wm;
+ sampler_count = brw->gen == 5 ? 0 :(key.sampler_count + 1) / 4;
+
+ /* Emit WM program relocation */
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ grf_reg_count << 1,
+ offsetof(struct brw_wm_unit_state, thread0),
+ brw->wm.prog_bo);
+
+ /* Emit scratch space relocation */
+ if (key.total_scratch != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_SCRATCH,
+ per_thread_scratch_space,
+ offsetof(struct brw_wm_unit_state, thread2),
+ brw->wm.scratch_bo);
}
+
+ /* Emit sampler state relocation */
+ if (key.sampler_count != 0) {
+ make_reloc(&reloc[nr_reloc++],
+ BRW_USAGE_STATE,
+ stats_enable | (sampler_count << 2),
+ offsetof(struct brw_wm_unit_state, wm4),
+ brw->wm.sampler_bo);
+ }
+
+
+ if (brw_search_cache(&brw->cache, BRW_WM_UNIT,
+ &key, sizeof(key),
+ reloc, nr_reloc,
+ NULL,
+ &brw->wm.state_bo))
+ return PIPE_OK;
+
+ ret = wm_unit_create_from_key(brw, &key,
+ reloc, nr_reloc,
+ &brw->wm.state_bo);
+ if (ret)
+ return ret;
+
+ return PIPE_OK;
}
const struct brw_tracked_state brw_wm_unit = {
.dirty = {
- .mesa = (PIPE_NEW_DEPTH_BUFFER |
- _NEW_POLYGON |
- _NEW_POLYGONSTIPPLE |
- _NEW_LINE |
- _NEW_COLOR |
- _NEW_QUERY),
-
- .brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_CURBE_OFFSETS |
+ .mesa = (PIPE_NEW_FRAGMENT_SHADER |
+ PIPE_NEW_DEPTH_BUFFER |
+ PIPE_NEW_RAST |
+ PIPE_NEW_DEPTH_STENCIL_ALPHA |
+ PIPE_NEW_QUERY),
+
+ .brw = (BRW_NEW_CURBE_OFFSETS |
BRW_NEW_NR_WM_SURFACES),
.cache = (CACHE_NEW_WM_PROG |