From: Daniel Vetter Date: Tue, 1 Mar 2011 21:14:50 +0000 (+0100) Subject: i915g: split up hw state emission into small atoms X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=179cb587954ff90efa477945297272680c750149;p=mesa.git i915g: split up hw state emission into small atoms Signed-off-by: Daniel Vetter --- diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 0323ad940f9..504b60c64ed 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -40,12 +40,18 @@ struct i915_tracked_hw_state { const char *name; - void (*validate)(struct i915_context *); + void (*validate)(struct i915_context *, unsigned *batch_space); void (*emit)(struct i915_context *); unsigned dirty, batch_space; }; +static void +validate_flush(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->flush_dirty ? 1 : 0; +} + static void emit_flush(struct i915_context *i915) { @@ -61,82 +67,336 @@ emit_flush(struct i915_context *i915) OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, + + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + +static void +emit_invariant(struct i915_context *i915) +{ + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + Elements(invariant_state)*sizeof(uint32_t)); +} + static void -validate_immediate(struct i915_context *i915) +validate_immediate(struct i915_context *i915, unsigned *batch_space) { + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; + + *batch_space = 1 + util_bitcount(dirty); +} + +static void +emit_immediate(struct i915_context *i915) +{ + /* remove unwatned bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, + i915->current.immediate[I915_IMMEDIATE_S0]); + else + OUT_BATCH(0); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) + OUT_BATCH(i915->current.immediate[i]); + } +} + +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} + +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); + } } static void -validate_static(struct i915_context *i915) +validate_static(struct i915_context *i915, unsigned *batch_space) { - if (i915->current.cbuf_bo) + *batch_space = 2 + 5; /* including DRAW_RECT */ + + if (i915->current.cbuf_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.cbuf_bo; + *batch_space += 3; + } - if (i915->current.depth_bo) + if (i915->current.depth_bo) { i915->validation_buffers[i915->num_validation_buffers++] = i915->current.depth_bo; + *batch_space += 3; + } +} + +static void +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } + + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } + + { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } } static void -validate_map(struct i915_context *i915) +validate_map(struct i915_context *i915, unsigned *batch_space) { const uint enabled = i915->current.sampler_enable_flags; uint unit; struct i915_texture *tex; + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; for (unit = 0; unit < I915_TEX_UNITS; unit++) { if (enabled & (1 << unit)) { - tex = i915_texture(i915->fragment_sampler_views[unit]->texture); - i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } } -const static struct i915_tracked_hw_state hw_atoms[] = { - { "flush", NULL, emit_flush, I915_HW_FLUSH, 1 }, - { "immediate", validate_immediate, NULL, I915_HW_IMMEDIATE }, - { "static", validate_static, NULL, I915_HW_STATIC }, - { "map", validate_map, NULL, I915_HW_MAP } -}; +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ + } + } + assert(count == nr); + } +} + +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} + +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); + } + } + } +} + +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } +#if 0 /* debug */ + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); + } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + } + } +} + +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->fs->program_len; +} + +static void +emit_program(struct i915_context *i915) +{ + uint i; + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); + for (i = 0; i < i915->fs->program_len; i++) { + OUT_BATCH(i915->fs->program[i]); + } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); +} static boolean i915_validate_state(struct i915_context *i915, unsigned *batch_space) { - int i; + unsigned tmp; i915->num_validation_buffers = 0; - *batch_space = 0; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].validate) { - hw_atoms[i].validate(i915); - *batch_space += hw_atoms[i].batch_space; - } + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = Elements(invariant_state); + else + *batch_space = 0; + +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM if (i915->num_validation_buffers == 0) return TRUE; if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, - i915->num_validation_buffers)) + i915->num_validation_buffers)) return FALSE; return TRUE; } -static void -emit_state(struct i915_context *i915) -{ - int i; - - for (i = 0; i < Elements(hw_atoms); i++) - if ((i915->hardware_dirty & hw_atoms[i].dirty) && hw_atoms[i].emit) - hw_atoms[i].emit(i915); -} - /* Push the state into the sarea and/or texture memory. */ void @@ -182,247 +442,20 @@ i915_emit_hardware_state(struct i915_context *i915 ) save_ptr = (uintptr_t)i915->batch->ptr; save_relocs = i915->batch->relocs; - emit_state(i915); - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIANT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); - - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); - - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - } - - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - /* remove unwatned bits and S7 */ - unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | - 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | - 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | - 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & - i915->immediate_dirty; - int i, num = util_bitcount(dirty); - assert(num && num <= I915_MAX_IMMEDIATE); - - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - dirty << 4 | (num - 1)); - - if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { - if (i915->vbo) - OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, - i915->current.immediate[I915_IMMEDIATE_S0]); - else - OUT_BATCH(0); - } - - for (i = 1; i < I915_MAX_IMMEDIATE; i++) { - if (dirty & (1 << i)) - OUT_BATCH(i915->current.immediate[i]); - } - } - -#if 01 - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - if (i915->dynamic_dirty & (1 << i)) - OUT_BATCH(i915->current.dynamic[i]); - } - } -#endif - -#if 01 - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - if (i915->current.cbuf_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.cbuf_flags); - OUT_RELOC(i915->current.cbuf_bo, - I915_USAGE_RENDER, - 0); - } - - /* What happens if no zbuf?? - */ - if (i915->current.depth_bo) { - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(i915->current.depth_flags); - OUT_RELOC(i915->current.depth_bo, - I915_USAGE_RENDER, - 0); - } - - { - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(i915->current.dst_buf_vars); - } - } -#endif - -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, 0); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); - } - } -#endif - -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } - } - } - } -#endif - -#if 01 - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_CONSTANTS) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH((1 << nr) - 1); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; - c += 4 * i; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } -#if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - } - } - } -#endif - -#if 01 - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } - } -#endif - -#if 01 - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - /* XXX flush only required when the draw_offset changes! */ - OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); - OUT_BATCH(i915->current.draw_offset); - OUT_BATCH(i915->current.draw_size); - OUT_BATCH(i915->current.draw_offset); - } -#endif +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM I915_DBG(DBG_EMIT, "%s: used %d dwords, %d relocs\n", __FUNCTION__, ((uintptr_t)i915->batch->ptr - save_ptr) / 4, @@ -431,4 +464,5 @@ i915_emit_hardware_state(struct i915_context *i915 ) i915->hardware_dirty = 0; i915->immediate_dirty = 0; i915->dynamic_dirty = 0; + i915->flush_dirty = 0; } diff --git a/src/gallium/drivers/i915/i915_state_static.c b/src/gallium/drivers/i915/i915_state_static.c index 97044499990..20cd23f8f73 100644 --- a/src/gallium/drivers/i915/i915_state_static.c +++ b/src/gallium/drivers/i915/i915_state_static.c @@ -164,7 +164,7 @@ static void update_framebuffer(struct i915_context *i915) assert(ret); if (i915->current.draw_offset != draw_offset) { i915->current.draw_offset = draw_offset; - /* XXX: only emit flush on change and not always in emit */ + i915_set_flush_dirty(i915, I915_PIPELINE_FLUSH); } i915->current.draw_size = (w - 1 + x) | ((h - 1 + y) << 16);