X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fi915%2Fi915_state_emit.c;h=9f0f9e33ca3fc84ea8645369e1ad2e2ac4b6aaab;hb=HEAD;hp=4d069fffa85abc780bd182be1b1362a98482961e;hpb=8f3bdeaad610d7d5a5c6e73e1e9c721219595754;p=mesa.git diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 4d069fffa85..9f0f9e33ca3 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2003 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -29,375 +29,561 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_batch.h" -#include "i915_reg.h" +#include "i915_debug.h" +#include "i915_fpc.h" #include "i915_resource.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "pipe/p_format.h" + +#include "util/format/u_format.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +struct i915_tracked_hw_state { + const char *name; + void (*validate)(struct i915_context *, unsigned *batch_space); + void (*emit)(struct i915_context *); + unsigned dirty, batch_space; +}; -static unsigned translate_format( enum pipe_format format ) + +static void +validate_flush(struct i915_context *i915, unsigned *batch_space) { - switch (format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - return COLOR_BUF_ARGB8888; - case PIPE_FORMAT_B5G6R5_UNORM: - return COLOR_BUF_RGB565; - default: - assert(0); - return 0; - } + *batch_space = i915->flush_dirty ? 1 : 0; } -static unsigned translate_depth_format( enum pipe_format zformat ) +static void +emit_flush(struct i915_context *i915) { - switch (zformat) { - case PIPE_FORMAT_Z24_UNORM_S8_USCALED: - return DEPTH_FRMT_24_FIXED_8_OTHER; - case PIPE_FORMAT_Z16_UNORM: - return DEPTH_FRMT_16_FIXED; - default: - assert(0); - return 0; - } + /* Cache handling is very cheap atm. State handling can request to flushes: + * - I915_FLUSH_CACHE which is a flush everything request and + * - I915_PIPELINE_FLUSH which is specifically for the draw_offset flush. + * Because the cache handling is so dumb, no explicit "invalidate map cache". + * Also, the first is a strict superset of the latter, so the following logic + * works. */ + if (i915->flush_dirty & I915_FLUSH_CACHE) + OUT_BATCH(MI_FLUSH | FLUSH_MAP_CACHE); + else if (i915->flush_dirty & I915_PIPELINE_FLUSH) + OUT_BATCH(MI_FLUSH | INHIBIT_FLUSH_RENDER_CACHE); } +uint32_t invariant_state[] = { + _3DSTATE_AA_CMD | AA_LINE_ECAAR_WIDTH_ENABLE | AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0, -/** - * Examine framebuffer state to determine width, height. - */ -static boolean -framebuffer_size(const struct pipe_framebuffer_state *fb, - uint *width, uint *height) + _3DSTATE_DFLT_DIFFUSE_CMD, 0, + + _3DSTATE_DFLT_SPEC_CMD, 0, + + _3DSTATE_DFLT_Z_CMD, 0, + + _3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7), + + _3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | + ENABLE_TEXKILL_3D_4D | + TEXKILL_4D, + + _3DSTATE_DEPTH_SUBRECT_DISABLE, + + /* disable indirect state for now + */ + _3DSTATE_LOAD_INDIRECT | 0, 0}; + +static void +emit_invariant(struct i915_context *i915) { - if (fb->cbufs[0]) { - *width = fb->cbufs[0]->width; - *height = fb->cbufs[0]->height; - return TRUE; - } - else if (fb->zsbuf) { - *width = fb->zsbuf->width; - *height = fb->zsbuf->height; - return TRUE; - } - else { - *width = *height = 0; - return FALSE; - } + i915_winsys_batchbuffer_write(i915->batch, invariant_state, + ARRAY_SIZE(invariant_state)*sizeof(uint32_t)); } - -/* Push the state into the sarea and/or texture memory. - */ -void -i915_emit_hardware_state(struct i915_context *i915 ) +static void +validate_immediate(struct i915_context *i915, unsigned *batch_space) { - /* XXX: there must be an easier way */ - const unsigned dwords = ( 14 + - 7 + - I915_MAX_DYNAMIC + - 8 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_TEX_UNITS*3 + - 2 + I915_MAX_CONSTANT*4 + -#if 0 - i915->current.program_len + -#else - i915->fs->program_len + -#endif - 6 - ) * 3/2; /* plus 50% margin */ - const unsigned relocs = ( I915_TEX_UNITS + - 3 - ) * 3/2; /* plus 50% margin */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; -#if 0 - debug_printf("i915_emit_hardware_state: %d dwords, %d relocs\n", dwords, relocs); -#endif - - if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(NULL); - assert(BEGIN_BATCH(dwords, relocs)); - } + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0) && i915->vbo) + i915->validation_buffers[i915->num_validation_buffers++] = i915->vbo; - /* 14 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_INVARIENT) - { - OUT_BATCH(_3DSTATE_AA_CMD | - AA_LINE_ECAAR_WIDTH_ENABLE | - AA_LINE_ECAAR_WIDTH_1_0 | - AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + *batch_space = 1 + util_bitcount(dirty); +} - OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); - OUT_BATCH(0); +static uint target_fixup(struct pipe_surface *p, int component) +{ + const struct + { + enum pipe_format format; + uint hw_mask[4]; + } fixup_mask[] = { + { PIPE_FORMAT_R8G8B8A8_UNORM, { S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_RED, S5_WRITEDISABLE_ALPHA}}, + { PIPE_FORMAT_R8G8B8X8_UNORM, { S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_RED, S5_WRITEDISABLE_ALPHA}}, + { PIPE_FORMAT_L8_UNORM, { S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE, 0, 0, S5_WRITEDISABLE_ALPHA}}, + { PIPE_FORMAT_I8_UNORM, { S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE, 0, 0, S5_WRITEDISABLE_ALPHA}}, + { PIPE_FORMAT_A8_UNORM, { 0, 0, 0, S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA}}, + { 0, { S5_WRITEDISABLE_RED, S5_WRITEDISABLE_GREEN, S5_WRITEDISABLE_BLUE, S5_WRITEDISABLE_ALPHA}} + }; + int i = sizeof(fixup_mask) / sizeof(*fixup_mask) - 1; + + if (p) + for(i = 0; fixup_mask[i].format != 0; i++) + if (p->format == fixup_mask[i].format) + return fixup_mask[i].hw_mask[component]; + + /* Just return default masks */ + return fixup_mask[i].hw_mask[component]; +} - OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); - OUT_BATCH(0); - - OUT_BATCH(_3DSTATE_DFLT_Z_CMD); - OUT_BATCH(0); +static void emit_immediate_s5(struct i915_context *i915, uint imm) +{ + /* Fixup write mask for non-BGRA render targets */ + uint fixup_imm = imm & ~( S5_WRITEDISABLE_RED | S5_WRITEDISABLE_GREEN | + S5_WRITEDISABLE_BLUE | S5_WRITEDISABLE_ALPHA ); + struct pipe_surface *surf = i915->framebuffer.cbufs[0]; + + if (imm & S5_WRITEDISABLE_RED) + fixup_imm |= target_fixup(surf, 0); + if (imm & S5_WRITEDISABLE_GREEN) + fixup_imm |= target_fixup(surf, 1); + if (imm & S5_WRITEDISABLE_BLUE) + fixup_imm |= target_fixup(surf, 2); + if (imm & S5_WRITEDISABLE_ALPHA) + fixup_imm |= target_fixup(surf, 3); + + OUT_BATCH(fixup_imm); +} - OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | - CSB_TCB(0, 0) | - CSB_TCB(1, 1) | - CSB_TCB(2, 2) | - CSB_TCB(3, 3) | - CSB_TCB(4, 4) | - CSB_TCB(5, 5) | - CSB_TCB(6, 6) | - CSB_TCB(7, 7)); - - OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | - ENABLE_POINT_RASTER_RULE | - OGL_POINT_RASTER_RULE | - ENABLE_LINE_STRIP_PROVOKE_VRTX | - ENABLE_TRI_FAN_PROVOKE_VRTX | - LINE_STRIP_PROVOKE_VRTX(1) | - TRI_FAN_PROVOKE_VRTX(2) | - ENABLE_TEXKILL_3D_4D | - TEXKILL_4D); - - /* Need to initialize this to zero. - */ - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0)); - OUT_BATCH(0); +static void emit_immediate_s6(struct i915_context *i915, uint imm) +{ + /* Fixup blend function for A8 dst buffers. + * When we blend to an A8 buffer, the GPU thinks it's a G8 buffer, + * and therefore we need to use the color factor for alphas. */ + uint srcRGB; + + if (i915->current.target_fixup_format == PIPE_FORMAT_A8_UNORM) { + srcRGB = (imm >> S6_CBUF_SRC_BLEND_FACT_SHIFT) & BLENDFACT_MASK; + if (srcRGB == BLENDFACT_DST_ALPHA) + srcRGB = BLENDFACT_DST_COLR; + else if (srcRGB == BLENDFACT_INV_DST_ALPHA) + srcRGB = BLENDFACT_INV_DST_COLR; + imm &= ~SRC_BLND_FACT(BLENDFACT_MASK); + imm |= SRC_BLND_FACT(srcRGB); + } - OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + OUT_BATCH(imm); +} - /* disable indirect state for now - */ - OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); - OUT_BATCH(0); - } - - /* 7 dwords, 1 relocs */ - if (i915->hardware_dirty & I915_HW_IMMEDIATE) - { - OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | - I1_LOAD_S(0) | - I1_LOAD_S(1) | - I1_LOAD_S(2) | - I1_LOAD_S(4) | - I1_LOAD_S(5) | - I1_LOAD_S(6) | - (5)); - - if(i915->vbo) - OUT_RELOC(i915->vbo, - I915_USAGE_VERTEX, +static void +emit_immediate(struct i915_context *i915) +{ + /* remove unwanted bits and S7 */ + unsigned dirty = (1 << I915_IMMEDIATE_S0 | 1 << I915_IMMEDIATE_S1 | + 1 << I915_IMMEDIATE_S2 | 1 << I915_IMMEDIATE_S3 | + 1 << I915_IMMEDIATE_S3 | 1 << I915_IMMEDIATE_S4 | + 1 << I915_IMMEDIATE_S5 | 1 << I915_IMMEDIATE_S6) & + i915->immediate_dirty; + int i, num = util_bitcount(dirty); + assert(num && num <= I915_MAX_IMMEDIATE); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + dirty << 4 | (num - 1)); + + if (i915->immediate_dirty & (1 << I915_IMMEDIATE_S0)) { + if (i915->vbo) + OUT_RELOC(i915->vbo, I915_USAGE_VERTEX, i915->current.immediate[I915_IMMEDIATE_S0]); else - /* FIXME: we should not do this */ OUT_BATCH(0); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S1]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S2]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S4]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S5]); - OUT_BATCH(i915->current.immediate[I915_IMMEDIATE_S6]); - } - - /* I915_MAX_DYNAMIC dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_DYNAMIC) - { - int i; - for (i = 0; i < I915_MAX_DYNAMIC; i++) { - OUT_BATCH(i915->current.dynamic[i]); + } + + for (i = 1; i < I915_MAX_IMMEDIATE; i++) { + if (dirty & (1 << i)) { + if (i == I915_IMMEDIATE_S5) + emit_immediate_s5(i915, i915->current.immediate[i]); + else if (i == I915_IMMEDIATE_S6) + emit_immediate_s6(i915, i915->current.immediate[i]); + else + OUT_BATCH(i915->current.immediate[i]); } } - - /* 8 dwords, 2 relocs */ - if (i915->hardware_dirty & I915_HW_STATIC) - { - struct pipe_surface *cbuf_surface = i915->framebuffer.cbufs[0]; - struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; +} - if (cbuf_surface) { - unsigned ctile = BUF_3D_USE_FENCE; - struct i915_texture *tex = i915_texture(cbuf_surface->texture); - assert(tex); +static void +validate_dynamic(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = util_bitcount(i915->dynamic_dirty & ((1 << I915_MAX_DYNAMIC) - 1)); +} - if (tex && tex->sw_tiled) { - ctile = BUF_3D_TILED_SURFACE; - } +static void +emit_dynamic(struct i915_context *i915) +{ + int i; + for (i = 0; i < I915_MAX_DYNAMIC; i++) { + if (i915->dynamic_dirty & (1 << i)) + OUT_BATCH(i915->current.dynamic[i]); + } +} - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); +static void +validate_static(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = 0; - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ctile); + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.cbuf_bo; + *batch_space += 3; + } - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - cbuf_surface->offset); - } + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + i915->validation_buffers[i915->num_validation_buffers++] + = i915->current.depth_bo; + *batch_space += 3; + } - /* What happens if no zbuf?? - */ - if (depth_surface) { - unsigned ztile = BUF_3D_USE_FENCE; - struct i915_texture *tex = i915_texture(depth_surface->texture); - assert(tex); + if (i915->static_dirty & I915_DST_VARS) + *batch_space += 2; - if (tex && tex->sw_tiled) { - ztile = BUF_3D_TILED_SURFACE; - } + if (i915->static_dirty & I915_DST_RECT) + *batch_space += 5; +} - OUT_BATCH(_3DSTATE_BUF_INFO_CMD); +static void +emit_static(struct i915_context *i915) +{ + if (i915->current.cbuf_bo && (i915->static_dirty & I915_DST_BUF_COLOR)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.cbuf_flags); + OUT_RELOC(i915->current.cbuf_bo, + I915_USAGE_RENDER, + 0); + } - assert(tex); - OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ - ztile); + /* What happens if no zbuf?? + */ + if (i915->current.depth_bo && (i915->static_dirty & I915_DST_BUF_DEPTH)) { + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(i915->current.depth_flags); + OUT_RELOC(i915->current.depth_bo, + I915_USAGE_RENDER, + 0); + } - OUT_RELOC(tex->buffer, - I915_USAGE_RENDER, - depth_surface->offset); - } - - { - unsigned cformat, zformat = 0; - - if (cbuf_surface) - cformat = cbuf_surface->format; - else - cformat = PIPE_FORMAT_B8G8R8A8_UNORM; /* arbitrary */ - cformat = translate_format(cformat); - - if (depth_surface) - zformat = translate_depth_format( i915->framebuffer.zsbuf->format ); - - OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); - OUT_BATCH(DSTORG_HORT_BIAS(0x8) | /* .5 */ - DSTORG_VERT_BIAS(0x8) | /* .5 */ - LOD_PRECLAMP_OGL | - TEX_DEFAULT_COLOR_OGL | - cformat | - zformat ); + if (i915->static_dirty & I915_DST_VARS) { + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(i915->current.dst_buf_vars); + } +} + +static void +validate_map(struct i915_context *i915, unsigned *batch_space) +{ + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + struct i915_texture *tex; + + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; + + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + tex = i915_texture(i915->fragment_sampler_views[unit]->texture); + i915->validation_buffers[i915->num_validation_buffers++] = tex->buffer; } } +} -#if 01 - /* texture images */ - /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ - if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) - { - const uint nr = i915->current.sampler_enable_nr; - if (nr) { - const uint enabled = i915->current.sampler_enable_flags; - uint unit; - uint count = 0; - OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); - OUT_BATCH(enabled); - for (unit = 0; unit < I915_TEX_UNITS; unit++) { - if (enabled & (1 << unit)) { - struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); - struct i915_winsys_buffer *buf = texture->buffer; - uint offset = 0; - assert(buf); - - count++; - - OUT_RELOC(buf, I915_USAGE_SAMPLER, offset); - OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ - OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ - } - } - assert(count == nr); +static void +emit_map(struct i915_context *i915) +{ + const uint nr = i915->current.sampler_enable_nr; + if (nr) { + const uint enabled = i915->current.sampler_enable_flags; + uint unit; + uint count = 0; + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * nr)); + OUT_BATCH(enabled); + for (unit = 0; unit < I915_TEX_UNITS; unit++) { + if (enabled & (1 << unit)) { + struct i915_texture *texture = i915_texture(i915->fragment_sampler_views[unit]->texture); + struct i915_winsys_buffer *buf = texture->buffer; + unsigned offset = i915->current.texbuffer[unit][2]; + + assert(buf); + + count++; + + OUT_RELOC(buf, I915_USAGE_SAMPLER, offset); + OUT_BATCH(i915->current.texbuffer[unit][0]); /* MS3 */ + OUT_BATCH(i915->current.texbuffer[unit][1]); /* MS4 */ } } -#endif + assert(count == nr); + } +} -#if 01 - /* samplers */ - /* 2 + I915_TEX_UNITS*3 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_SAMPLER) - { - if (i915->current.sampler_enable_nr) { - int i; - - OUT_BATCH( _3DSTATE_SAMPLER_STATE | - (3 * i915->current.sampler_enable_nr) ); - - OUT_BATCH( i915->current.sampler_enable_flags ); - - for (i = 0; i < I915_TEX_UNITS; i++) { - if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); - OUT_BATCH( i915->current.sampler[i][1] ); - OUT_BATCH( i915->current.sampler[i][2] ); - } +static void +validate_sampler(struct i915_context *i915, unsigned *batch_space) +{ + *batch_space = i915->current.sampler_enable_nr ? + 2 + 3*i915->current.sampler_enable_nr : 0; +} + +static void +emit_sampler(struct i915_context *i915) +{ + if (i915->current.sampler_enable_nr) { + int i; + + OUT_BATCH( _3DSTATE_SAMPLER_STATE | + (3 * i915->current.sampler_enable_nr) ); + + OUT_BATCH( i915->current.sampler_enable_flags ); + + for (i = 0; i < I915_TEX_UNITS; i++) { + if (i915->current.sampler_enable_flags & (1<current.sampler[i][0] ); + OUT_BATCH( i915->current.sampler[i][1] ); + OUT_BATCH( i915->current.sampler[i][2] ); } } } -#endif +} - /* constants */ - /* 2 + I915_MAX_CONSTANT*4 dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) - { - /* Collate the user-defined constants with the fragment shader's - * immediates according to the constant_flags[] array. - */ - const uint nr = i915->fs->num_constants; - if (nr) { - uint i; - - OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); - OUT_BATCH( (1 << (nr - 1)) | ((1 << (nr - 1)) - 1) ); - - for (i = 0; i < nr; i++) { - const uint *c; - if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { - /* grab user-defined constant */ - c = (uint *) i915->current.constants[PIPE_SHADER_FRAGMENT][i]; - } - else { - /* emit program constant */ - c = (uint *) i915->fs->constants[i]; - } +static void +validate_constants(struct i915_context *i915, unsigned *batch_space) +{ + int nr = i915->fs->num_constants ? + 2 + 4*i915->fs->num_constants : 0; + + *batch_space = nr; +} + +static void +emit_constants(struct i915_context *i915) +{ + /* Collate the user-defined constants with the fragment shader's + * immediates according to the constant_flags[] array. + */ + const uint nr = i915->fs->num_constants; + + assert(nr < I915_MAX_CONSTANT); + if (nr) { + uint i; + + OUT_BATCH( _3DSTATE_PIXEL_SHADER_CONSTANTS | (nr * 4) ); + OUT_BATCH((1 << nr) - 1); + + for (i = 0; i < nr; i++) { + const uint *c; + if (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER) { + /* grab user-defined constant */ + c = (uint *) i915_buffer(i915->constants[PIPE_SHADER_FRAGMENT])->data; + c += 4 * i; + } + else { + /* emit program constant */ + c = (uint *) i915->fs->constants[i]; + } #if 0 /* debug */ - { - float *f = (float *) c; - printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], - (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER - ? "user" : "immediate")); - } -#endif - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); - OUT_BATCH(*c++); + { + float *f = (float *) c; + printf("Const %2d: %f %f %f %f %s\n", i, f[0], f[1], f[2], f[3], + (i915->fs->constant_flags[i] == I915_CONSTFLAG_USER + ? "user" : "immediate")); } +#endif + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); + OUT_BATCH(*c++); } } +} + +static void +validate_program(struct i915_context *i915, unsigned *batch_space) +{ + uint additional_size = 0; + + additional_size += i915->current.target_fixup_format ? 3 : 0; + + /* we need more batch space if we want to emulate rgba framebuffers */ + *batch_space = i915->fs->decl_len + i915->fs->program_len + additional_size; +} + +static void +emit_program(struct i915_context *i915) +{ + uint additional_size = 0; + uint i; + + /* count how much additional space we'll need */ + validate_program(i915, &additional_size); + additional_size -= i915->fs->decl_len + i915->fs->program_len; + + /* we should always have, at least, a pass-through program */ + assert(i915->fs->program_len > 0); - /* Fragment program */ - /* i915->current.program_len dwords, 0 relocs */ - if (i915->hardware_dirty & I915_HW_PROGRAM) + /* output the declarations */ { - uint i; - /* we should always have, at least, a pass-through program */ - assert(i915->fs->program_len > 0); - for (i = 0; i < i915->fs->program_len; i++) { - OUT_BATCH(i915->fs->program[i]); - } + /* first word has the size, we have to adjust that */ + uint size = (i915->fs->decl[0]); + size += additional_size; + OUT_BATCH(size); } - /* drawing surface size */ - /* 6 dwords, 0 relocs */ - { - uint w, h; - boolean k = framebuffer_size(&i915->framebuffer, &w, &h); - (void)k; - assert(k); + for (i = 1 ; i < i915->fs->decl_len; i++) + OUT_BATCH(i915->fs->decl[i]); - OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(((w - 1) & 0xffff) | ((h - 1) << 16)); - OUT_BATCH(0); + /* output the program */ + assert(i915->fs->program_len % 3 == 0); + for (i = 0 ; i < i915->fs->program_len; i+=3) { + OUT_BATCH(i915->fs->program[i]); + OUT_BATCH(i915->fs->program[i+1]); + OUT_BATCH(i915->fs->program[i+2]); + } + + /* we emit an additional mov with swizzle to fake RGBA framebuffers */ + if (i915->current.target_fixup_format) { + /* mov out_color, out_color.zyxw */ + OUT_BATCH(A0_MOV | + (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | + A0_DEST_CHANNEL_ALL | + (REG_TYPE_OC << A0_SRC0_TYPE_SHIFT) | + (T_DIFFUSE << A0_SRC0_NR_SHIFT)); + OUT_BATCH(i915->current.fixup_swizzle); OUT_BATCH(0); } +} + +static void +emit_draw_rect(struct i915_context *i915) +{ + if (i915->static_dirty & I915_DST_RECT) { + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_RECT_DIS_DEPTH_OFS); + OUT_BATCH(i915->current.draw_offset); + OUT_BATCH(i915->current.draw_size); + OUT_BATCH(i915->current.draw_offset); + } +} + +static boolean +i915_validate_state(struct i915_context *i915, unsigned *batch_space) +{ + unsigned tmp; + + i915->num_validation_buffers = 0; + if (i915->hardware_dirty & I915_HW_INVARIANT) + *batch_space = ARRAY_SIZE(invariant_state); + else + *batch_space = 0; + +#if 0 +static int counter_total = 0; +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + static int counter_##atom = 0;\ + validate_##atom(i915, &tmp); \ + *batch_space += tmp;\ + counter_##atom += tmp;\ + counter_total += tmp;\ + printf("%s: \t%d/%d \t%2.2f\n",#atom, counter_##atom, counter_total, counter_##atom*100.f/counter_total);} +#else +#define VALIDATE_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) { \ + validate_##atom(i915, &tmp); \ + *batch_space += tmp; } +#endif + VALIDATE_ATOM(flush, I915_HW_FLUSH); + VALIDATE_ATOM(immediate, I915_HW_IMMEDIATE); + VALIDATE_ATOM(dynamic, I915_HW_DYNAMIC); + VALIDATE_ATOM(static, I915_HW_STATIC); + VALIDATE_ATOM(map, I915_HW_MAP); + VALIDATE_ATOM(sampler, I915_HW_SAMPLER); + VALIDATE_ATOM(constants, I915_HW_CONSTANTS); + VALIDATE_ATOM(program, I915_HW_PROGRAM); +#undef VALIDATE_ATOM + + if (i915->num_validation_buffers == 0) + return TRUE; + + if (!i915_winsys_validate_buffers(i915->batch, i915->validation_buffers, + i915->num_validation_buffers)) + return FALSE; + + return TRUE; +} + +/* Push the state into the sarea and/or texture memory. + */ +void +i915_emit_hardware_state(struct i915_context *i915 ) +{ + unsigned batch_space; + uintptr_t save_ptr; + + assert(i915->dirty == 0); + + if (I915_DBG_ON(DBG_ATOMS)) + i915_dump_hardware_dirty(i915, __FUNCTION__); + + if (!i915_validate_state(i915, &batch_space)) { + FLUSH_BATCH(NULL, I915_FLUSH_ASYNC); + assert(i915_validate_state(i915, &batch_space)); + } + + if(!BEGIN_BATCH(batch_space)) { + FLUSH_BATCH(NULL, I915_FLUSH_ASYNC); + assert(i915_validate_state(i915, &batch_space)); + assert(BEGIN_BATCH(batch_space)); + } + save_ptr = (uintptr_t)i915->batch->ptr; + +#define EMIT_ATOM(atom, hw_dirty) \ + if (i915->hardware_dirty & hw_dirty) \ + emit_##atom(i915); + EMIT_ATOM(flush, I915_HW_FLUSH); + EMIT_ATOM(invariant, I915_HW_INVARIANT); + EMIT_ATOM(immediate, I915_HW_IMMEDIATE); + EMIT_ATOM(dynamic, I915_HW_DYNAMIC); + EMIT_ATOM(static, I915_HW_STATIC); + EMIT_ATOM(map, I915_HW_MAP); + EMIT_ATOM(sampler, I915_HW_SAMPLER); + EMIT_ATOM(constants, I915_HW_CONSTANTS); + EMIT_ATOM(program, I915_HW_PROGRAM); + EMIT_ATOM(draw_rect, I915_HW_STATIC); +#undef EMIT_ATOM + + I915_DBG(DBG_EMIT, "%s: used %d dwords, %d dwords reserved\n", __FUNCTION__, + ((uintptr_t)i915->batch->ptr - save_ptr) / 4, + batch_space); + assert(((uintptr_t)i915->batch->ptr - save_ptr) / 4 == batch_space); i915->hardware_dirty = 0; + i915->immediate_dirty = 0; + i915->dynamic_dirty = 0; + i915->static_dirty = 0; + i915->flush_dirty = 0; }