From: Chia-I Wu Date: Wed, 12 Dec 2012 21:28:42 +0000 (+0800) Subject: ilo: add 3D pipeline for GEN6 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=91ce766c35ebb3ed964e373253178b9b99040f03;hp=67233b56d627c3bfd948e7e8e7fef21368bbe70e;p=mesa.git ilo: add 3D pipeline for GEN6 The 3D pipeline is a high-level interface to emit 3D commands and states. It uses GEN6 GPE to do the real work. --- diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index f897166f763..6c7ebe31512 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -1,5 +1,8 @@ C_SOURCES := \ ilo_3d.c \ + ilo_3d_pipeline.c \ + ilo_3d_pipeline_dump.c \ + ilo_3d_pipeline_gen6.c \ ilo_blit.c \ ilo_context.c \ ilo_cp.c \ diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline.c b/src/gallium/drivers/ilo/ilo_3d_pipeline.c new file mode 100644 index 00000000000..089db062594 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline.c @@ -0,0 +1,348 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "util/u_prim.h" +#include "intel_winsys.h" + +#include "ilo_context.h" +#include "ilo_cp.h" +#include "ilo_state.h" +#include "ilo_3d_pipeline_gen6.h" +#include "ilo_3d_pipeline.h" + +/* in U0.4 */ +struct sample_position { + uint8_t x, y; +}; + +/* \see gen6_get_sample_position() */ +static const struct sample_position sample_position_1x[1] = { + { 8, 8 }, +}; + +static const struct sample_position sample_position_4x[4] = { + { 6, 2 }, /* distance from the center is sqrt(40) */ + { 14, 6 }, /* distance from the center is sqrt(40) */ + { 2, 10 }, /* distance from the center is sqrt(40) */ + { 10, 14 }, /* distance from the center is sqrt(40) */ +}; + +static const struct sample_position sample_position_8x[8] = { + { 7, 9 }, /* distance from the center is sqrt(2) */ + { 9, 13 }, /* distance from the center is sqrt(26) */ + { 11, 3 }, /* distance from the center is sqrt(34) */ + { 13, 11 }, /* distance from the center is sqrt(34) */ + { 1, 7 }, /* distance from the center is sqrt(50) */ + { 5, 1 }, /* distance from the center is sqrt(58) */ + { 15, 5 }, /* distance from the center is sqrt(58) */ + { 3, 15 }, /* distance from the center is sqrt(74) */ +}; + +struct ilo_3d_pipeline * +ilo_3d_pipeline_create(struct ilo_cp *cp, int gen, int gt) +{ + struct ilo_3d_pipeline *p; + int i; + + p = CALLOC_STRUCT(ilo_3d_pipeline); + if (!p) + return NULL; + + p->cp = cp; + p->gen = gen; + + switch (p->gen) { + case ILO_GEN(6): + ilo_3d_pipeline_init_gen6(p); + break; + default: + assert(!"unsupported GEN"); + FREE(p); + return NULL; + break; + } + + p->gpe.gen = p->gen; + p->gpe.gt = gt; + + p->invalidate_flags = ILO_3D_PIPELINE_INVALIDATE_ALL; + + p->workaround_bo = p->cp->winsys->alloc_buffer(p->cp->winsys, + "PIPE_CONTROL workaround", 4096, 0); + if (!p->workaround_bo) { + ilo_warn("failed to allocate PIPE_CONTROL workaround bo\n"); + FREE(p); + return NULL; + } + + p->packed_sample_position_1x = + sample_position_1x[0].x << 4 | + sample_position_1x[0].y; + + /* pack into dwords */ + for (i = 0; i < 4; i++) { + p->packed_sample_position_4x |= + sample_position_4x[i].x << (8 * i + 4) | + sample_position_4x[i].y << (8 * i); + + p->packed_sample_position_8x[0] |= + sample_position_8x[i].x << (8 * i + 4) | + sample_position_8x[i].y << (8 * i); + + p->packed_sample_position_8x[1] |= + sample_position_8x[4 + i].x << (8 * i + 4) | + sample_position_8x[4 + i].y << (8 * i); + } + + return p; +} + +void +ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *p) +{ + if (p->workaround_bo) + p->workaround_bo->unreference(p->workaround_bo); + + FREE(p); +} + +static void +handle_invalid_batch_bo(struct ilo_3d_pipeline *p, bool unset) +{ + if (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_BATCH_BO) { + if (p->gen == ILO_GEN(6)) + p->state.has_gen6_wa_pipe_control = false; + + if (unset) + p->invalidate_flags &= ~ILO_3D_PIPELINE_INVALIDATE_BATCH_BO; + } +} + +/* XXX move to u_prim.h */ +static unsigned +prim_count(unsigned prim, unsigned num_verts) +{ + unsigned num_prims; + + u_trim_pipe_prim(prim, &num_verts); + + switch (prim) { + case PIPE_PRIM_POINTS: + num_prims = num_verts; + break; + case PIPE_PRIM_LINES: + num_prims = num_verts / 2; + break; + case PIPE_PRIM_LINE_LOOP: + num_prims = num_verts; + break; + case PIPE_PRIM_LINE_STRIP: + num_prims = num_verts - 1; + break; + case PIPE_PRIM_TRIANGLES: + num_prims = num_verts / 3; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + num_prims = num_verts - 2; + break; + case PIPE_PRIM_QUADS: + num_prims = (num_verts / 4) * 2; + break; + case PIPE_PRIM_QUAD_STRIP: + num_prims = (num_verts / 2 - 1) * 2; + break; + case PIPE_PRIM_POLYGON: + num_prims = num_verts - 2; + break; + case PIPE_PRIM_LINES_ADJACENCY: + num_prims = num_verts / 4; + break; + case PIPE_PRIM_LINE_STRIP_ADJACENCY: + num_prims = num_verts - 3; + break; + case PIPE_PRIM_TRIANGLES_ADJACENCY: + /* u_trim_pipe_prim is wrong? */ + num_verts += 1; + + num_prims = num_verts / 6; + break; + case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY: + /* u_trim_pipe_prim is wrong? */ + if (num_verts >= 6) + num_verts -= (num_verts % 2); + else + num_verts = 0; + + num_prims = (num_verts / 2 - 2); + break; + default: + assert(!"unknown pipe prim"); + num_prims = 0; + break; + } + + return num_prims; +} + +/** + * Emit context states and 3DPRIMITIVE. + */ +bool +ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + const struct pipe_draw_info *info, + int *prim_generated, int *prim_emitted) +{ + bool success; + + /* + * We keep track of the SVBI in the driver, so that we can restore it when + * the HW context is invalidated (by another process). The value needs to + * be reset when the stream output targets are changed. + */ + if (ilo->dirty & ILO_DIRTY_STREAM_OUTPUT_TARGETS) + p->state.so_num_vertices = 0; + + while (true) { + struct ilo_cp_jmp_buf jmp; + int err; + + /* we will rewind if aperture check below fails */ + ilo_cp_setjmp(p->cp, &jmp); + + handle_invalid_batch_bo(p, false); + + /* draw! */ + ilo_cp_assert_no_implicit_flush(p->cp, true); + p->emit_draw(p, ilo, info); + ilo_cp_assert_no_implicit_flush(p->cp, false); + + err = ilo->winsys->check_aperture_space(ilo->winsys, &p->cp->bo, 1); + if (!err) { + success = true; + break; + } + + /* rewind */ + ilo_cp_longjmp(p->cp, &jmp); + + if (ilo_cp_empty(p->cp)) { + success = false; + break; + } + else { + /* flush and try again */ + ilo_cp_flush(p->cp); + } + } + + if (success) { + const int num_verts = u_vertices_per_prim(u_reduced_prim(info->mode)); + const int max_emit = + (p->state.so_max_vertices - p->state.so_num_vertices) / num_verts; + const int generated = prim_count(info->mode, info->count); + const int emitted = MIN2(generated, max_emit); + + p->state.so_num_vertices += emitted * num_verts; + + if (prim_generated) + *prim_generated = generated; + + if (prim_emitted) + *prim_emitted = emitted; + } + + p->invalidate_flags = 0x0; + + return success; +} + +/** + * Emit PIPE_CONTROL to flush all caches. + */ +void +ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p) +{ + handle_invalid_batch_bo(p, true); + p->emit_flush(p); +} + +/** + * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_TIMESTAMP post-sync op. + */ +void +ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index) +{ + handle_invalid_batch_bo(p, true); + p->emit_write_timestamp(p, bo, index); +} + +/** + * Emit PIPE_CONTROL with PIPE_CONTROL_WRITE_DEPTH_COUNT post-sync op. + */ +void +ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index) +{ + handle_invalid_batch_bo(p, true); + p->emit_write_depth_count(p, bo, index); +} + +void +ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, + unsigned sample_count, + unsigned sample_index, + float *x, float *y) +{ + const struct sample_position *pos; + + switch (sample_count) { + case 1: + assert(sample_index < Elements(sample_position_1x)); + pos = sample_position_1x; + break; + case 4: + assert(sample_index < Elements(sample_position_4x)); + pos = sample_position_4x; + break; + case 8: + assert(sample_index < Elements(sample_position_8x)); + pos = sample_position_8x; + break; + default: + assert(!"unknown sample count"); + *x = 0.5f; + *y = 0.5f; + return; + break; + } + + *x = (float) pos[sample_index].x / 16.0f; + *y = (float) pos[sample_index].y / 16.0f; +} diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline.h b/src/gallium/drivers/ilo/ilo_3d_pipeline.h new file mode 100644 index 00000000000..3ed33044005 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline.h @@ -0,0 +1,242 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_3D_PIPELINE_H +#define ILO_3D_PIPELINE_H + +#include "ilo_common.h" +#include "ilo_context.h" +#include "ilo_gpe_gen6.h" + +struct pipe_draw_info; +struct intel_bo; +struct ilo_cp; +struct ilo_context; + +enum ilo_3d_pipeline_invalidate_flags { + ILO_3D_PIPELINE_INVALIDATE_HW = 1 << 0, + ILO_3D_PIPELINE_INVALIDATE_BATCH_BO = 1 << 1, + ILO_3D_PIPELINE_INVALIDATE_STATE_BO = 1 << 2, + ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO = 1 << 3, + + ILO_3D_PIPELINE_INVALIDATE_ALL = 0xffffffff, +}; + +enum ilo_3d_pipeline_action { + ILO_3D_PIPELINE_DRAW, + ILO_3D_PIPELINE_FLUSH, + ILO_3D_PIPELINE_WRITE_TIMESTAMP, + ILO_3D_PIPELINE_WRITE_DEPTH_COUNT, +}; + +/** + * 3D pipeline. + */ +struct ilo_3d_pipeline { + struct ilo_cp *cp; + int gen; + + struct ilo_gpe gpe; + + uint32_t invalidate_flags; + + struct intel_bo *workaround_bo; + + uint32_t packed_sample_position_1x; + uint32_t packed_sample_position_4x; + uint32_t packed_sample_position_8x[2]; + + int (*estimate_size)(struct ilo_3d_pipeline *pipeline, + enum ilo_3d_pipeline_action action, + const void *arg); + + void (*emit_draw)(struct ilo_3d_pipeline *pipeline, + const struct ilo_context *ilo, + const struct pipe_draw_info *info); + + void (*emit_flush)(struct ilo_3d_pipeline *pipeline); + + void (*emit_write_timestamp)(struct ilo_3d_pipeline *pipeline, + struct intel_bo *bo, int index); + + void (*emit_write_depth_count)(struct ilo_3d_pipeline *pipeline, + struct intel_bo *bo, int index); + + /** + * all GPE functions of all GENs + */ +#define GEN6_EMIT(name) ilo_gpe_gen6_ ## name gen6_ ## name + GEN6_EMIT(STATE_BASE_ADDRESS); + GEN6_EMIT(STATE_SIP); + GEN6_EMIT(PIPELINE_SELECT); + GEN6_EMIT(3DSTATE_BINDING_TABLE_POINTERS); + GEN6_EMIT(3DSTATE_SAMPLER_STATE_POINTERS); + GEN6_EMIT(3DSTATE_URB); + GEN6_EMIT(3DSTATE_VERTEX_BUFFERS); + GEN6_EMIT(3DSTATE_VERTEX_ELEMENTS); + GEN6_EMIT(3DSTATE_INDEX_BUFFER); + GEN6_EMIT(3DSTATE_VF_STATISTICS); + GEN6_EMIT(3DSTATE_VIEWPORT_STATE_POINTERS); + GEN6_EMIT(3DSTATE_CC_STATE_POINTERS); + GEN6_EMIT(3DSTATE_SCISSOR_STATE_POINTERS); + GEN6_EMIT(3DSTATE_VS); + GEN6_EMIT(3DSTATE_GS); + GEN6_EMIT(3DSTATE_CLIP); + GEN6_EMIT(3DSTATE_SF); + GEN6_EMIT(3DSTATE_WM); + GEN6_EMIT(3DSTATE_CONSTANT_VS); + GEN6_EMIT(3DSTATE_CONSTANT_GS); + GEN6_EMIT(3DSTATE_CONSTANT_PS); + GEN6_EMIT(3DSTATE_SAMPLE_MASK); + GEN6_EMIT(3DSTATE_DRAWING_RECTANGLE); + GEN6_EMIT(3DSTATE_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_OFFSET); + GEN6_EMIT(3DSTATE_POLY_STIPPLE_PATTERN); + GEN6_EMIT(3DSTATE_LINE_STIPPLE); + GEN6_EMIT(3DSTATE_AA_LINE_PARAMETERS); + GEN6_EMIT(3DSTATE_GS_SVB_INDEX); + GEN6_EMIT(3DSTATE_MULTISAMPLE); + GEN6_EMIT(3DSTATE_STENCIL_BUFFER); + GEN6_EMIT(3DSTATE_HIER_DEPTH_BUFFER); + GEN6_EMIT(3DSTATE_CLEAR_PARAMS); + GEN6_EMIT(PIPE_CONTROL); + GEN6_EMIT(3DPRIMITIVE); + GEN6_EMIT(INTERFACE_DESCRIPTOR_DATA); + GEN6_EMIT(SF_VIEWPORT); + GEN6_EMIT(CLIP_VIEWPORT); + GEN6_EMIT(CC_VIEWPORT); + GEN6_EMIT(COLOR_CALC_STATE); + GEN6_EMIT(BLEND_STATE); + GEN6_EMIT(DEPTH_STENCIL_STATE); + GEN6_EMIT(SCISSOR_RECT); + GEN6_EMIT(BINDING_TABLE_STATE); + GEN6_EMIT(surf_SURFACE_STATE); + GEN6_EMIT(view_SURFACE_STATE); + GEN6_EMIT(cbuf_SURFACE_STATE); + GEN6_EMIT(so_SURFACE_STATE); + GEN6_EMIT(SAMPLER_STATE); + GEN6_EMIT(SAMPLER_BORDER_COLOR_STATE); + GEN6_EMIT(push_constant_buffer); +#undef GEN6_EMIT + + /** + * HW states. + */ + struct ilo_3d_pipeline_state { + bool has_gen6_wa_pipe_control; + + int reduced_prim; + int so_num_vertices, so_max_vertices; + + uint32_t SF_VIEWPORT; + uint32_t CLIP_VIEWPORT; + uint32_t CC_VIEWPORT; + + uint32_t COLOR_CALC_STATE; + uint32_t BLEND_STATE; + uint32_t DEPTH_STENCIL_STATE; + + uint32_t SCISSOR_RECT; + + struct { + uint32_t BINDING_TABLE_STATE; + int BINDING_TABLE_STATE_size; + uint32_t SURFACE_STATE[ILO_MAX_VS_SURFACES]; + uint32_t SAMPLER_STATE; + uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS]; + uint32_t PUSH_CONSTANT_BUFFER; + int PUSH_CONSTANT_BUFFER_size; + } vs; + + struct { + uint32_t BINDING_TABLE_STATE; + int BINDING_TABLE_STATE_size; + uint32_t SURFACE_STATE[ILO_MAX_GS_SURFACES]; + bool active; + } gs; + + struct { + uint32_t BINDING_TABLE_STATE; + int BINDING_TABLE_STATE_size; + uint32_t SURFACE_STATE[ILO_MAX_WM_SURFACES]; + uint32_t SAMPLER_STATE; + uint32_t SAMPLER_BORDER_COLOR_STATE[ILO_MAX_SAMPLERS]; + } wm; + } state; +}; + +struct ilo_3d_pipeline * +ilo_3d_pipeline_create(struct ilo_cp *cp, int gen, int gt); + +void +ilo_3d_pipeline_destroy(struct ilo_3d_pipeline *pipeline); + + +static inline void +ilo_3d_pipeline_invalidate(struct ilo_3d_pipeline *p, uint32_t flags) +{ + p->invalidate_flags |= flags; +} + +/** + * Estimate the size of an action. + */ +static inline int +ilo_3d_pipeline_estimate_size(struct ilo_3d_pipeline *pipeline, + enum ilo_3d_pipeline_action action, + const void *arg) +{ + return pipeline->estimate_size(pipeline, action, arg); +} + +bool +ilo_3d_pipeline_emit_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + const struct pipe_draw_info *info, + int *prim_generated, int *prim_emitted); + +void +ilo_3d_pipeline_emit_flush(struct ilo_3d_pipeline *p); + +void +ilo_3d_pipeline_emit_write_timestamp(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index); + +void +ilo_3d_pipeline_emit_write_depth_count(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index); + +void +ilo_3d_pipeline_get_sample_position(struct ilo_3d_pipeline *p, + unsigned sample_count, + unsigned sample_index, + float *x, float *y); + +void +ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p); + +#endif /* ILO_3D_PIPELINE_H */ diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c new file mode 100644 index 00000000000..7f788521aff --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_dump.c @@ -0,0 +1,643 @@ +/* + * Copyright © 2007 Intel Corporation + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt + * Chia-I Wu + */ + +#include "intel_winsys.h" + +#include "ilo_cp.h" +#include "ilo_3d_pipeline.h" + +#define PRINTFLIKE(f, a) _util_printf_format(f, a) +typedef short GLshort; +typedef int GLint; +typedef unsigned char GLubyte; +typedef unsigned int GLuint; +typedef float GLfloat; +#include +#include +#include +#include "brw_structs.h" +#include "brw_defines.h" + +struct intel_context { + int gen; + + struct { + struct { + void *virtual; + } *bo, bo_dst; + } batch; +}; + +struct brw_context { + struct intel_context intel; +}; + +static void +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) PRINTFLIKE(5, 6); + +static void +batch_out(struct brw_context *brw, const char *name, uint32_t offset, + int index, char *fmt, ...) +{ + struct intel_context *intel = &brw->intel; + uint32_t *data = intel->batch.bo->virtual + offset; + va_list va; + + fprintf(stderr, "0x%08x: 0x%08x: %8s: ", + offset + index * 4, data[index], name); + va_start(va, fmt); + vfprintf(stderr, fmt, va); + va_end(va); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_surface_format(unsigned int surface_format) +{ + switch (surface_format) { + case 0x000: return "r32g32b32a32_float"; + case 0x0c1: return "b8g8r8a8_unorm"; + case 0x100: return "b5g6r5_unorm"; + case 0x102: return "b5g5r5a1_unorm"; + case 0x104: return "b4g4r4a4_unorm"; + default: return "unknown"; + } +} + +static void dump_vs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "VS_STATE"; + struct brw_vs_unit_state *vs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + vs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} + +static void dump_gs_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "GS_STATE"; + struct brw_gs_unit_state *gs = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + gs->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "vs5\n"); + batch_out(brw, name, offset, 6, "vs6\n"); +} + +static void dump_clip_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP_STATE"; + struct brw_clip_unit_state *clip = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + clip->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "clip5\n"); + batch_out(brw, name, offset, 6, "clip6\n"); + batch_out(brw, name, offset, 7, "vp xmin %f\n", clip->viewport_xmin); + batch_out(brw, name, offset, 8, "vp xmax %f\n", clip->viewport_xmax); + batch_out(brw, name, offset, 9, "vp ymin %f\n", clip->viewport_ymin); + batch_out(brw, name, offset, 10, "vp ymax %f\n", clip->viewport_ymax); +} + +static void dump_sf_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_STATE"; + struct brw_sf_unit_state *sf = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "thread4: %d threads\n", + sf->thread4.max_threads + 1); + batch_out(brw, name, offset, 5, "sf5: viewport offset\n"); + batch_out(brw, name, offset, 6, "sf6\n"); + batch_out(brw, name, offset, 7, "sf7\n"); +} + +static void dump_wm_state(struct brw_context *brw, uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "WM_STATE"; + struct brw_wm_unit_state *wm = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "thread0\n"); + batch_out(brw, name, offset, 1, "thread1\n"); + batch_out(brw, name, offset, 2, "thread2\n"); + batch_out(brw, name, offset, 3, "thread3\n"); + batch_out(brw, name, offset, 4, "wm4\n"); + batch_out(brw, name, offset, 5, "wm5: %s%s%s%s%s%s, %d threads\n", + wm->wm5.enable_8_pix ? "8pix" : "", + wm->wm5.enable_16_pix ? "16pix" : "", + wm->wm5.program_uses_depth ? ", uses depth" : "", + wm->wm5.program_computes_depth ? ", computes depth" : "", + wm->wm5.program_uses_killpixel ? ", kills" : "", + wm->wm5.thread_dispatch_enable ? "" : ", no dispatch", + wm->wm5.max_threads + 1); + batch_out(brw, name, offset, 6, "depth offset constant %f\n", + wm->global_depth_offset_constant); + batch_out(brw, name, offset, 7, "depth offset scale %f\n", + wm->global_depth_offset_scale); + batch_out(brw, name, offset, 8, "wm8: kernel 1 (gen5+)\n"); + batch_out(brw, name, offset, 9, "wm9: kernel 2 (gen5+)\n"); + batch_out(brw, name, offset, 10, "wm10: kernel 3 (gen5+)\n"); +} + +static void dump_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], BRW_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], BRW_SURFACE_HEIGHT) + 1, + GET_FIELD(surf[2], BRW_SURFACE_LOD)); + batch_out(brw, name, offset, 3, "pitch %d, %s tiled\n", + GET_FIELD(surf[3], BRW_SURFACE_PITCH) + 1, + (surf[3] & BRW_SURFACE_TILED) ? + ((surf[3] & BRW_SURFACE_TILED_Y) ? "Y" : "X") : "not"); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[4], BRW_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} + +static void dump_gen7_surface_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SURF"; + uint32_t *surf = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "%s %s\n", + get_965_surfacetype(GET_FIELD(surf[0], BRW_SURFACE_TYPE)), + get_965_surface_format(GET_FIELD(surf[0], BRW_SURFACE_FORMAT))); + batch_out(brw, name, offset, 1, "offset\n"); + batch_out(brw, name, offset, 2, "%dx%d size, %d mips\n", + GET_FIELD(surf[2], GEN7_SURFACE_WIDTH) + 1, + GET_FIELD(surf[2], GEN7_SURFACE_HEIGHT) + 1, + surf[5] & INTEL_MASK(3, 0)); + batch_out(brw, name, offset, 3, "pitch %d, %stiled\n", + (surf[3] & INTEL_MASK(17, 0)) + 1, + (surf[0] & (1 << 14)) ? "" : "not "); + batch_out(brw, name, offset, 4, "mip base %d\n", + GET_FIELD(surf[5], GEN7_SURFACE_MIN_LOD)); + batch_out(brw, name, offset, 5, "x,y offset: %d,%d\n", + GET_FIELD(surf[5], BRW_SURFACE_X_OFFSET), + GET_FIELD(surf[5], BRW_SURFACE_Y_OFFSET)); +} + +static void +dump_sdc(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SDC"; + struct intel_context *intel = &brw->intel; + + if (intel->gen >= 5 && intel->gen <= 6) { + struct gen5_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "unorm rgba\n"); + batch_out(brw, name, offset, 1, "r %f\n", sdc->f[0]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->f[1]); + batch_out(brw, name, offset, 3, "g %f\n", sdc->f[2]); + batch_out(brw, name, offset, 4, "a %f\n", sdc->f[3]); + batch_out(brw, name, offset, 5, "half float rg\n"); + batch_out(brw, name, offset, 6, "half float ba\n"); + batch_out(brw, name, offset, 7, "u16 rg\n"); + batch_out(brw, name, offset, 8, "u16 ba\n"); + batch_out(brw, name, offset, 9, "s16 rg\n"); + batch_out(brw, name, offset, 10, "s16 ba\n"); + batch_out(brw, name, offset, 11, "s8 rgba\n"); + } else { + struct brw_sampler_default_color *sdc = (intel->batch.bo->virtual + + offset); + batch_out(brw, name, offset, 0, "r %f\n", sdc->color[0]); + batch_out(brw, name, offset, 1, "g %f\n", sdc->color[1]); + batch_out(brw, name, offset, 2, "b %f\n", sdc->color[2]); + batch_out(brw, name, offset, 3, "a %f\n", sdc->color[3]); + } +} + +static void dump_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) +{ + struct intel_context *intel = &brw->intel; + int i; + struct brw_sampler_state *samp = intel->batch.bo->virtual + offset; + + assert(intel->gen < 7); + + for (i = 0; i < size / sizeof(*samp); i++) { + char name[20]; + + sprintf(name, "WM SAMP%d", i); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); + } +} + +static void dump_gen7_sampler_state(struct brw_context *brw, + uint32_t offset, uint32_t size) +{ + struct intel_context *intel = &brw->intel; + struct gen7_sampler_state *samp = intel->batch.bo->virtual + offset; + int i; + + assert(intel->gen >= 7); + + for (i = 0; i < size / sizeof(*samp); i++) { + char name[20]; + + sprintf(name, "WM SAMP%d", i); + batch_out(brw, name, offset, 0, "filtering\n"); + batch_out(brw, name, offset, 1, "wrapping, lod\n"); + batch_out(brw, name, offset, 2, "default color pointer\n"); + batch_out(brw, name, offset, 3, "chroma key, aniso\n"); + + samp++; + offset += sizeof(*samp); + } +} + + +static void dump_sf_viewport_state(struct brw_context *brw, + uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF VP"; + struct brw_sf_viewport *vp = intel->batch.bo->virtual + offset; + + assert(intel->gen < 7); + + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + + batch_out(brw, name, offset, 6, "top left = %d,%d\n", + vp->scissor.xmin, vp->scissor.ymin); + batch_out(brw, name, offset, 7, "bottom right = %d,%d\n", + vp->scissor.xmax, vp->scissor.ymax); +} + +static void dump_clip_viewport_state(struct brw_context *brw, + uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "CLIP VP"; + struct brw_clipper_viewport *vp = intel->batch.bo->virtual + offset; + + assert(intel->gen < 7); + + batch_out(brw, name, offset, 0, "xmin = %f\n", vp->xmin); + batch_out(brw, name, offset, 1, "xmax = %f\n", vp->xmax); + batch_out(brw, name, offset, 2, "ymin = %f\n", vp->ymin); + batch_out(brw, name, offset, 3, "ymax = %f\n", vp->ymax); +} + +static void dump_sf_clip_viewport_state(struct brw_context *brw, + uint32_t offset) +{ + struct intel_context *intel = &brw->intel; + const char *name = "SF_CLIP VP"; + struct gen7_sf_clip_viewport *vp = intel->batch.bo->virtual + offset; + + assert(intel->gen >= 7); + + batch_out(brw, name, offset, 0, "m00 = %f\n", vp->viewport.m00); + batch_out(brw, name, offset, 1, "m11 = %f\n", vp->viewport.m11); + batch_out(brw, name, offset, 2, "m22 = %f\n", vp->viewport.m22); + batch_out(brw, name, offset, 3, "m30 = %f\n", vp->viewport.m30); + batch_out(brw, name, offset, 4, "m31 = %f\n", vp->viewport.m31); + batch_out(brw, name, offset, 5, "m32 = %f\n", vp->viewport.m32); + batch_out(brw, name, offset, 6, "guardband xmin = %f\n", vp->guardband.xmin); + batch_out(brw, name, offset, 7, "guardband xmax = %f\n", vp->guardband.xmax); + batch_out(brw, name, offset, 8, "guardband ymin = %f\n", vp->guardband.ymin); + batch_out(brw, name, offset, 9, "guardband ymax = %f\n", vp->guardband.ymax); +} + + +static void dump_cc_viewport_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "CC VP"; + struct brw_cc_viewport *vp = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "min_depth = %f\n", vp->min_depth); + batch_out(brw, name, offset, 1, "max_depth = %f\n", vp->max_depth); +} + +static void dump_depth_stencil_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "D_S"; + struct gen6_depth_stencil_state *ds = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "stencil %sable, func %d, write %sable\n", + ds->ds0.stencil_enable ? "en" : "dis", + ds->ds0.stencil_func, + ds->ds0.stencil_write_enable ? "en" : "dis"); + batch_out(brw, name, offset, 1, + "stencil test mask 0x%x, write mask 0x%x\n", + ds->ds1.stencil_test_mask, ds->ds1.stencil_write_mask); + batch_out(brw, name, offset, 2, + "depth test %sable, func %d, write %sable\n", + ds->ds2.depth_test_enable ? "en" : "dis", + ds->ds2.depth_test_func, + ds->ds2.depth_write_enable ? "en" : "dis"); +} + +static void dump_cc_state_gen4(struct brw_context *brw, uint32_t offset) +{ + const char *name = "CC"; + + batch_out(brw, name, offset, 0, "cc0\n"); + batch_out(brw, name, offset, 1, "cc1\n"); + batch_out(brw, name, offset, 2, "cc2\n"); + batch_out(brw, name, offset, 3, "cc3\n"); + batch_out(brw, name, offset, 4, "cc4: viewport offset\n"); + batch_out(brw, name, offset, 5, "cc5\n"); + batch_out(brw, name, offset, 6, "cc6\n"); + batch_out(brw, name, offset, 7, "cc7\n"); +} + +static void dump_cc_state_gen6(struct brw_context *brw, uint32_t offset) +{ + const char *name = "CC"; + struct gen6_color_calc_state *cc = brw->intel.batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, + "alpha test format %s, round disable %d, stencil ref %d, " + "bf stencil ref %d\n", + cc->cc0.alpha_test_format ? "FLOAT32" : "UNORM8", + cc->cc0.round_disable, + cc->cc0.stencil_ref, + cc->cc0.bf_stencil_ref); + batch_out(brw, name, offset, 1, "\n"); + batch_out(brw, name, offset, 2, "constant red %f\n", cc->constant_r); + batch_out(brw, name, offset, 3, "constant green %f\n", cc->constant_g); + batch_out(brw, name, offset, 4, "constant blue %f\n", cc->constant_b); + batch_out(brw, name, offset, 5, "constant alpha %f\n", cc->constant_a); +} + +static void dump_blend_state(struct brw_context *brw, uint32_t offset) +{ + const char *name = "BLEND"; + + batch_out(brw, name, offset, 0, "\n"); + batch_out(brw, name, offset, 1, "\n"); +} + +static void +dump_scissor(struct brw_context *brw, uint32_t offset) +{ + const char *name = "SCISSOR"; + struct intel_context *intel = &brw->intel; + struct gen6_scissor_rect *scissor = intel->batch.bo->virtual + offset; + + batch_out(brw, name, offset, 0, "xmin %d, ymin %d\n", + scissor->xmin, scissor->ymin); + batch_out(brw, name, offset, 1, "xmax %d, ymax %d\n", + scissor->xmax, scissor->ymax); +} + +static void +dump_vs_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "VS_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + +static void +dump_wm_constants(struct brw_context *brw, uint32_t offset, uint32_t size) +{ + const char *name = "WM_CONST"; + struct intel_context *intel = &brw->intel; + uint32_t *as_uint = intel->batch.bo->virtual + offset; + float *as_float = intel->batch.bo->virtual + offset; + int i; + + for (i = 0; i < size / 4; i += 4) { + batch_out(brw, name, offset, i, "%3d: (% f % f % f % f) (0x%08x 0x%08x 0x%08x 0x%08x)\n", + i / 4, + as_float[i], as_float[i + 1], as_float[i + 2], as_float[i + 3], + as_uint[i], as_uint[i + 1], as_uint[i + 2], as_uint[i + 3]); + } +} + +static void dump_binding_table(struct brw_context *brw, uint32_t offset, + uint32_t size) +{ + char name[20]; + int i; + uint32_t *data = brw->intel.batch.bo->virtual + offset; + + for (i = 0; i < size / 4; i++) { + if (data[i] == 0) + continue; + + sprintf(name, "BIND%d", i); + batch_out(brw, name, offset, i, "surface state address\n"); + } +} + +static void +init_brw(struct brw_context *brw, struct ilo_3d_pipeline *p) +{ + brw->intel.gen = ILO_GEN_GET_MAJOR(p->gen); + brw->intel.batch.bo_dst.virtual = p->cp->bo->get_virtual(p->cp->bo); + brw->intel.batch.bo = &brw->intel.batch.bo_dst; +} + +static void +dump_3d_state(struct ilo_3d_pipeline *p) +{ + struct brw_context brw; + int num_states, i; + + init_brw(&brw, p); + + if (brw.intel.gen >= 7) { + dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT); + dump_sf_clip_viewport_state(&brw, p->state.SF_VIEWPORT); + } + else { + dump_clip_viewport_state(&brw, p->state.CLIP_VIEWPORT); + dump_sf_viewport_state(&brw, p->state.SF_VIEWPORT); + dump_cc_viewport_state(&brw, p->state.CC_VIEWPORT); + } + + dump_blend_state(&brw, p->state.BLEND_STATE); + dump_cc_state_gen6(&brw, p->state.COLOR_CALC_STATE); + dump_depth_stencil_state(&brw, p->state.DEPTH_STENCIL_STATE); + + /* VS */ + num_states = p->state.vs.BINDING_TABLE_STATE_size; + for (i = 0; i < num_states; i++) { + if (brw.intel.gen < 7) + dump_surface_state(&brw, p->state.vs.SURFACE_STATE[i]); + else + dump_gen7_surface_state(&brw, p->state.vs.SURFACE_STATE[i]); + } + dump_binding_table(&brw, p->state.vs.BINDING_TABLE_STATE, num_states * 4); + + num_states = 0; + for (i = 0; i < Elements(p->state.vs.SAMPLER_BORDER_COLOR_STATE); i++) { + if (!p->state.vs.SAMPLER_BORDER_COLOR_STATE[i]) + continue; + + dump_sdc(&brw, p->state.vs.SAMPLER_BORDER_COLOR_STATE[i]); + num_states++; + } + if (brw.intel.gen < 7) + dump_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16); + else + dump_gen7_sampler_state(&brw, p->state.vs.SAMPLER_STATE, num_states * 16); + + if (p->state.vs.PUSH_CONSTANT_BUFFER_size) { + dump_vs_constants(&brw, p->state.vs.PUSH_CONSTANT_BUFFER, + p->state.vs.PUSH_CONSTANT_BUFFER_size); + } + + /* GS */ + num_states = p->state.gs.BINDING_TABLE_STATE_size; + for (i = 0; i < num_states; i++) { + if (!p->state.gs.SURFACE_STATE[i]) + continue; + + if (brw.intel.gen < 7) + dump_surface_state(&brw, p->state.gs.SURFACE_STATE[i]); + else + dump_gen7_surface_state(&brw, p->state.gs.SURFACE_STATE[i]); + } + dump_binding_table(&brw, p->state.gs.BINDING_TABLE_STATE, num_states * 4); + + /* WM */ + num_states = p->state.wm.BINDING_TABLE_STATE_size; + for (i = 0; i < num_states; i++) { + if (!p->state.wm.SURFACE_STATE[i]) + continue; + + if (brw.intel.gen < 7) + dump_surface_state(&brw, p->state.wm.SURFACE_STATE[i]); + else + dump_gen7_surface_state(&brw, p->state.wm.SURFACE_STATE[i]); + } + dump_binding_table(&brw, p->state.wm.BINDING_TABLE_STATE, num_states * 4); + + num_states = 0; + for (i = 0; i < Elements(p->state.wm.SAMPLER_BORDER_COLOR_STATE); i++) { + if (!p->state.wm.SAMPLER_BORDER_COLOR_STATE[i]) + continue; + + dump_sdc(&brw, p->state.wm.SAMPLER_BORDER_COLOR_STATE[i]); + num_states++; + } + if (brw.intel.gen < 7) + dump_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16); + else + dump_gen7_sampler_state(&brw, p->state.wm.SAMPLER_STATE, num_states * 16); + + dump_scissor(&brw, p->state.SCISSOR_RECT); + + (void) dump_vs_state; + (void) dump_gs_state; + (void) dump_clip_state; + (void) dump_sf_state; + (void) dump_wm_state; + (void) dump_cc_state_gen4; + (void) dump_wm_constants; +} + +/** + * Dump the pipeline. + */ +void +ilo_3d_pipeline_dump(struct ilo_3d_pipeline *p) +{ + int err; + + ilo_cp_dump(p->cp); + + err = p->cp->bo->map(p->cp->bo, false); + if (!err) { + dump_3d_state(p); + p->cp->bo->unmap(p->cp->bo); + } +} diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c new file mode 100644 index 00000000000..16bd422c40c --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -0,0 +1,1637 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#include "util/u_dual_blend.h" +#include "util/u_prim.h" +#include "intel_reg.h" + +#include "ilo_context.h" +#include "ilo_cp.h" +#include "ilo_gpe_gen6.h" +#include "ilo_shader.h" +#include "ilo_state.h" +#include "ilo_3d_pipeline.h" +#include "ilo_3d_pipeline_gen6.h" + +/** + * This should be called before any depth stall flush (including those + * produced by non-pipelined state commands) or cache flush on GEN6. + * + * \see intel_emit_post_sync_nonzero_flush() + */ +static void +gen6_wa_pipe_control_post_sync(struct ilo_3d_pipeline *p, + bool caller_post_sync) +{ + assert(p->gen == ILO_GEN(6)); + + /* emit once */ + if (p->state.has_gen6_wa_pipe_control) + return; + + p->state.has_gen6_wa_pipe_control = true; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 60: + * + * "Pipe-control with CS-stall bit set must be sent BEFORE the + * pipe-control with a post-sync op and no write-cache flushes." + * + * The workaround below necessitates this workaround. + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD, + NULL, 0, false, p->cp); + + /* the caller will emit the post-sync op */ + if (caller_post_sync) + return; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 60: + * + * "Before any depth stall flush (including those produced by + * non-pipelined state commands), software needs to first send a + * PIPE_CONTROL with no bits set except Post-Sync Operation != 0." + * + * "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a + * PIPE_CONTROL with any non-zero post-sync-op is required." + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_WRITE_IMMEDIATE, + p->workaround_bo, 0, false, p->cp); +} + +static void +gen6_wa_pipe_control_wm_multisample_flush(struct ilo_3d_pipeline *p) +{ + assert(p->gen == ILO_GEN(6)); + + gen6_wa_pipe_control_post_sync(p, false); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 305: + * + * "Driver must guarentee that all the caches in the depth pipe are + * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This + * requires driver to send a PIPE_CONTROL with a CS stall along with a + * Depth Flush prior to this command." + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_CS_STALL, + 0, 0, false, p->cp); +} + +static void +gen6_wa_pipe_control_wm_depth_flush(struct ilo_3d_pipeline *p) +{ + assert(p->gen == ILO_GEN(6)); + + gen6_wa_pipe_control_post_sync(p, false); + + /* + * According to intel_emit_depth_stall_flushes() of classic i965, we need + * to emit a sequence of PIPE_CONTROLs prior to emitting depth related + * commands. + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_STALL, + NULL, 0, false, p->cp); + + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_CACHE_FLUSH, + NULL, 0, false, p->cp); + + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_STALL, + NULL, 0, false, p->cp); +} + +static void +gen6_wa_pipe_control_wm_max_threads_stall(struct ilo_3d_pipeline *p) +{ + assert(p->gen == ILO_GEN(6)); + + /* the post-sync workaround should cover this already */ + if (p->state.has_gen6_wa_pipe_control) + return; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 274: + * + * "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard + * field set (DW1 Bit 1), must be issued prior to any change to the + * value in this field (Maximum Number of Threads in 3DSTATE_WM)" + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_STALL_AT_SCOREBOARD, + NULL, 0, false, p->cp); + +} + +static void +gen6_wa_pipe_control_vs_const_flush(struct ilo_3d_pipeline *p) +{ + assert(p->gen == ILO_GEN(6)); + + gen6_wa_pipe_control_post_sync(p, false); + + /* + * According to upload_vs_state() of classic i965, we need to emit + * PIPE_CONTROL after 3DSTATE_CONSTANT_VS so that the command is kept being + * buffered by VS FF, to the point that the FF dies. + */ + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_STATE_CACHE_INVALIDATE, + NULL, 0, false, p->cp); +} + +#define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state) + +void +gen6_pipeline_common_select(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* PIPELINE_SELECT */ + if (session->hw_ctx_changed) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_PIPELINE_SELECT(&p->gpe, 0x0, p->cp); + } +} + +void +gen6_pipeline_common_sip(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* STATE_SIP */ + if (session->hw_ctx_changed) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_STATE_SIP(&p->gpe, 0, p->cp); + } +} + +void +gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* STATE_BASE_ADDRESS */ + if (session->state_bo_changed || session->instruction_bo_changed) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_STATE_BASE_ADDRESS(&p->gpe, + NULL, p->cp->bo, p->cp->bo, NULL, ilo->shader_cache->bo, + 0, 0, 0, 0, p->cp); + + /* + * From the Sandy Bridge PRM, volume 1 part 1, page 28: + * + * "The following commands must be reissued following any change to + * the base addresses: + * + * * 3DSTATE_BINDING_TABLE_POINTERS + * * 3DSTATE_SAMPLER_STATE_POINTERS + * * 3DSTATE_VIEWPORT_STATE_POINTERS + * * 3DSTATE_CC_POINTERS + * * MEDIA_STATE_POINTERS" + * + * 3DSTATE_SCISSOR_STATE_POINTERS is not on the list, but it is + * reasonable to also reissue the command. Same to PCB. + */ + session->viewport_state_changed = true; + + session->cc_state_blend_changed = true; + session->cc_state_dsa_changed = true; + session->cc_state_cc_changed = true; + + session->scissor_state_changed = true; + + session->binding_table_vs_changed = true; + session->binding_table_gs_changed = true; + session->binding_table_fs_changed = true; + + session->sampler_state_vs_changed = true; + session->sampler_state_gs_changed = true; + session->sampler_state_fs_changed = true; + + session->pcb_state_vs_changed = true; + session->pcb_state_gs_changed = true; + session->pcb_state_fs_changed = true; + } +} + +static void +gen6_pipeline_common_urb(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_URB */ + if (DIRTY(VERTEX_ELEMENTS) || DIRTY(VS) || DIRTY(GS)) { + const struct ilo_shader *vs = (ilo->vs) ? ilo->vs->shader : NULL; + const struct ilo_shader *gs = (ilo->gs) ? ilo->gs->shader : NULL; + const bool gs_active = (gs || (vs && vs->stream_output)); + int vs_entry_size, gs_entry_size; + int vs_total_size, gs_total_size; + + vs_entry_size = (vs) ? vs->out.count : 0; + + /* + * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS + * share VUE handles. The VUE allocation size must be large enough to + * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs. + * + * I am not sure if the PRM explicitly states that VF and VS share VUE + * handles. But here is a citation that implies so: + * + * From the Sandy Bridge PRM, volume 2 part 1, page 44: + * + * "Once a FF stage that spawn threads has sufficient input to + * initiate a thread, it must guarantee that it is safe to request + * the thread initiation. For all these FF stages, this check is + * based on : + * + * - The availability of output URB entries: + * - VS: As the input URB entries are overwritten with the + * VS-generated output data, output URB availability isn't a + * factor." + */ + if (vs_entry_size < ilo->vertex_elements->num_elements) + vs_entry_size = ilo->vertex_elements->num_elements; + + gs_entry_size = (gs) ? gs->out.count : + (vs && vs->stream_output) ? vs_entry_size : 0; + + /* in bytes */ + vs_entry_size *= sizeof(float) * 4; + gs_entry_size *= sizeof(float) * 4; + vs_total_size = ilo->urb.size * 1024; + + if (gs_active) { + vs_total_size /= 2; + gs_total_size = vs_total_size; + } + else { + gs_total_size = 0; + } + + p->gen6_3DSTATE_URB(&p->gpe, vs_total_size, gs_total_size, + vs_entry_size, gs_entry_size, p->cp); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 27: + * + * "Because of a urb corruption caused by allocating a previous + * gsunit's urb entry to vsunit software is required to send a + * "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB + * size == 0) plus a dummy DRAW call before any case where VS will + * be taking over GS URB space." + */ + if (p->state.gs.active && !gs_active) + ilo_3d_pipeline_emit_flush_gen6(p); + + p->state.gs.active = gs_active; + } +} + +static void +gen6_pipeline_common_pointers_1(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_VIEWPORT_STATE_POINTERS */ + if (session->viewport_state_changed) { + p->gen6_3DSTATE_VIEWPORT_STATE_POINTERS(&p->gpe, + p->state.CLIP_VIEWPORT, + p->state.SF_VIEWPORT, + p->state.CC_VIEWPORT, p->cp); + } +} + +static void +gen6_pipeline_common_pointers_2(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_CC_STATE_POINTERS */ + if (session->cc_state_blend_changed || + session->cc_state_dsa_changed || + session->cc_state_cc_changed) { + p->gen6_3DSTATE_CC_STATE_POINTERS(&p->gpe, + p->state.BLEND_STATE, + p->state.DEPTH_STENCIL_STATE, + p->state.COLOR_CALC_STATE, p->cp); + } + + /* 3DSTATE_SAMPLER_STATE_POINTERS */ + if (session->sampler_state_vs_changed || + session->sampler_state_gs_changed || + session->sampler_state_fs_changed) { + p->gen6_3DSTATE_SAMPLER_STATE_POINTERS(&p->gpe, + p->state.vs.SAMPLER_STATE, + 0, + p->state.wm.SAMPLER_STATE, p->cp); + } +} + +static void +gen6_pipeline_common_pointers_3(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_SCISSOR_STATE_POINTERS */ + if (session->scissor_state_changed) { + p->gen6_3DSTATE_SCISSOR_STATE_POINTERS(&p->gpe, + p->state.SCISSOR_RECT, p->cp); + } + + /* 3DSTATE_BINDING_TABLE_POINTERS */ + if (session->binding_table_vs_changed || + session->binding_table_gs_changed || + session->binding_table_fs_changed) { + p->gen6_3DSTATE_BINDING_TABLE_POINTERS(&p->gpe, + p->state.vs.BINDING_TABLE_STATE, + p->state.gs.BINDING_TABLE_STATE, + p->state.wm.BINDING_TABLE_STATE, p->cp); + } +} + +void +gen6_pipeline_vf(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_INDEX_BUFFER */ + if (DIRTY(INDEX_BUFFER)) { + p->gen6_3DSTATE_INDEX_BUFFER(&p->gpe, + &ilo->index_buffer, false, p->cp); + } + + /* 3DSTATE_VERTEX_BUFFERS */ + if (DIRTY(VERTEX_BUFFERS)) { + p->gen6_3DSTATE_VERTEX_BUFFERS(&p->gpe, + ilo->vertex_buffers.buffers, NULL, + (1 << ilo->vertex_buffers.num_buffers) - 1, p->cp); + } + + /* 3DSTATE_VERTEX_ELEMENTS */ + if (DIRTY(VERTEX_ELEMENTS) || DIRTY(VS)) { + const struct ilo_vertex_element *ive = ilo->vertex_elements; + bool last_velement_edgeflag = false; + bool prepend_generate_ids = false; + + if (ilo->vs) { + const struct ilo_shader_info *info = &ilo->vs->info; + + if (info->edgeflag_in >= 0) { + /* we rely on the state tracker here */ + assert(info->edgeflag_in == ive->num_elements - 1); + last_velement_edgeflag = true; + } + + prepend_generate_ids = (info->has_instanceid || info->has_vertexid); + } + + p->gen6_3DSTATE_VERTEX_ELEMENTS(&p->gpe, + ive->elements, ive->num_elements, + last_velement_edgeflag, prepend_generate_ids, p->cp); + } +} + +void +gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_VF_STATISTICS */ + if (session->hw_ctx_changed) + p->gen6_3DSTATE_VF_STATISTICS(&p->gpe, false, p->cp); +} + +void +gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DPRIMITIVE */ + p->gen6_3DPRIMITIVE(&p->gpe, session->info, false, p->cp); + p->state.has_gen6_wa_pipe_control = false; +} + +void +gen6_pipeline_vs(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(VERTEX_SAMPLERS)); + const bool emit_3dstate_constant_vs = session->pcb_state_vs_changed; + + /* + * the classic i965 does this in upload_vs_state(), citing a spec that I + * cannot find + */ + if (emit_3dstate_vs && p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + /* 3DSTATE_CONSTANT_VS */ + if (emit_3dstate_constant_vs) { + p->gen6_3DSTATE_CONSTANT_VS(&p->gpe, + &p->state.vs.PUSH_CONSTANT_BUFFER, + &p->state.vs.PUSH_CONSTANT_BUFFER_size, + 1, p->cp); + } + + /* 3DSTATE_VS */ + if (emit_3dstate_vs) { + const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL; + const int num_samplers = ilo->samplers[PIPE_SHADER_VERTEX].num_samplers; + + p->gen6_3DSTATE_VS(&p->gpe, + vs, ilo->max_vs_threads, num_samplers, p->cp); + } + + if (emit_3dstate_constant_vs && p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_vs_const_flush(p); +} + +static void +gen6_pipeline_gs(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_CONSTANT_GS */ + if (session->pcb_state_gs_changed) + p->gen6_3DSTATE_CONSTANT_GS(&p->gpe, NULL, NULL, 0, p->cp); + + /* 3DSTATE_GS */ + if (DIRTY(GS) || DIRTY(VS) || session->prim_changed) { + const struct ilo_shader *gs = (ilo->gs)? ilo->gs->shader : NULL; + const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL; + const int num_vertices = u_vertices_per_prim(session->reduced_prim); + + if (gs) + assert(!gs->pcb.clip_state_size); + + p->gen6_3DSTATE_GS(&p->gpe, + gs, ilo->max_gs_threads, vs, + (vs) ? vs->cache_offset + vs->gs_offsets[num_vertices - 1] : 0, + p->cp); + } +} + +static void +gen6_pipeline_gs_svbi(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_GS_SVB_INDEX */ + if (DIRTY(VS) || DIRTY(GS) || DIRTY(STREAM_OUTPUT_TARGETS)) { + const struct pipe_stream_output_info *so_info = + (ilo->gs) ? &ilo->gs->info.stream_output : + (ilo->vs) ? &ilo->vs->info.stream_output : NULL; + unsigned max_svbi = 0xffffffff; + int i; + + /* get max_svbi */ + for (i = 0; so_info && i < so_info->num_outputs; i++) { + const int output_buffer = so_info->output[i].output_buffer; + const struct pipe_stream_output_target *so = + ilo->stream_output_targets.targets[output_buffer]; + const int struct_size = so_info->stride[output_buffer] * 4; + const int elem_size = so_info->output[i].num_components * 4; + int buf_size, count; + + if (!so) + continue; + + buf_size = so->buffer_size - so_info->output[i].dst_offset * 4; + + count = buf_size / struct_size; + if (buf_size % struct_size >= elem_size) + count++; + + if (count < max_svbi) + max_svbi = count; + } + + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_3DSTATE_GS_SVB_INDEX(&p->gpe, + 0, p->state.so_num_vertices, max_svbi, + false, p->cp); + + if (session->hw_ctx_changed) { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 148: + * + * "If a buffer is not enabled then the SVBI must be set to 0x0 + * in order to not cause overflow in that SVBI." + * + * "If a buffer is not enabled then the MaxSVBI must be set to + * 0xFFFFFFFF in order to not cause overflow in that SVBI." + */ + for (i = 1; i < 4; i++) { + p->gen6_3DSTATE_GS_SVB_INDEX(&p->gpe, + i, 0, 0xffffffff, false, p->cp); + } + } + + /* remember the state for calculating primtive emitted in software */ + p->state.so_max_vertices = max_svbi; + } +} + +void +gen6_pipeline_clip(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_CLIP */ + if (DIRTY(RASTERIZER) || DIRTY(FS) || + DIRTY(VIEWPORT) || DIRTY(FRAMEBUFFER)) { + bool enable_guardband; + float x1, x2, y1, y2; + + /* + * We do not do 2D clipping yet. Guard band test should only be enabled + * when the viewport is larger than the framebuffer. + */ + x1 = fabs(ilo->viewport.scale[0]) * -1.0f + ilo->viewport.translate[0]; + x2 = fabs(ilo->viewport.scale[0]) * 1.0f + ilo->viewport.translate[0]; + y1 = fabs(ilo->viewport.scale[1]) * -1.0f + ilo->viewport.translate[1]; + y2 = fabs(ilo->viewport.scale[1]) * 1.0f + ilo->viewport.translate[1]; + enable_guardband = + (x1 <= 0.0f && x2 >= (float) ilo->framebuffer.width && + y1 <= 0.0f && y2 >= (float) ilo->framebuffer.height); + + p->gen6_3DSTATE_CLIP(&p->gpe, + ilo->rasterizer, + (ilo->fs && ilo->fs->shader->in.has_linear_interp), + enable_guardband, 1, p->cp); + } +} + +static void +gen6_pipeline_sf(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_SF */ + if (DIRTY(RASTERIZER) || DIRTY(VS) || DIRTY(GS) || DIRTY(FS)) { + const struct ilo_shader *fs = (ilo->fs)? ilo->fs->shader : NULL; + const struct ilo_shader *last_sh = + (ilo->gs)? ilo->gs->shader : + (ilo->vs)? ilo->vs->shader : NULL; + + p->gen6_3DSTATE_SF(&p->gpe, + ilo->rasterizer, fs, last_sh, p->cp); + } +} + +void +gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_DRAWING_RECTANGLE */ + if (DIRTY(FRAMEBUFFER)) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_3DSTATE_DRAWING_RECTANGLE(&p->gpe, 0, 0, + ilo->framebuffer.width, ilo->framebuffer.height, p->cp); + } +} + +static void +gen6_pipeline_wm(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_CONSTANT_PS */ + if (session->pcb_state_fs_changed) + p->gen6_3DSTATE_CONSTANT_PS(&p->gpe, NULL, NULL, 0, p->cp); + + /* 3DSTATE_WM */ + if (DIRTY(FS) || DIRTY(FRAGMENT_SAMPLERS) || + DIRTY(BLEND) || DIRTY(DEPTH_STENCIL_ALPHA) || + DIRTY(RASTERIZER)) { + const struct ilo_shader *fs = (ilo->fs)? ilo->fs->shader : NULL; + const int num_samplers = + ilo->samplers[PIPE_SHADER_FRAGMENT].num_samplers; + const bool dual_blend = (!ilo->blend->logicop_enable && + ilo->blend->rt[0].blend_enable && + util_blend_state_is_dual(ilo->blend, 0)); + const bool cc_may_kill = (ilo->depth_stencil_alpha->alpha.enabled || + ilo->blend->alpha_to_coverage); + + if (fs) + assert(!fs->pcb.clip_state_size); + + if (p->gen == ILO_GEN(6) && session->hw_ctx_changed) + gen6_wa_pipe_control_wm_max_threads_stall(p); + + p->gen6_3DSTATE_WM(&p->gpe, + fs, ilo->max_wm_threads, num_samplers, + ilo->rasterizer, dual_blend, cc_may_kill, p->cp); + } +} + +static void +gen6_pipeline_wm_multisample(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ + if (DIRTY(SAMPLE_MASK) || DIRTY(FRAMEBUFFER)) { + const uint32_t *packed_sample_pos; + int num_samples = 1; + + if (ilo->framebuffer.nr_cbufs) + num_samples = ilo->framebuffer.cbufs[0]->texture->nr_samples; + + packed_sample_pos = (num_samples > 1) ? + &p->packed_sample_position_4x : &p->packed_sample_position_1x; + + if (p->gen == ILO_GEN(6)) { + gen6_wa_pipe_control_post_sync(p, false); + gen6_wa_pipe_control_wm_multisample_flush(p); + } + + p->gen6_3DSTATE_MULTISAMPLE(&p->gpe, num_samples, packed_sample_pos, + ilo->rasterizer->half_pixel_center, p->cp); + + p->gen6_3DSTATE_SAMPLE_MASK(&p->gpe, + (num_samples > 1) ? ilo->sample_mask : 0x1, p->cp); + } +} + +static void +gen6_pipeline_wm_depth(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */ + if (DIRTY(FRAMEBUFFER)) { + if (p->gen == ILO_GEN(6)) { + gen6_wa_pipe_control_post_sync(p, false); + gen6_wa_pipe_control_wm_depth_flush(p); + } + + p->gen6_3DSTATE_DEPTH_BUFFER(&p->gpe, + ilo->framebuffer.zsbuf, false, p->cp); + + /* TODO */ + p->gen6_3DSTATE_CLEAR_PARAMS(&p->gpe, 0, p->cp); + } +} + +void +gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */ + if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) && + ilo->rasterizer->poly_stipple_enable) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_3DSTATE_POLY_STIPPLE_PATTERN(&p->gpe, + &ilo->poly_stipple, p->cp); + + p->gen6_3DSTATE_POLY_STIPPLE_OFFSET(&p->gpe, 0, 0, p->cp); + } + + /* 3DSTATE_LINE_STIPPLE */ + if (DIRTY(RASTERIZER) && ilo->rasterizer->line_stipple_enable) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_3DSTATE_LINE_STIPPLE(&p->gpe, + ilo->rasterizer->line_stipple_pattern, + ilo->rasterizer->line_stipple_factor + 1, p->cp); + } + + /* 3DSTATE_AA_LINE_PARAMETERS */ + if (DIRTY(RASTERIZER) && ilo->rasterizer->line_smooth) { + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_3DSTATE_AA_LINE_PARAMETERS(&p->gpe, p->cp); + } +} + +static void +gen6_pipeline_state_viewports(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* SF_VIEWPORT, CLIP_VIEWPORT, and CC_VIEWPORT */ + if (DIRTY(VIEWPORT)) { + p->state.CLIP_VIEWPORT = p->gen6_CLIP_VIEWPORT(&p->gpe, + &ilo->viewport, 1, p->cp); + + p->state.SF_VIEWPORT = p->gen6_SF_VIEWPORT(&p->gpe, + &ilo->viewport, 1, p->cp); + + p->state.CC_VIEWPORT = p->gen6_CC_VIEWPORT(&p->gpe, + &ilo->viewport, 1, p->cp); + + session->viewport_state_changed = true; + } +} + +static void +gen6_pipeline_state_cc(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* BLEND_STATE */ + if (DIRTY(BLEND) || DIRTY(FRAMEBUFFER) || DIRTY(DEPTH_STENCIL_ALPHA)) { + p->state.BLEND_STATE = p->gen6_BLEND_STATE(&p->gpe, + ilo->blend, &ilo->framebuffer, + &ilo->depth_stencil_alpha->alpha, p->cp); + + session->cc_state_blend_changed = true; + } + + /* COLOR_CALC_STATE */ + if (DIRTY(DEPTH_STENCIL_ALPHA) || DIRTY(STENCIL_REF) || DIRTY(BLEND_COLOR)) { + p->state.COLOR_CALC_STATE = p->gen6_COLOR_CALC_STATE(&p->gpe, + &ilo->stencil_ref, + ilo->depth_stencil_alpha->alpha.ref_value, + &ilo->blend_color, p->cp); + + session->cc_state_cc_changed = true; + } + + /* DEPTH_STENCIL_STATE */ + if (DIRTY(DEPTH_STENCIL_ALPHA)) { + p->state.DEPTH_STENCIL_STATE = + p->gen6_DEPTH_STENCIL_STATE(&p->gpe, + ilo->depth_stencil_alpha, p->cp); + + session->cc_state_dsa_changed = true; + } +} + +static void +gen6_pipeline_state_scissors(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* SCISSOR_RECT */ + if (DIRTY(SCISSOR)) { + p->state.SCISSOR_RECT = p->gen6_SCISSOR_RECT(&p->gpe, + &ilo->scissor, 1, p->cp); + + session->scissor_state_changed = true; + } +} + +static void +gen6_pipeline_state_surfaces_rt(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* SURFACE_STATEs for render targets */ + if (DIRTY(FRAMEBUFFER)) { + const int offset = ILO_WM_DRAW_SURFACE(0); + uint32_t *surface_state = &p->state.wm.SURFACE_STATE[offset]; + int i; + + for (i = 0; i < ilo->framebuffer.nr_cbufs; i++) { + const struct pipe_surface *surface = ilo->framebuffer.cbufs[i]; + + assert(surface); + surface_state[i] = + p->gen6_surf_SURFACE_STATE(&p->gpe, surface, p->cp); + } + + /* + * Upload at least one render target, as + * brw_update_renderbuffer_surfaces() does. I don't know why. + */ + if (i == 0) { + struct pipe_surface null_surface; + + memset(&null_surface, 0, sizeof(null_surface)); + null_surface.width = ilo->framebuffer.width; + null_surface.height = ilo->framebuffer.height; + + surface_state[i] = + p->gen6_surf_SURFACE_STATE(&p->gpe, &null_surface, p->cp); + + i++; + } + + memset(&surface_state[i], 0, (ILO_MAX_DRAW_BUFFERS - i) * 4); + + if (i && session->num_surfaces[PIPE_SHADER_FRAGMENT] < offset + i) + session->num_surfaces[PIPE_SHADER_FRAGMENT] = offset + i; + + session->binding_table_fs_changed = true; + } +} + +static void +gen6_pipeline_state_surfaces_so(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + const struct ilo_shader_state *vs = ilo->vs; + const struct ilo_shader_state *gs = ilo->gs; + const struct pipe_stream_output_target **so_targets = + (const struct pipe_stream_output_target **) + ilo->stream_output_targets.targets; + const int num_so_targets = ilo->stream_output_targets.num_targets; + + if (p->gen != ILO_GEN(6)) + return; + + /* SURFACE_STATEs for stream output targets */ + if (DIRTY(VS) || DIRTY(GS) || DIRTY(STREAM_OUTPUT_TARGETS)) { + const struct pipe_stream_output_info *so_info = + (gs) ? &gs->info.stream_output : + (vs) ? &vs->info.stream_output : NULL; + const int offset = ILO_GS_SO_SURFACE(0); + uint32_t *surface_state = &p->state.gs.SURFACE_STATE[offset]; + int i; + + for (i = 0; so_info && i < so_info->num_outputs; i++) { + const int target = so_info->output[i].output_buffer; + const struct pipe_stream_output_target *so_target = + (target < num_so_targets) ? so_targets[target] : NULL; + + if (so_target) { + surface_state[i] = p->gen6_so_SURFACE_STATE(&p->gpe, + so_target, so_info, i, p->cp); + } + else { + surface_state[i] = 0; + } + } + + memset(&surface_state[i], 0, (ILO_MAX_SO_BINDINGS - i) * 4); + + if (i && session->num_surfaces[PIPE_SHADER_GEOMETRY] < offset + i) + session->num_surfaces[PIPE_SHADER_GEOMETRY] = offset + i; + + session->binding_table_gs_changed = true; + } +} + +static void +gen6_pipeline_state_surfaces_view(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + int shader_type, + struct gen6_pipeline_session *session) +{ + const struct pipe_sampler_view **views = + (const struct pipe_sampler_view **) + ilo->sampler_views[shader_type].views; + const int num_views = ilo->sampler_views[shader_type].num_views; + uint32_t *surface_state; + int offset, i; + bool skip = false; + + /* SURFACE_STATEs for sampler views */ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + if (DIRTY(VERTEX_SAMPLER_VIEWS)) { + offset = ILO_VS_TEXTURE_SURFACE(0); + surface_state = &p->state.vs.SURFACE_STATE[offset]; + + session->binding_table_vs_changed = true; + } + else { + skip = true; + } + break; + case PIPE_SHADER_FRAGMENT: + if (DIRTY(FRAGMENT_SAMPLER_VIEWS)) { + offset = ILO_WM_TEXTURE_SURFACE(0); + surface_state = &p->state.wm.SURFACE_STATE[offset]; + + session->binding_table_fs_changed = true; + } + else { + skip = true; + } + break; + default: + skip = true; + break; + } + + if (skip) + return; + + for (i = 0; i < num_views; i++) { + if (views[i]) { + surface_state[i] = + p->gen6_view_SURFACE_STATE(&p->gpe, views[i], p->cp); + } + else { + surface_state[i] = 0; + } + } + + memset(&surface_state[i], 0, (ILO_MAX_SAMPLER_VIEWS - i) * 4); + + if (i && session->num_surfaces[shader_type] < offset + i) + session->num_surfaces[shader_type] = offset + i; +} + +static void +gen6_pipeline_state_surfaces_const(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + int shader_type, + struct gen6_pipeline_session *session) +{ + const struct pipe_constant_buffer *buffers = + ilo->constant_buffers[shader_type].buffers; + const int num_buffers = ilo->constant_buffers[shader_type].num_buffers; + uint32_t *surface_state; + int offset, i; + bool skip = false; + + /* SURFACE_STATEs for constant buffers */ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + if (DIRTY(CONSTANT_BUFFER)) { + offset = ILO_VS_CONST_SURFACE(0); + surface_state = &p->state.vs.SURFACE_STATE[offset]; + + session->binding_table_vs_changed = true; + } + else { + skip = true; + } + break; + case PIPE_SHADER_FRAGMENT: + if (DIRTY(CONSTANT_BUFFER)) { + offset = ILO_WM_CONST_SURFACE(0); + surface_state = &p->state.wm.SURFACE_STATE[offset]; + + session->binding_table_fs_changed = true; + } + else { + skip = true; + } + break; + default: + skip = true; + break; + } + + if (skip) + return; + + for (i = 0; i < num_buffers; i++) { + if (buffers[i].buffer) { + surface_state[i] = + p->gen6_cbuf_SURFACE_STATE(&p->gpe, &buffers[i], p->cp); + } + else { + surface_state[i] = 0; + } + } + + memset(&surface_state[i], 0, (ILO_MAX_CONST_BUFFERS - i) * 4); + + if (i && session->num_surfaces[shader_type] < offset + i) + session->num_surfaces[shader_type] = offset + i; +} + +static void +gen6_pipeline_state_binding_tables(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + int shader_type, + struct gen6_pipeline_session *session) +{ + uint32_t *binding_table_state, *surface_state; + int *binding_table_state_size, size; + bool skip = false; + + /* BINDING_TABLE_STATE */ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + surface_state = p->state.vs.SURFACE_STATE; + binding_table_state = &p->state.vs.BINDING_TABLE_STATE; + binding_table_state_size = &p->state.vs.BINDING_TABLE_STATE_size; + + skip = !session->binding_table_vs_changed; + break; + case PIPE_SHADER_GEOMETRY: + surface_state = p->state.gs.SURFACE_STATE; + binding_table_state = &p->state.gs.BINDING_TABLE_STATE; + binding_table_state_size = &p->state.gs.BINDING_TABLE_STATE_size; + + skip = !session->binding_table_gs_changed; + break; + case PIPE_SHADER_FRAGMENT: + surface_state = p->state.wm.SURFACE_STATE; + binding_table_state = &p->state.wm.BINDING_TABLE_STATE; + binding_table_state_size = &p->state.wm.BINDING_TABLE_STATE_size; + + skip = !session->binding_table_fs_changed; + break; + default: + skip = true; + break; + } + + if (skip) + return; + + /* + * If we have seemingly less SURFACE_STATEs than before, it could be that + * we did not touch those reside at the tail in this upload. Loop over + * them to figure out the real number of SURFACE_STATEs. + */ + for (size = *binding_table_state_size; + size > session->num_surfaces[shader_type]; size--) { + if (surface_state[size - 1]) + break; + } + if (size < session->num_surfaces[shader_type]) + size = session->num_surfaces[shader_type]; + + *binding_table_state = p->gen6_BINDING_TABLE_STATE(&p->gpe, + surface_state, size, p->cp); + *binding_table_state_size = size; +} + +static void +gen6_pipeline_state_samplers(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + int shader_type, + struct gen6_pipeline_session *session) +{ + const struct pipe_sampler_state **samplers = + (const struct pipe_sampler_state **) + ilo->samplers[shader_type].samplers; + const struct pipe_sampler_view **views = + (const struct pipe_sampler_view **) + ilo->sampler_views[shader_type].views; + const int num_samplers = ilo->samplers[shader_type].num_samplers; + const int num_views = ilo->sampler_views[shader_type].num_views; + uint32_t *sampler_state, *border_color_state; + bool emit_border_color = false; + bool skip = false; + + /* SAMPLER_BORDER_COLOR_STATE and SAMPLER_STATE */ + switch (shader_type) { + case PIPE_SHADER_VERTEX: + if (DIRTY(VERTEX_SAMPLERS) || DIRTY(VERTEX_SAMPLER_VIEWS)) { + sampler_state = &p->state.vs.SAMPLER_STATE; + border_color_state = p->state.vs.SAMPLER_BORDER_COLOR_STATE; + + if (DIRTY(VERTEX_SAMPLERS)) + emit_border_color = true; + + session->sampler_state_vs_changed = true; + } + else { + skip = true; + } + break; + case PIPE_SHADER_FRAGMENT: + if (DIRTY(FRAGMENT_SAMPLERS) || DIRTY(FRAGMENT_SAMPLER_VIEWS)) { + sampler_state = &p->state.wm.SAMPLER_STATE; + border_color_state = p->state.wm.SAMPLER_BORDER_COLOR_STATE; + + if (DIRTY(FRAGMENT_SAMPLERS)) + emit_border_color = true; + + session->sampler_state_fs_changed = true; + } + else { + skip = true; + } + break; + default: + skip = true; + break; + } + + if (skip) + return; + + if (emit_border_color) { + int i; + + for (i = 0; i < num_samplers; i++) { + border_color_state[i] = (samplers[i]) ? + p->gen6_SAMPLER_BORDER_COLOR_STATE(&p->gpe, + &samplers[i]->border_color, p->cp) : 0; + } + } + + /* should we take the minimum of num_samplers and num_views? */ + *sampler_state = p->gen6_SAMPLER_STATE(&p->gpe, + samplers, views, + border_color_state, + MIN2(num_samplers, num_views), p->cp); +} + +static void +gen6_pipeline_state_pcb(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* push constant buffer for VS */ + if (DIRTY(VS) || DIRTY(CLIP)) { + const struct ilo_shader *vs = (ilo->vs)? ilo->vs->shader : NULL; + + if (vs && vs->pcb.clip_state_size) { + void *pcb; + + p->state.vs.PUSH_CONSTANT_BUFFER_size = vs->pcb.clip_state_size; + p->state.vs.PUSH_CONSTANT_BUFFER = + p->gen6_push_constant_buffer(&p->gpe, + p->state.vs.PUSH_CONSTANT_BUFFER_size, &pcb, p->cp); + + memcpy(pcb, &ilo->clip, vs->pcb.clip_state_size); + } + else { + p->state.vs.PUSH_CONSTANT_BUFFER_size = 0; + p->state.vs.PUSH_CONSTANT_BUFFER = 0; + } + + session->pcb_state_vs_changed = true; + } +} + +#undef DIRTY + +static void +gen6_pipeline_commands(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* + * We try to keep the order of the commands match, as closely as possible, + * that of the classic i965 driver. It allows us to compare the command + * streams easily. + */ + gen6_pipeline_common_select(p, ilo, session); + gen6_pipeline_gs_svbi(p, ilo, session); + gen6_pipeline_common_sip(p, ilo, session); + gen6_pipeline_vf_statistics(p, ilo, session); + gen6_pipeline_common_base_address(p, ilo, session); + gen6_pipeline_common_pointers_1(p, ilo, session); + gen6_pipeline_common_urb(p, ilo, session); + gen6_pipeline_common_pointers_2(p, ilo, session); + gen6_pipeline_wm_multisample(p, ilo, session); + gen6_pipeline_vs(p, ilo, session); + gen6_pipeline_gs(p, ilo, session); + gen6_pipeline_clip(p, ilo, session); + gen6_pipeline_sf(p, ilo, session); + gen6_pipeline_wm(p, ilo, session); + gen6_pipeline_common_pointers_3(p, ilo, session); + gen6_pipeline_wm_depth(p, ilo, session); + gen6_pipeline_wm_raster(p, ilo, session); + gen6_pipeline_sf_rect(p, ilo, session); + gen6_pipeline_vf(p, ilo, session); + gen6_pipeline_vf_draw(p, ilo, session); +} + +void +gen6_pipeline_states(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + int shader_type; + + gen6_pipeline_state_viewports(p, ilo, session); + gen6_pipeline_state_cc(p, ilo, session); + gen6_pipeline_state_scissors(p, ilo, session); + gen6_pipeline_state_pcb(p, ilo, session); + + /* + * upload all SURAFCE_STATEs together so that we know there are minimal + * paddings + */ + gen6_pipeline_state_surfaces_rt(p, ilo, session); + gen6_pipeline_state_surfaces_so(p, ilo, session); + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + gen6_pipeline_state_surfaces_view(p, ilo, shader_type, session); + gen6_pipeline_state_surfaces_const(p, ilo, shader_type, session); + } + + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + gen6_pipeline_state_samplers(p, ilo, shader_type, session); + /* this must be called after all SURFACE_STATEs are uploaded */ + gen6_pipeline_state_binding_tables(p, ilo, shader_type, session); + } +} + +void +gen6_pipeline_prepare(const struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + const struct pipe_draw_info *info, + struct gen6_pipeline_session *session) +{ + memset(session, 0, sizeof(*session)); + session->info = info; + session->pipe_dirty = ilo->dirty; + session->reduced_prim = u_reduced_prim(info->mode); + + /* available space before the session */ + session->init_cp_space = ilo_cp_space(p->cp); + + session->hw_ctx_changed = + (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_HW); + + if (session->hw_ctx_changed) { + /* these should be enough to make everything uploaded */ + session->state_bo_changed = true; + session->instruction_bo_changed = true; + session->prim_changed = true; + } + else { + session->state_bo_changed = + (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_STATE_BO); + session->instruction_bo_changed = + (p->invalidate_flags & ILO_3D_PIPELINE_INVALIDATE_KERNEL_BO); + session->prim_changed = (p->state.reduced_prim != session->reduced_prim); + } +} + +void +gen6_pipeline_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + /* force all states to be uploaded if the state bo changed */ + if (session->state_bo_changed) + session->pipe_dirty = ILO_DIRTY_ALL; + else + session->pipe_dirty = ilo->dirty; + + session->emit_draw_states(p, ilo, session); + + /* force all commands to be uploaded if the HW context changed */ + if (session->hw_ctx_changed) + session->pipe_dirty = ILO_DIRTY_ALL; + else + session->pipe_dirty = ilo->dirty; + + session->emit_draw_commands(p, ilo, session); +} + +void +gen6_pipeline_end(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session) +{ + int used, estimate; + + /* sanity check size estimation */ + used = session->init_cp_space - ilo_cp_space(p->cp); + estimate = ilo_3d_pipeline_estimate_size(p, ILO_3D_PIPELINE_DRAW, ilo); + assert(used <= estimate); + + p->state.reduced_prim = session->reduced_prim; +} + +static void +ilo_3d_pipeline_emit_draw_gen6(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + const struct pipe_draw_info *info) +{ + struct gen6_pipeline_session session; + + gen6_pipeline_prepare(p, ilo, info, &session); + + session.emit_draw_states = gen6_pipeline_states; + session.emit_draw_commands = gen6_pipeline_commands; + + gen6_pipeline_draw(p, ilo, &session); + gen6_pipeline_end(p, ilo, &session); +} + +void +ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p) +{ + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_INSTRUCTION_FLUSH | + PIPE_CONTROL_WRITE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_VF_CACHE_INVALIDATE | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_NO_WRITE | + PIPE_CONTROL_CS_STALL, + 0, 0, false, p->cp); +} + +void +ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index) +{ + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, true); + + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_WRITE_TIMESTAMP, + bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, + true, p->cp); +} + +void +ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index) +{ + if (p->gen == ILO_GEN(6)) + gen6_wa_pipe_control_post_sync(p, false); + + p->gen6_PIPE_CONTROL(&p->gpe, + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_WRITE_DEPTH_COUNT, + bo, index * sizeof(uint64_t) | PIPE_CONTROL_GLOBAL_GTT_WRITE, + true, p->cp); +} + +static int +gen6_pipeline_estimate_commands(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen6 *gen6, + const struct ilo_context *ilo) +{ + static int size; + enum ilo_gpe_gen6_command cmd; + + if (size) + return size; + + for (cmd = 0; cmd < ILO_GPE_GEN6_COMMAND_COUNT; cmd++) { + int count; + + switch (cmd) { + case ILO_GPE_GEN6_PIPE_CONTROL: + /* for the workaround */ + count = 2; + /* another one after 3DSTATE_URB */ + count += 1; + /* and another one after 3DSTATE_CONSTANT_VS */ + count += 1; + break; + case ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX: + /* there are 4 SVBIs */ + count = 4; + break; + case ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS: + count = 33; + break; + case ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS: + count = 34; + break; + case ILO_GPE_GEN6_MEDIA_VFE_STATE: + case ILO_GPE_GEN6_MEDIA_CURBE_LOAD: + case ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD: + case ILO_GPE_GEN6_MEDIA_GATEWAY_STATE: + case ILO_GPE_GEN6_MEDIA_STATE_FLUSH: + case ILO_GPE_GEN6_MEDIA_OBJECT_WALKER: + /* media commands */ + count = 0; + break; + default: + count = 1; + break; + } + + if (count) + size += gen6->estimate_command_size(&p->gpe, cmd, count); + } + + return size; +} + +static int +gen6_pipeline_estimate_states(const struct ilo_3d_pipeline *p, + const struct ilo_gpe_gen6 *gen6, + const struct ilo_context *ilo) +{ + static int static_size; + int shader_type, count, size; + + if (!static_size) { + struct { + enum ilo_gpe_gen6_state state; + int count; + } static_states[] = { + /* viewports */ + { ILO_GPE_GEN6_SF_VIEWPORT, 1 }, + { ILO_GPE_GEN6_CLIP_VIEWPORT, 1 }, + { ILO_GPE_GEN6_CC_VIEWPORT, 1 }, + /* cc */ + { ILO_GPE_GEN6_COLOR_CALC_STATE, 1 }, + { ILO_GPE_GEN6_BLEND_STATE, ILO_MAX_DRAW_BUFFERS }, + { ILO_GPE_GEN6_DEPTH_STENCIL_STATE, 1 }, + /* scissors */ + { ILO_GPE_GEN6_SCISSOR_RECT, 1 }, + /* binding table (vs, gs, fs) */ + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_VS_SURFACES }, + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_GS_SURFACES }, + { ILO_GPE_GEN6_BINDING_TABLE_STATE, ILO_MAX_WM_SURFACES }, + }; + int i; + + for (i = 0; i < Elements(static_states); i++) { + static_size += gen6->estimate_state_size(&p->gpe, + static_states[i].state, + static_states[i].count); + } + } + + size = static_size; + + /* + * render targets (fs) + * stream outputs (gs) + * sampler views (vs, fs) + * constant buffers (vs, fs) + */ + count = ilo->framebuffer.nr_cbufs; + + if (ilo->gs) + count += ilo->gs->info.stream_output.num_outputs; + else if (ilo->vs) + count += ilo->vs->info.stream_output.num_outputs; + + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count += ilo->sampler_views[shader_type].num_views; + count += ilo->constant_buffers[shader_type].num_buffers; + } + + if (count) { + size += gen6->estimate_state_size(&p->gpe, + ILO_GPE_GEN6_SURFACE_STATE, count); + } + + /* samplers (vs, fs) */ + for (shader_type = 0; shader_type < PIPE_SHADER_TYPES; shader_type++) { + count = ilo->samplers[shader_type].num_samplers; + if (count) { + size += gen6->estimate_state_size(&p->gpe, + ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE, count); + size += gen6->estimate_state_size(&p->gpe, + ILO_GPE_GEN6_SAMPLER_STATE, count); + } + } + + /* pcb (vs) */ + if (ilo->vs && ilo->vs->shader->pcb.clip_state_size) { + const int pcb_size = ilo->vs->shader->pcb.clip_state_size; + + size += gen6->estimate_state_size(&p->gpe, + ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER, pcb_size); + } + + return size; +} + +static int +ilo_3d_pipeline_estimate_size_gen6(struct ilo_3d_pipeline *p, + enum ilo_3d_pipeline_action action, + const void *arg) +{ + const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); + int size; + + switch (action) { + case ILO_3D_PIPELINE_DRAW: + { + const struct ilo_context *ilo = arg; + + size = gen6_pipeline_estimate_commands(p, gen6, ilo) + + gen6_pipeline_estimate_states(p, gen6, ilo); + } + break; + case ILO_3D_PIPELINE_FLUSH: + size = gen6->estimate_command_size(&p->gpe, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; + break; + case ILO_3D_PIPELINE_WRITE_TIMESTAMP: + size = gen6->estimate_command_size(&p->gpe, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 2; + break; + case ILO_3D_PIPELINE_WRITE_DEPTH_COUNT: + size = gen6->estimate_command_size(&p->gpe, + ILO_GPE_GEN6_PIPE_CONTROL, 1) * 3; + break; + default: + assert(!"unknown 3D pipeline action"); + size = 0; + break; + } + + return size; +} + +void +ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p) +{ + const struct ilo_gpe_gen6 *gen6 = ilo_gpe_gen6_get(); + + p->estimate_size = ilo_3d_pipeline_estimate_size_gen6; + p->emit_draw = ilo_3d_pipeline_emit_draw_gen6; + p->emit_flush = ilo_3d_pipeline_emit_flush_gen6; + p->emit_write_timestamp = ilo_3d_pipeline_emit_write_timestamp_gen6; + p->emit_write_depth_count = ilo_3d_pipeline_emit_write_depth_count_gen6; + +#define GEN6_USE(p, name, from) \ + p->gen6_ ## name = from->emit_ ## name + GEN6_USE(p, STATE_BASE_ADDRESS, gen6); + GEN6_USE(p, STATE_SIP, gen6); + GEN6_USE(p, PIPELINE_SELECT, gen6); + GEN6_USE(p, 3DSTATE_BINDING_TABLE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_SAMPLER_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_URB, gen6); + GEN6_USE(p, 3DSTATE_VERTEX_BUFFERS, gen6); + GEN6_USE(p, 3DSTATE_VERTEX_ELEMENTS, gen6); + GEN6_USE(p, 3DSTATE_INDEX_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_VF_STATISTICS, gen6); + GEN6_USE(p, 3DSTATE_VIEWPORT_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_CC_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_SCISSOR_STATE_POINTERS, gen6); + GEN6_USE(p, 3DSTATE_VS, gen6); + GEN6_USE(p, 3DSTATE_GS, gen6); + GEN6_USE(p, 3DSTATE_CLIP, gen6); + GEN6_USE(p, 3DSTATE_SF, gen6); + GEN6_USE(p, 3DSTATE_WM, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_VS, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_GS, gen6); + GEN6_USE(p, 3DSTATE_CONSTANT_PS, gen6); + GEN6_USE(p, 3DSTATE_SAMPLE_MASK, gen6); + GEN6_USE(p, 3DSTATE_DRAWING_RECTANGLE, gen6); + GEN6_USE(p, 3DSTATE_DEPTH_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_OFFSET, gen6); + GEN6_USE(p, 3DSTATE_POLY_STIPPLE_PATTERN, gen6); + GEN6_USE(p, 3DSTATE_LINE_STIPPLE, gen6); + GEN6_USE(p, 3DSTATE_AA_LINE_PARAMETERS, gen6); + GEN6_USE(p, 3DSTATE_GS_SVB_INDEX, gen6); + GEN6_USE(p, 3DSTATE_MULTISAMPLE, gen6); + GEN6_USE(p, 3DSTATE_STENCIL_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_HIER_DEPTH_BUFFER, gen6); + GEN6_USE(p, 3DSTATE_CLEAR_PARAMS, gen6); + GEN6_USE(p, PIPE_CONTROL, gen6); + GEN6_USE(p, 3DPRIMITIVE, gen6); + GEN6_USE(p, INTERFACE_DESCRIPTOR_DATA, gen6); + GEN6_USE(p, SF_VIEWPORT, gen6); + GEN6_USE(p, CLIP_VIEWPORT, gen6); + GEN6_USE(p, CC_VIEWPORT, gen6); + GEN6_USE(p, COLOR_CALC_STATE, gen6); + GEN6_USE(p, BLEND_STATE, gen6); + GEN6_USE(p, DEPTH_STENCIL_STATE, gen6); + GEN6_USE(p, SCISSOR_RECT, gen6); + GEN6_USE(p, BINDING_TABLE_STATE, gen6); + GEN6_USE(p, surf_SURFACE_STATE, gen6); + GEN6_USE(p, view_SURFACE_STATE, gen6); + GEN6_USE(p, cbuf_SURFACE_STATE, gen6); + GEN6_USE(p, so_SURFACE_STATE, gen6); + GEN6_USE(p, SAMPLER_STATE, gen6); + GEN6_USE(p, SAMPLER_BORDER_COLOR_STATE, gen6); + GEN6_USE(p, push_constant_buffer, gen6); +#undef GEN6_USE +} diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h new file mode 100644 index 00000000000..8cc6ab20030 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.h @@ -0,0 +1,161 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2012-2013 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_3D_PIPELINE_GEN6_H +#define ILO_3D_PIPELINE_GEN6_H + +#include "ilo_common.h" + +struct ilo_3d_pipeline; +struct ilo_context; + +struct gen6_pipeline_session { + const struct pipe_draw_info *info; + + uint32_t pipe_dirty; + + int reduced_prim; + int init_cp_space; + + bool hw_ctx_changed; + bool state_bo_changed; + bool instruction_bo_changed; + bool prim_changed; + + void (*emit_draw_states)(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + + void (*emit_draw_commands)(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + + /* indirect states */ + bool viewport_state_changed; + bool cc_state_blend_changed; + bool cc_state_dsa_changed; + bool cc_state_cc_changed; + bool scissor_state_changed; + bool binding_table_vs_changed; + bool binding_table_gs_changed; + bool binding_table_fs_changed; + bool sampler_state_vs_changed; + bool sampler_state_gs_changed; + bool sampler_state_fs_changed; + bool pcb_state_vs_changed; + bool pcb_state_gs_changed; + bool pcb_state_fs_changed; + + int num_surfaces[PIPE_SHADER_TYPES]; +}; + +void +gen6_pipeline_prepare(const struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + const struct pipe_draw_info *info, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_end(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_common_select(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_common_sip(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_common_base_address(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_vf(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_vf_statistics(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_vf_draw(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_vs(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_clip(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_sf_rect(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_wm_raster(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +gen6_pipeline_states(struct ilo_3d_pipeline *p, + const struct ilo_context *ilo, + struct gen6_pipeline_session *session); + +void +ilo_3d_pipeline_emit_flush_gen6(struct ilo_3d_pipeline *p); + +void +ilo_3d_pipeline_emit_write_timestamp_gen6(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index); + +void +ilo_3d_pipeline_emit_write_depth_count_gen6(struct ilo_3d_pipeline *p, + struct intel_bo *bo, int index); + +void +ilo_3d_pipeline_init_gen6(struct ilo_3d_pipeline *p); + +#endif /* ILO_3D_PIPELINE_GEN6_H */