From ea8e7a8d4a32ff8d3eea2dce871cfbd6b833cc87 Mon Sep 17 00:00:00 2001 From: Chia-I Wu Date: Fri, 12 Sep 2014 23:44:19 +0800 Subject: [PATCH] ilo: move 3D functions to ilo_builder_3d*.h Move functions for the 3D pipeline to the new headers. We artificially split the functions into top (vertex processing) and bottom (pixel processing), to keep the headers at reasonable sizes. --- src/gallium/drivers/ilo/Makefile.sources | 4 +- .../drivers/ilo/ilo_3d_pipeline_gen6.c | 3 +- .../drivers/ilo/ilo_3d_pipeline_gen7.c | 2 +- .../drivers/ilo/ilo_blitter_rectlist.c | 6 +- src/gallium/drivers/ilo/ilo_builder_3d.h | 125 ++ .../drivers/ilo/ilo_builder_3d_bottom.h | 1334 ++++++++++++ .../{ilo_gpe_gen7.h => ilo_builder_3d_top.h} | 1808 ++++++++++------ src/gallium/drivers/ilo/ilo_gpe_gen6.h | 1879 ----------------- src/gallium/drivers/ilo/ilo_gpe_gen7.c | 3 +- 9 files changed, 2620 insertions(+), 2544 deletions(-) create mode 100644 src/gallium/drivers/ilo/ilo_builder_3d.h create mode 100644 src/gallium/drivers/ilo/ilo_builder_3d_bottom.h rename src/gallium/drivers/ilo/{ilo_gpe_gen7.h => ilo_builder_3d_top.h} (51%) diff --git a/src/gallium/drivers/ilo/Makefile.sources b/src/gallium/drivers/ilo/Makefile.sources index d086025e248..0a631e809af 100644 --- a/src/gallium/drivers/ilo/Makefile.sources +++ b/src/gallium/drivers/ilo/Makefile.sources @@ -16,6 +16,9 @@ C_SOURCES := \ ilo_blitter_rectlist.c \ ilo_builder.c \ ilo_builder.h \ + ilo_blitter_3d.h \ + ilo_blitter_3d_bottom.h \ + ilo_blitter_3d_top.h \ ilo_blitter_blt.h \ ilo_builder_decode.c \ ilo_builder_media.h \ @@ -31,7 +34,6 @@ C_SOURCES := \ ilo_gpe_gen6.c \ ilo_gpe_gen6.h \ ilo_gpe_gen7.c \ - ilo_gpe_gen7.h \ ilo_gpe.h \ ilo_gpgpu.c \ ilo_gpgpu.h \ diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c index 88dd3ae67fd..3e37c68f3f9 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen6.c @@ -31,12 +31,11 @@ #include "ilo_3d.h" #include "ilo_blitter.h" +#include "ilo_builder_3d.h" #include "ilo_builder_mi.h" #include "ilo_builder_render.h" #include "ilo_context.h" #include "ilo_cp.h" -#include "ilo_gpe_gen6.h" -#include "ilo_gpe_gen7.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_3d_pipeline.h" diff --git a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c index fd1daf50d7d..14c0823df63 100644 --- a/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c +++ b/src/gallium/drivers/ilo/ilo_3d_pipeline_gen7.c @@ -29,10 +29,10 @@ #include "util/u_dual_blend.h" #include "ilo_blitter.h" +#include "ilo_builder_3d.h" #include "ilo_builder_render.h" #include "ilo_context.h" #include "ilo_cp.h" -#include "ilo_gpe_gen7.h" #include "ilo_shader.h" #include "ilo_state.h" #include "ilo_3d_pipeline.h" diff --git a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c index 009780a1cd9..5dd0b1ec5a9 100644 --- a/src/gallium/drivers/ilo/ilo_blitter_rectlist.c +++ b/src/gallium/drivers/ilo/ilo_blitter_rectlist.c @@ -28,13 +28,13 @@ #include "util/u_draw.h" #include "util/u_pack_color.h" -#include "ilo_blitter.h" #include "ilo_3d.h" #include "ilo_3d_pipeline.h" +#include "ilo_builder_3d_top.h" /* for ve_init_cso_with_components() */ +#include "ilo_gpe_gen6.h" /* for zs_align_surface() */ #include "ilo_blit.h" #include "ilo_gpe.h" -#include "ilo_gpe_gen6.h" /* for ve_init_cso_with_components and - zs_align_surface */ +#include "ilo_blitter.h" /** * Set the states that are invariant between all ops. diff --git a/src/gallium/drivers/ilo/ilo_builder_3d.h b/src/gallium/drivers/ilo/ilo_builder_3d.h new file mode 100644 index 00000000000..c94fd718ee3 --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_builder_3d.h @@ -0,0 +1,125 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_BUILDER_3D_H +#define ILO_BUILDER_3D_H + +#include "genhw/genhw.h" + +#include "ilo_common.h" +#include "ilo_builder_3d_top.h" +#include "ilo_builder_3d_bottom.h" + +/** + * Translate a pipe primitive type to the matching hardware primitive type. + */ +static inline int +ilo_gpe_gen6_translate_pipe_prim(unsigned prim) +{ + static const int prim_mapping[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, + [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, + [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, + [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, + [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, + [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, + [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, + [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, + [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, + [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, + [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, + [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, + }; + + assert(prim_mapping[prim]); + + return prim_mapping[prim]; +} + +static inline void +gen6_3DPRIMITIVE(struct ilo_builder *builder, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist) +{ + const uint8_t cmd_len = 6; + const int prim = (rectlist) ? + GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | + vb_access | + prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = info->count; + dw[2] = vb_start; + dw[3] = info->instance_count; + dw[4] = info->start_instance; + dw[5] = info->index_bias; +} + +static inline void +gen7_3DPRIMITIVE(struct ilo_builder *builder, + const struct pipe_draw_info *info, + const struct ilo_ib_state *ib, + bool rectlist) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); + const int prim = (rectlist) ? + GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); + const int vb_access = (info->indexed) ? + GEN7_3DPRIM_DW1_ACCESS_RANDOM : + GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; + const uint32_t vb_start = info->start + + ((info->indexed) ? ib->draw_start_offset : 0); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vb_access | prim; + dw[2] = info->count; + dw[3] = vb_start; + dw[4] = info->instance_count; + dw[5] = info->start_instance; + dw[6] = info->index_bias; +} + +#endif /* ILO_BUILDER_3D_H */ diff --git a/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h new file mode 100644 index 00000000000..6427228a64c --- /dev/null +++ b/src/gallium/drivers/ilo/ilo_builder_3d_bottom.h @@ -0,0 +1,1334 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 2014 LunarG, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Authors: + * Chia-I Wu + */ + +#ifndef ILO_BUILDER_3D_BOTTOM_H +#define ILO_BUILDER_3D_BOTTOM_H + +#include "genhw/genhw.h" +#include "intel_winsys.h" + +#include "ilo_common.h" +#include "ilo_format.h" +#include "ilo_shader.h" +#include "ilo_builder.h" +#include "ilo_builder_3d_top.h" + +static inline void +gen6_3DSTATE_CLIP(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + bool enable_guardband, + int num_viewports) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); + uint32_t dw1, dw2, dw3, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (rasterizer) { + int interps; + + dw1 = rasterizer->clip.payload[0]; + dw2 = rasterizer->clip.payload[1]; + dw3 = rasterizer->clip.payload[2]; + + if (enable_guardband && rasterizer->clip.can_enable_guardband) + dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; + + interps = (fs) ? ilo_shader_get_kernel_param(fs, + ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; + + if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | + GEN6_INTERP_NONPERSPECTIVE_CENTROID | + GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) + dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; + + dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | + (num_viewports - 1); + } + else { + dw1 = 0; + dw2 = 0; + dw3 = 0; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + dw[3] = dw3; +} + +/** + * Fill in DW2 to DW7 of 3DSTATE_SF. + */ +static inline void +ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + int num_samples, + enum pipe_format depth_format, + uint32_t *payload, unsigned payload_len) +{ + assert(payload_len == Elements(rasterizer->sf.payload)); + + if (rasterizer) { + const struct ilo_rasterizer_sf *sf = &rasterizer->sf; + + memcpy(payload, sf->payload, sizeof(sf->payload)); + if (num_samples > 1) + payload[1] |= sf->dw_msaa; + } + else { + payload[0] = 0; + payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; + payload[2] = 0; + payload[3] = 0; + payload[4] = 0; + payload[5] = 0; + } + + if (ilo_dev_gen(dev) >= ILO_GEN(7)) { + int format; + + /* separate stencil */ + switch (depth_format) { + case PIPE_FORMAT_Z16_UNORM: + format = GEN6_ZFORMAT_D16_UNORM; + break; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + format = GEN6_ZFORMAT_D32_FLOAT; + break; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; + break; + default: + /* FLOAT surface is assumed when there is no depth buffer */ + format = GEN6_ZFORMAT_D32_FLOAT; + break; + } + + payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; + } +} + +/** + * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. + */ +static inline void +ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs, + uint32_t *dw, int num_dwords) +{ + int output_count, vue_offset, vue_len; + const struct ilo_kernel_routing *routing; + + ILO_DEV_ASSERT(dev, 6, 7.5); + assert(num_dwords == 13); + + if (!fs) { + memset(dw, 0, sizeof(dw[0]) * num_dwords); + dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; + return; + } + + output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); + assert(output_count <= 32); + + routing = ilo_shader_get_kernel_routing(fs); + + vue_offset = routing->source_skip; + assert(vue_offset % 2 == 0); + vue_offset /= 2; + + vue_len = (routing->source_len + 1) / 2; + if (!vue_len) + vue_len = 1; + + dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | + vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | + vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; + if (routing->swizzle_enable) + dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; + + switch (rasterizer->state.sprite_coord_mode) { + case PIPE_SPRITE_COORD_UPPER_LEFT: + dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; + break; + case PIPE_SPRITE_COORD_LOWER_LEFT: + dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; + break; + } + + STATIC_ASSERT(Elements(routing->swizzles) >= 16); + memcpy(&dw[1], routing->swizzles, 2 * 16); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 268: + * + * "This field (Point Sprite Texture Coordinate Enable) must be + * programmed to 0 when non-point primitives are rendered." + * + * TODO We do not check that yet. + */ + dw[9] = routing->point_sprite_enable; + + dw[10] = routing->const_interp_enable; + + /* WrapShortest enables */ + dw[11] = 0; + dw[12] = 0; +} + +static inline void +gen6_3DSTATE_SF(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs) +{ + const uint8_t cmd_len = 20; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); + uint32_t payload_raster[6], payload_sbe[13], *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer, + 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); + ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer, + fs, payload_sbe, Elements(payload_sbe)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = payload_sbe[0]; + memcpy(&dw[2], payload_raster, sizeof(payload_raster)); + memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4); +} + +static inline void +gen7_3DSTATE_SF(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + enum pipe_format zs_format) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); + const int num_samples = 1; + uint32_t payload[6], *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, + rasterizer, num_samples, zs_format, + payload, Elements(payload)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], payload, sizeof(payload)); +} + +static inline void +gen7_3DSTATE_SBE(struct ilo_builder *builder, + const struct ilo_rasterizer_state *rasterizer, + const struct ilo_shader_state *fs) +{ + const uint8_t cmd_len = 14; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); + uint32_t payload[13], *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, + rasterizer, fs, payload, Elements(payload)); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], payload, sizeof(payload)); +} + +static inline void +gen6_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + int num_samplers, + const struct ilo_rasterizer_state *rasterizer, + bool dual_blend, bool cc_may_kill, + uint32_t hiz_op) +{ + const uint8_t cmd_len = 9; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); + const int num_samples = 1; + const struct ilo_shader_cso *fs_cso; + uint32_t dw2, dw4, dw5, dw6, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + if (!fs) { + /* see brwCreateContext() */ + const int max_threads = (builder->dev->gt == 2) ? 80 : 40; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = hiz_op; + /* honor the valid range even if dispatching is disabled */ + dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; + dw[6] = 0; + dw[7] = 0; + dw[8] = 0; + + return; + } + + fs_cso = ilo_shader_get_kernel_cso(fs); + dw2 = fs_cso->payload[0]; + dw4 = fs_cso->payload[1]; + dw5 = fs_cso->payload[2]; + dw6 = fs_cso->payload[3]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 248: + * + * "This bit (Statistics Enable) must be disabled if either of these + * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve + * Enable or Depth Buffer Resolve Enable." + */ + assert(!hiz_op); + dw4 |= GEN6_WM_DW4_STATISTICS; + + if (cc_may_kill) + dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE; + + if (dual_blend) + dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; + + dw5 |= rasterizer->wm.payload[0]; + + dw6 |= rasterizer->wm.payload[1]; + + if (num_samples > 1) { + dw6 |= rasterizer->wm.dw_msaa_rast | + rasterizer->wm.dw_msaa_disp; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(fs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = dw6; + dw[7] = 0; /* kernel 1 */ + dw[8] = 0; /* kernel 2 */ +} + +static inline void +gen7_3DSTATE_WM(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + const struct ilo_rasterizer_state *rasterizer, + bool cc_may_kill, uint32_t hiz_op) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); + const int num_samples = 1; + uint32_t dw1, dw2, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* see ilo_gpe_init_rasterizer_wm() */ + if (rasterizer) { + dw1 = rasterizer->wm.payload[0]; + dw2 = rasterizer->wm.payload[1]; + + assert(!hiz_op); + dw1 |= GEN7_WM_DW1_STATISTICS; + } + else { + dw1 = hiz_op; + dw2 = 0; + } + + if (fs) { + const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + + dw1 |= fs_cso->payload[3]; + } + + if (cc_may_kill) + dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL; + + if (num_samples > 1) { + dw1 |= rasterizer->wm.dw_msaa_rast; + dw2 |= rasterizer->wm.dw_msaa_disp; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; +} + +static inline void +gen7_3DSTATE_PS(struct ilo_builder *builder, + const struct ilo_shader_state *fs, + int num_samplers, bool dual_blend) +{ + const uint8_t cmd_len = 8; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + if (!fs) { + int max_threads; + + /* GPU hangs if none of the dispatch enable bits is set */ + dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH; + + /* see brwCreateContext() */ + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_threads = (builder->dev->gt == 3) ? 408 : + (builder->dev->gt == 2) ? 204 : 102; + dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; + break; + case ILO_GEN(7): + default: + max_threads = (builder->dev->gt == 2) ? 172 : 48; + dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; + break; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = dw4; + dw[5] = 0; + dw[6] = 0; + dw[7] = 0; + + return; + } + + cso = ilo_shader_get_kernel_cso(fs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + if (dual_blend) + dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(fs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = 0; /* kernel 1 */ + dw[7] = 0; /* kernel 2 */ +} + +static inline void +gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 287: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | + buf_enabled << 12 | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); +} + +static inline void +gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, + bufs, sizes, num_bufs); +} + +static inline void +gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder, + uint32_t binding_table) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, + binding_table); +} + +static inline void +gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, + uint32_t sampler_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, + sampler_state); +} + +static inline void +gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, + int num_samples, + const uint32_t *packed_sample_pos, + bool pixel_location_center) +{ + const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | + (cmd_len - 2); + uint32_t dw1, dw2, dw3, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw1 = (pixel_location_center) ? + GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; + + switch (num_samples) { + case 0: + case 1: + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + case 4: + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; + dw2 = packed_sample_pos[0]; + dw3 = 0; + break; + case 8: + assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); + dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; + dw2 = packed_sample_pos[0]; + dw3 = packed_sample_pos[1]; + break; + default: + assert(!"unsupported sample count"); + dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; + dw2 = 0; + dw3 = 0; + break; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) + dw[2] = dw3; +} + +static inline void +gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, + unsigned sample_mask) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | + (cmd_len - 2); + const unsigned valid_mask = 0xf; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + sample_mask &= valid_mask; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = sample_mask; +} + +static inline void +gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, + unsigned sample_mask, + int num_samples) +{ + const uint8_t cmd_len = 2; + const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 294: + * + * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field + * (Sample Mask) must be zero. + * + * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field + * must be zero." + */ + sample_mask &= valid_mask; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = sample_mask; +} + +static inline void +gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, + unsigned x, unsigned y, + unsigned width, unsigned height) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | + (cmd_len - 2); + unsigned xmax = x + width - 1; + unsigned ymax = y + height - 1; + int rect_limit; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + rect_limit = 16383; + } + else { + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 230: + * + * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) + * must be an even number" + */ + assert(y % 2 == 0); + + rect_limit = 8191; + } + + if (x > rect_limit) x = rect_limit; + if (y > rect_limit) y = rect_limit; + if (xmax > rect_limit) xmax = rect_limit; + if (ymax > rect_limit) ymax = rect_limit; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + + dw[0] = dw0; + dw[1] = y << 16 | x; + dw[2] = ymax << 16 | xmax; + + /* + * There is no need to set the origin. It is intended to support front + * buffer rendering. + */ + dw[3] = 0; +} + +static inline void +gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, + int x_offset, int y_offset) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + assert(x_offset >= 0 && x_offset <= 31); + assert(y_offset >= 0 && y_offset <= 31); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = x_offset << 8 | y_offset; +} + +static inline void +gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, + const struct pipe_poly_stipple *pattern) +{ + const uint8_t cmd_len = 33; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | + (cmd_len - 2); + uint32_t *dw; + int i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + STATIC_ASSERT(Elements(pattern->stipple) == 32); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw++; + + for (i = 0; i < 32; i++) + dw[i] = pattern->stipple[i]; +} + +static inline void +gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, + unsigned pattern, unsigned factor) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | + (cmd_len - 2); + uint32_t *dw; + unsigned inverse; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + assert((pattern & 0xffff) == pattern); + assert(factor >= 1 && factor <= 256); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = pattern; + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + /* in U1.16 */ + inverse = (unsigned) (65536.0f / factor); + dw[2] = inverse << 15 | factor; + } + else { + /* in U1.13 */ + inverse = (unsigned) (8192.0f / factor); + dw[2] = inverse << 16 | factor; + } +} + +static inline void +gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0 << 16 | 0; + dw[2] = 0 << 16 | 0; +} + +static inline void +gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 7; + unsigned pos; + uint32_t dw0, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = zs->payload[0]; + + if (zs->bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->bo, zs->payload[1], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } + + dw[3] = zs->payload[2]; + dw[4] = zs->payload[3]; + dw[5] = zs->payload[4]; + dw[6] = zs->payload[5]; +} + +static inline void +gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 3; + uint32_t dw0, *dw; + unsigned pos; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + /* see ilo_gpe_init_zs_surface() */ + dw[1] = zs->payload[6]; + + if (zs->separate_s8_bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } +} + +static inline void +gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, + const struct ilo_zs_surface *zs) +{ + const uint8_t cmd_len = 3; + uint32_t dw0, *dw; + unsigned pos; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? + GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : + GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); + dw0 |= (cmd_len - 2); + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + /* see ilo_gpe_init_zs_surface() */ + dw[1] = zs->payload[8]; + + if (zs->hiz_bo) { + ilo_builder_batch_reloc(builder, pos + 2, + zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE); + } else { + dw[2] = 0; + } +} + +static inline void +gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, + uint32_t clear_val) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | + GEN6_CLEAR_PARAMS_DW0_VALID | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clear_val; +} + +static inline void +gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, + uint32_t clear_val) +{ + const uint8_t cmd_len = 3; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clear_val; + dw[2] = 1; +} + +static inline void +gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder, + uint32_t clip_viewport, + uint32_t sf_viewport, + uint32_t cc_viewport) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | + GEN6_PTR_VP_DW0_CLIP_CHANGED | + GEN6_PTR_VP_DW0_SF_CHANGED | + GEN6_PTR_VP_DW0_CC_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = clip_viewport; + dw[2] = sf_viewport; + dw[3] = cc_viewport; +} + +static inline void +gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder, + uint32_t scissor_rect) +{ + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = scissor_rect; +} + +static inline void +gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, + uint32_t blend_state, + uint32_t depth_stencil_state, + uint32_t color_calc_state) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = blend_state | 1; + dw[2] = depth_stencil_state | 1; + dw[3] = color_calc_state | 1; +} + +static inline void +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder, + uint32_t sf_clip_viewport) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, + sf_clip_viewport); +} + +static inline void +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder, + uint32_t cc_viewport) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, + cc_viewport); +} + +static inline void +gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, + uint32_t color_calc_state) +{ + gen7_3dstate_pointer(builder, + GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state); +} + +static inline void +gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder, + uint32_t depth_stencil_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, + depth_stencil_state); +} + +static inline void +gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, + uint32_t blend_state) +{ + gen7_3dstate_pointer(builder, + GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, + blend_state); +} + +static inline uint32_t +gen6_CLIP_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 4 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 193: + * + * "The viewport-related state is stored as an array of up to 16 + * elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_gbx); + dw[1] = fui(vp->max_gbx); + dw[2] = fui(vp->min_gby); + dw[3] = fui(vp->max_gby); + + dw += 4; + } + + return state_offset; +} + +static inline uint32_t +gen6_SF_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 8 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 262: + * + * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + + dw += 8; + } + + return state_offset; +} + +static inline uint32_t +gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 64; + const int state_len = 16 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 270: + * + * "The viewport-specific state used by both the SF and CL units + * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each + * of which contains the DWords described below. The start of each + * element is spaced 16 DWords apart. The location of first element of + * the array, as specified by both Pointer to SF_VIEWPORT and Pointer + * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->m00); + dw[1] = fui(vp->m11); + dw[2] = fui(vp->m22); + dw[3] = fui(vp->m30); + dw[4] = fui(vp->m31); + dw[5] = fui(vp->m32); + dw[6] = 0; + dw[7] = 0; + dw[8] = fui(vp->min_gbx); + dw[9] = fui(vp->max_gbx); + dw[10] = fui(vp->min_gby); + dw[11] = fui(vp->max_gby); + dw[12] = 0; + dw[13] = 0; + dw[14] = 0; + dw[15] = 0; + + dw += 16; + } + + return state_offset; +} + +static inline uint32_t +gen6_CC_VIEWPORT(struct ilo_builder *builder, + const struct ilo_viewport_cso *viewports, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 2 * num_viewports; + uint32_t state_offset, *dw; + unsigned i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 385: + * + * "The viewport state is stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); + + for (i = 0; i < num_viewports; i++) { + const struct ilo_viewport_cso *vp = &viewports[i]; + + dw[0] = fui(vp->min_z); + dw[1] = fui(vp->max_z); + + dw += 2; + } + + return state_offset; +} + +static inline uint32_t +gen6_SCISSOR_RECT(struct ilo_builder *builder, + const struct ilo_scissor_state *scissor, + unsigned num_viewports) +{ + const int state_align = 32; + const int state_len = 2 * num_viewports; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 263: + * + * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is + * stored as an array of up to 16 elements..." + */ + assert(num_viewports && num_viewports <= 16); + assert(Elements(scissor->payload) >= state_len); + + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, + state_align, state_len, scissor->payload); +} + +static inline uint32_t +gen6_COLOR_CALC_STATE(struct ilo_builder *builder, + const struct pipe_stencil_ref *stencil_ref, + ubyte alpha_ref, + const struct pipe_blend_color *blend_color) +{ + const int state_align = 64; + const int state_len = 6; + uint32_t state_offset, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); + + dw[0] = stencil_ref->ref_value[0] << 24 | + stencil_ref->ref_value[1] << 16 | + GEN6_CC_DW0_ALPHATEST_UNORM8; + dw[1] = alpha_ref; + dw[2] = fui(blend_color->color[0]); + dw[3] = fui(blend_color->color[1]); + dw[4] = fui(blend_color->color[2]); + dw[5] = fui(blend_color->color[3]); + + return state_offset; +} + +static inline uint32_t +gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, + const struct ilo_dsa_state *dsa) +{ + const int state_align = 64; + const int state_len = 3; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + STATIC_ASSERT(Elements(dsa->payload) >= state_len); + + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, + state_align, state_len, dsa->payload); +} + +static inline uint32_t +gen6_BLEND_STATE(struct ilo_builder *builder, + const struct ilo_blend_state *blend, + const struct ilo_fb_state *fb, + const struct ilo_dsa_state *dsa) +{ + const int state_align = 64; + int state_len; + uint32_t state_offset, *dw; + unsigned num_targets, i; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 376: + * + * "The blend state is stored as an array of up to 8 elements..." + */ + num_targets = fb->state.nr_cbufs; + assert(num_targets <= 8); + + if (!num_targets) { + if (!dsa->dw_alpha) + return 0; + /* to be able to reference alpha func */ + num_targets = 1; + } + + state_len = 2 * num_targets; + + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); + + for (i = 0; i < num_targets; i++) { + const unsigned idx = (blend->independent_blend_enable) ? i : 0; + const struct ilo_blend_cso *cso = &blend->cso[idx]; + const int num_samples = fb->num_samples; + const struct util_format_description *format_desc = + (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ? + util_format_description(fb->state.cbufs[idx]->format) : NULL; + bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; + + rt_is_unorm = true; + rt_is_pure_integer = false; + rt_dst_alpha_forced_one = false; + + if (format_desc) { + int ch; + + switch (format_desc->format) { + case PIPE_FORMAT_B8G8R8X8_UNORM: + /* force alpha to one when the HW format has alpha */ + assert(ilo_translate_render_format(builder->dev, + PIPE_FORMAT_B8G8R8X8_UNORM) == + GEN6_FORMAT_B8G8R8A8_UNORM); + rt_dst_alpha_forced_one = true; + break; + default: + break; + } + + for (ch = 0; ch < 4; ch++) { + if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) + continue; + + if (format_desc->channel[ch].pure_integer) { + rt_is_unorm = false; + rt_is_pure_integer = true; + break; + } + + if (!format_desc->channel[ch].normalized || + format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) + rt_is_unorm = false; + } + } + + dw[0] = cso->payload[0]; + dw[1] = cso->payload[1]; + + if (!rt_is_pure_integer) { + if (rt_dst_alpha_forced_one) + dw[0] |= cso->dw_blend_dst_alpha_forced_one; + else + dw[0] |= cso->dw_blend; + } + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 365: + * + * "Logic Ops are only supported on *_UNORM surfaces (excluding + * _SRGB variants), otherwise Logic Ops must be DISABLED." + * + * Since logicop is ignored for non-UNORM color buffers, no special care + * is needed. + */ + if (rt_is_unorm) + dw[1] |= cso->dw_logicop; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 356: + * + * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage + * Dither both must be disabled." + * + * There is no such limitation on GEN7, or for AlphaToOne. But GL + * requires that anyway. + */ + if (num_samples > 1) + dw[1] |= cso->dw_alpha_mod; + + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 382: + * + * "Alpha Test can only be enabled if Pixel Shader outputs a float + * alpha value." + */ + if (!rt_is_pure_integer) + dw[1] |= dsa->dw_alpha; + + dw += 2; + } + + return state_offset; +} + +#endif /* ILO_BUILDER_3D_BOTTOM_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.h b/src/gallium/drivers/ilo/ilo_builder_3d_top.h similarity index 51% rename from src/gallium/drivers/ilo/ilo_gpe_gen7.h rename to src/gallium/drivers/ilo/ilo_builder_3d_top.h index 9739665d753..e742f63d698 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.h +++ b/src/gallium/drivers/ilo/ilo_builder_3d_top.h @@ -1,7 +1,7 @@ /* * Mesa 3-D graphics library * - * Copyright (C) 2013 LunarG, Inc. + * Copyright (C) 2014 LunarG, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -25,328 +25,632 @@ * Chia-I Wu */ -#ifndef ILO_GPE_GEN7_H -#define ILO_GPE_GEN7_H +#ifndef ILO_BUILDER_3D_TOP_H +#define ILO_BUILDER_3D_TOP_H +#include "genhw/genhw.h" #include "intel_winsys.h" #include "ilo_common.h" -#include "ilo_cp.h" +#include "ilo_gpe.h" #include "ilo_resource.h" #include "ilo_shader.h" -#include "ilo_gpe_gen6.h" +#include "ilo_builder.h" static inline void -gen7_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, - uint32_t clear_val) +gen6_3DSTATE_URB(struct ilo_builder *builder, + int vs_total_size, int gs_total_size, + int vs_entry_size, int gs_entry_size) { const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | - (cmd_len - 2); + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); + const int row_size = 128; /* 1024 bits */ + int vs_alloc_size, gs_alloc_size; + int vs_num_entries, gs_num_entries; uint32_t *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 6); + + /* in 1024-bit URB rows */ + vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; + gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; + + /* the valid range is [1, 5] */ + if (!vs_alloc_size) + vs_alloc_size = 1; + if (!gs_alloc_size) + gs_alloc_size = 1; + assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); + + /* the valid range is [24, 256] in multiples of 4 */ + vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; + if (vs_num_entries > 256) + vs_num_entries = 256; + assert(vs_num_entries >= 24); + + /* the valid range is [0, 256] in multiples of 4 */ + gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; + if (gs_num_entries > 256) + gs_num_entries = 256; ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = clear_val; - dw[2] = 1; + dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | + vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; + dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | + (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; } static inline void -gen7_3DSTATE_VF(struct ilo_builder *builder, - bool enable_cut_index, - uint32_t cut_index) +gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, + int subop, int offset, int size) { const uint8_t cmd_len = 2; - uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); + const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | + GEN6_RENDER_SUBTYPE_3D | + subop | (cmd_len - 2); uint32_t *dw; + int end; - ILO_DEV_ASSERT(builder->dev, 7.5, 7.5); + ILO_DEV_ASSERT(builder->dev, 7, 7.5); - if (enable_cut_index) - dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 68: + * + * "(A table that says the maximum size of each constant buffer is + * 16KB") + * + * From the Ivy Bridge PRM, volume 2 part 1, page 115: + * + * "The sum of the Constant Buffer Offset and the Constant Buffer Size + * may not exceed the maximum value of the Constant Buffer Size." + * + * Thus, the valid range of buffer end is [0KB, 16KB]. + */ + end = (offset + size) / 1024; + if (end > 16) { + assert(!"invalid constant buffer end"); + end = 16; + } + + /* the valid range of buffer offset is [0KB, 15KB] */ + offset = (offset + 1023) / 1024; + if (offset > 15) { + assert(!"invalid constant buffer offset"); + offset = 15; + } + + if (offset > end) { + assert(!size); + offset = end; + } + + /* the valid range of buffer size is [0KB, 15KB] */ + size = end - offset; + if (size > 15) { + assert(!"invalid constant buffer size"); + size = 15; + } ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = cut_index; + dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT | + size; } static inline void -gen7_3dstate_pointer(struct ilo_builder *builder, - int subop, uint32_t pointer) +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); +} + +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); +} + +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); +} + +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); +} + +static inline void +gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, + int offset, int size) +{ + gen7_3dstate_push_constant_alloc(builder, + GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); +} + +static inline void +gen7_3dstate_urb(struct ilo_builder *builder, + int subop, int offset, int size, + int entry_size) { const uint8_t cmd_len = 2; const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | GEN6_RENDER_SUBTYPE_3D | subop | (cmd_len - 2); + const int row_size = 64; /* 512 bits */ + int alloc_size, num_entries, min_entries, max_entries; uint32_t *dw; ILO_DEV_ASSERT(builder->dev, 7, 7.5); + /* VS, HS, DS, and GS variants */ + assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); + + /* in multiples of 8KB */ + assert(offset % 8192 == 0); + offset /= 8192; + + /* in multiple of 512-bit rows */ + alloc_size = (entry_size + row_size - 1) / row_size; + if (!alloc_size) + alloc_size = 1; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 34: + * + * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may + * cause performance to decrease due to banking in the URB. Element + * sizes of 16 to 20 should be programmed with six 512-bit URB rows." + */ + if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) + alloc_size = 6; + + /* in multiples of 8 */ + num_entries = (size / row_size / alloc_size) & ~7; + + switch (subop) { + case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_entries = (builder->dev->gt >= 2) ? 1664 : 640; + min_entries = (builder->dev->gt >= 2) ? 64 : 32; + break; + case ILO_GEN(7): + default: + max_entries = (builder->dev->gt == 2) ? 704 : 512; + min_entries = 32; + break; + } + + assert(num_entries >= min_entries); + if (num_entries > max_entries) + num_entries = max_entries; + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: + max_entries = (builder->dev->gt == 2) ? 64 : 32; + if (num_entries > max_entries) + num_entries = max_entries; + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: + if (num_entries) + assert(num_entries >= 138); + break; + case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: + switch (ilo_dev_gen(builder->dev)) { + case ILO_GEN(7.5): + max_entries = (builder->dev->gt >= 2) ? 640 : 256; + break; + case ILO_GEN(7): + default: + max_entries = (builder->dev->gt == 2) ? 320 : 192; + break; + } + + if (num_entries > max_entries) + num_entries = max_entries; + break; + default: + break; + } + ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = pointer; + dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT | + (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT | + num_entries; } static inline void -gen7_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, - uint32_t color_calc_state) +gen7_3DSTATE_URB_VS(struct ilo_builder *builder, + int offset, int size, int entry_size) { - gen7_3dstate_pointer(builder, - GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, color_calc_state); + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, + offset, size, entry_size); } static inline void -gen7_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs, - int num_samplers) +gen7_3DSTATE_URB_HS(struct ilo_builder *builder, + int offset, int size, int entry_size) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, + offset, size, entry_size); +} - ILO_DEV_ASSERT(builder->dev, 7, 7.5); +static inline void +gen7_3DSTATE_URB_DS(struct ilo_builder *builder, + int offset, int size, int entry_size) +{ + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, + offset, size, entry_size); +} - if (!gs) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = GEN7_GS_DW5_STATISTICS; - dw[6] = 0; - return; - } +static inline void +gen7_3DSTATE_URB_GS(struct ilo_builder *builder, + int offset, int size, int entry_size) +{ + gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, + offset, size, entry_size); +} - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; +static inline void +gen7_3DSTATE_VF(struct ilo_builder *builder, + bool enable_cut_index, + uint32_t cut_index) +{ + const uint8_t cmd_len = 2; + uint32_t dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); + uint32_t *dw; - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + ILO_DEV_ASSERT(builder->dev, 7.5, 7.5); + + if (enable_cut_index) + dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(gs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = 0; + dw[1] = cut_index; } static inline void -gen7_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - enum pipe_format zs_format) +gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, + bool enable) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - const int num_samples = 1; - uint32_t payload[6], *dw; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + const uint8_t cmd_len = 1; + const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) | + enable; - ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, - rasterizer, num_samples, zs_format, - payload, Elements(payload)); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); + ilo_builder_batch_write(builder, cmd_len, &dw0); } static inline void -gen7_3DSTATE_WM(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - const struct ilo_rasterizer_state *rasterizer, - bool cc_may_kill, uint32_t hiz_op) +gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, + const struct ilo_ve_state *ve, + const struct ilo_vb_state *vb) { - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - const int num_samples = 1; - uint32_t dw1, dw2, *dw; + uint8_t cmd_len; + uint32_t dw0, *dw; + unsigned hw_idx, pos; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - /* see ilo_gpe_init_rasterizer_wm() */ - if (rasterizer) { - dw1 = rasterizer->wm.payload[0]; - dw2 = rasterizer->wm.payload[1]; + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 82: + * + * "From 1 to 33 VBs can be specified..." + */ + assert(ve->vb_count <= 33); - assert(!hiz_op); - dw1 |= GEN7_WM_DW1_STATISTICS; - } - else { - dw1 = hiz_op; - dw2 = 0; - } + if (!ve->vb_count) + return; - if (fs) { - const struct ilo_shader_cso *fs_cso = ilo_shader_get_kernel_cso(fs); + cmd_len = 1 + 4 * ve->vb_count; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | + (cmd_len - 2); - dw1 |= fs_cso->payload[3]; - } + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; - if (cc_may_kill) - dw1 |= GEN7_WM_DW1_PS_ENABLE | GEN7_WM_DW1_PS_KILL; + dw++; + pos++; + for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { + const unsigned instance_divisor = ve->instance_divisors[hw_idx]; + const unsigned pipe_idx = ve->vb_mapping[hw_idx]; + const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - if (num_samples > 1) { - dw1 |= rasterizer->wm.dw_msaa_rast; - dw2 |= rasterizer->wm.dw_msaa_disp; + dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT; + + if (instance_divisor) + dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA; + else + dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA; + + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) + dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED; + + /* use null vb if there is no buffer or the stride is out of range */ + if (cso->buffer && cso->stride <= 2048) { + const struct ilo_buffer *buf = ilo_buffer(cso->buffer); + const uint32_t start_offset = cso->buffer_offset; + const uint32_t end_offset = buf->bo_size - 1; + + dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT; + ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); + ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); + } + else { + dw[0] |= 1 << 13; + dw[1] = 0; + dw[2] = 0; + } + + dw[3] = instance_divisor; + + dw += 4; + pos += 4; } +} - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; +static inline void +ve_init_cso_with_components(const struct ilo_dev_info *dev, + int comp0, int comp1, int comp2, int comp3, + struct ilo_ve_cso *cso) +{ + ILO_DEV_ASSERT(dev, 6, 7.5); + + STATIC_ASSERT(Elements(cso->payload) >= 2); + cso->payload[0] = GEN6_VE_STATE_DW0_VALID; + cso->payload[1] = + comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | + comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | + comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | + comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; } static inline void -gen7_3dstate_constant(struct ilo_builder *builder, - int subop, - const uint32_t *bufs, const int *sizes, - int num_bufs) +ve_set_cso_edgeflag(const struct ilo_dev_info *dev, + struct ilo_ve_cso *cso) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop | (cmd_len - 2); - uint32_t payload[6], *dw; - int total_read_length, i; + int format; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(dev, 6, 7.5); - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS && - subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK); + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 94: + * + * "- This bit (Edge Flag Enable) must only be ENABLED on the last + * valid VERTEX_ELEMENT structure. + * + * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, + * and Component 1-3 Control must be set to VFCOMP_NOSTORE. + * + * - The Source Element Format must be set to the UINT format. + * + * - [DevSNB]: Edge Flags are not supported for QUADLIST + * primitives. Software may elect to convert QUADLIST primitives + * to some set of corresponding edge-flag-supported primitive + * types (e.g., POLYGONs) prior to submission to the 3D pipeline." + */ - assert(num_bufs <= 4); + cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; + cso->payload[1] = + GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | + GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; - payload[0] = 0; - payload[1] = 0; + /* + * Edge flags have format GEN6_FORMAT_R8_UINT when defined via + * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined + * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. + * + * Since all the hardware cares about is whether the flags are zero or not, + * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. + */ + format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; + if (format == GEN6_FORMAT_R32_FLOAT) { + STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); + cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); + } + else { + assert(format == GEN6_FORMAT_R8_UINT); + } +} - total_read_length = 0; - for (i = 0; i < 4; i++) { - int read_len; +static inline void +gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, + const struct ilo_ve_state *ve, + bool last_velement_edgeflag, + bool prepend_generated_ids) +{ + uint8_t cmd_len; + uint32_t dw0, *dw; + unsigned i; - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 112: - * - * "Constant buffers must be enabled in order from Constant Buffer 0 - * to Constant Buffer 3 within this command. For example, it is - * not allowed to enable Constant Buffer 1 by programming a - * non-zero value in the VS Constant Buffer 1 Read Length without a - * non-zero value in VS Constant Buffer 0 Read Length." - */ - if (i >= num_bufs || !sizes[i]) { - for (; i < 4; i++) { - assert(i >= num_bufs || !sizes[i]); - payload[2 + i] = 0; - } - break; - } + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - /* read lengths are in 256-bit units */ - read_len = (sizes[i] + 31) / 32; - /* the lower 5 bits are used for memory object control state */ - assert(bufs[i] % 32 == 0); + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 93: + * + * "Up to 34 (DevSNB+) vertex elements are supported." + */ + assert(ve->count + prepend_generated_ids <= 34); - payload[i / 2] |= read_len << ((i % 2) ? 16 : 0); - payload[2 + i] = bufs[i]; + STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); - total_read_length += read_len; + if (!ve->count && !prepend_generated_ids) { + struct ilo_ve_cso dummy; + + ve_init_cso_with_components(builder->dev, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_0, + GEN6_VFCOMP_STORE_1_FP, + &dummy); + + cmd_len = 3; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); + + return; } - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 113: - * - * "The sum of all four read length fields must be less than or equal - * to the size of 64" - */ - assert(total_read_length <= 64); + cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | + (cmd_len - 2); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); -} + dw++; -static inline void -gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, - bufs, sizes, num_bufs); -} + if (prepend_generated_ids) { + struct ilo_ve_cso gen_ids; -static inline void -gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, - bufs, sizes, num_bufs); -} + ve_init_cso_with_components(builder->dev, + GEN6_VFCOMP_STORE_VID, + GEN6_VFCOMP_STORE_IID, + GEN6_VFCOMP_NOSTORE, + GEN6_VFCOMP_NOSTORE, + &gen_ids); -static inline void -gen7_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, - bufs, sizes, num_bufs); + memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); + dw += 2; + } + + if (last_velement_edgeflag) { + struct ilo_ve_cso edgeflag; + + for (i = 0; i < ve->count - 1; i++) + memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); + + edgeflag = ve->cso[i]; + ve_set_cso_edgeflag(builder->dev, &edgeflag); + memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); + } + else { + for (i = 0; i < ve->count; i++) + memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); + } } static inline void -gen7_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask, - int num_samples) +gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, + const struct ilo_ib_state *ib, + bool enable_cut_index) { - const uint8_t cmd_len = 2; - const unsigned valid_mask = ((1 << num_samples) - 1) | 0x1; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | - (cmd_len - 2); - uint32_t *dw; + const uint8_t cmd_len = 3; + struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); + uint32_t start_offset, end_offset; + int format; + unsigned pos; + uint32_t dw0, *dw; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (!buf) + return; + + /* this is moved to the new 3DSTATE_VF */ + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) + assert(!enable_cut_index); + + switch (ib->hw_index_size) { + case 4: + format = GEN6_IB_DW0_FORMAT_DWORD; + break; + case 2: + format = GEN6_IB_DW0_FORMAT_WORD; + break; + case 1: + format = GEN6_IB_DW0_FORMAT_BYTE; + break; + default: + assert(!"unknown index size"); + format = GEN6_IB_DW0_FORMAT_BYTE; + break; + } /* - * From the Ivy Bridge PRM, volume 2 part 1, page 294: - * - * "If Number of Multisamples is NUMSAMPLES_1, bits 7:1 of this field - * (Sample Mask) must be zero. - * - * If Number of Multisamples is NUMSAMPLES_4, bits 7:4 of this field - * must be zero." + * set start_offset to 0 here and adjust pipe_draw_info::start with + * ib->draw_start_offset in 3DPRIMITIVE */ - sample_mask &= valid_mask; + start_offset = 0; + end_offset = buf->bo_size; - ilo_builder_batch_pointer(builder, cmd_len, &dw); + /* end_offset must also be aligned and is inclusive */ + end_offset -= (end_offset % ib->hw_index_size); + end_offset--; + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | + format | + (cmd_len - 2); + if (enable_cut_index) + dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; + + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = sample_mask; + ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); + ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); } static inline void -gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) +gen6_3DSTATE_VS(struct ilo_builder *builder, + const struct ilo_shader_state *vs, + int num_samplers) { - gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS, - bufs, sizes, num_bufs); -} + const uint8_t cmd_len = 6; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; -static inline void -gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS, - bufs, sizes, num_bufs); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); + + if (!vs) { + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = 0; + dw[5] = 0; + + return; + } + + cso = ilo_shader_get_kernel_cso(vs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(vs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; } static inline void @@ -410,6 +714,139 @@ gen7_3DSTATE_DS(struct ilo_builder *builder, dw[5] = 0; } +static inline void +gen6_3DSTATE_GS(struct ilo_builder *builder, + const struct ilo_shader_state *gs, + const struct ilo_shader_state *vs, + int verts_per_prim) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); + uint32_t dw1, dw2, dw4, dw5, dw6, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + if (gs) { + const struct ilo_shader_cso *cso; + + dw1 = ilo_shader_get_kernel_offset(gs); + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + dw6 = cso->payload[3]; + } + else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { + struct ilo_shader_cso cso; + enum ilo_kernel_param param; + + switch (verts_per_prim) { + case 1: + param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; + break; + case 2: + param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; + break; + default: + param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; + break; + } + + dw1 = ilo_shader_get_kernel_offset(vs) + + ilo_shader_get_kernel_param(vs, param); + + /* cannot use VS's CSO */ + ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso); + dw2 = cso.payload[0]; + dw4 = cso.payload[1]; + dw5 = cso.payload[2]; + dw6 = cso.payload[3]; + } + else { + dw1 = 0; + dw2 = 0; + dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; + dw5 = GEN6_GS_DW5_STATISTICS; + dw6 = 0; + } + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = dw2; + dw[3] = 0; + dw[4] = dw4; + dw[5] = dw5; + dw[6] = dw6; +} + +static inline void +gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, + int index, unsigned svbi, + unsigned max_svbi, + bool load_vertex_count) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | + (cmd_len - 2); + uint32_t dw1, *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(index >= 0 && index < 4); + + dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT; + if (load_vertex_count) + dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = dw1; + dw[2] = svbi; + dw[3] = max_svbi; +} + +static inline void +gen7_3DSTATE_GS(struct ilo_builder *builder, + const struct ilo_shader_state *gs, + int num_samplers) +{ + const uint8_t cmd_len = 7; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); + const struct ilo_shader_cso *cso; + uint32_t dw2, dw4, dw5, *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + if (!gs) { + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw[4] = 0; + dw[5] = GEN7_GS_DW5_STATISTICS; + dw[6] = 0; + return; + } + + cso = ilo_shader_get_kernel_cso(gs); + dw2 = cso->payload[0]; + dw4 = cso->payload[1]; + dw5 = cso->payload[2]; + + dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = ilo_shader_get_kernel_offset(gs); + dw[2] = dw2; + dw[3] = 0; /* scratch */ + dw[4] = dw4; + dw[5] = dw5; + dw[6] = 0; +} + static inline void gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, unsigned buffer_mask, @@ -471,124 +908,204 @@ gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder, } static inline void -gen7_3DSTATE_SBE(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs) +gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, + const struct pipe_stream_output_info *so_info) { - const uint8_t cmd_len = 14; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | (cmd_len - 2); - uint32_t payload[13], *dw; + uint16_t cmd_len; + uint32_t dw0, *dw; + int buffer_selects, num_entries, i; + uint16_t so_decls[128]; ILO_DEV_ASSERT(builder->dev, 7, 7.5); - ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, - rasterizer, fs, payload, Elements(payload)); + buffer_selects = 0; + num_entries = 0; - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], payload, sizeof(payload)); -} + if (so_info) { + int buffer_offsets[PIPE_MAX_SO_BUFFERS]; -static inline void -gen7_3DSTATE_PS(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int num_samplers, bool dual_blend) -{ - const uint8_t cmd_len = 8; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; + memset(buffer_offsets, 0, sizeof(buffer_offsets)); - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + for (i = 0; i < so_info->num_outputs; i++) { + unsigned decl, buf, reg, mask; + + buf = so_info->output[i].output_buffer; + + /* pad with holes */ + assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); + while (buffer_offsets[buf] < so_info->output[i].dst_offset) { + int num_dwords; - if (!fs) { - int max_threads; + num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; + if (num_dwords > 4) + num_dwords = 4; - /* GPU hangs if none of the dispatch enable bits is set */ - dw4 = GEN7_PS_DW4_8_PIXEL_DISPATCH; + decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + GEN7_SO_DECL_HOLE_FLAG | + ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; - /* see brwCreateContext() */ - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_threads = (builder->dev->gt == 3) ? 408 : - (builder->dev->gt == 2) ? 204 : 102; - dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; - break; - case ILO_GEN(7): - default: - max_threads = (builder->dev->gt == 2) ? 172 : 48; - dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; - break; + so_decls[num_entries++] = decl; + buffer_offsets[buf] += num_dwords; + } + + reg = so_info->output[i].register_index; + mask = ((1 << so_info->output[i].num_components) - 1) << + so_info->output[i].start_component; + + decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | + reg << GEN7_SO_DECL_REG_INDEX__SHIFT | + mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + + so_decls[num_entries++] = decl; + buffer_selects |= 1 << buf; + buffer_offsets[buf] += so_info->output[i].num_components; } + } + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 201: + * + * "Errata: All 128 decls for all four streams must be included + * whenever this command is issued. The "Num Entries [n]" fields still + * contain the actual numbers of valid decls." + * + * Also note that "DWord Length" has 9 bits for this command, and the type + * of cmd_len is thus uint16_t. + */ + cmd_len = 2 * 128 + 3; + dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | + 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | + 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | + buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; + dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | + 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | + 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | + num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; + dw += 3; + + for (i = 0; i < num_entries; i++) { + dw[0] = so_decls[i]; + dw[1] = 0; + dw += 2; + } + for (; i < 128; i++) { + dw[0] = 0; + dw[1] = 0; + dw += 2; + } +} + +static inline void +gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, + int index, int base, int stride, + const struct pipe_stream_output_target *so_target) +{ + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | + (cmd_len - 2); + struct ilo_buffer *buf; + int end; + unsigned pos; + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + if (!so_target || !so_target->buffer) { ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = 0; + dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; dw[2] = 0; dw[3] = 0; - dw[4] = dw4; - dw[5] = 0; - dw[6] = 0; - dw[7] = 0; return; } - cso = ilo_shader_get_kernel_cso(fs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; + buf = ilo_buffer(so_target->buffer); - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; + /* DWord-aligned */ + assert(stride % 4 == 0 && base % 4 == 0); + assert(so_target->buffer_offset % 4 == 0); - if (dual_blend) - dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; + stride &= ~3; + base = (base + so_target->buffer_offset) & ~3; + end = (base + so_target->buffer_size) & ~3; - ilo_builder_batch_pointer(builder, cmd_len, &dw); + pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = 0; /* kernel 1 */ - dw[7] = 0; /* kernel 2 */ -} + dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | + stride; -static inline void -gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct ilo_builder *builder, - uint32_t sf_clip_viewport) -{ - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, - sf_clip_viewport); + ilo_builder_batch_reloc(builder, pos + 2, + buf->bo, base, INTEL_RELOC_WRITE); + ilo_builder_batch_reloc(builder, pos + 3, + buf->bo, end, INTEL_RELOC_WRITE); } static inline void -gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct ilo_builder *builder, - uint32_t cc_viewport) +gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder, + uint32_t vs_binding_table, + uint32_t gs_binding_table, + uint32_t ps_binding_table) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, - cc_viewport); + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | + GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | + GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | + GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vs_binding_table; + dw[2] = gs_binding_table; + dw[3] = ps_binding_table; } static inline void -gen7_3DSTATE_BLEND_STATE_POINTERS(struct ilo_builder *builder, - uint32_t blend_state) +gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder, + uint32_t vs_sampler_state, + uint32_t gs_sampler_state, + uint32_t ps_sampler_state) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, - blend_state); + const uint8_t cmd_len = 4; + const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | + GEN6_PTR_SAMPLER_DW0_VS_CHANGED | + GEN6_PTR_SAMPLER_DW0_GS_CHANGED | + GEN6_PTR_SAMPLER_DW0_PS_CHANGED | + (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 6, 6); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = vs_sampler_state; + dw[2] = gs_sampler_state; + dw[3] = ps_sampler_state; } static inline void -gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct ilo_builder *builder, - uint32_t depth_stencil_state) +gen7_3dstate_pointer(struct ilo_builder *builder, + int subop, uint32_t pointer) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, - depth_stencil_state); + const uint8_t cmd_len = 2; + const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | + GEN6_RENDER_SUBTYPE_3D | + subop | (cmd_len - 2); + uint32_t *dw; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + dw[1] = pointer; } static inline void @@ -627,15 +1144,6 @@ gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder, binding_table); } -static inline void -gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(struct ilo_builder *builder, - uint32_t binding_table) -{ - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, - binding_table); -} - static inline void gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder, uint32_t sampler_state) @@ -672,459 +1180,445 @@ gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder, sampler_state); } -static inline void -gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(struct ilo_builder *builder, - uint32_t sampler_state) +static inline unsigned +gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, + const uint32_t *bufs, const int *sizes, + int num_bufs, int max_read_length, + uint32_t *dw, int num_dwords) { - gen7_3dstate_pointer(builder, - GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, - sampler_state); -} + unsigned enabled = 0x0; + int total_read_length, i; -static inline void -gen7_3dstate_urb(struct ilo_builder *builder, - int subop, int offset, int size, - int entry_size) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | - GEN6_RENDER_SUBTYPE_3D | - subop | (cmd_len - 2); - const int row_size = 64; /* 512 bits */ - int alloc_size, num_entries, min_entries, max_entries; - uint32_t *dw; + assert(num_dwords == 4); - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + total_read_length = 0; + for (i = 0; i < 4; i++) { + if (i < num_bufs && sizes[i]) { + /* in 256-bit units minus one */ + const int read_len = (sizes[i] + 31) / 32 - 1; - /* VS, HS, DS, and GS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS); + assert(bufs[i] % 32 == 0); + assert(read_len < 32); - /* in multiples of 8KB */ - assert(offset % 8192 == 0); - offset /= 8192; + enabled |= 1 << i; + dw[i] = bufs[i] | read_len; - /* in multiple of 512-bit rows */ - alloc_size = (entry_size + row_size - 1) / row_size; - if (!alloc_size) - alloc_size = 1; + total_read_length += read_len + 1; + } + else { + dw[i] = 0; + } + } - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 34: - * - * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may - * cause performance to decrease due to banking in the URB. Element - * sizes of 16 to 20 should be programmed with six 512-bit URB rows." - */ - if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5) - alloc_size = 6; + assert(total_read_length <= max_read_length); - /* in multiples of 8 */ - num_entries = (size / row_size / alloc_size) & ~7; + return enabled; +} - switch (subop) { - case GEN7_RENDER_OPCODE_3DSTATE_URB_VS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 1664 : 640; - min_entries = (builder->dev->gt >= 2) ? 64 : 32; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 704 : 512; - min_entries = 32; - break; - } +static inline void +gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) +{ + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; - assert(num_entries >= min_entries); - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_HS: - max_entries = (builder->dev->gt == 2) ? 64 : 32; - if (num_entries > max_entries) - num_entries = max_entries; - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_DS: - if (num_entries) - assert(num_entries >= 138); - break; - case GEN7_RENDER_OPCODE_3DSTATE_URB_GS: - switch (ilo_dev_gen(builder->dev)) { - case ILO_GEN(7.5): - max_entries = (builder->dev->gt >= 2) ? 640 : 256; - break; - case ILO_GEN(7): - default: - max_entries = (builder->dev->gt == 2) ? 320 : 192; - break; - } + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); - if (num_entries > max_entries) - num_entries = max_entries; - break; - default: - break; - } + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 138: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 32" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); + + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | + buf_enabled << 12 | + (cmd_len - 2); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT | - (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT | - num_entries; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); } static inline void -gen7_3DSTATE_URB_VS(struct ilo_builder *builder, - int offset, int size, int entry_size) +gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS, - offset, size, entry_size); -} + const uint8_t cmd_len = 5; + uint32_t buf_dw[4], buf_enabled; + uint32_t dw0, *dw; -static inline void -gen7_3DSTATE_URB_HS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS, - offset, size, entry_size); -} + ILO_DEV_ASSERT(builder->dev, 6, 6); + assert(num_bufs <= 4); -static inline void -gen7_3DSTATE_URB_DS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS, - offset, size, entry_size); -} + /* + * From the Sandy Bridge PRM, volume 2 part 1, page 161: + * + * "The sum of all four read length fields (each incremented to + * represent the actual read length) must be less than or equal to 64" + */ + buf_enabled = gen6_fill_3dstate_constant(builder->dev, + bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); -static inline void -gen7_3DSTATE_URB_GS(struct ilo_builder *builder, - int offset, int size, int entry_size) -{ - gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS, - offset, size, entry_size); + dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | + buf_enabled << 12 | + (cmd_len - 2); + + ilo_builder_batch_pointer(builder, cmd_len, &dw); + dw[0] = dw0; + memcpy(&dw[1], buf_dw, sizeof(buf_dw)); } static inline void -gen7_3dstate_push_constant_alloc(struct ilo_builder *builder, - int subop, int offset, int size) +gen7_3dstate_constant(struct ilo_builder *builder, + int subop, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - const uint8_t cmd_len = 2; + const uint8_t cmd_len = 7; const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | GEN6_RENDER_SUBTYPE_3D | subop | (cmd_len - 2); - uint32_t *dw; - int end; + uint32_t payload[6], *dw; + int total_read_length, i; + + ILO_DEV_ASSERT(builder->dev, 7, 7.5); + + /* VS, HS, DS, GS, and PS variants */ + assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS && + subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS && + subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK); + + assert(num_bufs <= 4); + + payload[0] = 0; + payload[1] = 0; + + total_read_length = 0; + for (i = 0; i < 4; i++) { + int read_len; + + /* + * From the Ivy Bridge PRM, volume 2 part 1, page 112: + * + * "Constant buffers must be enabled in order from Constant Buffer 0 + * to Constant Buffer 3 within this command. For example, it is + * not allowed to enable Constant Buffer 1 by programming a + * non-zero value in the VS Constant Buffer 1 Read Length without a + * non-zero value in VS Constant Buffer 0 Read Length." + */ + if (i >= num_bufs || !sizes[i]) { + for (; i < 4; i++) { + assert(i >= num_bufs || !sizes[i]); + payload[2 + i] = 0; + } + break; + } + + /* read lengths are in 256-bit units */ + read_len = (sizes[i] + 31) / 32; + /* the lower 5 bits are used for memory object control state */ + assert(bufs[i] % 32 == 0); - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + payload[i / 2] |= read_len << ((i % 2) ? 16 : 0); + payload[2 + i] = bufs[i]; - /* VS, HS, DS, GS, and PS variants */ - assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS && - subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS); + total_read_length += read_len; + } /* - * From the Ivy Bridge PRM, volume 2 part 1, page 68: - * - * "(A table that says the maximum size of each constant buffer is - * 16KB") - * - * From the Ivy Bridge PRM, volume 2 part 1, page 115: - * - * "The sum of the Constant Buffer Offset and the Constant Buffer Size - * may not exceed the maximum value of the Constant Buffer Size." + * From the Ivy Bridge PRM, volume 2 part 1, page 113: * - * Thus, the valid range of buffer end is [0KB, 16KB]. + * "The sum of all four read length fields must be less than or equal + * to the size of 64" */ - end = (offset + size) / 1024; - if (end > 16) { - assert(!"invalid constant buffer end"); - end = 16; - } - - /* the valid range of buffer offset is [0KB, 15KB] */ - offset = (offset + 1023) / 1024; - if (offset > 15) { - assert(!"invalid constant buffer offset"); - offset = 15; - } - - if (offset > end) { - assert(!size); - offset = end; - } - - /* the valid range of buffer size is [0KB, 15KB] */ - size = end - offset; - if (size > 15) { - assert(!"invalid constant buffer size"); - size = 15; - } + assert(total_read_length <= 64); ilo_builder_batch_pointer(builder, cmd_len, &dw); dw[0] = dw0; - dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT | - size; -} - -static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder, - int offset, int size) -{ - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size); + memcpy(&dw[1], payload, sizeof(payload)); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size); + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size); + gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size); + gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS, + bufs, sizes, num_bufs); } static inline void -gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder, - int offset, int size) +gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, + const uint32_t *bufs, const int *sizes, + int num_bufs) { - gen7_3dstate_push_constant_alloc(builder, - GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size); + gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS, + bufs, sizes, num_bufs); } -static inline void -gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder, - const struct pipe_stream_output_info *so_info) +static inline uint32_t +gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, + uint32_t *surface_states, + int num_surface_states) { - uint16_t cmd_len; - uint32_t dw0, *dw; - int buffer_selects, num_entries, i; - uint16_t so_decls[128]; + const int state_align = 32; + const int state_len = num_surface_states; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - buffer_selects = 0; - num_entries = 0; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 69: + * + * "It is stored as an array of up to 256 elements..." + */ + assert(num_surface_states <= 256); - if (so_info) { - int buffer_offsets[PIPE_MAX_SO_BUFFERS]; + if (!num_surface_states) + return 0; - memset(buffer_offsets, 0, sizeof(buffer_offsets)); + return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE, + state_align, state_len, surface_states); +} - for (i = 0; i < so_info->num_outputs; i++) { - unsigned decl, buf, reg, mask; +static inline uint32_t +gen6_SURFACE_STATE(struct ilo_builder *builder, + const struct ilo_view_surface *surf, + bool for_render) +{ + const int state_align = 32; + const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6; + uint32_t state_offset; - buf = so_info->output[i].output_buffer; + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - /* pad with holes */ - assert(buffer_offsets[buf] <= so_info->output[i].dst_offset); - while (buffer_offsets[buf] < so_info->output[i].dst_offset) { - int num_dwords; + state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE, + state_align, state_len, surf->payload); - num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf]; - if (num_dwords > 4) - num_dwords = 4; + if (surf->bo) { + ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, + surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); + } - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - GEN7_SO_DECL_HOLE_FLAG | - ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + return state_offset; +} - so_decls[num_entries++] = decl; - buffer_offsets[buf] += num_dwords; - } +static inline uint32_t +gen6_so_SURFACE_STATE(struct ilo_builder *builder, + const struct pipe_stream_output_target *so, + const struct pipe_stream_output_info *so_info, + int so_index) +{ + struct ilo_buffer *buf = ilo_buffer(so->buffer); + unsigned bo_offset, struct_size; + enum pipe_format elem_format; + struct ilo_view_surface surf; - reg = so_info->output[i].register_index; - mask = ((1 << so_info->output[i].num_components) - 1) << - so_info->output[i].start_component; + ILO_DEV_ASSERT(builder->dev, 6, 6); - decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT | - reg << GEN7_SO_DECL_REG_INDEX__SHIFT | - mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT; + bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; + struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - so_decls[num_entries++] = decl; - buffer_selects |= 1 << buf; - buffer_offsets[buf] += so_info->output[i].num_components; - } + switch (so_info->output[so_index].num_components) { + case 1: + elem_format = PIPE_FORMAT_R32_FLOAT; + break; + case 2: + elem_format = PIPE_FORMAT_R32G32_FLOAT; + break; + case 3: + elem_format = PIPE_FORMAT_R32G32B32_FLOAT; + break; + case 4: + elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + break; + default: + assert(!"unexpected SO components length"); + elem_format = PIPE_FORMAT_R32_FLOAT; + break; } - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 201: - * - * "Errata: All 128 decls for all four streams must be included - * whenever this command is issued. The "Num Entries [n]" fields still - * contain the actual numbers of valid decls." - * - * Also note that "DWord Length" has 9 bits for this command, and the type - * of cmd_len is thus uint16_t. - */ - cmd_len = 2 * 128 + 3; - dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT | - 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT | - buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT; - dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT | - 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT | - num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT; - dw += 3; + ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset, + so->buffer_size, struct_size, elem_format, false, true, &surf); - for (i = 0; i < num_entries; i++) { - dw[0] = so_decls[i]; - dw[1] = 0; - dw += 2; - } - for (; i < 128; i++) { - dw[0] = 0; - dw[1] = 0; - dw += 2; - } + return gen6_SURFACE_STATE(builder, &surf, false); } -static inline void -gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder, - int index, int base, int stride, - const struct pipe_stream_output_target *so_target) +static inline uint32_t +gen6_SAMPLER_STATE(struct ilo_builder *builder, + const struct ilo_sampler_cso * const *samplers, + const struct pipe_sampler_view * const *views, + const uint32_t *sampler_border_colors, + int num_samplers) { - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) | - (cmd_len - 2); - struct ilo_buffer *buf; - int end; - unsigned pos; - uint32_t *dw; + const int state_align = 32; + const int state_len = 4 * num_samplers; + uint32_t state_offset, *dw; + int i; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - if (!so_target || !so_target->buffer) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT; - dw[2] = 0; - dw[3] = 0; + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 101: + * + * "The sampler state is stored as an array of up to 16 elements..." + */ + assert(num_samplers <= 16); - return; - } + if (!num_samplers) + return 0; - buf = ilo_buffer(so_target->buffer); + state_offset = ilo_builder_state_pointer(builder, + ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); + + for (i = 0; i < num_samplers; i++) { + const struct ilo_sampler_cso *sampler = samplers[i]; + const struct pipe_sampler_view *view = views[i]; + const uint32_t border_color = sampler_border_colors[i]; + uint32_t dw_filter, dw_wrap; + + /* there may be holes */ + if (!sampler || !view) { + /* disabled sampler */ + dw[0] = 1 << 31; + dw[1] = 0; + dw[2] = 0; + dw[3] = 0; + dw += 4; + + continue; + } - /* DWord-aligned */ - assert(stride % 4 == 0 && base % 4 == 0); - assert(so_target->buffer_offset % 4 == 0); + /* determine filter and wrap modes */ + switch (view->texture->target) { + case PIPE_TEXTURE_1D: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_1d; + break; + case PIPE_TEXTURE_3D: + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 103: + * + * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for + * surfaces of type SURFTYPE_3D." + */ + dw_filter = sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + case PIPE_TEXTURE_CUBE: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap_cube; + break; + default: + dw_filter = (sampler->anisotropic) ? + sampler->dw_filter_aniso : sampler->dw_filter; + dw_wrap = sampler->dw_wrap; + break; + } - stride &= ~3; - base = (base + so_target->buffer_offset) & ~3; - end = (base + so_target->buffer_size) & ~3; + dw[0] = sampler->payload[0]; + dw[1] = sampler->payload[1]; + assert(!(border_color & 0x1f)); + dw[2] = border_color; + dw[3] = sampler->payload[2]; - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT | - stride; + dw[0] |= dw_filter; - ilo_builder_batch_reloc(builder, pos + 2, - buf->bo, base, INTEL_RELOC_WRITE); - ilo_builder_batch_reloc(builder, pos + 3, - buf->bo, end, INTEL_RELOC_WRITE); + if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { + dw[3] |= dw_wrap; + } + else { + /* + * From the Sandy Bridge PRM, volume 4 part 1, page 21: + * + * "[DevSNB] Errata: Incorrect behavior is observed in cases + * where the min and mag mode filters are different and + * SurfMinLOD is nonzero. The determination of MagMode uses the + * following equation instead of the one in the above + * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" + * + * As a way to work around that, we set Base to + * view->u.tex.first_level. + */ + dw[0] |= view->u.tex.first_level << 22; + + dw[1] |= dw_wrap; + } + + dw += 4; + } + + return state_offset; } -static inline void -gen7_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist) +static inline uint32_t +gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, + const struct ilo_sampler_cso *sampler) { - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN7_3DPRIM_DW1_ACCESS_RANDOM : - GEN7_3DPRIM_DW1_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - uint32_t *dw; + const int state_align = 32; + const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12; - ILO_DEV_ASSERT(builder->dev, 7, 7.5); + ILO_DEV_ASSERT(builder->dev, 6, 7.5); - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vb_access | prim; - dw[2] = info->count; - dw[3] = vb_start; - dw[4] = info->instance_count; - dw[5] = info->start_instance; - dw[6] = info->index_bias; + assert(Elements(sampler->payload) >= 3 + state_len); + + /* see ilo_gpe_init_sampler_cso() */ + return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB, + state_align, state_len, &sampler->payload[3]); } static inline uint32_t -gen7_SF_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) +gen6_push_constant_buffer(struct ilo_builder *builder, + int size, void **pcb) { - const int state_align = 64; - const int state_len = 16 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 7, 7.5); - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 270: - * - * "The viewport-specific state used by both the SF and CL units - * (SF_CLIP_VIEWPORT) is stored as an array of up to 16 elements, each - * of which contains the DWords described below. The start of each - * element is spaced 16 DWords apart. The location of first element of - * the array, as specified by both Pointer to SF_VIEWPORT and Pointer - * to CLIP_VIEWPORT, is aligned to a 64-byte boundary." + * For all VS, GS, FS, and CS push constant buffers, they must be aligned + * to 32 bytes, and their sizes are specified in 256-bit units. */ - assert(num_viewports && num_viewports <= 16); + const int state_align = 32; + const int state_len = align(size, 32) / 4; + uint32_t state_offset; + char *buf; + + ILO_DEV_ASSERT(builder->dev, 6, 7.5); state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); + ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf); - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; + /* zero out the unused range */ + if (size < state_len * 4) + memset(&buf[size], 0, state_len * 4 - size); - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - dw[8] = fui(vp->min_gbx); - dw[9] = fui(vp->max_gbx); - dw[10] = fui(vp->min_gby); - dw[11] = fui(vp->max_gby); - dw[12] = 0; - dw[13] = 0; - dw[14] = 0; - dw[15] = 0; - - dw += 16; - } + if (pcb) + *pcb = buf; return state_offset; } -#endif /* ILO_GPE_GEN7_H */ +#endif /* ILO_BUILDER_3D_TOP_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen6.h b/src/gallium/drivers/ilo/ilo_gpe_gen6.h index eb537f801b8..f417710237a 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen6.h +++ b/src/gallium/drivers/ilo/ilo_gpe_gen6.h @@ -32,10 +32,6 @@ #include "intel_winsys.h" #include "ilo_common.h" -#include "ilo_cp.h" -#include "ilo_format.h" -#include "ilo_resource.h" -#include "ilo_shader.h" #include "ilo_gpe.h" /** @@ -57,34 +53,6 @@ ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling) } } -/** - * Translate a pipe primitive type to the matching hardware primitive type. - */ -static inline int -ilo_gpe_gen6_translate_pipe_prim(unsigned prim) -{ - static const int prim_mapping[PIPE_PRIM_MAX] = { - [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST, - [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST, - [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP, - [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP, - [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST, - [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP, - [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN, - [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST, - [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP, - [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON, - [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ, - [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ, - [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ, - [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ, - }; - - assert(prim_mapping[prim]); - - return prim_mapping[prim]; -} - /** * Translate a pipe texture target to the matching hardware surface type. */ @@ -112,1014 +80,6 @@ ilo_gpe_gen6_translate_texture(enum pipe_texture_target target) } } -/** - * Fill in DW2 to DW7 of 3DSTATE_SF. - */ -static inline void -ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - int num_samples, - enum pipe_format depth_format, - uint32_t *payload, unsigned payload_len) -{ - assert(payload_len == Elements(rasterizer->sf.payload)); - - if (rasterizer) { - const struct ilo_rasterizer_sf *sf = &rasterizer->sf; - - memcpy(payload, sf->payload, sizeof(sf->payload)); - if (num_samples > 1) - payload[1] |= sf->dw_msaa; - } - else { - payload[0] = 0; - payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0; - payload[2] = 0; - payload[3] = 0; - payload[4] = 0; - payload[5] = 0; - } - - if (ilo_dev_gen(dev) >= ILO_GEN(7)) { - int format; - - /* separate stencil */ - switch (depth_format) { - case PIPE_FORMAT_Z16_UNORM: - format = GEN6_ZFORMAT_D16_UNORM; - break; - case PIPE_FORMAT_Z32_FLOAT: - case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: - format = GEN6_ZFORMAT_D32_FLOAT; - break; - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_Z24_UNORM_S8_UINT: - format = GEN6_ZFORMAT_D24_UNORM_X8_UINT; - break; - default: - /* FLOAT surface is assumed when there is no depth buffer */ - format = GEN6_ZFORMAT_D32_FLOAT; - break; - } - - payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; - } -} - -/** - * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF. - */ -static inline void -ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - uint32_t *dw, int num_dwords) -{ - int output_count, vue_offset, vue_len; - const struct ilo_kernel_routing *routing; - - ILO_DEV_ASSERT(dev, 6, 7.5); - assert(num_dwords == 13); - - if (!fs) { - memset(dw, 0, sizeof(dw[0]) * num_dwords); - dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT; - return; - } - - output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT); - assert(output_count <= 32); - - routing = ilo_shader_get_kernel_routing(fs); - - vue_offset = routing->source_skip; - assert(vue_offset % 2 == 0); - vue_offset /= 2; - - vue_len = (routing->source_len + 1) / 2; - if (!vue_len) - vue_len = 1; - - dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | - vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | - vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; - if (routing->swizzle_enable) - dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE; - - switch (rasterizer->state.sprite_coord_mode) { - case PIPE_SPRITE_COORD_UPPER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT; - break; - case PIPE_SPRITE_COORD_LOWER_LEFT: - dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT; - break; - } - - STATIC_ASSERT(Elements(routing->swizzles) >= 16); - memcpy(&dw[1], routing->swizzles, 2 * 16); - - /* - * From the Ivy Bridge PRM, volume 2 part 1, page 268: - * - * "This field (Point Sprite Texture Coordinate Enable) must be - * programmed to 0 when non-point primitives are rendered." - * - * TODO We do not check that yet. - */ - dw[9] = routing->point_sprite_enable; - - dw[10] = routing->const_interp_enable; - - /* WrapShortest enables */ - dw[11] = 0; - dw[12] = 0; -} - -static inline void -gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder, - bool enable) -{ - const uint8_t cmd_len = 1; - const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) | - enable; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_write(builder, cmd_len, &dw0); -} - -static inline void -gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder, - uint32_t vs_binding_table, - uint32_t gs_binding_table, - uint32_t ps_binding_table) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | - GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | - GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vs_binding_table; - dw[2] = gs_binding_table; - dw[3] = ps_binding_table; -} - -static inline void -gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder, - uint32_t vs_sampler_state, - uint32_t gs_sampler_state, - uint32_t ps_sampler_state) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | - GEN6_PTR_SAMPLER_DW0_VS_CHANGED | - GEN6_PTR_SAMPLER_DW0_GS_CHANGED | - GEN6_PTR_SAMPLER_DW0_PS_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = vs_sampler_state; - dw[2] = gs_sampler_state; - dw[3] = ps_sampler_state; -} - -static inline void -gen6_3DSTATE_URB(struct ilo_builder *builder, - int vs_total_size, int gs_total_size, - int vs_entry_size, int gs_entry_size) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2); - const int row_size = 128; /* 1024 bits */ - int vs_alloc_size, gs_alloc_size; - int vs_num_entries, gs_num_entries; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* in 1024-bit URB rows */ - vs_alloc_size = (vs_entry_size + row_size - 1) / row_size; - gs_alloc_size = (gs_entry_size + row_size - 1) / row_size; - - /* the valid range is [1, 5] */ - if (!vs_alloc_size) - vs_alloc_size = 1; - if (!gs_alloc_size) - gs_alloc_size = 1; - assert(vs_alloc_size <= 5 && gs_alloc_size <= 5); - - /* the valid range is [24, 256] in multiples of 4 */ - vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3; - if (vs_num_entries > 256) - vs_num_entries = 256; - assert(vs_num_entries >= 24); - - /* the valid range is [0, 256] in multiples of 4 */ - gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3; - if (gs_num_entries > 256) - gs_num_entries = 256; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT | - vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT; - dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT | - (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT; -} - -static inline void -gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - const struct ilo_vb_state *vb) -{ - uint8_t cmd_len; - uint32_t dw0, *dw; - unsigned hw_idx, pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 82: - * - * "From 1 to 33 VBs can be specified..." - */ - assert(ve->vb_count <= 33); - - if (!ve->vb_count) - return; - - cmd_len = 1 + 4 * ve->vb_count; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | - (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - - dw++; - pos++; - for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) { - const unsigned instance_divisor = ve->instance_divisors[hw_idx]; - const unsigned pipe_idx = ve->vb_mapping[hw_idx]; - const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx]; - - dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT; - - if (instance_divisor) - dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA; - else - dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED; - - /* use null vb if there is no buffer or the stride is out of range */ - if (cso->buffer && cso->stride <= 2048) { - const struct ilo_buffer *buf = ilo_buffer(cso->buffer); - const uint32_t start_offset = cso->buffer_offset; - const uint32_t end_offset = buf->bo_size - 1; - - dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); - } - else { - dw[0] |= 1 << 13; - dw[1] = 0; - dw[2] = 0; - } - - dw[3] = instance_divisor; - - dw += 4; - pos += 4; - } -} - -static inline void -ve_init_cso_with_components(const struct ilo_dev_info *dev, - int comp0, int comp1, int comp2, int comp3, - struct ilo_ve_cso *cso) -{ - ILO_DEV_ASSERT(dev, 6, 7.5); - - STATIC_ASSERT(Elements(cso->payload) >= 2); - cso->payload[0] = GEN6_VE_STATE_DW0_VALID; - cso->payload[1] = - comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT | - comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT | - comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT | - comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT; -} - -static inline void -ve_set_cso_edgeflag(const struct ilo_dev_info *dev, - struct ilo_ve_cso *cso) -{ - int format; - - ILO_DEV_ASSERT(dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 94: - * - * "- This bit (Edge Flag Enable) must only be ENABLED on the last - * valid VERTEX_ELEMENT structure. - * - * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC, - * and Component 1-3 Control must be set to VFCOMP_NOSTORE. - * - * - The Source Element Format must be set to the UINT format. - * - * - [DevSNB]: Edge Flags are not supported for QUADLIST - * primitives. Software may elect to convert QUADLIST primitives - * to some set of corresponding edge-flag-supported primitive - * types (e.g., POLYGONs) prior to submission to the 3D pipeline." - */ - - cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE; - cso->payload[1] = - GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT | - GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT; - - /* - * Edge flags have format GEN6_FORMAT_R8_UINT when defined via - * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined - * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h. - * - * Since all the hardware cares about is whether the flags are zero or not, - * we can treat them as GEN6_FORMAT_R32_UINT in the latter case. - */ - format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff; - if (format == GEN6_FORMAT_R32_FLOAT) { - STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1); - cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT); - } - else { - assert(format == GEN6_FORMAT_R8_UINT); - } -} - -static inline void -gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder, - const struct ilo_ve_state *ve, - bool last_velement_edgeflag, - bool prepend_generated_ids) -{ - uint8_t cmd_len; - uint32_t dw0, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 93: - * - * "Up to 34 (DevSNB+) vertex elements are supported." - */ - assert(ve->count + prepend_generated_ids <= 34); - - STATIC_ASSERT(Elements(ve->cso[0].payload) == 2); - - if (!ve->count && !prepend_generated_ids) { - struct ilo_ve_cso dummy; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_0, - GEN6_VFCOMP_STORE_1_FP, - &dummy); - - cmd_len = 3; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], dummy.payload, sizeof(dummy.payload)); - - return; - } - - cmd_len = 2 * (ve->count + prepend_generated_ids) + 1; - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw++; - - if (prepend_generated_ids) { - struct ilo_ve_cso gen_ids; - - ve_init_cso_with_components(builder->dev, - GEN6_VFCOMP_STORE_VID, - GEN6_VFCOMP_STORE_IID, - GEN6_VFCOMP_NOSTORE, - GEN6_VFCOMP_NOSTORE, - &gen_ids); - - memcpy(dw, gen_ids.payload, sizeof(gen_ids.payload)); - dw += 2; - } - - if (last_velement_edgeflag) { - struct ilo_ve_cso edgeflag; - - for (i = 0; i < ve->count - 1; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); - - edgeflag = ve->cso[i]; - ve_set_cso_edgeflag(builder->dev, &edgeflag); - memcpy(&dw[2 * i], edgeflag.payload, sizeof(edgeflag.payload)); - } - else { - for (i = 0; i < ve->count; i++) - memcpy(&dw[2 * i], ve->cso[i].payload, sizeof(ve->cso[i].payload)); - } -} - -static inline void -gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder, - const struct ilo_ib_state *ib, - bool enable_cut_index) -{ - const uint8_t cmd_len = 3; - struct ilo_buffer *buf = ilo_buffer(ib->hw_resource); - uint32_t start_offset, end_offset; - int format; - unsigned pos; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (!buf) - return; - - /* this is moved to the new 3DSTATE_VF */ - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5)) - assert(!enable_cut_index); - - switch (ib->hw_index_size) { - case 4: - format = GEN6_IB_DW0_FORMAT_DWORD; - break; - case 2: - format = GEN6_IB_DW0_FORMAT_WORD; - break; - case 1: - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - default: - assert(!"unknown index size"); - format = GEN6_IB_DW0_FORMAT_BYTE; - break; - } - - /* - * set start_offset to 0 here and adjust pipe_draw_info::start with - * ib->draw_start_offset in 3DPRIMITIVE - */ - start_offset = 0; - end_offset = buf->bo_size; - - /* end_offset must also be aligned and is inclusive */ - end_offset -= (end_offset % ib->hw_index_size); - end_offset--; - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | - format | - (cmd_len - 2); - if (enable_cut_index) - dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0); - ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0); -} - -static inline void -gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct ilo_builder *builder, - uint32_t clip_viewport, - uint32_t sf_viewport, - uint32_t cc_viewport) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | - GEN6_PTR_VP_DW0_CLIP_CHANGED | - GEN6_PTR_VP_DW0_SF_CHANGED | - GEN6_PTR_VP_DW0_CC_CHANGED | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = clip_viewport; - dw[2] = sf_viewport; - dw[3] = cc_viewport; -} - -static inline void -gen6_3DSTATE_CC_STATE_POINTERS(struct ilo_builder *builder, - uint32_t blend_state, - uint32_t depth_stencil_state, - uint32_t color_calc_state) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = blend_state | 1; - dw[2] = depth_stencil_state | 1; - dw[3] = color_calc_state | 1; -} - -static inline void -gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct ilo_builder *builder, - uint32_t scissor_rect) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = scissor_rect; -} - -static inline void -gen6_3DSTATE_VS(struct ilo_builder *builder, - const struct ilo_shader_state *vs, - int num_samplers) -{ - const uint8_t cmd_len = 6; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); - const struct ilo_shader_cso *cso; - uint32_t dw2, dw4, dw5, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (!vs) { - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = 0; - dw[5] = 0; - - return; - } - - cso = ilo_shader_get_kernel_cso(vs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - - dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(vs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; -} - -static inline void -gen6_3DSTATE_GS(struct ilo_builder *builder, - const struct ilo_shader_state *gs, - const struct ilo_shader_state *vs, - int verts_per_prim) -{ - const uint8_t cmd_len = 7; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); - uint32_t dw1, dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - if (gs) { - const struct ilo_shader_cso *cso; - - dw1 = ilo_shader_get_kernel_offset(gs); - - cso = ilo_shader_get_kernel_cso(gs); - dw2 = cso->payload[0]; - dw4 = cso->payload[1]; - dw5 = cso->payload[2]; - dw6 = cso->payload[3]; - } - else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) { - struct ilo_shader_cso cso; - enum ilo_kernel_param param; - - switch (verts_per_prim) { - case 1: - param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET; - break; - case 2: - param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET; - break; - default: - param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET; - break; - } - - dw1 = ilo_shader_get_kernel_offset(vs) + - ilo_shader_get_kernel_param(vs, param); - - /* cannot use VS's CSO */ - ilo_gpe_init_gs_cso_gen6(builder->dev, vs, &cso); - dw2 = cso.payload[0]; - dw4 = cso.payload[1]; - dw5 = cso.payload[2]; - dw6 = cso.payload[3]; - } - else { - dw1 = 0; - dw2 = 0; - dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT; - dw5 = GEN6_GS_DW5_STATISTICS; - dw6 = 0; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - dw[3] = 0; - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; -} - -static inline void -gen6_3DSTATE_CLIP(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs, - bool enable_guardband, - int num_viewports) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2); - uint32_t dw1, dw2, dw3, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (rasterizer) { - int interps; - - dw1 = rasterizer->clip.payload[0]; - dw2 = rasterizer->clip.payload[1]; - dw3 = rasterizer->clip.payload[2]; - - if (enable_guardband && rasterizer->clip.can_enable_guardband) - dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE; - - interps = (fs) ? ilo_shader_get_kernel_param(fs, - ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0; - - if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | - GEN6_INTERP_NONPERSPECTIVE_CENTROID | - GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) - dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; - - dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO | - (num_viewports - 1); - } - else { - dw1 = 0; - dw2 = 0; - dw3 = 0; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - dw[3] = dw3; -} - -static inline void -gen6_3DSTATE_SF(struct ilo_builder *builder, - const struct ilo_rasterizer_state *rasterizer, - const struct ilo_shader_state *fs) -{ - const uint8_t cmd_len = 20; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2); - uint32_t payload_raster[6], payload_sbe[13], *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_gpe_gen6_fill_3dstate_sf_raster(builder->dev, rasterizer, - 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster)); - ilo_gpe_gen6_fill_3dstate_sf_sbe(builder->dev, rasterizer, - fs, payload_sbe, Elements(payload_sbe)); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = payload_sbe[0]; - memcpy(&dw[2], payload_raster, sizeof(payload_raster)); - memcpy(&dw[8], &payload_sbe[1], sizeof(payload_sbe) - 4); -} - -static inline void -gen6_3DSTATE_WM(struct ilo_builder *builder, - const struct ilo_shader_state *fs, - int num_samplers, - const struct ilo_rasterizer_state *rasterizer, - bool dual_blend, bool cc_may_kill, - uint32_t hiz_op) -{ - const uint8_t cmd_len = 9; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); - const int num_samples = 1; - const struct ilo_shader_cso *fs_cso; - uint32_t dw2, dw4, dw5, dw6, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - if (!fs) { - /* see brwCreateContext() */ - const int max_threads = (builder->dev->gt == 2) ? 80 : 40; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw[4] = hiz_op; - /* honor the valid range even if dispatching is disabled */ - dw[5] = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT; - dw[6] = 0; - dw[7] = 0; - dw[8] = 0; - - return; - } - - fs_cso = ilo_shader_get_kernel_cso(fs); - dw2 = fs_cso->payload[0]; - dw4 = fs_cso->payload[1]; - dw5 = fs_cso->payload[2]; - dw6 = fs_cso->payload[3]; - - dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 248: - * - * "This bit (Statistics Enable) must be disabled if either of these - * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve - * Enable or Depth Buffer Resolve Enable." - */ - assert(!hiz_op); - dw4 |= GEN6_WM_DW4_STATISTICS; - - if (cc_may_kill) - dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE; - - if (dual_blend) - dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; - - dw5 |= rasterizer->wm.payload[0]; - - dw6 |= rasterizer->wm.payload[1]; - - if (num_samples > 1) { - dw6 |= rasterizer->wm.dw_msaa_rast | - rasterizer->wm.dw_msaa_disp; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = ilo_shader_get_kernel_offset(fs); - dw[2] = dw2; - dw[3] = 0; /* scratch */ - dw[4] = dw4; - dw[5] = dw5; - dw[6] = dw6; - dw[7] = 0; /* kernel 1 */ - dw[8] = 0; /* kernel 2 */ -} - -static inline unsigned -gen6_fill_3dstate_constant(const struct ilo_dev_info *dev, - const uint32_t *bufs, const int *sizes, - int num_bufs, int max_read_length, - uint32_t *dw, int num_dwords) -{ - unsigned enabled = 0x0; - int total_read_length, i; - - assert(num_dwords == 4); - - total_read_length = 0; - for (i = 0; i < 4; i++) { - if (i < num_bufs && sizes[i]) { - /* in 256-bit units minus one */ - const int read_len = (sizes[i] + 31) / 32 - 1; - - assert(bufs[i] % 32 == 0); - assert(read_len < 32); - - enabled |= 1 << i; - dw[i] = bufs[i] | read_len; - - total_read_length += read_len + 1; - } - else { - dw[i] = 0; - } - } - - assert(total_read_length <= max_read_length); - - return enabled; -} - -static inline void -gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 138: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 32" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 161: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_CONSTANT_PS(struct ilo_builder *builder, - const uint32_t *bufs, const int *sizes, - int num_bufs) -{ - const uint8_t cmd_len = 5; - uint32_t buf_dw[4], buf_enabled; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(num_bufs <= 4); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 287: - * - * "The sum of all four read length fields (each incremented to - * represent the actual read length) must be less than or equal to 64" - */ - buf_enabled = gen6_fill_3dstate_constant(builder->dev, - bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw)); - - dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) | - buf_enabled << 12 | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - memcpy(&dw[1], buf_dw, sizeof(buf_dw)); -} - -static inline void -gen6_3DSTATE_SAMPLE_MASK(struct ilo_builder *builder, - unsigned sample_mask) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) | - (cmd_len - 2); - const unsigned valid_mask = 0xf; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - sample_mask &= valid_mask; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = sample_mask; -} - -static inline void -gen6_3DSTATE_DRAWING_RECTANGLE(struct ilo_builder *builder, - unsigned x, unsigned y, - unsigned width, unsigned height) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | - (cmd_len - 2); - unsigned xmax = x + width - 1; - unsigned ymax = y + height - 1; - int rect_limit; - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - rect_limit = 16383; - } - else { - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 230: - * - * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min) - * must be an even number" - */ - assert(y % 2 == 0); - - rect_limit = 8191; - } - - if (x > rect_limit) x = rect_limit; - if (y > rect_limit) y = rect_limit; - if (xmax > rect_limit) xmax = rect_limit; - if (ymax > rect_limit) ymax = rect_limit; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - - dw[0] = dw0; - dw[1] = y << 16 | x; - dw[2] = ymax << 16 | xmax; - - /* - * There is no need to set the origin. It is intended to support front - * buffer rendering. - */ - dw[3] = 0; -} - static inline void zs_align_surface(const struct ilo_dev_info *dev, unsigned align_w, unsigned align_h, @@ -1155,843 +115,4 @@ zs_align_surface(const struct ilo_dev_info *dev, zs->payload[2] = dw3; } -static inline void -gen6_3DSTATE_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 7; - unsigned pos; - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = zs->payload[0]; - - if (zs->bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->bo, zs->payload[1], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } - - dw[3] = zs->payload[2]; - dw[4] = zs->payload[3]; - dw[5] = zs->payload[4]; - dw[6] = zs->payload[5]; -} - -static inline void -gen6_3DSTATE_POLY_STIPPLE_OFFSET(struct ilo_builder *builder, - int x_offset, int y_offset) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - assert(x_offset >= 0 && x_offset <= 31); - assert(y_offset >= 0 && y_offset <= 31); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = x_offset << 8 | y_offset; -} - -static inline void -gen6_3DSTATE_POLY_STIPPLE_PATTERN(struct ilo_builder *builder, - const struct pipe_poly_stipple *pattern) -{ - const uint8_t cmd_len = 33; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) | - (cmd_len - 2); - uint32_t *dw; - int i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - STATIC_ASSERT(Elements(pattern->stipple) == 32); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw++; - - for (i = 0; i < 32; i++) - dw[i] = pattern->stipple[i]; -} - -static inline void -gen6_3DSTATE_LINE_STIPPLE(struct ilo_builder *builder, - unsigned pattern, unsigned factor) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) | - (cmd_len - 2); - uint32_t *dw; - unsigned inverse; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - assert((pattern & 0xffff) == pattern); - assert(factor >= 1 && factor <= 256); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = pattern; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - /* in U1.16 */ - inverse = (unsigned) (65536.0f / factor); - dw[2] = inverse << 15 | factor; - } - else { - /* in U1.13 */ - inverse = (unsigned) (8192.0f / factor); - dw[2] = inverse << 16 | factor; - } -} - -static inline void -gen6_3DSTATE_AA_LINE_PARAMETERS(struct ilo_builder *builder) -{ - const uint8_t cmd_len = 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = 0 << 16 | 0; - dw[2] = 0 << 16 | 0; -} - -static inline void -gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder, - int index, unsigned svbi, - unsigned max_svbi, - bool load_vertex_count) -{ - const uint8_t cmd_len = 4; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | - (cmd_len - 2); - uint32_t dw1, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - assert(index >= 0 && index < 4); - - dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT; - if (load_vertex_count) - dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT; - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = svbi; - dw[3] = max_svbi; -} - -static inline void -gen6_3DSTATE_MULTISAMPLE(struct ilo_builder *builder, - int num_samples, - const uint32_t *packed_sample_pos, - bool pixel_location_center) -{ - const uint8_t cmd_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 3; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) | - (cmd_len - 2); - uint32_t dw1, dw2, dw3, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw1 = (pixel_location_center) ? - GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER; - - switch (num_samples) { - case 0: - case 1: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - case 4: - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4; - dw2 = packed_sample_pos[0]; - dw3 = 0; - break; - case 8: - assert(ilo_dev_gen(builder->dev) >= ILO_GEN(7)); - dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8; - dw2 = packed_sample_pos[0]; - dw3 = packed_sample_pos[1]; - break; - default: - assert(!"unsupported sample count"); - dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1; - dw2 = 0; - dw3 = 0; - break; - } - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = dw1; - dw[2] = dw2; - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) - dw[2] = dw3; -} - -static inline void -gen6_3DSTATE_STENCIL_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 3; - uint32_t dw0, *dw; - unsigned pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[6]; - - if (zs->separate_s8_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->separate_s8_bo, zs->payload[7], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } -} - -static inline void -gen6_3DSTATE_HIER_DEPTH_BUFFER(struct ilo_builder *builder, - const struct ilo_zs_surface *zs) -{ - const uint8_t cmd_len = 3; - uint32_t dw0, *dw; - unsigned pos; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - dw0 = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? - GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : - GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); - dw0 |= (cmd_len - 2); - - pos = ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - /* see ilo_gpe_init_zs_surface() */ - dw[1] = zs->payload[8]; - - if (zs->hiz_bo) { - ilo_builder_batch_reloc(builder, pos + 2, - zs->hiz_bo, zs->payload[9], INTEL_RELOC_WRITE); - } else { - dw[2] = 0; - } -} - -static inline void -gen6_3DSTATE_CLEAR_PARAMS(struct ilo_builder *builder, - uint32_t clear_val) -{ - const uint8_t cmd_len = 2; - const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | - GEN6_CLEAR_PARAMS_DW0_VALID | - (cmd_len - 2); - uint32_t *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = clear_val; -} - -static inline void -gen6_3DPRIMITIVE(struct ilo_builder *builder, - const struct pipe_draw_info *info, - const struct ilo_ib_state *ib, - bool rectlist) -{ - const uint8_t cmd_len = 6; - const int prim = (rectlist) ? - GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode); - const int vb_access = (info->indexed) ? - GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL; - const uint32_t vb_start = info->start + - ((info->indexed) ? ib->draw_start_offset : 0); - uint32_t dw0, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | - vb_access | - prim << GEN6_3DPRIM_DW0_TYPE__SHIFT | - (cmd_len - 2); - - ilo_builder_batch_pointer(builder, cmd_len, &dw); - dw[0] = dw0; - dw[1] = info->count; - dw[2] = vb_start; - dw[3] = info->instance_count; - dw[4] = info->start_instance; - dw[5] = info->index_bias; -} - -static inline uint32_t -gen6_SF_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 8 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 262: - * - * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SF_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->m00); - dw[1] = fui(vp->m11); - dw[2] = fui(vp->m22); - dw[3] = fui(vp->m30); - dw[4] = fui(vp->m31); - dw[5] = fui(vp->m32); - dw[6] = 0; - dw[7] = 0; - - dw += 8; - } - - return state_offset; -} - -static inline uint32_t -gen6_CLIP_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 4 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 193: - * - * "The viewport-related state is stored as an array of up to 16 - * elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_CLIP_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_gbx); - dw[1] = fui(vp->max_gbx); - dw[2] = fui(vp->min_gby); - dw[3] = fui(vp->max_gby); - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_CC_VIEWPORT(struct ilo_builder *builder, - const struct ilo_viewport_cso *viewports, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 2 * num_viewports; - uint32_t state_offset, *dw; - unsigned i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 385: - * - * "The viewport state is stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_CC_VIEWPORT, state_align, state_len, &dw); - - for (i = 0; i < num_viewports; i++) { - const struct ilo_viewport_cso *vp = &viewports[i]; - - dw[0] = fui(vp->min_z); - dw[1] = fui(vp->max_z); - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_COLOR_CALC_STATE(struct ilo_builder *builder, - const struct pipe_stencil_ref *stencil_ref, - ubyte alpha_ref, - const struct pipe_blend_color *blend_color) -{ - const int state_align = 64; - const int state_len = 6; - uint32_t state_offset, *dw; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_COLOR_CALC, state_align, state_len, &dw); - - dw[0] = stencil_ref->ref_value[0] << 24 | - stencil_ref->ref_value[1] << 16 | - GEN6_CC_DW0_ALPHATEST_UNORM8; - dw[1] = alpha_ref; - dw[2] = fui(blend_color->color[0]); - dw[3] = fui(blend_color->color[1]); - dw[4] = fui(blend_color->color[2]); - dw[5] = fui(blend_color->color[3]); - - return state_offset; -} - -static inline uint32_t -gen6_BLEND_STATE(struct ilo_builder *builder, - const struct ilo_blend_state *blend, - const struct ilo_fb_state *fb, - const struct ilo_dsa_state *dsa) -{ - const int state_align = 64; - int state_len; - uint32_t state_offset, *dw; - unsigned num_targets, i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 376: - * - * "The blend state is stored as an array of up to 8 elements..." - */ - num_targets = fb->state.nr_cbufs; - assert(num_targets <= 8); - - if (!num_targets) { - if (!dsa->dw_alpha) - return 0; - /* to be able to reference alpha func */ - num_targets = 1; - } - - state_len = 2 * num_targets; - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_BLEND, state_align, state_len, &dw); - - for (i = 0; i < num_targets; i++) { - const unsigned idx = (blend->independent_blend_enable) ? i : 0; - const struct ilo_blend_cso *cso = &blend->cso[idx]; - const int num_samples = fb->num_samples; - const struct util_format_description *format_desc = - (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ? - util_format_description(fb->state.cbufs[idx]->format) : NULL; - bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one; - - rt_is_unorm = true; - rt_is_pure_integer = false; - rt_dst_alpha_forced_one = false; - - if (format_desc) { - int ch; - - switch (format_desc->format) { - case PIPE_FORMAT_B8G8R8X8_UNORM: - /* force alpha to one when the HW format has alpha */ - assert(ilo_translate_render_format(builder->dev, - PIPE_FORMAT_B8G8R8X8_UNORM) == - GEN6_FORMAT_B8G8R8A8_UNORM); - rt_dst_alpha_forced_one = true; - break; - default: - break; - } - - for (ch = 0; ch < 4; ch++) { - if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID) - continue; - - if (format_desc->channel[ch].pure_integer) { - rt_is_unorm = false; - rt_is_pure_integer = true; - break; - } - - if (!format_desc->channel[ch].normalized || - format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED) - rt_is_unorm = false; - } - } - - dw[0] = cso->payload[0]; - dw[1] = cso->payload[1]; - - if (!rt_is_pure_integer) { - if (rt_dst_alpha_forced_one) - dw[0] |= cso->dw_blend_dst_alpha_forced_one; - else - dw[0] |= cso->dw_blend; - } - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 365: - * - * "Logic Ops are only supported on *_UNORM surfaces (excluding - * _SRGB variants), otherwise Logic Ops must be DISABLED." - * - * Since logicop is ignored for non-UNORM color buffers, no special care - * is needed. - */ - if (rt_is_unorm) - dw[1] |= cso->dw_logicop; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 356: - * - * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage - * Dither both must be disabled." - * - * There is no such limitation on GEN7, or for AlphaToOne. But GL - * requires that anyway. - */ - if (num_samples > 1) - dw[1] |= cso->dw_alpha_mod; - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 382: - * - * "Alpha Test can only be enabled if Pixel Shader outputs a float - * alpha value." - */ - if (!rt_is_pure_integer) - dw[1] |= dsa->dw_alpha; - - dw += 2; - } - - return state_offset; -} - -static inline uint32_t -gen6_DEPTH_STENCIL_STATE(struct ilo_builder *builder, - const struct ilo_dsa_state *dsa) -{ - const int state_align = 64; - const int state_len = 3; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - STATIC_ASSERT(Elements(dsa->payload) >= state_len); - - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_DEPTH_STENCIL, - state_align, state_len, dsa->payload); -} - -static inline uint32_t -gen6_SCISSOR_RECT(struct ilo_builder *builder, - const struct ilo_scissor_state *scissor, - unsigned num_viewports) -{ - const int state_align = 32; - const int state_len = 2 * num_viewports; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 2 part 1, page 263: - * - * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is - * stored as an array of up to 16 elements..." - */ - assert(num_viewports && num_viewports <= 16); - assert(Elements(scissor->payload) >= state_len); - - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_SCISSOR_RECT, - state_align, state_len, scissor->payload); -} - -static inline uint32_t -gen6_BINDING_TABLE_STATE(struct ilo_builder *builder, - uint32_t *surface_states, - int num_surface_states) -{ - const int state_align = 32; - const int state_len = num_surface_states; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 69: - * - * "It is stored as an array of up to 256 elements..." - */ - assert(num_surface_states <= 256); - - if (!num_surface_states) - return 0; - - return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE, - state_align, state_len, surface_states); -} - -static inline uint32_t -gen6_SURFACE_STATE(struct ilo_builder *builder, - const struct ilo_view_surface *surf, - bool for_render) -{ - const int state_align = 32; - const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6; - uint32_t state_offset; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE, - state_align, state_len, surf->payload); - - if (surf->bo) { - ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo, - surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0); - } - - return state_offset; -} - -static inline uint32_t -gen6_so_SURFACE_STATE(struct ilo_builder *builder, - const struct pipe_stream_output_target *so, - const struct pipe_stream_output_info *so_info, - int so_index) -{ - struct ilo_buffer *buf = ilo_buffer(so->buffer); - unsigned bo_offset, struct_size; - enum pipe_format elem_format; - struct ilo_view_surface surf; - - ILO_DEV_ASSERT(builder->dev, 6, 6); - - bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4; - struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4; - - switch (so_info->output[so_index].num_components) { - case 1: - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - case 2: - elem_format = PIPE_FORMAT_R32G32_FLOAT; - break; - case 3: - elem_format = PIPE_FORMAT_R32G32B32_FLOAT; - break; - case 4: - elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - break; - default: - assert(!"unexpected SO components length"); - elem_format = PIPE_FORMAT_R32_FLOAT; - break; - } - - ilo_gpe_init_view_surface_for_buffer_gen6(builder->dev, buf, bo_offset, - so->buffer_size, struct_size, elem_format, false, true, &surf); - - return gen6_SURFACE_STATE(builder, &surf, false); -} - -static inline uint32_t -gen6_SAMPLER_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso * const *samplers, - const struct pipe_sampler_view * const *views, - const uint32_t *sampler_border_colors, - int num_samplers) -{ - const int state_align = 32; - const int state_len = 4 * num_samplers; - uint32_t state_offset, *dw; - int i; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 101: - * - * "The sampler state is stored as an array of up to 16 elements..." - */ - assert(num_samplers <= 16); - - if (!num_samplers) - return 0; - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw); - - for (i = 0; i < num_samplers; i++) { - const struct ilo_sampler_cso *sampler = samplers[i]; - const struct pipe_sampler_view *view = views[i]; - const uint32_t border_color = sampler_border_colors[i]; - uint32_t dw_filter, dw_wrap; - - /* there may be holes */ - if (!sampler || !view) { - /* disabled sampler */ - dw[0] = 1 << 31; - dw[1] = 0; - dw[2] = 0; - dw[3] = 0; - dw += 4; - - continue; - } - - /* determine filter and wrap modes */ - switch (view->texture->target) { - case PIPE_TEXTURE_1D: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_1d; - break; - case PIPE_TEXTURE_3D: - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 103: - * - * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for - * surfaces of type SURFTYPE_3D." - */ - dw_filter = sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - case PIPE_TEXTURE_CUBE: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap_cube; - break; - default: - dw_filter = (sampler->anisotropic) ? - sampler->dw_filter_aniso : sampler->dw_filter; - dw_wrap = sampler->dw_wrap; - break; - } - - dw[0] = sampler->payload[0]; - dw[1] = sampler->payload[1]; - assert(!(border_color & 0x1f)); - dw[2] = border_color; - dw[3] = sampler->payload[2]; - - dw[0] |= dw_filter; - - if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) { - dw[3] |= dw_wrap; - } - else { - /* - * From the Sandy Bridge PRM, volume 4 part 1, page 21: - * - * "[DevSNB] Errata: Incorrect behavior is observed in cases - * where the min and mag mode filters are different and - * SurfMinLOD is nonzero. The determination of MagMode uses the - * following equation instead of the one in the above - * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)" - * - * As a way to work around that, we set Base to - * view->u.tex.first_level. - */ - dw[0] |= view->u.tex.first_level << 22; - - dw[1] |= dw_wrap; - } - - dw += 4; - } - - return state_offset; -} - -static inline uint32_t -gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder, - const struct ilo_sampler_cso *sampler) -{ - const int state_align = 32; - const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - assert(Elements(sampler->payload) >= 3 + state_len); - - /* see ilo_gpe_init_sampler_cso() */ - return ilo_builder_state_write(builder, ILO_BUILDER_ITEM_BLOB, - state_align, state_len, &sampler->payload[3]); -} - -static inline uint32_t -gen6_push_constant_buffer(struct ilo_builder *builder, - int size, void **pcb) -{ - /* - * For all VS, GS, FS, and CS push constant buffers, they must be aligned - * to 32 bytes, and their sizes are specified in 256-bit units. - */ - const int state_align = 32; - const int state_len = align(size, 32) / 4; - uint32_t state_offset; - char *buf; - - ILO_DEV_ASSERT(builder->dev, 6, 7.5); - - state_offset = ilo_builder_state_pointer(builder, - ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf); - - /* zero out the unused range */ - if (size < state_len * 4) - memset(&buf[size], 0, state_len * 4 - size); - - if (pcb) - *pcb = buf; - - return state_offset; -} - #endif /* ILO_GPE_GEN6_H */ diff --git a/src/gallium/drivers/ilo/ilo_gpe_gen7.c b/src/gallium/drivers/ilo/ilo_gpe_gen7.c index 0e5602c65a6..21b4bfd6101 100644 --- a/src/gallium/drivers/ilo/ilo_gpe_gen7.c +++ b/src/gallium/drivers/ilo/ilo_gpe_gen7.c @@ -31,7 +31,8 @@ #include "ilo_format.h" #include "ilo_resource.h" #include "ilo_shader.h" -#include "ilo_gpe_gen7.h" +#include "ilo_gpe_gen6.h" +#include "ilo_gpe.h" void ilo_gpe_init_gs_cso_gen7(const struct ilo_dev_info *dev, -- 2.30.2