ilo state structs (struct ilo_xxx_state) are moved as well.
core/ilo_fence.h \
core/ilo_image.c \
core/ilo_image.h \
+ core/ilo_state_3d.h \
+ core/ilo_state_3d_bottom.c \
+ core/ilo_state_3d_top.c \
core/intel_winsys.h \
ilo_blit.c \
ilo_blit.h \
ilo_shader.h \
ilo_state.c \
ilo_state.h \
- ilo_state_3d.h \
- ilo_state_3d_bottom.c \
- ilo_state_3d_top.c \
ilo_transfer.c \
ilo_transfer.h \
ilo_video.c \
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef ILO_STATE_3D_H
+#define ILO_STATE_3D_H
+
+#include "genhw/genhw.h"
+#include "pipe/p_state.h"
+
+#include "ilo_core.h"
+#include "ilo_dev.h"
+
+/**
+ * \see brw_context.h
+ */
+#define ILO_MAX_DRAW_BUFFERS 8
+#define ILO_MAX_CONST_BUFFERS (1 + 12)
+#define ILO_MAX_SAMPLER_VIEWS 16
+#define ILO_MAX_SAMPLERS 16
+#define ILO_MAX_SO_BINDINGS 64
+#define ILO_MAX_SO_BUFFERS 4
+#define ILO_MAX_VIEWPORTS 1
+
+#define ILO_MAX_SURFACES 256
+
+struct intel_bo;
+struct ilo_buffer;
+struct ilo_shader_state;
+struct ilo_texture;
+
+struct ilo_vb_state {
+ struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
+ uint32_t enabled_mask;
+};
+
+struct ilo_ib_state {
+ struct pipe_resource *buffer;
+ const void *user_buffer;
+ unsigned offset;
+ unsigned index_size;
+
+ /* these are not valid until the state is finalized */
+ struct pipe_resource *hw_resource;
+ unsigned hw_index_size;
+ /* an offset to be added to pipe_draw_info::start */
+ int64_t draw_start_offset;
+};
+
+struct ilo_ve_cso {
+ /* VERTEX_ELEMENT_STATE */
+ uint32_t payload[2];
+};
+
+struct ilo_ve_state {
+ struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
+ unsigned count;
+
+ unsigned instance_divisors[PIPE_MAX_ATTRIBS];
+ unsigned vb_mapping[PIPE_MAX_ATTRIBS];
+ unsigned vb_count;
+
+ /* these are not valid until the state is finalized */
+ struct ilo_ve_cso edgeflag_cso;
+ bool last_cso_edgeflag;
+
+ struct ilo_ve_cso nosrc_cso;
+ bool prepend_nosrc_cso;
+};
+
+struct ilo_so_state {
+ struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
+ unsigned count;
+ unsigned append_bitmask;
+
+ bool enabled;
+};
+
+struct ilo_viewport_cso {
+ /* matrix form */
+ float m00, m11, m22, m30, m31, m32;
+
+ /* guardband in NDC space */
+ float min_gbx, min_gby, max_gbx, max_gby;
+
+ /* viewport in screen space */
+ float min_x, min_y, min_z;
+ float max_x, max_y, max_z;
+};
+
+struct ilo_viewport_state {
+ struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
+ unsigned count;
+
+ struct pipe_viewport_state viewport0;
+};
+
+struct ilo_scissor_state {
+ /* SCISSOR_RECT */
+ uint32_t payload[ILO_MAX_VIEWPORTS * 2];
+
+ struct pipe_scissor_state scissor0;
+};
+
+struct ilo_rasterizer_clip {
+ /* 3DSTATE_CLIP */
+ uint32_t payload[3];
+
+ uint32_t can_enable_guardband;
+};
+
+struct ilo_rasterizer_sf {
+ /* 3DSTATE_SF */
+ uint32_t payload[3];
+ uint32_t dw_msaa;
+
+ /* Global Depth Offset Constant/Scale/Clamp */
+ uint32_t dw_depth_offset_const;
+ uint32_t dw_depth_offset_scale;
+ uint32_t dw_depth_offset_clamp;
+
+ /* Gen8+ 3DSTATE_RASTER */
+ uint32_t dw_raster;
+};
+
+struct ilo_rasterizer_wm {
+ /* 3DSTATE_WM */
+ uint32_t payload[2];
+ uint32_t dw_msaa_rast;
+ uint32_t dw_msaa_disp;
+};
+
+struct ilo_rasterizer_state {
+ struct pipe_rasterizer_state state;
+
+ struct ilo_rasterizer_clip clip;
+ struct ilo_rasterizer_sf sf;
+ struct ilo_rasterizer_wm wm;
+};
+
+struct ilo_dsa_state {
+ /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
+ uint32_t payload[3];
+
+ uint32_t dw_blend_alpha;
+ uint32_t dw_ps_blend_alpha;
+ ubyte alpha_ref;
+};
+
+struct ilo_blend_cso {
+ /* BLEND_STATE */
+ uint32_t payload[2];
+
+ uint32_t dw_blend;
+ uint32_t dw_blend_dst_alpha_forced_one;
+};
+
+struct ilo_blend_state {
+ struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
+
+ bool dual_blend;
+ bool alpha_to_coverage;
+
+ uint32_t dw_shared;
+ uint32_t dw_alpha_mod;
+ uint32_t dw_logicop;
+
+ /* a part of 3DSTATE_PS_BLEND */
+ uint32_t dw_ps_blend;
+ uint32_t dw_ps_blend_dst_alpha_forced_one;
+};
+
+struct ilo_sampler_cso {
+ /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
+ uint32_t payload[15];
+
+ uint32_t dw_filter;
+ uint32_t dw_filter_aniso;
+ uint32_t dw_wrap;
+ uint32_t dw_wrap_1d;
+ uint32_t dw_wrap_cube;
+
+ bool anisotropic;
+ bool saturate_r;
+ bool saturate_s;
+ bool saturate_t;
+};
+
+struct ilo_sampler_state {
+ const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
+};
+
+struct ilo_view_surface {
+ /* SURFACE_STATE */
+ uint32_t payload[13];
+ struct intel_bo *bo;
+
+ uint32_t scanout;
+};
+
+struct ilo_view_cso {
+ struct pipe_sampler_view base;
+
+ struct ilo_view_surface surface;
+};
+
+struct ilo_view_state {
+ struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
+ unsigned count;
+};
+
+struct ilo_cbuf_cso {
+ struct pipe_resource *resource;
+ struct ilo_view_surface surface;
+
+ /*
+ * this CSO is not so constant because user buffer needs to be uploaded in
+ * finalize_constant_buffers()
+ */
+ const void *user_buffer;
+ unsigned user_buffer_size;
+};
+
+struct ilo_cbuf_state {
+ struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
+ uint32_t enabled_mask;
+};
+
+struct ilo_resource_state {
+ struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
+ unsigned count;
+};
+
+struct ilo_surface_cso {
+ struct pipe_surface base;
+
+ bool is_rt;
+ union {
+ struct ilo_view_surface rt;
+ struct ilo_zs_surface {
+ uint32_t payload[12];
+ uint32_t dw_aligned_8x4;
+
+ struct intel_bo *bo;
+ struct intel_bo *hiz_bo;
+ struct intel_bo *separate_s8_bo;
+ } zs;
+ } u;
+};
+
+struct ilo_fb_state {
+ struct pipe_framebuffer_state state;
+
+ struct ilo_view_surface null_rt;
+ struct ilo_zs_surface null_zs;
+
+ struct ilo_fb_blend_caps {
+ bool can_logicop;
+ bool can_blend;
+ bool can_alpha_test;
+ bool dst_alpha_forced_one;
+ } blend_caps[PIPE_MAX_COLOR_BUFS];
+
+ unsigned num_samples;
+};
+
+struct ilo_shader_cso {
+ uint32_t payload[5];
+};
+
+/**
+ * Translate a pipe texture target to the matching hardware surface type.
+ */
+static inline int
+ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
+{
+ switch (target) {
+ case PIPE_BUFFER:
+ return GEN6_SURFTYPE_BUFFER;
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return GEN6_SURFTYPE_1D;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_2D_ARRAY:
+ return GEN6_SURFTYPE_2D;
+ case PIPE_TEXTURE_3D:
+ return GEN6_SURFTYPE_3D;
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ return GEN6_SURFTYPE_CUBE;
+ default:
+ assert(!"unknown texture target");
+ return GEN6_SURFTYPE_BUFFER;
+ }
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev *dev,
+ unsigned num_states,
+ const struct pipe_vertex_element *states,
+ struct ilo_ve_state *ve);
+
+void
+ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
+ struct ilo_ve_cso *cso);
+
+void
+ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
+ int comp0, int comp1, int comp2, int comp3,
+ struct ilo_ve_cso *cso);
+
+void
+ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
+ const struct pipe_viewport_state *state,
+ struct ilo_viewport_cso *vp);
+
+void
+ilo_gpe_set_scissor(const struct ilo_dev *dev,
+ unsigned start_slot,
+ unsigned num_states,
+ const struct pipe_scissor_state *states,
+ struct ilo_scissor_state *scissor);
+
+void
+ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
+ struct ilo_scissor_state *scissor);
+
+void
+ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_state *rasterizer);
+void
+ilo_gpe_init_dsa(const struct ilo_dev *dev,
+ const struct pipe_depth_stencil_alpha_state *state,
+ struct ilo_dsa_state *dsa);
+
+void
+ilo_gpe_init_blend(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ struct ilo_blend_state *blend);
+
+void
+ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
+ const struct pipe_sampler_state *state,
+ struct ilo_sampler_cso *sampler);
+
+void
+ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
+ unsigned width, unsigned height,
+ unsigned depth, unsigned level,
+ struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
+ const struct ilo_buffer *buf,
+ unsigned offset, unsigned size,
+ unsigned struct_size,
+ enum pipe_format elem_format,
+ bool is_rt, bool render_cache_rw,
+ struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned first_level,
+ unsigned num_levels,
+ unsigned first_layer,
+ unsigned num_layers,
+ bool is_rt,
+ struct ilo_view_surface *surf);
+
+void
+ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format, unsigned level,
+ unsigned first_layer, unsigned num_layers,
+ struct ilo_zs_surface *zs);
+
+void
+ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *vs,
+ struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso);
+
+void
+ilo_gpe_set_fb(const struct ilo_dev *dev,
+ const struct pipe_framebuffer_state *state,
+ struct ilo_fb_state *fb);
+
+#endif /* ILO_STATE_3D_H */
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "util/u_dual_blend.h"
+#include "util/u_framebuffer.h"
+#include "util/u_half.h"
+
+#include "ilo_format.h"
+#include "ilo_state_3d.h"
+#include "../ilo_resource.h"
+#include "../ilo_shader.h"
+
+static void
+rasterizer_init_clip(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_clip *clip)
+{
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ dw1 = GEN6_CLIP_DW1_STATISTICS;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 219:
+ *
+ * "Workaround : Due to Hardware issue "EarlyCull" needs to be
+ * enabled only for the cases where the incoming primitive topology
+ * into the clipper guaranteed to be Trilist."
+ *
+ * What does this mean?
+ */
+ dw1 |= 0 << 19 |
+ GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ if (state->front_ccw)
+ dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
+
+ switch (state->cull_face) {
+ case PIPE_FACE_NONE:
+ dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
+ break;
+ case PIPE_FACE_FRONT:
+ dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
+ break;
+ case PIPE_FACE_BACK:
+ dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
+ break;
+ case PIPE_FACE_FRONT_AND_BACK:
+ dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
+ break;
+ }
+ }
+ }
+
+ dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
+ GEN6_CLIP_DW2_XY_TEST_ENABLE |
+ state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
+ GEN6_CLIP_DW2_CLIPMODE_NORMAL;
+
+ if (state->clip_halfz)
+ dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
+ else
+ dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
+
+ if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
+ dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
+
+ if (state->flatshade_first) {
+ dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+ 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+ 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ }
+ else {
+ dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
+ 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
+ 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
+ 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
+
+ clip->payload[0] = dw1;
+ clip->payload[1] = dw2;
+ clip->payload[2] = dw3;
+
+ clip->can_enable_guardband = true;
+
+ /*
+ * There are several reasons that guard band test should be disabled
+ *
+ * - GL wide points (to avoid partially visibie object)
+ * - GL wide or AA lines (to avoid partially visibie object)
+ */
+ if (state->point_size_per_vertex || state->point_size > 1.0f)
+ clip->can_enable_guardband = false;
+ if (state->line_smooth || state->line_width > 1.0f)
+ clip->can_enable_guardband = false;
+}
+
+static void
+rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_sf *sf)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * Scale the constant term. The minimum representable value used by the HW
+ * is not large enouch to be the minimum resolvable difference.
+ */
+ sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
+ sf->dw_depth_offset_scale = fui(state->offset_scale);
+ sf->dw_depth_offset_clamp = fui(state->offset_clamp);
+}
+
+static void
+rasterizer_init_sf_gen6(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_sf *sf)
+{
+ int line_width, point_width;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "This bit (Statistics Enable) should be set whenever clipping is
+ * enabled and the Statistics Enable bit is set in CLIP_STATE. It
+ * should be cleared if clipping is disabled or Statistics Enable in
+ * CLIP_STATE is clear."
+ */
+ dw1 = GEN7_SF_DW1_STATISTICS |
+ GEN7_SF_DW1_VIEWPORT_ENABLE;
+
+ /* XXX GEN6 path seems to work fine for GEN7 */
+ if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 258:
+ *
+ * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
+ * Enable Solid , Global Depth Offset Enable Wireframe, and Global
+ * Depth Offset Enable Point) should be set whenever non zero depth
+ * bias (Slope, Bias) values are used. Setting this bit may have
+ * some degradation of performance for some workloads."
+ */
+ if (state->offset_tri || state->offset_line || state->offset_point) {
+ /* XXX need to scale offset_const according to the depth format */
+ dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
+
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
+ GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
+ GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+ }
+ } else {
+ if (state->offset_tri)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
+ if (state->offset_line)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
+ if (state->offset_point)
+ dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
+ }
+
+ switch (state->fill_front) {
+ case PIPE_POLYGON_MODE_FILL:
+ dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
+ break;
+ }
+
+ switch (state->fill_back) {
+ case PIPE_POLYGON_MODE_FILL:
+ dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
+ break;
+ }
+
+ if (state->front_ccw)
+ dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
+
+ dw2 = 0;
+
+ if (state->line_smooth) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "This field (Anti-aliasing Enable) must be disabled if any of the
+ * render targets have integer (UINT or SINT) surface format."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ *
+ * "This field (Hierarchical Depth Buffer Enable) must be disabled
+ * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
+ *
+ * TODO We do not check those yet.
+ */
+ dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
+ GEN7_SF_DW2_AA_LINE_CAP_1_0;
+ }
+
+ switch (state->cull_face) {
+ case PIPE_FACE_NONE:
+ dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
+ break;
+ case PIPE_FACE_FRONT:
+ dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
+ break;
+ case PIPE_FACE_BACK:
+ dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
+ break;
+ case PIPE_FACE_FRONT_AND_BACK:
+ dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
+ break;
+ }
+
+ /*
+ * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
+ * pixels in the minor direction. We have to make the lines slightly
+ * thicker, 0.5 pixel on both sides, so that they intersect that many
+ * pixels are considered into the lines.
+ *
+ * Line width is in U3.7.
+ */
+ line_width = (int)
+ ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
+ line_width = CLAMP(line_width, 0, 1023);
+
+ /* use GIQ rules */
+ if (line_width == 128 && !state->line_smooth)
+ line_width = 0;
+
+ dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
+ dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
+
+ if (state->scissor)
+ dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
+
+ dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+ GEN7_SF_DW3_SUBPIXEL_8BITS;
+
+ if (state->line_last_pixel)
+ dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+ if (state->flatshade_first) {
+ dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ } else {
+ dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ if (!state->point_size_per_vertex)
+ dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
+
+ /* in U8.3 */
+ point_width = (int) (state->point_size * 8.0f + 0.5f);
+ point_width = CLAMP(point_width, 1, 2047);
+
+ dw3 |= point_width;
+
+ STATIC_ASSERT(Elements(sf->payload) >= 3);
+ sf->payload[0] = dw1;
+ sf->payload[1] = dw2;
+ sf->payload[2] = dw3;
+
+ if (state->multisample) {
+ sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 251:
+ *
+ * "Software must not program a value of 0.0 when running in
+ * MSRASTMODE_ON_xxx modes - zero-width lines are not available
+ * when multisampling rasterization is enabled."
+ */
+ if (!line_width) {
+ line_width = 128; /* 1.0f */
+
+ sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+ }
+ } else {
+ sf->dw_msaa = 0;
+ }
+
+ rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
+ /* 3DSTATE_RASTER is Gen8+ only */
+ sf->dw_raster = 0;
+}
+
+static uint32_t
+rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state)
+{
+ uint32_t dw = 0;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (state->front_ccw)
+ dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
+
+ switch (state->cull_face) {
+ case PIPE_FACE_NONE:
+ dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
+ break;
+ case PIPE_FACE_FRONT:
+ dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
+ break;
+ case PIPE_FACE_BACK:
+ dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
+ break;
+ case PIPE_FACE_FRONT_AND_BACK:
+ dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
+ break;
+ }
+
+ if (state->point_smooth)
+ dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
+
+ if (state->multisample)
+ dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
+
+ if (state->offset_tri)
+ dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
+ if (state->offset_line)
+ dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
+ if (state->offset_point)
+ dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
+
+ switch (state->fill_front) {
+ case PIPE_POLYGON_MODE_FILL:
+ dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
+ break;
+ }
+
+ switch (state->fill_back) {
+ case PIPE_POLYGON_MODE_FILL:
+ dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
+ break;
+ case PIPE_POLYGON_MODE_LINE:
+ dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
+ break;
+ case PIPE_POLYGON_MODE_POINT:
+ dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
+ break;
+ }
+
+ if (state->line_smooth)
+ dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
+
+ if (state->scissor)
+ dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
+
+ if (state->depth_clip)
+ dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
+
+ return dw;
+}
+
+static void
+rasterizer_init_sf_gen8(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_sf *sf)
+{
+ int line_width, point_width;
+ uint32_t dw1, dw2, dw3;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ /* in U3.7 */
+ line_width = (int)
+ ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
+ line_width = CLAMP(line_width, 0, 1023);
+
+ /* use GIQ rules */
+ if (line_width == 128 && !state->line_smooth)
+ line_width = 0;
+
+ /* in U8.3 */
+ point_width = (int) (state->point_size * 8.0f + 0.5f);
+ point_width = CLAMP(point_width, 1, 2047);
+
+ dw1 = GEN7_SF_DW1_STATISTICS |
+ GEN7_SF_DW1_VIEWPORT_ENABLE;
+
+ dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
+ if (state->line_smooth)
+ dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
+
+ dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
+ GEN7_SF_DW3_SUBPIXEL_8BITS |
+ point_width;
+
+ if (state->line_last_pixel)
+ dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
+
+ if (state->flatshade_first) {
+ dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ } else {
+ dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
+ 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
+ 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
+ }
+
+ if (!state->point_size_per_vertex)
+ dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
+
+ dw3 |= point_width;
+
+ STATIC_ASSERT(Elements(sf->payload) >= 3);
+ sf->payload[0] = dw1;
+ sf->payload[1] = dw2;
+ sf->payload[2] = dw3;
+
+ rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
+
+ sf->dw_msaa = 0;
+ sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
+}
+
+static void
+rasterizer_init_wm_gen6(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_wm *wm)
+{
+ uint32_t dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ /* only the FF unit states are set, as in GEN7 */
+
+ dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
+
+ /* same value as in 3DSTATE_SF */
+ if (state->line_smooth)
+ dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
+
+ if (state->poly_stipple_enable)
+ dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
+ if (state->line_stipple_enable)
+ dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
+
+ /*
+ * assertion that makes sure
+ *
+ * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
+ *
+ * is valid
+ */
+ STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
+ GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
+ dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
+
+ if (state->bottom_edge_rule)
+ dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
+
+ wm->dw_msaa_rast =
+ (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
+ wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
+
+ STATIC_ASSERT(Elements(wm->payload) >= 2);
+ wm->payload[0] = dw5;
+ wm->payload[1] = dw6;
+}
+
+static void
+rasterizer_init_wm_gen7(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_wm *wm)
+{
+ uint32_t dw1, dw2;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ /*
+ * assertion that makes sure
+ *
+ * dw1 |= wm->dw_msaa_rast;
+ * dw2 |= wm->dw_msaa_disp;
+ *
+ * is valid
+ */
+ STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
+ GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
+ dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
+ GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
+ dw2 = 0;
+
+ /* same value as in 3DSTATE_SF */
+ if (state->line_smooth)
+ dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
+
+ if (state->poly_stipple_enable)
+ dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+ if (state->line_stipple_enable)
+ dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+ if (state->bottom_edge_rule)
+ dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+ wm->dw_msaa_rast =
+ (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
+ wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
+
+ STATIC_ASSERT(Elements(wm->payload) >= 2);
+ wm->payload[0] = dw1;
+ wm->payload[1] = dw2;
+}
+
+static uint32_t
+rasterizer_get_wm_gen8(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
+ GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
+
+ /* same value as in 3DSTATE_SF */
+ if (state->line_smooth)
+ dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
+
+ if (state->poly_stipple_enable)
+ dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
+ if (state->line_stipple_enable)
+ dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
+
+ if (state->bottom_edge_rule)
+ dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
+
+ return dw;
+}
+
+void
+ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
+ const struct pipe_rasterizer_state *state,
+ struct ilo_rasterizer_state *rasterizer)
+{
+ rasterizer_init_clip(dev, state, &rasterizer->clip);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
+ rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
+
+ rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
+ } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
+ rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
+ } else {
+ rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
+ rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
+ }
+}
+
+static void
+fs_init_cso_gen6(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, input_count, sampler_count, interps, max_threads;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+ input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
+ sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+ interps = ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
+
+ /* see brwCreateContext() */
+ max_threads = (dev->gt == 2) ? 80 : 40;
+
+ dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
+ 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
+ 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
+
+ dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
+ * PS kernel or color calculator has the ability to kill (discard)
+ * pixels or samples, other than due to depth or stencil testing.
+ * This bit is required to be ENABLED in the following situations:
+ *
+ * The API pixel shader program contains "killpix" or "discard"
+ * instructions, or other code in the pixel shader kernel that can
+ * cause the final pixel mask to differ from the pixel mask received
+ * on dispatch.
+ *
+ * A sampler with chroma key enabled with kill pixel mode is used by
+ * the pixel shader.
+ *
+ * Any render target has Alpha Test Enable or AlphaToCoverage Enable
+ * enabled.
+ *
+ * The pixel shader kernel generates and outputs oMask.
+ *
+ * Note: As ClipDistance clipping is fully supported in hardware and
+ * therefore not via PS instructions, there should be no need to
+ * ENABLE this bit due to ClipDistance clipping."
+ */
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+ dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 275:
+ *
+ * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
+ * field must be set to disabled."
+ *
+ * TODO This is not checked yet.
+ */
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+ dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+ dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+ dw5 |= GEN6_WM_DW5_PS_USE_W;
+
+ /*
+ * TODO set this bit only when
+ *
+ * a) fs writes colors and color is not masked, or
+ * b) fs writes depth, or
+ * c) fs or cc kills
+ */
+ if (true)
+ dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
+
+ assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+ dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
+
+ dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
+ GEN6_WM_DW6_PS_POSOFFSET_NONE |
+ interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
+}
+
+static uint32_t
+fs_get_wm_gen7(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ dw = ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
+ GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
+
+ /*
+ * TODO set this bit only when
+ *
+ * a) fs writes colors and color is not masked, or
+ * b) fs writes depth, or
+ * c) fs or cc kills
+ */
+ dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
+
+ /*
+ * From the Ivy Bridge PRM, volume 2 part 1, page 278:
+ *
+ * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
+ * the PS kernel or color calculator has the ability to kill
+ * (discard) pixels or samples, other than due to depth or stencil
+ * testing. This bit is required to be ENABLED in the following
+ * situations:
+ *
+ * - The API pixel shader program contains "killpix" or "discard"
+ * instructions, or other code in the pixel shader kernel that
+ * can cause the final pixel mask to differ from the pixel mask
+ * received on dispatch.
+ *
+ * - A sampler with chroma key enabled with kill pixel mode is used
+ * by the pixel shader.
+ *
+ * - Any render target has Alpha Test Enable or AlphaToCoverage
+ * Enable enabled.
+ *
+ * - The pixel shader kernel generates and outputs oMask.
+ *
+ * Note: As ClipDistance clipping is fully supported in hardware
+ * and therefore not via PS instructions, there should be no need
+ * to ENABLE this bit due to ClipDistance clipping."
+ */
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+ dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+ dw |= GEN7_WM_DW1_PSCDEPTH_ON;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+ dw |= GEN7_WM_DW1_PS_USE_DEPTH;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+ dw |= GEN7_WM_DW1_PS_USE_W;
+
+ return dw;
+}
+
+static void
+fs_init_cso_gen7(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, sampler_count, max_threads;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+ sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+
+ dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
+
+ /* see brwCreateContext() */
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(7.5):
+ max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
+ dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
+ dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
+ break;
+ case ILO_GEN(7):
+ default:
+ max_threads = (dev->gt == 2) ? 172 : 48;
+ dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
+ break;
+ }
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
+ dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
+ dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
+
+ assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+ dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
+
+ dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
+ 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
+ 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = fs_get_wm_gen7(dev, fs);
+}
+
+static uint32_t
+fs_get_psx_gen8(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
+
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
+ dw |= GEN8_PSX_DW1_KILL_PIXEL;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
+ dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
+ dw |= GEN8_PSX_DW1_USE_DEPTH;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
+ dw |= GEN8_PSX_DW1_USE_W;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
+ dw |= GEN8_PSX_DW1_ATTR_ENABLE;
+
+ return dw;
+}
+
+static uint32_t
+fs_get_wm_gen8(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs)
+{
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ return ilo_shader_get_kernel_param(fs,
+ ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
+ GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
+}
+
+static void
+fs_init_cso_gen8(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, sampler_count;
+ uint32_t dw3, dw6, dw7;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
+ sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
+
+ dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ /* always 64? */
+ dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
+ GEN8_PS_DW6_POSOFFSET_NONE;
+ if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
+ dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
+
+ assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
+ dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
+
+ dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
+ 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
+ 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 5);
+ cso->payload[0] = dw3;
+ cso->payload[1] = dw6;
+ cso->payload[2] = dw7;
+ cso->payload[3] = fs_get_psx_gen8(dev, fs);
+ cso->payload[4] = fs_get_wm_gen8(dev, fs);
+}
+
+void
+ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *fs,
+ struct ilo_shader_cso *cso)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ fs_init_cso_gen8(dev, fs, cso);
+ else if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ fs_init_cso_gen7(dev, fs, cso);
+ else
+ fs_init_cso_gen6(dev, fs, cso);
+}
+
+struct ilo_zs_surface_info {
+ int surface_type;
+ int format;
+
+ struct {
+ struct intel_bo *bo;
+ unsigned stride;
+ unsigned qpitch;
+ enum gen_surface_tiling tiling;
+ uint32_t offset;
+ } zs, stencil, hiz;
+
+ unsigned width, height, depth;
+ unsigned lod, first_layer, num_layers;
+};
+
+static void
+zs_init_info_null(const struct ilo_dev *dev,
+ struct ilo_zs_surface_info *info)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ memset(info, 0, sizeof(*info));
+
+ info->surface_type = GEN6_SURFTYPE_NULL;
+ info->format = GEN6_ZFORMAT_D32_FLOAT;
+ info->width = 1;
+ info->height = 1;
+ info->depth = 1;
+ info->num_layers = 1;
+}
+
+static void
+zs_init_info(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format, unsigned level,
+ unsigned first_layer, unsigned num_layers,
+ struct ilo_zs_surface_info *info)
+{
+ bool separate_stencil;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ memset(info, 0, sizeof(*info));
+
+ info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+
+ if (info->surface_type == GEN6_SURFTYPE_CUBE) {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
+ *
+ * "For Other Surfaces (Cube Surfaces):
+ * This field (Minimum Array Element) is ignored."
+ *
+ * "For Other Surfaces (Cube Surfaces):
+ * This field (Render Target View Extent) is ignored."
+ *
+ * As such, we cannot set first_layer and num_layers on cube surfaces.
+ * To work around that, treat it as a 2D surface.
+ */
+ info->surface_type = GEN6_SURFTYPE_2D;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ separate_stencil = true;
+ }
+ else {
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ *
+ * "This field (Separate Stencil Buffer Enable) must be set to the
+ * same value (enabled or disabled) as Hierarchical Depth Buffer
+ * Enable."
+ */
+ separate_stencil =
+ ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 317:
+ *
+ * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
+ * Surface Format of the depth buffer cannot be
+ * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
+ * requires the separate stencil buffer."
+ *
+ * From the Ironlake PRM, volume 2 part 1, page 330:
+ *
+ * "If this field (Separate Stencil Buffer Enable) is disabled, the
+ * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
+ *
+ * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
+ * is indeed used, the depth values output by the fragment shaders will
+ * be different when read back.
+ *
+ * As for GEN7+, separate_stencil is always true.
+ */
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ info->format = GEN6_ZFORMAT_D16_UNORM;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ info->format = GEN6_ZFORMAT_D32_FLOAT;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ info->format = (separate_stencil) ?
+ GEN6_ZFORMAT_D24_UNORM_X8_UINT :
+ GEN6_ZFORMAT_D24_UNORM_S8_UINT;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ info->format = (separate_stencil) ?
+ GEN6_ZFORMAT_D32_FLOAT :
+ GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
+ break;
+ case PIPE_FORMAT_S8_UINT:
+ if (separate_stencil) {
+ info->format = GEN6_ZFORMAT_D32_FLOAT;
+ break;
+ }
+ /* fall through */
+ default:
+ assert(!"unsupported depth/stencil format");
+ zs_init_info_null(dev, info);
+ return;
+ break;
+ }
+
+ if (format != PIPE_FORMAT_S8_UINT) {
+ info->zs.bo = tex->image.bo;
+ info->zs.stride = tex->image.bo_stride;
+
+ assert(tex->image.layer_height % 4 == 0);
+ info->zs.qpitch = tex->image.layer_height / 4;
+
+ info->zs.tiling = tex->image.tiling;
+ info->zs.offset = 0;
+ }
+
+ if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
+ const struct ilo_texture *s8_tex =
+ (tex->separate_s8) ? tex->separate_s8 : tex;
+
+ info->stencil.bo = s8_tex->image.bo;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 329:
+ *
+ * "The pitch must be set to 2x the value computed based on width,
+ * as the stencil buffer is stored with two rows interleaved."
+ *
+ * For GEN7, we still dobule the stride because we did not double the
+ * slice widths when initializing the layout.
+ */
+ info->stencil.stride = s8_tex->image.bo_stride * 2;
+
+ assert(s8_tex->image.layer_height % 4 == 0);
+ info->stencil.qpitch = s8_tex->image.layer_height / 4;
+
+ info->stencil.tiling = s8_tex->image.tiling;
+
+ if (ilo_dev_gen(dev) == ILO_GEN(6)) {
+ unsigned x, y;
+
+ assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD);
+
+ /* offset to the level */
+ ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y);
+ ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y);
+ info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y);
+ }
+ }
+
+ if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
+ info->hiz.bo = tex->image.aux_bo;
+ info->hiz.stride = tex->image.aux_stride;
+
+ assert(tex->image.aux_layer_height % 4 == 0);
+ info->hiz.qpitch = tex->image.aux_layer_height / 4;
+
+ info->hiz.tiling = GEN6_TILING_Y;
+
+ /* offset to the level */
+ if (ilo_dev_gen(dev) == ILO_GEN(6))
+ info->hiz.offset = tex->image.aux_offsets[level];
+ }
+
+ info->width = tex->image.width0;
+ info->height = tex->image.height0;
+ info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+ tex->base.depth0 : num_layers;
+
+ info->lod = level;
+ info->first_layer = first_layer;
+ info->num_layers = num_layers;
+}
+
+void
+ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format, unsigned level,
+ unsigned first_layer, unsigned num_layers,
+ struct ilo_zs_surface *zs)
+{
+ const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
+ const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
+ struct ilo_zs_surface_info info;
+ uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
+ int align_w = 8, align_h = 4;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ if (tex) {
+ zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
+
+ switch (tex->base.nr_samples) {
+ case 2:
+ align_w /= 2;
+ break;
+ case 4:
+ align_w /= 2;
+ align_h /= 2;
+ break;
+ case 8:
+ align_w /= 4;
+ align_h /= 2;
+ break;
+ case 16:
+ align_w /= 4;
+ align_h /= 4;
+ break;
+ default:
+ break;
+ }
+ } else {
+ zs_init_info_null(dev, &info);
+ }
+
+ switch (info.surface_type) {
+ case GEN6_SURFTYPE_NULL:
+ break;
+ case GEN6_SURFTYPE_1D:
+ assert(info.width <= max_2d_size && info.height == 1 &&
+ info.depth <= max_array_size);
+ assert(info.first_layer < max_array_size - 1 &&
+ info.num_layers <= max_array_size);
+ break;
+ case GEN6_SURFTYPE_2D:
+ assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+ info.depth <= max_array_size);
+ assert(info.first_layer < max_array_size - 1 &&
+ info.num_layers <= max_array_size);
+ break;
+ case GEN6_SURFTYPE_3D:
+ assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
+ assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
+ break;
+ case GEN6_SURFTYPE_CUBE:
+ assert(info.width <= max_2d_size && info.height <= max_2d_size &&
+ info.depth == 1);
+ assert(info.first_layer == 0 && info.num_layers == 1);
+ assert(info.width == info.height);
+ break;
+ default:
+ assert(!"unexpected depth surface type");
+ break;
+ }
+
+ dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
+ info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
+
+ if (info.zs.bo) {
+ /* required for GEN6+ */
+ assert(info.zs.tiling == GEN6_TILING_Y);
+ assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
+ info.zs.stride % 128 == 0);
+ assert(info.width <= info.zs.stride);
+
+ dw1 |= (info.zs.stride - 1);
+ dw2 = info.zs.offset;
+ } else {
+ dw2 = 0;
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ if (info.zs.bo)
+ dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
+
+ if (info.stencil.bo)
+ dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
+
+ if (info.hiz.bo)
+ dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
+
+ dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+ (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+ info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
+
+ zs->dw_aligned_8x4 =
+ (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
+ (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
+ info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
+
+ dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
+ info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
+
+ dw5 = 0;
+
+ dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ dw6 |= info.zs.qpitch;
+ } else {
+ /* always Y-tiled */
+ dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
+
+ if (info.hiz.bo) {
+ dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
+ GEN6_DEPTH_DW1_SEPARATE_STENCIL;
+ }
+
+ dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+ (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+ info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
+ GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+
+ zs->dw_aligned_8x4 =
+ (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
+ (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
+ info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
+ GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
+
+ dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
+ info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
+ (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
+
+ dw5 = 0;
+
+ dw6 = 0;
+ }
+
+ STATIC_ASSERT(Elements(zs->payload) >= 12);
+
+ zs->payload[0] = dw1;
+ zs->payload[1] = dw2;
+ zs->payload[2] = dw3;
+ zs->payload[3] = dw4;
+ zs->payload[4] = dw5;
+ zs->payload[5] = dw6;
+
+ /* do not increment reference count */
+ zs->bo = info.zs.bo;
+
+ /* separate stencil */
+ if (info.stencil.bo) {
+ assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
+ info.stencil.stride % 128 == 0);
+
+ dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
+
+ dw2 = info.stencil.offset;
+ dw4 = info.stencil.qpitch;
+ } else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 0;
+ }
+
+ zs->payload[6] = dw1;
+ zs->payload[7] = dw2;
+ zs->payload[8] = dw4;
+ /* do not increment reference count */
+ zs->separate_s8_bo = info.stencil.bo;
+
+ /* hiz */
+ if (info.hiz.bo) {
+ dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
+ dw2 = info.hiz.offset;
+ dw4 = info.hiz.qpitch;
+ } else {
+ dw1 = 0;
+ dw2 = 0;
+ dw4 = 0;
+ }
+
+ zs->payload[9] = dw1;
+ zs->payload[10] = dw2;
+ zs->payload[11] = dw4;
+ /* do not increment reference count */
+ zs->hiz_bo = info.hiz.bo;
+}
+
+static void
+viewport_get_guardband(const struct ilo_dev *dev,
+ int center_x, int center_y,
+ int *min_gbx, int *max_gbx,
+ int *min_gby, int *max_gby)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 234:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
+ * - Maximum Post-Clamp Delta (X or Y): 16K"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
+ * - Maximum Post-Clamp Delta (X or Y): N/A"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * Combined, the bounding box of any object can not exceed 8K in both
+ * width and height.
+ *
+ * Below we set the guardband as a squre of length 8K, centered at where
+ * the viewport is. This makes sure all objects passing the GB test are
+ * valid to the renderer, and those failing the XY clipping have a
+ * better chance of passing the GB test.
+ */
+ const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
+ const int half_len = 8192 / 2;
+
+ /* make sure the guardband is within the valid range */
+ if (center_x - half_len < -max_extent)
+ center_x = -max_extent + half_len;
+ else if (center_x + half_len > max_extent - 1)
+ center_x = max_extent - half_len;
+
+ if (center_y - half_len < -max_extent)
+ center_y = -max_extent + half_len;
+ else if (center_y + half_len > max_extent - 1)
+ center_y = max_extent - half_len;
+
+ *min_gbx = (float) (center_x - half_len);
+ *max_gbx = (float) (center_x + half_len);
+ *min_gby = (float) (center_y - half_len);
+ *max_gby = (float) (center_y + half_len);
+}
+
+void
+ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
+ const struct pipe_viewport_state *state,
+ struct ilo_viewport_cso *vp)
+{
+ const float scale_x = fabs(state->scale[0]);
+ const float scale_y = fabs(state->scale[1]);
+ const float scale_z = fabs(state->scale[2]);
+ int min_gbx, max_gbx, min_gby, max_gby;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ viewport_get_guardband(dev,
+ (int) state->translate[0],
+ (int) state->translate[1],
+ &min_gbx, &max_gbx, &min_gby, &max_gby);
+
+ /* matrix form */
+ vp->m00 = state->scale[0];
+ vp->m11 = state->scale[1];
+ vp->m22 = state->scale[2];
+ vp->m30 = state->translate[0];
+ vp->m31 = state->translate[1];
+ vp->m32 = state->translate[2];
+
+ /* guardband in NDC space */
+ vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
+ vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
+ vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
+ vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
+
+ /* viewport in screen space */
+ vp->min_x = scale_x * -1.0f + state->translate[0];
+ vp->max_x = scale_x * 1.0f + state->translate[0];
+ vp->min_y = scale_y * -1.0f + state->translate[1];
+ vp->max_y = scale_y * 1.0f + state->translate[1];
+ vp->min_z = scale_z * -1.0f + state->translate[2];
+ vp->max_z = scale_z * 1.0f + state->translate[2];
+}
+
+/**
+ * Translate a pipe logicop to the matching hardware logicop.
+ */
+static int
+gen6_translate_pipe_logicop(unsigned logicop)
+{
+ switch (logicop) {
+ case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
+ case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
+ case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
+ case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
+ case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
+ case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
+ case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
+ case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
+ case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
+ case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
+ case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
+ case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
+ case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
+ case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
+ case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
+ case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
+ default:
+ assert(!"unknown logicop function");
+ return GEN6_LOGICOP_CLEAR;
+ }
+}
+
+/**
+ * Translate a pipe blend function to the matching hardware blend function.
+ */
+static int
+gen6_translate_pipe_blend(unsigned blend)
+{
+ switch (blend) {
+ case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
+ case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
+ case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
+ case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
+ case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
+ default:
+ assert(!"unknown blend function");
+ return GEN6_BLENDFUNCTION_ADD;
+ };
+}
+
+/**
+ * Translate a pipe blend factor to the matching hardware blend factor.
+ */
+static int
+gen6_translate_pipe_blendfactor(unsigned blendfactor)
+{
+ switch (blendfactor) {
+ case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+ default:
+ assert(!"unknown blend factor");
+ return GEN6_BLENDFACTOR_ONE;
+ };
+}
+
+/**
+ * Translate a pipe stencil op to the matching hardware stencil op.
+ */
+static int
+gen6_translate_pipe_stencil_op(unsigned stencil_op)
+{
+ switch (stencil_op) {
+ case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
+ case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
+ case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
+ case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
+ case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
+ case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
+ case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
+ case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
+ default:
+ assert(!"unknown stencil op");
+ return GEN6_STENCILOP_KEEP;
+ }
+}
+
+static int
+gen6_blend_factor_dst_alpha_forced_one(int factor)
+{
+ switch (factor) {
+ case GEN6_BLENDFACTOR_DST_ALPHA:
+ return GEN6_BLENDFACTOR_ONE;
+ case GEN6_BLENDFACTOR_INV_DST_ALPHA:
+ case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return GEN6_BLENDFACTOR_ZERO;
+ default:
+ return factor;
+ }
+}
+
+static uint32_t
+blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
+ const struct pipe_rt_blend_state *rt,
+ bool dst_alpha_forced_one)
+{
+ int rgb_src, rgb_dst, a_src, a_dst;
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!rt->blend_enable)
+ return 0;
+
+ rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
+ rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
+ a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
+ a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
+
+ if (dst_alpha_forced_one) {
+ rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
+ rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
+ a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
+ a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
+ }
+
+ dw = GEN6_RT_DW0_BLEND_ENABLE |
+ gen6_translate_pipe_blend(rt->alpha_func) << 26 |
+ a_src << 20 |
+ a_dst << 15 |
+ gen6_translate_pipe_blend(rt->rgb_func) << 11 |
+ rgb_src << 5 |
+ rgb_dst;
+
+ if (rt->rgb_func != rt->alpha_func ||
+ rgb_src != a_src || rgb_dst != a_dst)
+ dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+ return dw;
+}
+
+static uint32_t
+blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
+ const struct pipe_rt_blend_state *rt,
+ bool dst_alpha_forced_one,
+ bool *independent_alpha)
+{
+ int rgb_src, rgb_dst, a_src, a_dst;
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!rt->blend_enable) {
+ *independent_alpha = false;
+ return 0;
+ }
+
+ rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
+ rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
+ a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
+ a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
+
+ if (dst_alpha_forced_one) {
+ rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
+ rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
+ a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
+ a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
+ }
+
+ dw = GEN8_RT_DW0_BLEND_ENABLE |
+ rgb_src << 26 |
+ rgb_dst << 21 |
+ gen6_translate_pipe_blend(rt->rgb_func) << 18 |
+ a_src << 13 |
+ a_dst << 8 |
+ gen6_translate_pipe_blend(rt->alpha_func) << 5;
+
+ *independent_alpha = (rt->rgb_func != rt->alpha_func ||
+ rgb_src != a_src ||
+ rgb_dst != a_dst);
+
+ return dw;
+}
+
+static void
+blend_init_cso_gen6(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ struct ilo_blend_state *blend,
+ unsigned index)
+{
+ const struct pipe_rt_blend_state *rt = &state->rt[index];
+ struct ilo_blend_cso *cso = &blend->cso[index];
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ cso->payload[0] = 0;
+ cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
+ GEN6_RT_DW1_PRE_BLEND_CLAMP |
+ GEN6_RT_DW1_POST_BLEND_CLAMP;
+
+ if (!(rt->colormask & PIPE_MASK_A))
+ cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
+ if (!(rt->colormask & PIPE_MASK_R))
+ cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
+ if (!(rt->colormask & PIPE_MASK_G))
+ cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
+ if (!(rt->colormask & PIPE_MASK_B))
+ cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Color Buffer Blending and Logic Ops must not be enabled
+ * simultaneously, or behavior is UNDEFINED."
+ *
+ * Since state->logicop_enable takes precedence over rt->blend_enable,
+ * no special care is needed.
+ */
+ if (state->logicop_enable) {
+ cso->dw_blend = 0;
+ cso->dw_blend_dst_alpha_forced_one = 0;
+ } else {
+ cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
+ cso->dw_blend_dst_alpha_forced_one =
+ blend_get_rt_blend_enable_gen6(dev, rt, true);
+ }
+}
+
+static bool
+blend_init_cso_gen8(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ struct ilo_blend_state *blend,
+ unsigned index)
+{
+ const struct pipe_rt_blend_state *rt = &state->rt[index];
+ struct ilo_blend_cso *cso = &blend->cso[index];
+ bool independent_alpha = false;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ cso->payload[0] = 0;
+ cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
+ GEN8_RT_DW1_PRE_BLEND_CLAMP |
+ GEN8_RT_DW1_POST_BLEND_CLAMP;
+
+ if (!(rt->colormask & PIPE_MASK_A))
+ cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
+ if (!(rt->colormask & PIPE_MASK_R))
+ cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
+ if (!(rt->colormask & PIPE_MASK_G))
+ cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
+ if (!(rt->colormask & PIPE_MASK_B))
+ cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
+
+ if (state->logicop_enable) {
+ cso->dw_blend = 0;
+ cso->dw_blend_dst_alpha_forced_one = 0;
+ } else {
+ bool tmp[2];
+
+ cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
+ cso->dw_blend_dst_alpha_forced_one =
+ blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
+
+ if (tmp[0] || tmp[1])
+ independent_alpha = true;
+ }
+
+ return independent_alpha;
+}
+
+static uint32_t
+blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state)
+{
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!state->logicop_enable)
+ return 0;
+
+ return GEN6_RT_DW1_LOGICOP_ENABLE |
+ gen6_translate_pipe_logicop(state->logicop_func) << 18;
+}
+
+static uint32_t
+blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state)
+{
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!state->logicop_enable)
+ return 0;
+
+ return GEN8_RT_DW1_LOGICOP_ENABLE |
+ gen6_translate_pipe_logicop(state->logicop_func) << 27;
+}
+
+static uint32_t
+blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ bool dual_blend)
+{
+ uint32_t dw = 0;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (state->alpha_to_coverage) {
+ dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
+ }
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 378:
+ *
+ * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
+ * must be disabled."
+ */
+ if (state->alpha_to_one && !dual_blend)
+ dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
+
+ return dw;
+}
+
+static uint32_t
+blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ bool dual_blend)
+{
+ uint32_t dw = 0;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (state->alpha_to_coverage) {
+ dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
+ GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
+ }
+
+ if (state->alpha_to_one && !dual_blend)
+ dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
+
+ return dw;
+}
+
+static uint32_t
+blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
+{
+ int rgb_src, rgb_dst, a_src, a_dst;
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
+ return 0;
+
+ a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
+ a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
+ rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
+ rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
+
+ dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
+ dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
+ dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
+ dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
+ dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
+
+ if (a_src != rgb_src || a_dst != rgb_dst)
+ dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
+
+ return dw;
+}
+
+void
+ilo_gpe_init_blend(const struct ilo_dev *dev,
+ const struct pipe_blend_state *state,
+ struct ilo_blend_state *blend)
+{
+ unsigned i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
+ state->rt[0].blend_enable &&
+ !state->logicop_enable);
+ blend->alpha_to_coverage = state->alpha_to_coverage;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ bool independent_alpha;
+
+ blend->dw_alpha_mod =
+ blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
+ blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
+ blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
+
+ independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
+ if (independent_alpha)
+ blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+
+ blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
+ blend->cso[0].dw_blend);
+ blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
+ blend->cso[0].dw_blend_dst_alpha_forced_one);
+
+ if (state->independent_blend_enable) {
+ for (i = 1; i < Elements(blend->cso); i++) {
+ independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
+ if (independent_alpha)
+ blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
+ }
+ } else {
+ for (i = 1; i < Elements(blend->cso); i++)
+ blend->cso[i] = blend->cso[0];
+ }
+ } else {
+ blend->dw_alpha_mod =
+ blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
+ blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
+ blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
+
+ blend->dw_ps_blend = 0;
+ blend->dw_ps_blend_dst_alpha_forced_one = 0;
+
+ blend_init_cso_gen6(dev, state, blend, 0);
+ if (state->independent_blend_enable) {
+ for (i = 1; i < Elements(blend->cso); i++)
+ blend_init_cso_gen6(dev, state, blend, i);
+ } else {
+ for (i = 1; i < Elements(blend->cso); i++)
+ blend->cso[i] = blend->cso[0];
+ }
+ }
+}
+
+/**
+ * Translate a pipe DSA test function to the matching hardware compare
+ * function.
+ */
+static int
+gen6_translate_dsa_func(unsigned func)
+{
+ switch (func) {
+ case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
+ case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(!"unknown depth/stencil/alpha test function");
+ return GEN6_COMPAREFUNCTION_NEVER;
+ }
+}
+
+static uint32_t
+dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
+ const struct pipe_stencil_state *stencil0,
+ const struct pipe_stencil_state *stencil1)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!stencil0->enabled)
+ return 0;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 359:
+ *
+ * "If the Depth Buffer is either undefined or does not have a surface
+ * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
+ * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 370:
+ *
+ * "This field (Stencil Test Enable) cannot be enabled if
+ * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
+ *
+ * TODO We do not check these yet.
+ */
+ dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
+ gen6_translate_dsa_func(stencil0->func) << 28 |
+ gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
+ gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
+ gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
+ if (stencil0->writemask)
+ dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+
+ if (stencil1->enabled) {
+ dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
+ gen6_translate_dsa_func(stencil1->func) << 12 |
+ gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
+ gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
+ gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
+ if (stencil1->writemask)
+ dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
+ }
+
+ return dw;
+}
+
+static uint32_t
+dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
+ const struct pipe_stencil_state *stencil0,
+ const struct pipe_stencil_state *stencil1)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!stencil0->enabled)
+ return 0;
+
+ dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
+ gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
+ gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
+ gen6_translate_dsa_func(stencil0->func) << 8 |
+ GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
+ if (stencil0->writemask)
+ dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+
+ if (stencil1->enabled) {
+ dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
+ gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
+ gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
+ gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
+ GEN8_ZS_DW1_STENCIL1_ENABLE;
+ if (stencil1->writemask)
+ dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
+ }
+
+ return dw;
+}
+
+static uint32_t
+dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
+ const struct pipe_depth_state *state)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 360:
+ *
+ * "Enabling the Depth Test function without defining a Depth Buffer is
+ * UNDEFINED."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 375:
+ *
+ * "A Depth Buffer must be defined before enabling writes to it, or
+ * operation is UNDEFINED."
+ *
+ * TODO We do not check these yet.
+ */
+ if (state->enabled) {
+ dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
+ gen6_translate_dsa_func(state->func) << 27;
+ } else {
+ dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
+ }
+
+ if (state->writemask)
+ dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
+
+ return dw;
+}
+
+static uint32_t
+dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
+ const struct pipe_depth_state *state)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (state->enabled) {
+ dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
+ gen6_translate_dsa_func(state->func) << 5;
+ } else {
+ dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
+ }
+
+ if (state->writemask)
+ dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
+
+ return dw;
+}
+
+static uint32_t
+dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
+ const struct pipe_alpha_state *state)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 6, 7.5);
+
+ if (!state->enabled)
+ return 0;
+
+ /* this will be ORed to BLEND_STATE */
+ dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
+ gen6_translate_dsa_func(state->func) << 13;
+
+ return dw;
+}
+
+static uint32_t
+dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
+ const struct pipe_alpha_state *state)
+{
+ uint32_t dw;
+
+ ILO_DEV_ASSERT(dev, 8, 8);
+
+ if (!state->enabled)
+ return 0;
+
+ /* this will be ORed to BLEND_STATE */
+ dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
+ gen6_translate_dsa_func(state->func) << 24;
+
+ return dw;
+}
+
+void
+ilo_gpe_init_dsa(const struct ilo_dev *dev,
+ const struct pipe_depth_stencil_alpha_state *state,
+ struct ilo_dsa_state *dsa)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(Elements(dsa->payload) >= 3);
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
+ &state->stencil[0], &state->stencil[1]);
+ const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
+
+ assert(!(dw_stencil & dw_depth));
+ dsa->payload[0] = dw_stencil | dw_depth;
+
+ dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
+ dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
+ GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
+ } else {
+ dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
+ &state->stencil[0], &state->stencil[1]);
+ dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
+
+ dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
+ dsa->dw_ps_blend_alpha = 0;
+ }
+
+ dsa->payload[1] = state->stencil[0].valuemask << 24 |
+ state->stencil[0].writemask << 16 |
+ state->stencil[1].valuemask << 8 |
+ state->stencil[1].writemask;
+
+ dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
+}
+
+void
+ilo_gpe_set_scissor(const struct ilo_dev *dev,
+ unsigned start_slot,
+ unsigned num_states,
+ const struct pipe_scissor_state *states,
+ struct ilo_scissor_state *scissor)
+{
+ unsigned i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ for (i = 0; i < num_states; i++) {
+ uint16_t min_x, min_y, max_x, max_y;
+
+ /* both max and min are inclusive in SCISSOR_RECT */
+ if (states[i].minx < states[i].maxx &&
+ states[i].miny < states[i].maxy) {
+ min_x = states[i].minx;
+ min_y = states[i].miny;
+ max_x = states[i].maxx - 1;
+ max_y = states[i].maxy - 1;
+ }
+ else {
+ /* we have to make min greater than max */
+ min_x = 1;
+ min_y = 1;
+ max_x = 0;
+ max_y = 0;
+ }
+
+ scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
+ scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
+ }
+
+ if (!start_slot && num_states)
+ scissor->scissor0 = states[0];
+}
+
+void
+ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
+ struct ilo_scissor_state *scissor)
+{
+ unsigned i;
+
+ for (i = 0; i < Elements(scissor->payload); i += 2) {
+ scissor->payload[i + 0] = 1 << 16 | 1;
+ scissor->payload[i + 1] = 0;
+ }
+}
+
+static void
+fb_set_blend_caps(const struct ilo_dev *dev,
+ enum pipe_format format,
+ struct ilo_fb_blend_caps *caps)
+{
+ const struct util_format_description *desc =
+ util_format_description(format);
+ const int ch = util_format_get_first_non_void_channel(format);
+
+ memset(caps, 0, sizeof(*caps));
+
+ if (format == PIPE_FORMAT_NONE || desc->is_mixed)
+ return;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 365:
+ *
+ * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
+ * variants), otherwise Logic Ops must be DISABLED."
+ *
+ * According to the classic driver, this is lifted on Gen8+.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ caps->can_logicop = true;
+ } else {
+ caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
+ desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
+ desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
+ }
+
+ /* no blending for pure integer formats */
+ caps->can_blend = !util_format_is_pure_integer(format);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 382:
+ *
+ * "Alpha Test can only be enabled if Pixel Shader outputs a float
+ * alpha value."
+ */
+ caps->can_alpha_test = !util_format_is_pure_integer(format);
+
+ caps->dst_alpha_forced_one =
+ (ilo_format_translate_render(dev, format) !=
+ ilo_format_translate_color(dev, format));
+
+ /* sanity check */
+ if (caps->dst_alpha_forced_one) {
+ enum pipe_format render_format;
+
+ switch (format) {
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ default:
+ render_format = PIPE_FORMAT_NONE;
+ break;
+ }
+
+ assert(ilo_format_translate_render(dev, format) ==
+ ilo_format_translate_color(dev, render_format));
+ }
+}
+
+void
+ilo_gpe_set_fb(const struct ilo_dev *dev,
+ const struct pipe_framebuffer_state *state,
+ struct ilo_fb_state *fb)
+{
+ const struct pipe_surface *first_surf = NULL;
+ int i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ util_copy_framebuffer_state(&fb->state, state);
+
+ ilo_gpe_init_view_surface_null(dev,
+ (state->width) ? state->width : 1,
+ (state->height) ? state->height : 1,
+ 1, 0, &fb->null_rt);
+
+ for (i = 0; i < state->nr_cbufs; i++) {
+ if (state->cbufs[i]) {
+ fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
+
+ if (!first_surf)
+ first_surf = state->cbufs[i];
+ } else {
+ fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
+ }
+ }
+
+ if (!first_surf && state->zsbuf)
+ first_surf = state->zsbuf;
+
+ fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
+ if (!fb->num_samples)
+ fb->num_samples = 1;
+
+ /*
+ * The PRMs list several restrictions when the framebuffer has more than
+ * one surface. It seems they are actually lifted on GEN6+.
+ */
+}
--- /dev/null
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2014 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "genhw/genhw.h"
+#include "util/u_dual_blend.h"
+#include "util/u_framebuffer.h"
+#include "util/u_half.h"
+#include "util/u_resource.h"
+
+#include "ilo_format.h"
+#include "ilo_state_3d.h"
+#include "../ilo_resource.h"
+#include "../ilo_shader.h"
+
+static void
+ve_init_cso(const struct ilo_dev *dev,
+ const struct pipe_vertex_element *state,
+ unsigned vb_index,
+ struct ilo_ve_cso *cso)
+{
+ int comp[4] = {
+ GEN6_VFCOMP_STORE_SRC,
+ GEN6_VFCOMP_STORE_SRC,
+ GEN6_VFCOMP_STORE_SRC,
+ GEN6_VFCOMP_STORE_SRC,
+ };
+ int format;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ switch (util_format_get_nr_components(state->src_format)) {
+ case 1: comp[1] = GEN6_VFCOMP_STORE_0;
+ case 2: comp[2] = GEN6_VFCOMP_STORE_0;
+ case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
+ GEN6_VFCOMP_STORE_1_INT :
+ GEN6_VFCOMP_STORE_1_FP;
+ }
+
+ format = ilo_format_translate_vertex(dev, state->src_format);
+
+ STATIC_ASSERT(Elements(cso->payload) >= 2);
+ cso->payload[0] =
+ vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
+ GEN6_VE_DW0_VALID |
+ format << GEN6_VE_DW0_FORMAT__SHIFT |
+ state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
+
+ cso->payload[1] =
+ comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
+ comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
+ comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
+ comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_ve(const struct ilo_dev *dev,
+ unsigned num_states,
+ const struct pipe_vertex_element *states,
+ struct ilo_ve_state *ve)
+{
+ unsigned i;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ ve->count = num_states;
+ ve->vb_count = 0;
+
+ for (i = 0; i < num_states; i++) {
+ const unsigned pipe_idx = states[i].vertex_buffer_index;
+ const unsigned instance_divisor = states[i].instance_divisor;
+ unsigned hw_idx;
+
+ /*
+ * map the pipe vb to the hardware vb, which has a fixed instance
+ * divisor
+ */
+ for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
+ if (ve->vb_mapping[hw_idx] == pipe_idx &&
+ ve->instance_divisors[hw_idx] == instance_divisor)
+ break;
+ }
+
+ /* create one if there is no matching hardware vb */
+ if (hw_idx >= ve->vb_count) {
+ hw_idx = ve->vb_count++;
+
+ ve->vb_mapping[hw_idx] = pipe_idx;
+ ve->instance_divisors[hw_idx] = instance_divisor;
+ }
+
+ ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
+ }
+}
+
+void
+ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
+ struct ilo_ve_cso *cso)
+{
+ int format;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 94:
+ *
+ * "- This bit (Edge Flag Enable) must only be ENABLED on the last
+ * valid VERTEX_ELEMENT structure.
+ *
+ * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
+ * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
+ *
+ * - The Source Element Format must be set to the UINT format.
+ *
+ * - [DevSNB]: Edge Flags are not supported for QUADLIST
+ * primitives. Software may elect to convert QUADLIST primitives
+ * to some set of corresponding edge-flag-supported primitive
+ * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
+ */
+ cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
+
+ /*
+ * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
+ * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
+ * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
+ *
+ * Since all the hardware cares about is whether the flags are zero or not,
+ * we can treat them as the corresponding _UINT formats.
+ */
+ format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
+ cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
+
+ switch (format) {
+ case GEN6_FORMAT_R32_FLOAT:
+ format = GEN6_FORMAT_R32_UINT;
+ break;
+ case GEN6_FORMAT_R8_USCALED:
+ format = GEN6_FORMAT_R8_UINT;
+ break;
+ default:
+ break;
+ }
+
+ cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
+
+ cso->payload[1] =
+ GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
+ GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
+ int comp0, int comp1, int comp2, int comp3,
+ struct ilo_ve_cso *cso)
+{
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ STATIC_ASSERT(Elements(cso->payload) >= 2);
+
+ assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
+ comp1 != GEN6_VFCOMP_STORE_SRC &&
+ comp2 != GEN6_VFCOMP_STORE_SRC &&
+ comp3 != GEN6_VFCOMP_STORE_SRC);
+
+ cso->payload[0] = GEN6_VE_DW0_VALID;
+ cso->payload[1] =
+ comp0 << GEN6_VE_DW1_COMP0__SHIFT |
+ comp1 << GEN6_VE_DW1_COMP1__SHIFT |
+ comp2 << GEN6_VE_DW1_COMP2__SHIFT |
+ comp3 << GEN6_VE_DW1_COMP3__SHIFT;
+}
+
+void
+ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *vs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, vue_read_len, sampler_count, max_threads;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
+ vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
+ sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 135:
+ *
+ * "(Vertex URB Entry Read Length) Specifies the number of pairs of
+ * 128-bit vertex elements to be passed into the payload for each
+ * vertex."
+ *
+ * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
+ * data to be read and passed to the thread."
+ */
+ vue_read_len = (vue_read_len + 1) / 2;
+ if (!vue_read_len)
+ vue_read_len = 1;
+
+ max_threads = dev->thread_count;
+ if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
+ max_threads *= 2;
+
+ dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
+ vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
+ 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
+
+ dw5 = GEN6_VS_DW5_STATISTICS |
+ GEN6_VS_DW5_VS_ENABLE;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
+ dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
+ else
+ dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 3);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+}
+
+static void
+gs_init_cso_gen6(const struct ilo_dev *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, vue_read_len, max_threads;
+ uint32_t dw2, dw4, dw5, dw6;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_URB_DATA_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+ }
+ else {
+ start_grf = ilo_shader_get_kernel_param(gs,
+ ILO_KERNEL_VS_GEN6_SO_START_REG);
+
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 153:
+ *
+ * "Specifies the amount of URB data read and passed in the thread
+ * payload for each Vertex URB entry, in 256-bit register increments.
+ *
+ * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
+ * 0 indicating no Vertex URB data to be read and passed to the
+ * thread."
+ */
+ vue_read_len = (vue_read_len + 1) / 2;
+ if (!vue_read_len)
+ vue_read_len = 1;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 154:
+ *
+ * "Maximum Number of Threads valid range is [0,27] when Rendering
+ * Enabled bit is set."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+ *
+ * "Programming Note: If the GS stage is enabled, software must always
+ * allocate at least one GS URB Entry. This is true even if the GS
+ * thread never needs to output vertices to the pipeline, e.g., when
+ * only performing stream output. This is an artifact of the need to
+ * pass the GS thread an initial destination URB handle."
+ *
+ * As such, we always enable rendering, and limit the number of threads.
+ */
+ if (dev->gt == 2) {
+ /* maximum is 60, but limited to 28 */
+ max_threads = 28;
+ }
+ else {
+ /* maximum is 24, but limited to 21 (see brwCreateContext()) */
+ max_threads = 21;
+ }
+
+ dw2 = GEN6_THREADDISP_SPF;
+
+ dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
+ 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
+ start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
+
+ dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
+ GEN6_GS_DW5_STATISTICS |
+ GEN6_GS_DW5_SO_STATISTICS |
+ GEN6_GS_DW5_RENDER_ENABLE;
+
+ /*
+ * we cannot make use of GEN6_GS_REORDER because it will reorder
+ * triangle strips according to D3D rules (triangle 2N+1 uses vertices
+ * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
+ * (2N+2, 2N+1, 2N+3)).
+ */
+ dw6 = GEN6_GS_DW6_GS_ENABLE;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
+ dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
+
+ if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
+ const uint32_t svbi_post_inc =
+ ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
+
+ dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
+ if (svbi_post_inc) {
+ dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
+ svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
+ }
+ }
+
+ STATIC_ASSERT(Elements(cso->payload) >= 4);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+ cso->payload[3] = dw6;
+}
+
+static void
+gs_init_cso_gen7(const struct ilo_dev *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ int start_grf, vue_read_len, sampler_count, max_threads;
+ uint32_t dw2, dw4, dw5;
+
+ ILO_DEV_ASSERT(dev, 7, 7.5);
+
+ start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
+ vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
+ sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
+
+ /* in pairs */
+ vue_read_len = (vue_read_len + 1) / 2;
+
+ switch (ilo_dev_gen(dev)) {
+ case ILO_GEN(7.5):
+ max_threads = (dev->gt >= 2) ? 256 : 70;
+ break;
+ case ILO_GEN(7):
+ max_threads = (dev->gt == 2) ? 128 : 36;
+ break;
+ default:
+ max_threads = 1;
+ break;
+ }
+
+ dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
+ dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
+
+ dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
+ GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
+ 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
+ start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
+
+ dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
+ GEN7_GS_DW5_STATISTICS |
+ GEN7_GS_DW5_GS_ENABLE;
+
+ STATIC_ASSERT(Elements(cso->payload) >= 3);
+ cso->payload[0] = dw2;
+ cso->payload[1] = dw4;
+ cso->payload[2] = dw5;
+}
+
+void
+ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
+ const struct ilo_shader_state *gs,
+ struct ilo_shader_cso *cso)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7))
+ gs_init_cso_gen7(dev, gs, cso);
+ else
+ gs_init_cso_gen6(dev, gs, cso);
+}
+
+static void
+view_init_null_gen6(const struct ilo_dev *dev,
+ unsigned width, unsigned height,
+ unsigned depth, unsigned level,
+ struct ilo_view_surface *surf)
+{
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ assert(width >= 1 && height >= 1 && depth >= 1);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 71:
+ *
+ * "A null surface will be used in instances where an actual surface is
+ * not bound. When a write message is generated to a null surface, no
+ * actual surface is written to. When a read message (including any
+ * sampling engine message) is generated to a null surface, the result
+ * is all zeros. Note that a null surface type is allowed to be used
+ * with all messages, even if it is not specificially indicated as
+ * supported. All of the remaining fields in surface state are ignored
+ * for null surfaces, with the following exceptions:
+ *
+ * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
+ * depth buffer's corresponding state for all render target
+ * surfaces, including null.
+ * * Surface Format must be R8G8B8A8_UNORM."
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 82:
+ *
+ * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
+ * true"
+ */
+
+ STATIC_ASSERT(Elements(surf->payload) >= 6);
+ dw = surf->payload;
+
+ dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+
+ dw[1] = 0;
+
+ dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+ (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+ level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+ dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+ GEN6_TILING_X;
+
+ dw[4] = 0;
+ dw[5] = 0;
+}
+
+static void
+view_init_for_buffer_gen6(const struct ilo_dev *dev,
+ const struct ilo_buffer *buf,
+ unsigned offset, unsigned size,
+ unsigned struct_size,
+ enum pipe_format elem_format,
+ bool is_rt, bool render_cache_rw,
+ struct ilo_view_surface *surf)
+{
+ const int elem_size = util_format_get_blocksize(elem_format);
+ int width, height, depth, pitch;
+ int surface_format, num_entries;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ /*
+ * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
+ * structure in a buffer.
+ */
+
+ surface_format = ilo_format_translate_color(dev, elem_format);
+
+ num_entries = size / struct_size;
+ /* see if there is enough space to fit another element */
+ if (size % struct_size >= elem_size)
+ num_entries++;
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 76:
+ *
+ * "For SURFTYPE_BUFFER render targets, this field (Surface Base
+ * Address) specifies the base address of first element of the
+ * surface. The surface is interpreted as a simple array of that
+ * single element type. The address must be naturally-aligned to the
+ * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
+ * must be 16-byte aligned).
+ *
+ * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
+ * the base address of the first element of the surface, computed in
+ * software by adding the surface base address to the byte offset of
+ * the element in the buffer."
+ */
+ if (is_rt)
+ assert(offset % elem_size == 0);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 77:
+ *
+ * "For buffer surfaces, the number of entries in the buffer ranges
+ * from 1 to 2^27."
+ */
+ assert(num_entries >= 1 && num_entries <= 1 << 27);
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
+ * indicates the size of the structure."
+ */
+ pitch = struct_size;
+
+ pitch--;
+ num_entries--;
+ /* bits [6:0] */
+ width = (num_entries & 0x0000007f);
+ /* bits [19:7] */
+ height = (num_entries & 0x000fff80) >> 7;
+ /* bits [26:20] */
+ depth = (num_entries & 0x07f00000) >> 20;
+
+ STATIC_ASSERT(Elements(surf->payload) >= 6);
+ dw = surf->payload;
+
+ dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
+ if (render_cache_rw)
+ dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+
+ dw[1] = offset;
+
+ dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+ width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
+
+ dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+ pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
+
+ dw[4] = 0;
+ dw[5] = 0;
+}
+
+static void
+view_init_for_texture_gen6(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned first_level,
+ unsigned num_levels,
+ unsigned first_layer,
+ unsigned num_layers,
+ bool is_rt,
+ struct ilo_view_surface *surf)
+{
+ int surface_type, surface_format;
+ int width, height, depth, pitch, lod;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+ assert(surface_type != GEN6_SURFTYPE_BUFFER);
+
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
+ format = PIPE_FORMAT_Z32_FLOAT;
+
+ if (is_rt)
+ surface_format = ilo_format_translate_render(dev, format);
+ else
+ surface_format = ilo_format_translate_texture(dev, format);
+ assert(surface_format >= 0);
+
+ width = tex->image.width0;
+ height = tex->image.height0;
+ depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+ tex->base.depth0 : num_layers;
+ pitch = tex->image.bo_stride;
+
+ if (surface_type == GEN6_SURFTYPE_CUBE) {
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
+ * range of this field (Depth) is [0,84], indicating the number of
+ * cube array elements (equal to the number of underlying 2D array
+ * elements divided by 6). For other surfaces, this field must be
+ * zero."
+ *
+ * When is_rt is true, we treat the texture as a 2D one to avoid the
+ * restriction.
+ */
+ if (is_rt) {
+ surface_type = GEN6_SURFTYPE_2D;
+ }
+ else {
+ assert(num_layers % 6 == 0);
+ depth = num_layers / 6;
+ }
+ }
+
+ /* sanity check the size */
+ assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
+ switch (surface_type) {
+ case GEN6_SURFTYPE_1D:
+ assert(width <= 8192 && height == 1 && depth <= 512);
+ assert(first_layer < 512 && num_layers <= 512);
+ break;
+ case GEN6_SURFTYPE_2D:
+ assert(width <= 8192 && height <= 8192 && depth <= 512);
+ assert(first_layer < 512 && num_layers <= 512);
+ break;
+ case GEN6_SURFTYPE_3D:
+ assert(width <= 2048 && height <= 2048 && depth <= 2048);
+ assert(first_layer < 2048 && num_layers <= 512);
+ if (!is_rt)
+ assert(first_layer == 0);
+ break;
+ case GEN6_SURFTYPE_CUBE:
+ assert(width <= 8192 && height <= 8192 && depth <= 85);
+ assert(width == height);
+ assert(first_layer < 512 && num_layers <= 512);
+ if (is_rt)
+ assert(first_layer == 0);
+ break;
+ default:
+ assert(!"unexpected surface type");
+ break;
+ }
+
+ /* non-full array spacing is supported only on GEN7+ */
+ assert(tex->image.walk != ILO_IMAGE_WALK_LOD);
+ /* non-interleaved samples are supported only on GEN7+ */
+ if (tex->base.nr_samples > 1)
+ assert(tex->image.interleaved_samples);
+
+ if (is_rt) {
+ assert(num_levels == 1);
+ lod = first_level;
+ }
+ else {
+ lod = num_levels - 1;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 76:
+ *
+ * "Linear render target surface base addresses must be element-size
+ * aligned, for non-YUV surface formats, or a multiple of 2
+ * element-sizes for YUV surface formats. Other linear surfaces have
+ * no alignment requirements (byte alignment is sufficient.)"
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 81:
+ *
+ * "For linear render target surfaces, the pitch must be a multiple
+ * of the element size for non-YUV surface formats. Pitch must be a
+ * multiple of 2 * element size for YUV surface formats."
+ *
+ * From the Sandy Bridge PRM, volume 4 part 1, page 86:
+ *
+ * "For linear surfaces, this field (X Offset) must be zero"
+ */
+ if (tex->image.tiling == GEN6_TILING_NONE) {
+ if (is_rt) {
+ const int elem_size = util_format_get_blocksize(format);
+ assert(pitch % elem_size == 0);
+ }
+ }
+
+ STATIC_ASSERT(Elements(surf->payload) >= 6);
+ dw = surf->payload;
+
+ dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
+ surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
+ GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
+
+ if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
+ dw[0] |= 1 << 9 |
+ GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+ }
+
+ if (is_rt)
+ dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
+
+ dw[1] = 0;
+
+ dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
+ (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
+ lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
+
+ assert(tex->image.tiling != GEN8_TILING_W);
+ dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
+ (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
+ tex->image.tiling;
+
+ dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
+ first_layer << 17 |
+ (num_layers - 1) << 8 |
+ ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
+ GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
+
+ dw[5] = 0;
+
+ assert(tex->image.align_j == 2 || tex->image.align_j == 4);
+ if (tex->image.align_j == 4)
+ dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
+}
+
+static void
+view_init_null_gen7(const struct ilo_dev *dev,
+ unsigned width, unsigned height,
+ unsigned depth, unsigned level,
+ struct ilo_view_surface *surf)
+{
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ assert(width >= 1 && height >= 1 && depth >= 1);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 62:
+ *
+ * "A null surface is used in instances where an actual surface is not
+ * bound. When a write message is generated to a null surface, no
+ * actual surface is written to. When a read message (including any
+ * sampling engine message) is generated to a null surface, the result
+ * is all zeros. Note that a null surface type is allowed to be used
+ * with all messages, even if it is not specificially indicated as
+ * supported. All of the remaining fields in surface state are ignored
+ * for null surfaces, with the following exceptions:
+ *
+ * * Width, Height, Depth, LOD, and Render Target View Extent fields
+ * must match the depth buffer's corresponding state for all render
+ * target surfaces, including null.
+ * * All sampling engine and data port messages support null surfaces
+ * with the above behavior, even if not mentioned as specifically
+ * supported, except for the following:
+ * * Data Port Media Block Read/Write messages.
+ * * The Surface Type of a surface used as a render target (accessed
+ * via the Data Port's Render Target Write message) must be the same
+ * as the Surface Type of all other render targets and of the depth
+ * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
+ * buffer or render targets are SURFTYPE_NULL."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 65:
+ *
+ * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
+ * true"
+ */
+
+ STATIC_ASSERT(Elements(surf->payload) >= 13);
+ dw = surf->payload;
+
+ dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
+ else
+ dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+ dw[1] = 0;
+
+ dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
+ GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
+
+ dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
+
+ dw[4] = 0;
+ dw[5] = level;
+
+ dw[6] = 0;
+ dw[7] = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
+}
+
+static void
+view_init_for_buffer_gen7(const struct ilo_dev *dev,
+ const struct ilo_buffer *buf,
+ unsigned offset, unsigned size,
+ unsigned struct_size,
+ enum pipe_format elem_format,
+ bool is_rt, bool render_cache_rw,
+ struct ilo_view_surface *surf)
+{
+ const bool typed = (elem_format != PIPE_FORMAT_NONE);
+ const bool structured = (!typed && struct_size > 1);
+ const int elem_size = (typed) ?
+ util_format_get_blocksize(elem_format) : 1;
+ int width, height, depth, pitch;
+ int surface_type, surface_format, num_entries;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
+
+ surface_format = (typed) ?
+ ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
+
+ num_entries = size / struct_size;
+ /* see if there is enough space to fit another element */
+ if (size % struct_size >= elem_size && !structured)
+ num_entries++;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 67:
+ *
+ * "For SURFTYPE_BUFFER render targets, this field (Surface Base
+ * Address) specifies the base address of first element of the
+ * surface. The surface is interpreted as a simple array of that
+ * single element type. The address must be naturally-aligned to the
+ * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
+ * must be 16-byte aligned)
+ *
+ * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
+ * the base address of the first element of the surface, computed in
+ * software by adding the surface base address to the byte offset of
+ * the element in the buffer."
+ */
+ if (is_rt)
+ assert(offset % elem_size == 0);
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "For typed buffer and structured buffer surfaces, the number of
+ * entries in the buffer ranges from 1 to 2^27. For raw buffer
+ * surfaces, the number of entries in the buffer is the number of
+ * bytes which can range from 1 to 2^30."
+ */
+ assert(num_entries >= 1 &&
+ num_entries <= 1 << ((typed || structured) ? 27 : 30));
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 69:
+ *
+ * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
+ * 11 if the Surface Format is RAW (the size of the buffer must be a
+ * multiple of 4 bytes)."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
+ * field (Surface Pitch) indicates the size of the structure."
+ *
+ * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
+ * must be a multiple of 4 bytes."
+ */
+ if (structured)
+ assert(struct_size % 4 == 0);
+ else if (!typed)
+ assert(num_entries % 4 == 0);
+
+ pitch = struct_size;
+
+ pitch--;
+ num_entries--;
+ /* bits [6:0] */
+ width = (num_entries & 0x0000007f);
+ /* bits [20:7] */
+ height = (num_entries & 0x001fff80) >> 7;
+ /* bits [30:21] */
+ depth = (num_entries & 0x7fe00000) >> 21;
+ /* limit to [26:21] */
+ if (typed || structured)
+ depth &= 0x3f;
+
+ STATIC_ASSERT(Elements(surf->payload) >= 13);
+ dw = surf->payload;
+
+ dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+ if (render_cache_rw)
+ dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ dw[8] = offset;
+ memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
+ } else {
+ dw[1] = offset;
+ }
+
+ dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
+ GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
+
+ dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
+ pitch;
+
+ dw[4] = 0;
+ dw[5] = 0;
+
+ dw[6] = 0;
+ dw[7] = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
+ GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+ GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
+ GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+ }
+}
+
+static void
+view_init_for_texture_gen7(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned first_level,
+ unsigned num_levels,
+ unsigned first_layer,
+ unsigned num_layers,
+ bool is_rt,
+ struct ilo_view_surface *surf)
+{
+ int surface_type, surface_format;
+ int width, height, depth, pitch, lod;
+ uint32_t *dw;
+
+ ILO_DEV_ASSERT(dev, 7, 8);
+
+ surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
+ assert(surface_type != GEN6_SURFTYPE_BUFFER);
+
+ if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
+ format = PIPE_FORMAT_Z32_FLOAT;
+
+ if (is_rt)
+ surface_format = ilo_format_translate_render(dev, format);
+ else
+ surface_format = ilo_format_translate_texture(dev, format);
+ assert(surface_format >= 0);
+
+ width = tex->image.width0;
+ height = tex->image.height0;
+ depth = (tex->base.target == PIPE_TEXTURE_3D) ?
+ tex->base.depth0 : num_layers;
+ pitch = tex->image.bo_stride;
+
+ if (surface_type == GEN6_SURFTYPE_CUBE) {
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
+ * this field is [0,340], indicating the number of cube array
+ * elements (equal to the number of underlying 2D array elements
+ * divided by 6). For other surfaces, this field must be zero."
+ *
+ * When is_rt is true, we treat the texture as a 2D one to avoid the
+ * restriction.
+ */
+ if (is_rt) {
+ surface_type = GEN6_SURFTYPE_2D;
+ }
+ else {
+ assert(num_layers % 6 == 0);
+ depth = num_layers / 6;
+ }
+ }
+
+ /* sanity check the size */
+ assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
+ assert(first_layer < 2048 && num_layers <= 2048);
+ switch (surface_type) {
+ case GEN6_SURFTYPE_1D:
+ assert(width <= 16384 && height == 1 && depth <= 2048);
+ break;
+ case GEN6_SURFTYPE_2D:
+ assert(width <= 16384 && height <= 16384 && depth <= 2048);
+ break;
+ case GEN6_SURFTYPE_3D:
+ assert(width <= 2048 && height <= 2048 && depth <= 2048);
+ if (!is_rt)
+ assert(first_layer == 0);
+ break;
+ case GEN6_SURFTYPE_CUBE:
+ assert(width <= 16384 && height <= 16384 && depth <= 86);
+ assert(width == height);
+ if (is_rt)
+ assert(first_layer == 0);
+ break;
+ default:
+ assert(!"unexpected surface type");
+ break;
+ }
+
+ if (is_rt) {
+ assert(num_levels == 1);
+ lod = first_level;
+ }
+ else {
+ lod = num_levels - 1;
+ }
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 68:
+ *
+ * "The Base Address for linear render target surfaces and surfaces
+ * accessed with the typed surface read/write data port messages must
+ * be element-size aligned, for non-YUV surface formats, or a multiple
+ * of 2 element-sizes for YUV surface formats. Other linear surfaces
+ * have no alignment requirements (byte alignment is sufficient)."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 70:
+ *
+ * "For linear render target surfaces and surfaces accessed with the
+ * typed data port messages, the pitch must be a multiple of the
+ * element size for non-YUV surface formats. Pitch must be a multiple
+ * of 2 * element size for YUV surface formats. For linear surfaces
+ * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
+ * of 4 bytes.For other linear surfaces, the pitch can be any multiple
+ * of bytes."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 74:
+ *
+ * "For linear surfaces, this field (X Offset) must be zero."
+ */
+ if (tex->image.tiling == GEN6_TILING_NONE) {
+ if (is_rt) {
+ const int elem_size = util_format_get_blocksize(format);
+ assert(pitch % elem_size == 0);
+ }
+ }
+
+ STATIC_ASSERT(Elements(surf->payload) >= 13);
+ dw = surf->payload;
+
+ dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
+ surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
+
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 63:
+ *
+ * "If this field (Surface Array) is enabled, the Surface Type must be
+ * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
+ * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
+ * SURFTYPE_CUBE, the Depth field must be set to zero."
+ *
+ * For non-3D sampler surfaces, resinfo (the sampler message) always
+ * returns zero for the number of layers when this field is not set.
+ */
+ if (surface_type != GEN6_SURFTYPE_3D) {
+ if (util_resource_is_array_texture(&tex->base))
+ dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
+ else
+ assert(depth == 1);
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ switch (tex->image.align_j) {
+ case 4:
+ dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
+ break;
+ case 8:
+ dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
+ break;
+ case 16:
+ dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
+ break;
+ default:
+ assert(!"unsupported valign");
+ break;
+ }
+
+ switch (tex->image.align_i) {
+ case 4:
+ dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
+ break;
+ case 8:
+ dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
+ break;
+ case 16:
+ dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
+ break;
+ default:
+ assert(!"unsupported halign");
+ break;
+ }
+
+ dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
+ } else {
+ assert(tex->image.align_i == 4 || tex->image.align_i == 8);
+ assert(tex->image.align_j == 2 || tex->image.align_j == 4);
+
+ if (tex->image.align_j == 4)
+ dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
+
+ if (tex->image.align_i == 8)
+ dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
+
+ assert(tex->image.tiling != GEN8_TILING_W);
+ dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
+
+ if (tex->image.walk == ILO_IMAGE_WALK_LOD)
+ dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
+ else
+ dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
+ }
+
+ if (is_rt)
+ dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
+
+ if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
+ dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
+ assert(tex->image.layer_height % 4 == 0);
+ dw[1] = tex->image.layer_height / 4;
+ } else {
+ dw[1] = 0;
+ }
+
+ dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
+ GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
+
+ dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
+ (pitch - 1);
+
+ dw[4] = first_layer << 18 |
+ (num_layers - 1) << 7;
+
+ /*
+ * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
+ * means the samples are interleaved. The layouts are the same when the
+ * number of samples is 1.
+ */
+ if (tex->image.interleaved_samples && tex->base.nr_samples > 1) {
+ assert(!is_rt);
+ dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
+ }
+ else {
+ dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
+ }
+
+ switch (tex->base.nr_samples) {
+ case 0:
+ case 1:
+ default:
+ dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
+ break;
+ case 2:
+ dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
+ break;
+ case 4:
+ dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
+ break;
+ case 8:
+ dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
+ break;
+ case 16:
+ dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
+ break;
+ }
+
+ dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
+ lod;
+
+ dw[6] = 0;
+ dw[7] = 0;
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
+ dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
+ GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
+ GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
+ GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(8))
+ memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
+}
+
+void
+ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
+ unsigned width, unsigned height,
+ unsigned depth, unsigned level,
+ struct ilo_view_surface *surf)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ view_init_null_gen7(dev,
+ width, height, depth, level, surf);
+ } else {
+ view_init_null_gen6(dev,
+ width, height, depth, level, surf);
+ }
+
+ surf->bo = NULL;
+ surf->scanout = false;
+}
+
+void
+ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
+ const struct ilo_buffer *buf,
+ unsigned offset, unsigned size,
+ unsigned struct_size,
+ enum pipe_format elem_format,
+ bool is_rt, bool render_cache_rw,
+ struct ilo_view_surface *surf)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ view_init_for_buffer_gen7(dev, buf, offset, size,
+ struct_size, elem_format, is_rt, render_cache_rw, surf);
+ } else {
+ view_init_for_buffer_gen6(dev, buf, offset, size,
+ struct_size, elem_format, is_rt, render_cache_rw, surf);
+ }
+
+ /* do not increment reference count */
+ surf->bo = buf->bo;
+ surf->scanout = false;
+}
+
+void
+ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
+ const struct ilo_texture *tex,
+ enum pipe_format format,
+ unsigned first_level,
+ unsigned num_levels,
+ unsigned first_layer,
+ unsigned num_layers,
+ bool is_rt,
+ struct ilo_view_surface *surf)
+{
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ view_init_for_texture_gen7(dev, tex, format,
+ first_level, num_levels, first_layer, num_layers,
+ is_rt, surf);
+ } else {
+ view_init_for_texture_gen6(dev, tex, format,
+ first_level, num_levels, first_layer, num_layers,
+ is_rt, surf);
+ }
+
+ /* do not increment reference count */
+ surf->bo = tex->image.bo;
+
+ /* assume imported RTs are scanouts */
+ surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) ||
+ (tex->imported && (tex->base.bind & PIPE_BIND_RENDER_TARGET)));
+}
+
+static void
+sampler_init_border_color_gen6(const struct ilo_dev *dev,
+ const union pipe_color_union *color,
+ uint32_t *dw, int num_dwords)
+{
+ float rgba[4] = {
+ color->f[0], color->f[1], color->f[2], color->f[3],
+ };
+
+ ILO_DEV_ASSERT(dev, 6, 6);
+
+ assert(num_dwords >= 12);
+
+ /*
+ * This state is not documented in the Sandy Bridge PRM, but in the
+ * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
+ */
+
+ /* IEEE_FP */
+ dw[1] = fui(rgba[0]);
+ dw[2] = fui(rgba[1]);
+ dw[3] = fui(rgba[2]);
+ dw[4] = fui(rgba[3]);
+
+ /* FLOAT_16 */
+ dw[5] = util_float_to_half(rgba[0]) |
+ util_float_to_half(rgba[1]) << 16;
+ dw[6] = util_float_to_half(rgba[2]) |
+ util_float_to_half(rgba[3]) << 16;
+
+ /* clamp to [-1.0f, 1.0f] */
+ rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
+ rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
+ rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
+ rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
+
+ /* SNORM16 */
+ dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
+ (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
+ dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
+ (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
+
+ /* SNORM8 */
+ dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
+ (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
+ (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
+ (int8_t) util_iround(rgba[3] * 127.0f) << 24;
+
+ /* clamp to [0.0f, 1.0f] */
+ rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
+ rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
+ rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
+ rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
+
+ /* UNORM8 */
+ dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
+ (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
+ (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
+ (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
+
+ /* UNORM16 */
+ dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
+ (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
+ dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
+ (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
+}
+
+/**
+ * Translate a pipe texture mipfilter to the matching hardware mipfilter.
+ */
+static int
+gen6_translate_tex_mipfilter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
+ case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
+ case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
+ default:
+ assert(!"unknown mipfilter");
+ return GEN6_MIPFILTER_NONE;
+ }
+}
+
+/**
+ * Translate a pipe texture filter to the matching hardware mapfilter.
+ */
+static int
+gen6_translate_tex_filter(unsigned filter)
+{
+ switch (filter) {
+ case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
+ case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
+ default:
+ assert(!"unknown sampler filter");
+ return GEN6_MAPFILTER_NEAREST;
+ }
+}
+
+/**
+ * Translate a pipe texture coordinate wrapping mode to the matching hardware
+ * wrapping mode.
+ */
+static int
+gen6_translate_tex_wrap(unsigned wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
+ case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ default:
+ assert(!"unknown sampler wrap mode");
+ return GEN6_TEXCOORDMODE_WRAP;
+ }
+}
+
+/**
+ * Translate a pipe shadow compare function to the matching hardware shadow
+ * function.
+ */
+static int
+gen6_translate_shadow_func(unsigned func)
+{
+ /*
+ * For PIPE_FUNC_x, the reference value is on the left-hand side of the
+ * comparison, and 1.0 is returned when the comparison is true.
+ *
+ * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
+ * the comparison, and 0.0 is returned when the comparison is true.
+ */
+ switch (func) {
+ case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
+ case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
+ case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
+ case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
+ case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
+ case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
+ case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
+ case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
+ default:
+ assert(!"unknown shadow compare function");
+ return GEN6_COMPAREFUNCTION_NEVER;
+ }
+}
+
+void
+ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
+ const struct pipe_sampler_state *state,
+ struct ilo_sampler_cso *sampler)
+{
+ int mip_filter, min_filter, mag_filter, max_aniso;
+ int lod_bias, max_lod, min_lod;
+ int wrap_s, wrap_t, wrap_r, wrap_cube;
+ uint32_t dw0, dw1, dw3;
+
+ ILO_DEV_ASSERT(dev, 6, 8);
+
+ memset(sampler, 0, sizeof(*sampler));
+
+ mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
+ min_filter = gen6_translate_tex_filter(state->min_img_filter);
+ mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
+
+ sampler->anisotropic = state->max_anisotropy;
+
+ if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
+ max_aniso = state->max_anisotropy / 2 - 1;
+ else if (state->max_anisotropy > 16)
+ max_aniso = GEN6_ANISORATIO_16;
+ else
+ max_aniso = GEN6_ANISORATIO_2;
+
+ /*
+ *
+ * Here is how the hardware calculate per-pixel LOD, from my reading of the
+ * PRMs:
+ *
+ * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
+ * other ways. The number of texels is measured using level
+ * SurfMinLod.
+ * 2) Bias is added to LOD.
+ * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
+ * compared with Base to determine whether magnification or
+ * minification is needed. (if preclamp is disabled, LOD is compared
+ * with Base before clamping)
+ * 4) If magnification is needed, or no mipmapping is requested, LOD is
+ * set to floor(MinLod).
+ * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
+ *
+ * With Gallium interface, Base is always zero and
+ * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
+ */
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ const float scale = 256.0f;
+
+ /* [-16.0, 16.0) in S4.8 */
+ lod_bias = (int)
+ (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
+ lod_bias &= 0x1fff;
+
+ /* [0.0, 14.0] in U4.8 */
+ max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
+ min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
+ }
+ else {
+ const float scale = 64.0f;
+
+ /* [-16.0, 16.0) in S4.6 */
+ lod_bias = (int)
+ (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
+ lod_bias &= 0x7ff;
+
+ /* [0.0, 13.0] in U4.6 */
+ max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
+ min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
+ }
+
+ /*
+ * We want LOD to be clamped to determine magnification/minification, and
+ * get set to zero when it is magnification or when mipmapping is disabled.
+ * The hardware would set LOD to floor(MinLod) and that is a problem when
+ * MinLod is greater than or equal to 1.0f.
+ *
+ * With Base being zero, it is always minification when MinLod is non-zero.
+ * To achieve our goal, we just need to set MinLod to zero and set
+ * MagFilter to MinFilter when mipmapping is disabled.
+ */
+ if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
+ min_lod = 0;
+ mag_filter = min_filter;
+ }
+
+ /* determine wrap s/t/r */
+ wrap_s = gen6_translate_tex_wrap(state->wrap_s);
+ wrap_t = gen6_translate_tex_wrap(state->wrap_t);
+ wrap_r = gen6_translate_tex_wrap(state->wrap_r);
+ if (ilo_dev_gen(dev) < ILO_GEN(8)) {
+ /*
+ * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
+ * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
+ * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
+ * additionally clamping the texture coordinates to [0.0, 1.0].
+ *
+ * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
+ * clamping has to be taken care of in the shaders. There are two
+ * filters here, but let the minification one has a say.
+ */
+ const bool clamp_is_to_edge =
+ (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
+
+ if (clamp_is_to_edge) {
+ if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
+ wrap_s = GEN6_TEXCOORDMODE_CLAMP;
+ if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
+ wrap_t = GEN6_TEXCOORDMODE_CLAMP;
+ if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
+ wrap_r = GEN6_TEXCOORDMODE_CLAMP;
+ } else {
+ if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_s = true;
+ }
+ if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_t = true;
+ }
+ if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
+ wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
+ sampler->saturate_r = true;
+ }
+ }
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 4 part 1, page 107:
+ *
+ * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
+ * and TEXCOORDMODE_CUBE settings are valid, and each TC component
+ * must have the same Address Control mode."
+ *
+ * From the Ivy Bridge PRM, volume 4 part 1, page 96:
+ *
+ * "This field (Cube Surface Control Mode) must be set to
+ * CUBECTRLMODE_PROGRAMMED"
+ *
+ * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
+ * map filtering.
+ */
+ if (state->seamless_cube_map &&
+ (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
+ state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
+ wrap_cube = GEN6_TEXCOORDMODE_CUBE;
+ }
+ else {
+ wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
+ }
+
+ if (!state->normalized_coords) {
+ /*
+ * From the Ivy Bridge PRM, volume 4 part 1, page 98:
+ *
+ * "The following state must be set as indicated if this field
+ * (Non-normalized Coordinate Enable) is enabled:
+ *
+ * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
+ * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
+ * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
+ * - Mag Mode Filter must be MAPFILTER_NEAREST or
+ * MAPFILTER_LINEAR.
+ * - Min Mode Filter must be MAPFILTER_NEAREST or
+ * MAPFILTER_LINEAR.
+ * - Mip Mode Filter must be MIPFILTER_NONE.
+ * - Min LOD must be 0.
+ * - Max LOD must be 0.
+ * - MIP Count must be 0.
+ * - Surface Min LOD must be 0.
+ * - Texture LOD Bias must be 0."
+ */
+ assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
+ wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+ assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
+ wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+ assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
+ wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
+
+ assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
+ mag_filter == GEN6_MAPFILTER_LINEAR);
+ assert(min_filter == GEN6_MAPFILTER_NEAREST ||
+ min_filter == GEN6_MAPFILTER_LINEAR);
+
+ /* work around a bug in util_blitter */
+ mip_filter = GEN6_MIPFILTER_NONE;
+
+ assert(mip_filter == GEN6_MIPFILTER_NONE);
+ }
+
+ if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
+ dw0 = 1 << 28 |
+ mip_filter << 20 |
+ lod_bias << 1;
+
+ sampler->dw_filter = mag_filter << 17 |
+ min_filter << 14;
+
+ sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
+ GEN6_MAPFILTER_ANISOTROPIC << 14 |
+ 1;
+
+ dw1 = min_lod << 20 |
+ max_lod << 8;
+
+ if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
+ dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
+
+ dw3 = max_aniso << 19;
+
+ /* round the coordinates for linear filtering */
+ if (min_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
+ GEN6_SAMPLER_DW3_V_MIN_ROUND |
+ GEN6_SAMPLER_DW3_R_MIN_ROUND);
+ }
+ if (mag_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
+ GEN6_SAMPLER_DW3_V_MAG_ROUND |
+ GEN6_SAMPLER_DW3_R_MAG_ROUND);
+ }
+
+ if (!state->normalized_coords)
+ dw3 |= 1 << 10;
+
+ sampler->dw_wrap = wrap_s << 6 |
+ wrap_t << 3 |
+ wrap_r;
+
+ /*
+ * As noted in the classic i965 driver, the HW may still reference
+ * wrap_t and wrap_r for 1D textures. We need to set them to a safe
+ * mode
+ */
+ sampler->dw_wrap_1d = wrap_s << 6 |
+ GEN6_TEXCOORDMODE_WRAP << 3 |
+ GEN6_TEXCOORDMODE_WRAP;
+
+ sampler->dw_wrap_cube = wrap_cube << 6 |
+ wrap_cube << 3 |
+ wrap_cube;
+
+ STATIC_ASSERT(Elements(sampler->payload) >= 7);
+
+ sampler->payload[0] = dw0;
+ sampler->payload[1] = dw1;
+ sampler->payload[2] = dw3;
+
+ memcpy(&sampler->payload[3],
+ state->border_color.ui, sizeof(state->border_color.ui));
+ }
+ else {
+ dw0 = 1 << 28 |
+ mip_filter << 20 |
+ lod_bias << 3;
+
+ if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
+ dw0 |= gen6_translate_shadow_func(state->compare_func);
+
+ sampler->dw_filter = (min_filter != mag_filter) << 27 |
+ mag_filter << 17 |
+ min_filter << 14;
+
+ sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
+ GEN6_MAPFILTER_ANISOTROPIC << 14;
+
+ dw1 = min_lod << 22 |
+ max_lod << 12;
+
+ sampler->dw_wrap = wrap_s << 6 |
+ wrap_t << 3 |
+ wrap_r;
+
+ sampler->dw_wrap_1d = wrap_s << 6 |
+ GEN6_TEXCOORDMODE_WRAP << 3 |
+ GEN6_TEXCOORDMODE_WRAP;
+
+ sampler->dw_wrap_cube = wrap_cube << 6 |
+ wrap_cube << 3 |
+ wrap_cube;
+
+ dw3 = max_aniso << 19;
+
+ /* round the coordinates for linear filtering */
+ if (min_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
+ GEN6_SAMPLER_DW3_V_MIN_ROUND |
+ GEN6_SAMPLER_DW3_R_MIN_ROUND);
+ }
+ if (mag_filter != GEN6_MAPFILTER_NEAREST) {
+ dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
+ GEN6_SAMPLER_DW3_V_MAG_ROUND |
+ GEN6_SAMPLER_DW3_R_MAG_ROUND);
+ }
+
+ if (!state->normalized_coords)
+ dw3 |= 1;
+
+ STATIC_ASSERT(Elements(sampler->payload) >= 15);
+
+ sampler->payload[0] = dw0;
+ sampler->payload[1] = dw1;
+ sampler->payload[2] = dw3;
+
+ sampler_init_border_color_gen6(dev,
+ &state->border_color, &sampler->payload[3], 12);
+ }
+}
* Chia-I Wu <olv@lunarg.com>
*/
+#include "core/ilo_state_3d.h"
#include "util/u_draw.h"
#include "util/u_pack_color.h"
#include "ilo_draw.h"
#include "ilo_state.h"
-#include "ilo_state_3d.h"
#include "ilo_blit.h"
#include "ilo_blitter.h"
#define ILO_BUILDER_3D_TOP_H
#include "genhw/genhw.h"
+#include "core/ilo_state_3d.h"
#include "core/intel_winsys.h"
#include "ilo_common.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
-#include "ilo_state_3d.h"
#include "ilo_builder.h"
static inline void
#include "genhw/genhw.h" /* for SBE setup */
#include "tgsi/tgsi_parse.h"
+#include "core/ilo_state_3d.h"
#include "core/intel_winsys.h"
#include "shader/ilo_shader_internal.h"
#include "ilo_builder.h"
#include "ilo_state.h"
-#include "ilo_state_3d.h"
#include "ilo_shader.h"
struct ilo_shader_cache {
* Chia-I Wu <olv@lunarg.com>
*/
+#include "core/ilo_state_3d.h"
#include "util/u_dynarray.h"
#include "util/u_helpers.h"
#include "util/u_upload_mgr.h"
#include "ilo_resource.h"
#include "ilo_shader.h"
#include "ilo_state.h"
-#include "ilo_state_3d.h"
static void
finalize_shader_states(struct ilo_state_vector *vec)
#ifndef ILO_STATE_H
#define ILO_STATE_H
+#include "core/ilo_state_3d.h"
#include "pipe/p_state.h"
#include "util/u_dynarray.h"
#include "ilo_common.h"
-/**
- * \see brw_context.h
- */
-#define ILO_MAX_DRAW_BUFFERS 8
-#define ILO_MAX_CONST_BUFFERS (1 + 12)
-#define ILO_MAX_SAMPLER_VIEWS 16
-#define ILO_MAX_SAMPLERS 16
-#define ILO_MAX_SO_BINDINGS 64
-#define ILO_MAX_SO_BUFFERS 4
-#define ILO_MAX_VIEWPORTS 1
-
-#define ILO_MAX_SURFACES 256
-
/**
* States that we track.
*
ILO_DIRTY_ALL = 0xffffffff,
};
-struct intel_bo;
-struct ilo_buffer;
struct ilo_context;
-struct ilo_shader_state;
-struct ilo_texture;
-
-struct ilo_vb_state {
- struct pipe_vertex_buffer states[PIPE_MAX_ATTRIBS];
- uint32_t enabled_mask;
-};
-
-struct ilo_ib_state {
- struct pipe_resource *buffer;
- const void *user_buffer;
- unsigned offset;
- unsigned index_size;
-
- /* these are not valid until the state is finalized */
- struct pipe_resource *hw_resource;
- unsigned hw_index_size;
- /* an offset to be added to pipe_draw_info::start */
- int64_t draw_start_offset;
-};
-
-struct ilo_ve_cso {
- /* VERTEX_ELEMENT_STATE */
- uint32_t payload[2];
-};
-
-struct ilo_ve_state {
- struct ilo_ve_cso cso[PIPE_MAX_ATTRIBS];
- unsigned count;
-
- unsigned instance_divisors[PIPE_MAX_ATTRIBS];
- unsigned vb_mapping[PIPE_MAX_ATTRIBS];
- unsigned vb_count;
-
- /* these are not valid until the state is finalized */
- struct ilo_ve_cso edgeflag_cso;
- bool last_cso_edgeflag;
-
- struct ilo_ve_cso nosrc_cso;
- bool prepend_nosrc_cso;
-};
-
-struct ilo_so_state {
- struct pipe_stream_output_target *states[ILO_MAX_SO_BUFFERS];
- unsigned count;
- unsigned append_bitmask;
-
- bool enabled;
-};
-
-struct ilo_viewport_cso {
- /* matrix form */
- float m00, m11, m22, m30, m31, m32;
-
- /* guardband in NDC space */
- float min_gbx, min_gby, max_gbx, max_gby;
-
- /* viewport in screen space */
- float min_x, min_y, min_z;
- float max_x, max_y, max_z;
-};
-
-struct ilo_viewport_state {
- struct ilo_viewport_cso cso[ILO_MAX_VIEWPORTS];
- unsigned count;
-
- struct pipe_viewport_state viewport0;
-};
-
-struct ilo_scissor_state {
- /* SCISSOR_RECT */
- uint32_t payload[ILO_MAX_VIEWPORTS * 2];
-
- struct pipe_scissor_state scissor0;
-};
-
-struct ilo_rasterizer_clip {
- /* 3DSTATE_CLIP */
- uint32_t payload[3];
-
- uint32_t can_enable_guardband;
-};
-
-struct ilo_rasterizer_sf {
- /* 3DSTATE_SF */
- uint32_t payload[3];
- uint32_t dw_msaa;
-
- /* Global Depth Offset Constant/Scale/Clamp */
- uint32_t dw_depth_offset_const;
- uint32_t dw_depth_offset_scale;
- uint32_t dw_depth_offset_clamp;
-
- /* Gen8+ 3DSTATE_RASTER */
- uint32_t dw_raster;
-};
-
-struct ilo_rasterizer_wm {
- /* 3DSTATE_WM */
- uint32_t payload[2];
- uint32_t dw_msaa_rast;
- uint32_t dw_msaa_disp;
-};
-
-struct ilo_rasterizer_state {
- struct pipe_rasterizer_state state;
-
- struct ilo_rasterizer_clip clip;
- struct ilo_rasterizer_sf sf;
- struct ilo_rasterizer_wm wm;
-};
-
-struct ilo_dsa_state {
- /* DEPTH_STENCIL_STATE or Gen8+ 3DSTATE_WM_DEPTH_STENCIL */
- uint32_t payload[3];
-
- uint32_t dw_blend_alpha;
- uint32_t dw_ps_blend_alpha;
- ubyte alpha_ref;
-};
-
-struct ilo_blend_cso {
- /* BLEND_STATE */
- uint32_t payload[2];
-
- uint32_t dw_blend;
- uint32_t dw_blend_dst_alpha_forced_one;
-};
-
-struct ilo_blend_state {
- struct ilo_blend_cso cso[ILO_MAX_DRAW_BUFFERS];
-
- bool dual_blend;
- bool alpha_to_coverage;
-
- uint32_t dw_shared;
- uint32_t dw_alpha_mod;
- uint32_t dw_logicop;
-
- /* a part of 3DSTATE_PS_BLEND */
- uint32_t dw_ps_blend;
- uint32_t dw_ps_blend_dst_alpha_forced_one;
-};
-
-struct ilo_sampler_cso {
- /* SAMPLER_STATE and SAMPLER_BORDER_COLOR_STATE */
- uint32_t payload[15];
-
- uint32_t dw_filter;
- uint32_t dw_filter_aniso;
- uint32_t dw_wrap;
- uint32_t dw_wrap_1d;
- uint32_t dw_wrap_cube;
-
- bool anisotropic;
- bool saturate_r;
- bool saturate_s;
- bool saturate_t;
-};
-
-struct ilo_sampler_state {
- const struct ilo_sampler_cso *cso[ILO_MAX_SAMPLERS];
-};
-
-struct ilo_view_surface {
- /* SURFACE_STATE */
- uint32_t payload[13];
- struct intel_bo *bo;
-
- uint32_t scanout;
-};
-
-struct ilo_view_cso {
- struct pipe_sampler_view base;
-
- struct ilo_view_surface surface;
-};
-
-struct ilo_view_state {
- struct pipe_sampler_view *states[ILO_MAX_SAMPLER_VIEWS];
- unsigned count;
-};
-
-struct ilo_cbuf_cso {
- struct pipe_resource *resource;
- struct ilo_view_surface surface;
-
- /*
- * this CSO is not so constant because user buffer needs to be uploaded in
- * finalize_constant_buffers()
- */
- const void *user_buffer;
- unsigned user_buffer_size;
-};
-
-struct ilo_cbuf_state {
- struct ilo_cbuf_cso cso[ILO_MAX_CONST_BUFFERS];
- uint32_t enabled_mask;
-};
-
-struct ilo_resource_state {
- struct pipe_surface *states[PIPE_MAX_SHADER_RESOURCES];
- unsigned count;
-};
-
-struct ilo_surface_cso {
- struct pipe_surface base;
-
- bool is_rt;
- union {
- struct ilo_view_surface rt;
- struct ilo_zs_surface {
- uint32_t payload[12];
- uint32_t dw_aligned_8x4;
-
- struct intel_bo *bo;
- struct intel_bo *hiz_bo;
- struct intel_bo *separate_s8_bo;
- } zs;
- } u;
-};
-
-struct ilo_fb_state {
- struct pipe_framebuffer_state state;
-
- struct ilo_view_surface null_rt;
- struct ilo_zs_surface null_zs;
-
- struct ilo_fb_blend_caps {
- bool can_logicop;
- bool can_blend;
- bool can_alpha_test;
- bool dst_alpha_forced_one;
- } blend_caps[PIPE_MAX_COLOR_BUFS];
-
- unsigned num_samples;
-};
struct ilo_global_binding_cso {
struct pipe_resource *resource;
unsigned count;
};
-struct ilo_shader_cso {
- uint32_t payload[5];
-};
-
struct ilo_state_vector {
const struct pipe_draw_info *draw;
+++ /dev/null
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <olv@lunarg.com>
- */
-
-#ifndef ILO_STATE_3D_H
-#define ILO_STATE_3D_H
-
-#include "genhw/genhw.h"
-#include "core/intel_winsys.h"
-
-#include "ilo_common.h"
-#include "ilo_state.h"
-
-/**
- * Translate a pipe texture target to the matching hardware surface type.
- */
-static inline int
-ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
-{
- switch (target) {
- case PIPE_BUFFER:
- return GEN6_SURFTYPE_BUFFER;
- case PIPE_TEXTURE_1D:
- case PIPE_TEXTURE_1D_ARRAY:
- return GEN6_SURFTYPE_1D;
- case PIPE_TEXTURE_2D:
- case PIPE_TEXTURE_RECT:
- case PIPE_TEXTURE_2D_ARRAY:
- return GEN6_SURFTYPE_2D;
- case PIPE_TEXTURE_3D:
- return GEN6_SURFTYPE_3D;
- case PIPE_TEXTURE_CUBE:
- case PIPE_TEXTURE_CUBE_ARRAY:
- return GEN6_SURFTYPE_CUBE;
- default:
- assert(!"unknown texture target");
- return GEN6_SURFTYPE_BUFFER;
- }
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
- unsigned num_states,
- const struct pipe_vertex_element *states,
- struct ilo_ve_state *ve);
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
- struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso);
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
- const struct pipe_viewport_state *state,
- struct ilo_viewport_cso *vp);
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
- unsigned start_slot,
- unsigned num_states,
- const struct pipe_scissor_state *states,
- struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
- struct ilo_scissor_state *scissor);
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_state *rasterizer);
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
- const struct pipe_depth_stencil_alpha_state *state,
- struct ilo_dsa_state *dsa);
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend);
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
- const struct pipe_sampler_state *state,
- struct ilo_sampler_cso *sampler);
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf);
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface *zs);
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *vs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso);
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb);
-
-#endif /* ILO_STATE_3D_H */
+++ /dev/null
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "core/ilo_format.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-
-#include "ilo_context.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
-#include "ilo_state.h"
-#include "ilo_state_3d.h"
-
-static void
-rasterizer_init_clip(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_clip *clip)
-{
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- dw1 = GEN6_CLIP_DW1_STATISTICS;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 219:
- *
- * "Workaround : Due to Hardware issue "EarlyCull" needs to be
- * enabled only for the cases where the incoming primitive topology
- * into the clipper guaranteed to be Trilist."
- *
- * What does this mean?
- */
- dw1 |= 0 << 19 |
- GEN7_CLIP_DW1_EARLY_CULL_ENABLE;
-
- if (ilo_dev_gen(dev) < ILO_GEN(8)) {
- if (state->front_ccw)
- dw1 |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw1 |= GEN7_CLIP_DW1_CULLMODE_BOTH;
- break;
- }
- }
- }
-
- dw2 = GEN6_CLIP_DW2_CLIP_ENABLE |
- GEN6_CLIP_DW2_XY_TEST_ENABLE |
- state->clip_plane_enable << GEN6_CLIP_DW2_UCP_CLIP_ENABLES__SHIFT |
- GEN6_CLIP_DW2_CLIPMODE_NORMAL;
-
- if (state->clip_halfz)
- dw2 |= GEN6_CLIP_DW2_APIMODE_D3D;
- else
- dw2 |= GEN6_CLIP_DW2_APIMODE_OGL;
-
- if (ilo_dev_gen(dev) < ILO_GEN(8) && state->depth_clip)
- dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE;
-
- if (state->flatshade_first) {
- dw2 |= 0 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 0 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
- }
- else {
- dw2 |= 2 << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT |
- 1 << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT |
- 2 << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT;
- }
-
- dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT |
- 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT;
-
- clip->payload[0] = dw1;
- clip->payload[1] = dw2;
- clip->payload[2] = dw3;
-
- clip->can_enable_guardband = true;
-
- /*
- * There are several reasons that guard band test should be disabled
- *
- * - GL wide points (to avoid partially visibie object)
- * - GL wide or AA lines (to avoid partially visibie object)
- */
- if (state->point_size_per_vertex || state->point_size > 1.0f)
- clip->can_enable_guardband = false;
- if (state->line_smooth || state->line_width > 1.0f)
- clip->can_enable_guardband = false;
-}
-
-static void
-rasterizer_init_sf_depth_offset_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- /*
- * Scale the constant term. The minimum representable value used by the HW
- * is not large enouch to be the minimum resolvable difference.
- */
- sf->dw_depth_offset_const = fui(state->offset_units * 2.0f);
- sf->dw_depth_offset_scale = fui(state->offset_scale);
- sf->dw_depth_offset_clamp = fui(state->offset_clamp);
-}
-
-static void
-rasterizer_init_sf_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- int line_width, point_width;
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 248:
- *
- * "This bit (Statistics Enable) should be set whenever clipping is
- * enabled and the Statistics Enable bit is set in CLIP_STATE. It
- * should be cleared if clipping is disabled or Statistics Enable in
- * CLIP_STATE is clear."
- */
- dw1 = GEN7_SF_DW1_STATISTICS |
- GEN7_SF_DW1_VIEWPORT_ENABLE;
-
- /* XXX GEN6 path seems to work fine for GEN7 */
- if (false && ilo_dev_gen(dev) >= ILO_GEN(7)) {
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 258:
- *
- * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
- * Enable Solid , Global Depth Offset Enable Wireframe, and Global
- * Depth Offset Enable Point) should be set whenever non zero depth
- * bias (Slope, Bias) values are used. Setting this bit may have
- * some degradation of performance for some workloads."
- */
- if (state->offset_tri || state->offset_line || state->offset_point) {
- /* XXX need to scale offset_const according to the depth format */
- dw1 |= GEN7_SF_DW1_LEGACY_DEPTH_OFFSET;
-
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID |
- GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME |
- GEN7_SF_DW1_DEPTH_OFFSET_POINT;
- }
- } else {
- if (state->offset_tri)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_SOLID;
- if (state->offset_line)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME;
- if (state->offset_point)
- dw1 |= GEN7_SF_DW1_DEPTH_OFFSET_POINT;
- }
-
- switch (state->fill_front) {
- case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_FRONTFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_FRONTFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_FRONTFACE_POINT;
- break;
- }
-
- switch (state->fill_back) {
- case PIPE_POLYGON_MODE_FILL:
- dw1 |= GEN7_SF_DW1_BACKFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw1 |= GEN7_SF_DW1_BACKFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw1 |= GEN7_SF_DW1_BACKFACE_POINT;
- break;
- }
-
- if (state->front_ccw)
- dw1 |= GEN7_SF_DW1_FRONTWINDING_CCW;
-
- dw2 = 0;
-
- if (state->line_smooth) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 251:
- *
- * "This field (Anti-aliasing Enable) must be disabled if any of the
- * render targets have integer (UINT or SINT) surface format."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Hierarchical Depth Buffer Enable) must be disabled
- * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
- *
- * TODO We do not check those yet.
- */
- dw2 |= GEN7_SF_DW2_AA_LINE_ENABLE |
- GEN7_SF_DW2_AA_LINE_CAP_1_0;
- }
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw2 |= GEN7_SF_DW2_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw2 |= GEN7_SF_DW2_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw2 |= GEN7_SF_DW2_CULLMODE_BOTH;
- break;
- }
-
- /*
- * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
- * pixels in the minor direction. We have to make the lines slightly
- * thicker, 0.5 pixel on both sides, so that they intersect that many
- * pixels are considered into the lines.
- *
- * Line width is in U3.7.
- */
- line_width = (int)
- ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
- line_width = CLAMP(line_width, 0, 1023);
-
- /* use GIQ rules */
- if (line_width == 128 && !state->line_smooth)
- line_width = 0;
-
- dw2 |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
-
- if (ilo_dev_gen(dev) == ILO_GEN(7.5) && state->line_stipple_enable)
- dw2 |= GEN75_SF_DW2_LINE_STIPPLE_ENABLE;
-
- if (state->scissor)
- dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE;
-
- dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
- GEN7_SF_DW3_SUBPIXEL_8BITS;
-
- if (state->line_last_pixel)
- dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
- if (state->flatshade_first) {
- dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- } else {
- dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- }
-
- if (!state->point_size_per_vertex)
- dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
- /* in U8.3 */
- point_width = (int) (state->point_size * 8.0f + 0.5f);
- point_width = CLAMP(point_width, 1, 2047);
-
- dw3 |= point_width;
-
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- sf->payload[0] = dw1;
- sf->payload[1] = dw2;
- sf->payload[2] = dw3;
-
- if (state->multisample) {
- sf->dw_msaa = GEN7_SF_DW2_MSRASTMODE_ON_PATTERN;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 251:
- *
- * "Software must not program a value of 0.0 when running in
- * MSRASTMODE_ON_xxx modes - zero-width lines are not available
- * when multisampling rasterization is enabled."
- */
- if (!line_width) {
- line_width = 128; /* 1.0f */
-
- sf->dw_msaa |= line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
- }
- } else {
- sf->dw_msaa = 0;
- }
-
- rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
- /* 3DSTATE_RASTER is Gen8+ only */
- sf->dw_raster = 0;
-}
-
-static uint32_t
-rasterizer_get_sf_raster_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->front_ccw)
- dw |= GEN8_RASTER_DW1_FRONTWINDING_CCW;
-
- switch (state->cull_face) {
- case PIPE_FACE_NONE:
- dw |= GEN8_RASTER_DW1_CULLMODE_NONE;
- break;
- case PIPE_FACE_FRONT:
- dw |= GEN8_RASTER_DW1_CULLMODE_FRONT;
- break;
- case PIPE_FACE_BACK:
- dw |= GEN8_RASTER_DW1_CULLMODE_BACK;
- break;
- case PIPE_FACE_FRONT_AND_BACK:
- dw |= GEN8_RASTER_DW1_CULLMODE_BOTH;
- break;
- }
-
- if (state->point_smooth)
- dw |= GEN8_RASTER_DW1_SMOOTH_POINT_ENABLE;
-
- if (state->multisample)
- dw |= GEN8_RASTER_DW1_API_MULTISAMPLE_ENABLE;
-
- if (state->offset_tri)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_SOLID;
- if (state->offset_line)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_WIREFRAME;
- if (state->offset_point)
- dw|= GEN8_RASTER_DW1_DEPTH_OFFSET_POINT;
-
- switch (state->fill_front) {
- case PIPE_POLYGON_MODE_FILL:
- dw |= GEN8_RASTER_DW1_FRONTFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw |= GEN8_RASTER_DW1_FRONTFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw |= GEN8_RASTER_DW1_FRONTFACE_POINT;
- break;
- }
-
- switch (state->fill_back) {
- case PIPE_POLYGON_MODE_FILL:
- dw |= GEN8_RASTER_DW1_BACKFACE_SOLID;
- break;
- case PIPE_POLYGON_MODE_LINE:
- dw |= GEN8_RASTER_DW1_BACKFACE_WIREFRAME;
- break;
- case PIPE_POLYGON_MODE_POINT:
- dw |= GEN8_RASTER_DW1_BACKFACE_POINT;
- break;
- }
-
- if (state->line_smooth)
- dw |= GEN8_RASTER_DW1_AA_LINE_ENABLE;
-
- if (state->scissor)
- dw |= GEN8_RASTER_DW1_SCISSOR_ENABLE;
-
- if (state->depth_clip)
- dw |= GEN8_RASTER_DW1_Z_TEST_ENABLE;
-
- return dw;
-}
-
-static void
-rasterizer_init_sf_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_sf *sf)
-{
- int line_width, point_width;
- uint32_t dw1, dw2, dw3;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- /* in U3.7 */
- line_width = (int)
- ((state->line_width + (float) state->line_smooth) * 128.0f + 0.5f);
- line_width = CLAMP(line_width, 0, 1023);
-
- /* use GIQ rules */
- if (line_width == 128 && !state->line_smooth)
- line_width = 0;
-
- /* in U8.3 */
- point_width = (int) (state->point_size * 8.0f + 0.5f);
- point_width = CLAMP(point_width, 1, 2047);
-
- dw1 = GEN7_SF_DW1_STATISTICS |
- GEN7_SF_DW1_VIEWPORT_ENABLE;
-
- dw2 = line_width << GEN7_SF_DW2_LINE_WIDTH__SHIFT;
- if (state->line_smooth)
- dw2 |= GEN7_SF_DW2_AA_LINE_CAP_1_0;
-
- dw3 = GEN7_SF_DW3_TRUE_AA_LINE_DISTANCE |
- GEN7_SF_DW3_SUBPIXEL_8BITS |
- point_width;
-
- if (state->line_last_pixel)
- dw3 |= GEN7_SF_DW3_LINE_LAST_PIXEL_ENABLE;
-
- if (state->flatshade_first) {
- dw3 |= 0 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 0 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- } else {
- dw3 |= 2 << GEN7_SF_DW3_TRI_PROVOKE__SHIFT |
- 1 << GEN7_SF_DW3_LINE_PROVOKE__SHIFT |
- 2 << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT;
- }
-
- if (!state->point_size_per_vertex)
- dw3 |= GEN7_SF_DW3_USE_POINT_WIDTH;
-
- dw3 |= point_width;
-
- STATIC_ASSERT(Elements(sf->payload) >= 3);
- sf->payload[0] = dw1;
- sf->payload[1] = dw2;
- sf->payload[2] = dw3;
-
- rasterizer_init_sf_depth_offset_gen6(dev, state, sf);
-
- sf->dw_msaa = 0;
- sf->dw_raster = rasterizer_get_sf_raster_gen8(dev, state);
-}
-
-static void
-rasterizer_init_wm_gen6(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_wm *wm)
-{
- uint32_t dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- /* only the FF unit states are set, as in GEN7 */
-
- dw5 = GEN6_WM_DW5_AA_LINE_WIDTH_2_0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw5 |= GEN6_WM_DW5_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw5 |= GEN6_WM_DW5_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw5 |= GEN6_WM_DW5_LINE_STIPPLE_ENABLE;
-
- /*
- * assertion that makes sure
- *
- * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
- *
- * is valid
- */
- STATIC_ASSERT(GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN6_WM_DW6_MSDISPMODE_PERSAMPLE == 0);
- dw6 = GEN6_WM_DW6_ZW_INTERP_PIXEL;
-
- if (state->bottom_edge_rule)
- dw6 |= GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT;
-
- wm->dw_msaa_rast =
- (state->multisample) ? GEN6_WM_DW6_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
-
- STATIC_ASSERT(Elements(wm->payload) >= 2);
- wm->payload[0] = dw5;
- wm->payload[1] = dw6;
-}
-
-static void
-rasterizer_init_wm_gen7(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_wm *wm)
-{
- uint32_t dw1, dw2;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- /*
- * assertion that makes sure
- *
- * dw1 |= wm->dw_msaa_rast;
- * dw2 |= wm->dw_msaa_disp;
- *
- * is valid
- */
- STATIC_ASSERT(GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL == 0 &&
- GEN7_WM_DW2_MSDISPMODE_PERSAMPLE == 0);
- dw1 = GEN7_WM_DW1_ZW_INTERP_PIXEL |
- GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
- dw2 = 0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw1 |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw1 |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw1 |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
- if (state->bottom_edge_rule)
- dw1 |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
- wm->dw_msaa_rast =
- (state->multisample) ? GEN7_WM_DW1_MSRASTMODE_ON_PATTERN : 0;
- wm->dw_msaa_disp = GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
-
- STATIC_ASSERT(Elements(wm->payload) >= 2);
- wm->payload[0] = dw1;
- wm->payload[1] = dw2;
-}
-
-static uint32_t
-rasterizer_get_wm_gen8(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- dw = GEN7_WM_DW1_ZW_INTERP_PIXEL |
- GEN7_WM_DW1_AA_LINE_WIDTH_2_0;
-
- /* same value as in 3DSTATE_SF */
- if (state->line_smooth)
- dw |= GEN7_WM_DW1_AA_LINE_CAP_1_0;
-
- if (state->poly_stipple_enable)
- dw |= GEN7_WM_DW1_POLY_STIPPLE_ENABLE;
- if (state->line_stipple_enable)
- dw |= GEN7_WM_DW1_LINE_STIPPLE_ENABLE;
-
- if (state->bottom_edge_rule)
- dw |= GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT;
-
- return dw;
-}
-
-void
-ilo_gpe_init_rasterizer(const struct ilo_dev *dev,
- const struct pipe_rasterizer_state *state,
- struct ilo_rasterizer_state *rasterizer)
-{
- rasterizer_init_clip(dev, state, &rasterizer->clip);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- memset(&rasterizer->wm, 0, sizeof(rasterizer->wm));
- rasterizer->wm.payload[0] = rasterizer_get_wm_gen8(dev, state);
-
- rasterizer_init_sf_gen8(dev, state, &rasterizer->sf);
- } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- rasterizer_init_wm_gen7(dev, state, &rasterizer->wm);
- rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
- } else {
- rasterizer_init_wm_gen6(dev, state, &rasterizer->wm);
- rasterizer_init_sf_gen6(dev, state, &rasterizer->sf);
- }
-}
-
-static void
-fs_init_cso_gen6(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, input_count, sampler_count, interps, max_threads;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
- interps = ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
-
- /* see brwCreateContext() */
- max_threads = (dev->gt == 2) ? 80 : 40;
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = start_grf << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
- 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
-
- dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
- * PS kernel or color calculator has the ability to kill (discard)
- * pixels or samples, other than due to depth or stencil testing.
- * This bit is required to be ENABLED in the following situations:
- *
- * The API pixel shader program contains "killpix" or "discard"
- * instructions, or other code in the pixel shader kernel that can
- * cause the final pixel mask to differ from the pixel mask received
- * on dispatch.
- *
- * A sampler with chroma key enabled with kill pixel mode is used by
- * the pixel shader.
- *
- * Any render target has Alpha Test Enable or AlphaToCoverage Enable
- * enabled.
- *
- * The pixel shader kernel generates and outputs oMask.
- *
- * Note: As ClipDistance clipping is fully supported in hardware and
- * therefore not via PS instructions, there should be no need to
- * ENABLE this bit due to ClipDistance clipping."
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 275:
- *
- * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
- * field must be set to disabled."
- *
- * TODO This is not checked yet.
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw5 |= GEN6_WM_DW5_PS_USE_W;
-
- /*
- * TODO set this bit only when
- *
- * a) fs writes colors and color is not masked, or
- * b) fs writes depth, or
- * c) fs or cc kills
- */
- if (true)
- dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw5 |= GEN6_PS_DISPATCH_8 << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
-
- dw6 = input_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
- GEN6_WM_DW6_PS_POSOFFSET_NONE |
- interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = dw6;
-}
-
-static uint32_t
-fs_get_wm_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- dw = ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
- GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-
- /*
- * TODO set this bit only when
- *
- * a) fs writes colors and color is not masked, or
- * b) fs writes depth, or
- * c) fs or cc kills
- */
- dw |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
-
- /*
- * From the Ivy Bridge PRM, volume 2 part 1, page 278:
- *
- * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that
- * the PS kernel or color calculator has the ability to kill
- * (discard) pixels or samples, other than due to depth or stencil
- * testing. This bit is required to be ENABLED in the following
- * situations:
- *
- * - The API pixel shader program contains "killpix" or "discard"
- * instructions, or other code in the pixel shader kernel that
- * can cause the final pixel mask to differ from the pixel mask
- * received on dispatch.
- *
- * - A sampler with chroma key enabled with kill pixel mode is used
- * by the pixel shader.
- *
- * - Any render target has Alpha Test Enable or AlphaToCoverage
- * Enable enabled.
- *
- * - The pixel shader kernel generates and outputs oMask.
- *
- * Note: As ClipDistance clipping is fully supported in hardware
- * and therefore not via PS instructions, there should be no need
- * to ENABLE this bit due to ClipDistance clipping."
- */
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw |= GEN7_WM_DW1_PS_KILL_PIXEL;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw |= GEN7_WM_DW1_PSCDEPTH_ON;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw |= GEN7_WM_DW1_PS_USE_DEPTH;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw |= GEN7_WM_DW1_PS_USE_W;
-
- return dw;
-}
-
-static void
-fs_init_cso_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = GEN7_PS_DW4_POSOFFSET_NONE;
-
- /* see brwCreateContext() */
- switch (ilo_dev_gen(dev)) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt == 3) ? 408 : (dev->gt == 2) ? 204 : 102;
- dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT;
- dw4 |= 1 << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
- break;
- case ILO_GEN(7):
- default:
- max_threads = (dev->gt == 2) ? 172 : 48;
- dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT;
- break;
- }
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
- dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
- dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw4 |= GEN6_PS_DISPATCH_8 << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
-
- dw5 = start_grf << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
- 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = fs_get_wm_gen7(dev, fs);
-}
-
-static uint32_t
-fs_get_psx_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- dw = GEN8_PSX_DW1_DISPATCH_ENABLE;
-
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
- dw |= GEN8_PSX_DW1_KILL_PIXEL;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
- dw |= GEN8_PSX_DW1_PSCDEPTH_ON;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
- dw |= GEN8_PSX_DW1_USE_DEPTH;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
- dw |= GEN8_PSX_DW1_USE_W;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT))
- dw |= GEN8_PSX_DW1_ATTR_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-fs_get_wm_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs)
-{
- ILO_DEV_ASSERT(dev, 8, 8);
-
- return ilo_shader_get_kernel_param(fs,
- ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) <<
- GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT;
-}
-
-static void
-fs_init_cso_gen8(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, sampler_count;
- uint32_t dw3, dw6, dw7;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
- sampler_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_SAMPLER_COUNT);
-
- dw3 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw3 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- /* always 64? */
- dw6 = (64 - 2) << GEN8_PS_DW6_MAX_THREADS__SHIFT |
- GEN8_PS_DW6_POSOFFSET_NONE;
- if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_PCB_CBUF0_SIZE))
- dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
-
- assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
- dw6 |= GEN6_PS_DISPATCH_8 << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
-
- dw7 = start_grf << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
- 0 << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
- 0 << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 5);
- cso->payload[0] = dw3;
- cso->payload[1] = dw6;
- cso->payload[2] = dw7;
- cso->payload[3] = fs_get_psx_gen8(dev, fs);
- cso->payload[4] = fs_get_wm_gen8(dev, fs);
-}
-
-void
-ilo_gpe_init_fs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *fs,
- struct ilo_shader_cso *cso)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- fs_init_cso_gen8(dev, fs, cso);
- else if (ilo_dev_gen(dev) >= ILO_GEN(7))
- fs_init_cso_gen7(dev, fs, cso);
- else
- fs_init_cso_gen6(dev, fs, cso);
-}
-
-struct ilo_zs_surface_info {
- int surface_type;
- int format;
-
- struct {
- struct intel_bo *bo;
- unsigned stride;
- unsigned qpitch;
- enum gen_surface_tiling tiling;
- uint32_t offset;
- } zs, stencil, hiz;
-
- unsigned width, height, depth;
- unsigned lod, first_layer, num_layers;
-};
-
-static void
-zs_init_info_null(const struct ilo_dev *dev,
- struct ilo_zs_surface_info *info)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(info, 0, sizeof(*info));
-
- info->surface_type = GEN6_SURFTYPE_NULL;
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- info->width = 1;
- info->height = 1;
- info->depth = 1;
- info->num_layers = 1;
-}
-
-static void
-zs_init_info(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface_info *info)
-{
- bool separate_stencil;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(info, 0, sizeof(*info));
-
- info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
-
- if (info->surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Minimum Array Element) is ignored."
- *
- * "For Other Surfaces (Cube Surfaces):
- * This field (Render Target View Extent) is ignored."
- *
- * As such, we cannot set first_layer and num_layers on cube surfaces.
- * To work around that, treat it as a 2D surface.
- */
- info->surface_type = GEN6_SURFTYPE_2D;
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- separate_stencil = true;
- }
- else {
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "This field (Separate Stencil Buffer Enable) must be set to the
- * same value (enabled or disabled) as Hierarchical Depth Buffer
- * Enable."
- */
- separate_stencil =
- ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 317:
- *
- * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
- * Surface Format of the depth buffer cannot be
- * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
- * requires the separate stencil buffer."
- *
- * From the Ironlake PRM, volume 2 part 1, page 330:
- *
- * "If this field (Separate Stencil Buffer Enable) is disabled, the
- * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
- *
- * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
- * is indeed used, the depth values output by the fragment shaders will
- * be different when read back.
- *
- * As for GEN7+, separate_stencil is always true.
- */
- switch (format) {
- case PIPE_FORMAT_Z16_UNORM:
- info->format = GEN6_ZFORMAT_D16_UNORM;
- break;
- case PIPE_FORMAT_Z32_FLOAT:
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- case PIPE_FORMAT_Z24X8_UNORM:
- case PIPE_FORMAT_Z24_UNORM_S8_UINT:
- info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D24_UNORM_X8_UINT :
- GEN6_ZFORMAT_D24_UNORM_S8_UINT;
- break;
- case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
- info->format = (separate_stencil) ?
- GEN6_ZFORMAT_D32_FLOAT :
- GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
- break;
- case PIPE_FORMAT_S8_UINT:
- if (separate_stencil) {
- info->format = GEN6_ZFORMAT_D32_FLOAT;
- break;
- }
- /* fall through */
- default:
- assert(!"unsupported depth/stencil format");
- zs_init_info_null(dev, info);
- return;
- break;
- }
-
- if (format != PIPE_FORMAT_S8_UINT) {
- info->zs.bo = tex->image.bo;
- info->zs.stride = tex->image.bo_stride;
-
- assert(tex->image.layer_height % 4 == 0);
- info->zs.qpitch = tex->image.layer_height / 4;
-
- info->zs.tiling = tex->image.tiling;
- info->zs.offset = 0;
- }
-
- if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
- const struct ilo_texture *s8_tex =
- (tex->separate_s8) ? tex->separate_s8 : tex;
-
- info->stencil.bo = s8_tex->image.bo;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 329:
- *
- * "The pitch must be set to 2x the value computed based on width,
- * as the stencil buffer is stored with two rows interleaved."
- *
- * For GEN7, we still dobule the stride because we did not double the
- * slice widths when initializing the layout.
- */
- info->stencil.stride = s8_tex->image.bo_stride * 2;
-
- assert(s8_tex->image.layer_height % 4 == 0);
- info->stencil.qpitch = s8_tex->image.layer_height / 4;
-
- info->stencil.tiling = s8_tex->image.tiling;
-
- if (ilo_dev_gen(dev) == ILO_GEN(6)) {
- unsigned x, y;
-
- assert(s8_tex->image.walk == ILO_IMAGE_WALK_LOD);
-
- /* offset to the level */
- ilo_image_get_slice_pos(&s8_tex->image, level, 0, &x, &y);
- ilo_image_pos_to_mem(&s8_tex->image, x, y, &x, &y);
- info->stencil.offset = ilo_image_mem_to_raw(&s8_tex->image, x, y);
- }
- }
-
- if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
- info->hiz.bo = tex->image.aux_bo;
- info->hiz.stride = tex->image.aux_stride;
-
- assert(tex->image.aux_layer_height % 4 == 0);
- info->hiz.qpitch = tex->image.aux_layer_height / 4;
-
- info->hiz.tiling = GEN6_TILING_Y;
-
- /* offset to the level */
- if (ilo_dev_gen(dev) == ILO_GEN(6))
- info->hiz.offset = tex->image.aux_offsets[level];
- }
-
- info->width = tex->image.width0;
- info->height = tex->image.height0;
- info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
- tex->base.depth0 : num_layers;
-
- info->lod = level;
- info->first_layer = first_layer;
- info->num_layers = num_layers;
-}
-
-void
-ilo_gpe_init_zs_surface(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format, unsigned level,
- unsigned first_layer, unsigned num_layers,
- struct ilo_zs_surface *zs)
-{
- const int max_2d_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 16384 : 8192;
- const int max_array_size = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 2048 : 512;
- struct ilo_zs_surface_info info;
- uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
- int align_w = 8, align_h = 4;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- if (tex) {
- zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
-
- switch (tex->base.nr_samples) {
- case 2:
- align_w /= 2;
- break;
- case 4:
- align_w /= 2;
- align_h /= 2;
- break;
- case 8:
- align_w /= 4;
- align_h /= 2;
- break;
- case 16:
- align_w /= 4;
- align_h /= 4;
- break;
- default:
- break;
- }
- } else {
- zs_init_info_null(dev, &info);
- }
-
- switch (info.surface_type) {
- case GEN6_SURFTYPE_NULL:
- break;
- case GEN6_SURFTYPE_1D:
- assert(info.width <= max_2d_size && info.height == 1 &&
- info.depth <= max_array_size);
- assert(info.first_layer < max_array_size - 1 &&
- info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_2D:
- assert(info.width <= max_2d_size && info.height <= max_2d_size &&
- info.depth <= max_array_size);
- assert(info.first_layer < max_array_size - 1 &&
- info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_3D:
- assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
- assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(info.width <= max_2d_size && info.height <= max_2d_size &&
- info.depth == 1);
- assert(info.first_layer == 0 && info.num_layers == 1);
- assert(info.width == info.height);
- break;
- default:
- assert(!"unexpected depth surface type");
- break;
- }
-
- dw1 = info.surface_type << GEN6_DEPTH_DW1_TYPE__SHIFT |
- info.format << GEN6_DEPTH_DW1_FORMAT__SHIFT;
-
- if (info.zs.bo) {
- /* required for GEN6+ */
- assert(info.zs.tiling == GEN6_TILING_Y);
- assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
- info.zs.stride % 128 == 0);
- assert(info.width <= info.zs.stride);
-
- dw1 |= (info.zs.stride - 1);
- dw2 = info.zs.offset;
- } else {
- dw2 = 0;
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- if (info.zs.bo)
- dw1 |= GEN7_DEPTH_DW1_DEPTH_WRITE_ENABLE;
-
- if (info.stencil.bo)
- dw1 |= GEN7_DEPTH_DW1_STENCIL_WRITE_ENABLE;
-
- if (info.hiz.bo)
- dw1 |= GEN7_DEPTH_DW1_HIZ_ENABLE;
-
- dw3 = (info.height - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
- (info.width - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
- zs->dw_aligned_8x4 =
- (align(info.height, align_h) - 1) << GEN7_DEPTH_DW3_HEIGHT__SHIFT |
- (align(info.width, align_w) - 1) << GEN7_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN7_DEPTH_DW3_LOD__SHIFT;
-
- dw4 = (info.depth - 1) << GEN7_DEPTH_DW4_DEPTH__SHIFT |
- info.first_layer << GEN7_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT;
-
- dw5 = 0;
-
- dw6 = (info.num_layers - 1) << GEN7_DEPTH_DW6_RT_VIEW_EXTENT__SHIFT;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- dw6 |= info.zs.qpitch;
- } else {
- /* always Y-tiled */
- dw1 |= GEN6_TILING_Y << GEN6_DEPTH_DW1_TILING__SHIFT;
-
- if (info.hiz.bo) {
- dw1 |= GEN6_DEPTH_DW1_HIZ_ENABLE |
- GEN6_DEPTH_DW1_SEPARATE_STENCIL;
- }
-
- dw3 = (info.height - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
- (info.width - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
- GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
- zs->dw_aligned_8x4 =
- (align(info.height, align_h) - 1) << GEN6_DEPTH_DW3_HEIGHT__SHIFT |
- (align(info.width, align_w) - 1) << GEN6_DEPTH_DW3_WIDTH__SHIFT |
- info.lod << GEN6_DEPTH_DW3_LOD__SHIFT |
- GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
-
- dw4 = (info.depth - 1) << GEN6_DEPTH_DW4_DEPTH__SHIFT |
- info.first_layer << GEN6_DEPTH_DW4_MIN_ARRAY_ELEMENT__SHIFT |
- (info.num_layers - 1) << GEN6_DEPTH_DW4_RT_VIEW_EXTENT__SHIFT;
-
- dw5 = 0;
-
- dw6 = 0;
- }
-
- STATIC_ASSERT(Elements(zs->payload) >= 12);
-
- zs->payload[0] = dw1;
- zs->payload[1] = dw2;
- zs->payload[2] = dw3;
- zs->payload[3] = dw4;
- zs->payload[4] = dw5;
- zs->payload[5] = dw6;
-
- /* do not increment reference count */
- zs->bo = info.zs.bo;
-
- /* separate stencil */
- if (info.stencil.bo) {
- assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
- info.stencil.stride % 128 == 0);
-
- dw1 = (info.stencil.stride - 1) << GEN6_STENCIL_DW1_PITCH__SHIFT;
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
- dw1 |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
-
- dw2 = info.stencil.offset;
- dw4 = info.stencil.qpitch;
- } else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 0;
- }
-
- zs->payload[6] = dw1;
- zs->payload[7] = dw2;
- zs->payload[8] = dw4;
- /* do not increment reference count */
- zs->separate_s8_bo = info.stencil.bo;
-
- /* hiz */
- if (info.hiz.bo) {
- dw1 = (info.hiz.stride - 1) << GEN6_HIZ_DW1_PITCH__SHIFT;
- dw2 = info.hiz.offset;
- dw4 = info.hiz.qpitch;
- } else {
- dw1 = 0;
- dw2 = 0;
- dw4 = 0;
- }
-
- zs->payload[9] = dw1;
- zs->payload[10] = dw2;
- zs->payload[11] = dw4;
- /* do not increment reference count */
- zs->hiz_bo = info.hiz.bo;
-}
-
-static void
-viewport_get_guardband(const struct ilo_dev *dev,
- int center_x, int center_y,
- int *min_gbx, int *max_gbx,
- int *min_gby, int *max_gby)
-{
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 234:
- *
- * "Per-Device Guardband Extents
- *
- * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
- * - Maximum Post-Clamp Delta (X or Y): 16K"
- *
- * "In addition, in order to be correctly rendered, objects must have a
- * screenspace bounding box not exceeding 8K in the X or Y direction.
- * This additional restriction must also be comprehended by software,
- * i.e., enforced by use of clipping."
- *
- * From the Ivy Bridge PRM, volume 2 part 1, page 248:
- *
- * "Per-Device Guardband Extents
- *
- * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
- * - Maximum Post-Clamp Delta (X or Y): N/A"
- *
- * "In addition, in order to be correctly rendered, objects must have a
- * screenspace bounding box not exceeding 8K in the X or Y direction.
- * This additional restriction must also be comprehended by software,
- * i.e., enforced by use of clipping."
- *
- * Combined, the bounding box of any object can not exceed 8K in both
- * width and height.
- *
- * Below we set the guardband as a squre of length 8K, centered at where
- * the viewport is. This makes sure all objects passing the GB test are
- * valid to the renderer, and those failing the XY clipping have a
- * better chance of passing the GB test.
- */
- const int max_extent = (ilo_dev_gen(dev) >= ILO_GEN(7)) ? 32768 : 16384;
- const int half_len = 8192 / 2;
-
- /* make sure the guardband is within the valid range */
- if (center_x - half_len < -max_extent)
- center_x = -max_extent + half_len;
- else if (center_x + half_len > max_extent - 1)
- center_x = max_extent - half_len;
-
- if (center_y - half_len < -max_extent)
- center_y = -max_extent + half_len;
- else if (center_y + half_len > max_extent - 1)
- center_y = max_extent - half_len;
-
- *min_gbx = (float) (center_x - half_len);
- *max_gbx = (float) (center_x + half_len);
- *min_gby = (float) (center_y - half_len);
- *max_gby = (float) (center_y + half_len);
-}
-
-void
-ilo_gpe_set_viewport_cso(const struct ilo_dev *dev,
- const struct pipe_viewport_state *state,
- struct ilo_viewport_cso *vp)
-{
- const float scale_x = fabs(state->scale[0]);
- const float scale_y = fabs(state->scale[1]);
- const float scale_z = fabs(state->scale[2]);
- int min_gbx, max_gbx, min_gby, max_gby;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- viewport_get_guardband(dev,
- (int) state->translate[0],
- (int) state->translate[1],
- &min_gbx, &max_gbx, &min_gby, &max_gby);
-
- /* matrix form */
- vp->m00 = state->scale[0];
- vp->m11 = state->scale[1];
- vp->m22 = state->scale[2];
- vp->m30 = state->translate[0];
- vp->m31 = state->translate[1];
- vp->m32 = state->translate[2];
-
- /* guardband in NDC space */
- vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
- vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
- vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
- vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
-
- /* viewport in screen space */
- vp->min_x = scale_x * -1.0f + state->translate[0];
- vp->max_x = scale_x * 1.0f + state->translate[0];
- vp->min_y = scale_y * -1.0f + state->translate[1];
- vp->max_y = scale_y * 1.0f + state->translate[1];
- vp->min_z = scale_z * -1.0f + state->translate[2];
- vp->max_z = scale_z * 1.0f + state->translate[2];
-}
-
-/**
- * Translate a pipe logicop to the matching hardware logicop.
- */
-static int
-gen6_translate_pipe_logicop(unsigned logicop)
-{
- switch (logicop) {
- case PIPE_LOGICOP_CLEAR: return GEN6_LOGICOP_CLEAR;
- case PIPE_LOGICOP_NOR: return GEN6_LOGICOP_NOR;
- case PIPE_LOGICOP_AND_INVERTED: return GEN6_LOGICOP_AND_INVERTED;
- case PIPE_LOGICOP_COPY_INVERTED: return GEN6_LOGICOP_COPY_INVERTED;
- case PIPE_LOGICOP_AND_REVERSE: return GEN6_LOGICOP_AND_REVERSE;
- case PIPE_LOGICOP_INVERT: return GEN6_LOGICOP_INVERT;
- case PIPE_LOGICOP_XOR: return GEN6_LOGICOP_XOR;
- case PIPE_LOGICOP_NAND: return GEN6_LOGICOP_NAND;
- case PIPE_LOGICOP_AND: return GEN6_LOGICOP_AND;
- case PIPE_LOGICOP_EQUIV: return GEN6_LOGICOP_EQUIV;
- case PIPE_LOGICOP_NOOP: return GEN6_LOGICOP_NOOP;
- case PIPE_LOGICOP_OR_INVERTED: return GEN6_LOGICOP_OR_INVERTED;
- case PIPE_LOGICOP_COPY: return GEN6_LOGICOP_COPY;
- case PIPE_LOGICOP_OR_REVERSE: return GEN6_LOGICOP_OR_REVERSE;
- case PIPE_LOGICOP_OR: return GEN6_LOGICOP_OR;
- case PIPE_LOGICOP_SET: return GEN6_LOGICOP_SET;
- default:
- assert(!"unknown logicop function");
- return GEN6_LOGICOP_CLEAR;
- }
-}
-
-/**
- * Translate a pipe blend function to the matching hardware blend function.
- */
-static int
-gen6_translate_pipe_blend(unsigned blend)
-{
- switch (blend) {
- case PIPE_BLEND_ADD: return GEN6_BLENDFUNCTION_ADD;
- case PIPE_BLEND_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
- case PIPE_BLEND_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
- case PIPE_BLEND_MIN: return GEN6_BLENDFUNCTION_MIN;
- case PIPE_BLEND_MAX: return GEN6_BLENDFUNCTION_MAX;
- default:
- assert(!"unknown blend function");
- return GEN6_BLENDFUNCTION_ADD;
- };
-}
-
-/**
- * Translate a pipe blend factor to the matching hardware blend factor.
- */
-static int
-gen6_translate_pipe_blendfactor(unsigned blendfactor)
-{
- switch (blendfactor) {
- case PIPE_BLENDFACTOR_ONE: return GEN6_BLENDFACTOR_ONE;
- case PIPE_BLENDFACTOR_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
- case PIPE_BLENDFACTOR_DST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
- case PIPE_BLENDFACTOR_DST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
- case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
- case PIPE_BLENDFACTOR_CONST_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
- case PIPE_BLENDFACTOR_CONST_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
- case PIPE_BLENDFACTOR_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
- case PIPE_BLENDFACTOR_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
- case PIPE_BLENDFACTOR_ZERO: return GEN6_BLENDFACTOR_ZERO;
- case PIPE_BLENDFACTOR_INV_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
- case PIPE_BLENDFACTOR_INV_DST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
- case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
- case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
- case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
- default:
- assert(!"unknown blend factor");
- return GEN6_BLENDFACTOR_ONE;
- };
-}
-
-/**
- * Translate a pipe stencil op to the matching hardware stencil op.
- */
-static int
-gen6_translate_pipe_stencil_op(unsigned stencil_op)
-{
- switch (stencil_op) {
- case PIPE_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
- case PIPE_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
- case PIPE_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
- case PIPE_STENCIL_OP_INCR: return GEN6_STENCILOP_INCRSAT;
- case PIPE_STENCIL_OP_DECR: return GEN6_STENCILOP_DECRSAT;
- case PIPE_STENCIL_OP_INCR_WRAP: return GEN6_STENCILOP_INCR;
- case PIPE_STENCIL_OP_DECR_WRAP: return GEN6_STENCILOP_DECR;
- case PIPE_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
- default:
- assert(!"unknown stencil op");
- return GEN6_STENCILOP_KEEP;
- }
-}
-
-static int
-gen6_blend_factor_dst_alpha_forced_one(int factor)
-{
- switch (factor) {
- case GEN6_BLENDFACTOR_DST_ALPHA:
- return GEN6_BLENDFACTOR_ONE;
- case GEN6_BLENDFACTOR_INV_DST_ALPHA:
- case GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE:
- return GEN6_BLENDFACTOR_ZERO;
- default:
- return factor;
- }
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_rt_blend_state *rt,
- bool dst_alpha_forced_one)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!rt->blend_enable)
- return 0;
-
- rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
- rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
- a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
- a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
- if (dst_alpha_forced_one) {
- rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
- rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
- a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
- a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
- }
-
- dw = GEN6_RT_DW0_BLEND_ENABLE |
- gen6_translate_pipe_blend(rt->alpha_func) << 26 |
- a_src << 20 |
- a_dst << 15 |
- gen6_translate_pipe_blend(rt->rgb_func) << 11 |
- rgb_src << 5 |
- rgb_dst;
-
- if (rt->rgb_func != rt->alpha_func ||
- rgb_src != a_src || rgb_dst != a_dst)
- dw |= GEN6_RT_DW0_INDEPENDENT_ALPHA_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_rt_blend_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_rt_blend_state *rt,
- bool dst_alpha_forced_one,
- bool *independent_alpha)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!rt->blend_enable) {
- *independent_alpha = false;
- return 0;
- }
-
- rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
- rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
- a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
- a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
-
- if (dst_alpha_forced_one) {
- rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
- rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
- a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
- a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
- }
-
- dw = GEN8_RT_DW0_BLEND_ENABLE |
- rgb_src << 26 |
- rgb_dst << 21 |
- gen6_translate_pipe_blend(rt->rgb_func) << 18 |
- a_src << 13 |
- a_dst << 8 |
- gen6_translate_pipe_blend(rt->alpha_func) << 5;
-
- *independent_alpha = (rt->rgb_func != rt->alpha_func ||
- rgb_src != a_src ||
- rgb_dst != a_dst);
-
- return dw;
-}
-
-static void
-blend_init_cso_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend,
- unsigned index)
-{
- const struct pipe_rt_blend_state *rt = &state->rt[index];
- struct ilo_blend_cso *cso = &blend->cso[index];
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- cso->payload[0] = 0;
- cso->payload[1] = GEN6_RT_DW1_COLORCLAMP_RTFORMAT |
- GEN6_RT_DW1_PRE_BLEND_CLAMP |
- GEN6_RT_DW1_POST_BLEND_CLAMP;
-
- if (!(rt->colormask & PIPE_MASK_A))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_A;
- if (!(rt->colormask & PIPE_MASK_R))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_R;
- if (!(rt->colormask & PIPE_MASK_G))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_G;
- if (!(rt->colormask & PIPE_MASK_B))
- cso->payload[1] |= GEN6_RT_DW1_WRITE_DISABLE_B;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Color Buffer Blending and Logic Ops must not be enabled
- * simultaneously, or behavior is UNDEFINED."
- *
- * Since state->logicop_enable takes precedence over rt->blend_enable,
- * no special care is needed.
- */
- if (state->logicop_enable) {
- cso->dw_blend = 0;
- cso->dw_blend_dst_alpha_forced_one = 0;
- } else {
- cso->dw_blend = blend_get_rt_blend_enable_gen6(dev, rt, false);
- cso->dw_blend_dst_alpha_forced_one =
- blend_get_rt_blend_enable_gen6(dev, rt, true);
- }
-}
-
-static bool
-blend_init_cso_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend,
- unsigned index)
-{
- const struct pipe_rt_blend_state *rt = &state->rt[index];
- struct ilo_blend_cso *cso = &blend->cso[index];
- bool independent_alpha = false;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- cso->payload[0] = 0;
- cso->payload[1] = GEN8_RT_DW1_COLORCLAMP_RTFORMAT |
- GEN8_RT_DW1_PRE_BLEND_CLAMP |
- GEN8_RT_DW1_POST_BLEND_CLAMP;
-
- if (!(rt->colormask & PIPE_MASK_A))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_A;
- if (!(rt->colormask & PIPE_MASK_R))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_R;
- if (!(rt->colormask & PIPE_MASK_G))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_G;
- if (!(rt->colormask & PIPE_MASK_B))
- cso->payload[0] |= GEN8_RT_DW0_WRITE_DISABLE_B;
-
- if (state->logicop_enable) {
- cso->dw_blend = 0;
- cso->dw_blend_dst_alpha_forced_one = 0;
- } else {
- bool tmp[2];
-
- cso->dw_blend = blend_get_rt_blend_enable_gen8(dev, rt, false, &tmp[0]);
- cso->dw_blend_dst_alpha_forced_one =
- blend_get_rt_blend_enable_gen8(dev, rt, true, &tmp[1]);
-
- if (tmp[0] || tmp[1])
- independent_alpha = true;
- }
-
- return independent_alpha;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state)
-{
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!state->logicop_enable)
- return 0;
-
- return GEN6_RT_DW1_LOGICOP_ENABLE |
- gen6_translate_pipe_logicop(state->logicop_func) << 18;
-}
-
-static uint32_t
-blend_get_logicop_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state)
-{
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!state->logicop_enable)
- return 0;
-
- return GEN8_RT_DW1_LOGICOP_ENABLE |
- gen6_translate_pipe_logicop(state->logicop_func) << 27;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen6(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- bool dual_blend)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (state->alpha_to_coverage) {
- dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE;
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- dw |= GEN6_RT_DW1_ALPHA_TO_COVERAGE_DITHER;
- }
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 378:
- *
- * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
- * must be disabled."
- */
- if (state->alpha_to_one && !dual_blend)
- dw |= GEN6_RT_DW1_ALPHA_TO_ONE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_alpha_mod_gen8(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- bool dual_blend)
-{
- uint32_t dw = 0;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->alpha_to_coverage) {
- dw |= GEN8_BLEND_DW0_ALPHA_TO_COVERAGE |
- GEN8_BLEND_DW0_ALPHA_TO_COVERAGE_DITHER;
- }
-
- if (state->alpha_to_one && !dual_blend)
- dw |= GEN8_BLEND_DW0_ALPHA_TO_ONE;
-
- return dw;
-}
-
-static uint32_t
-blend_get_ps_blend_gen8(const struct ilo_dev *dev, uint32_t rt_dw0)
-{
- int rgb_src, rgb_dst, a_src, a_dst;
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!(rt_dw0 & GEN8_RT_DW0_BLEND_ENABLE))
- return 0;
-
- a_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_ALPHA_FACTOR);
- a_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_ALPHA_FACTOR);
- rgb_src = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_SRC_COLOR_FACTOR);
- rgb_dst = GEN_EXTRACT(rt_dw0, GEN8_RT_DW0_DST_COLOR_FACTOR);
-
- dw = GEN8_PS_BLEND_DW1_BLEND_ENABLE;
- dw |= GEN_SHIFT32(a_src, GEN8_PS_BLEND_DW1_SRC_ALPHA_FACTOR);
- dw |= GEN_SHIFT32(a_dst, GEN8_PS_BLEND_DW1_DST_ALPHA_FACTOR);
- dw |= GEN_SHIFT32(rgb_src, GEN8_PS_BLEND_DW1_SRC_COLOR_FACTOR);
- dw |= GEN_SHIFT32(rgb_dst, GEN8_PS_BLEND_DW1_DST_COLOR_FACTOR);
-
- if (a_src != rgb_src || a_dst != rgb_dst)
- dw |= GEN8_PS_BLEND_DW1_INDEPENDENT_ALPHA_ENABLE;
-
- return dw;
-}
-
-void
-ilo_gpe_init_blend(const struct ilo_dev *dev,
- const struct pipe_blend_state *state,
- struct ilo_blend_state *blend)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- blend->dual_blend = (util_blend_state_is_dual(state, 0) &&
- state->rt[0].blend_enable &&
- !state->logicop_enable);
- blend->alpha_to_coverage = state->alpha_to_coverage;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- bool independent_alpha;
-
- blend->dw_alpha_mod =
- blend_get_alpha_mod_gen8(dev, state, blend->dual_blend);
- blend->dw_logicop = blend_get_logicop_enable_gen8(dev, state);
- blend->dw_shared = (state->dither) ? GEN8_BLEND_DW0_DITHER_ENABLE : 0;
-
- independent_alpha = blend_init_cso_gen8(dev, state, blend, 0);
- if (independent_alpha)
- blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
-
- blend->dw_ps_blend = blend_get_ps_blend_gen8(dev,
- blend->cso[0].dw_blend);
- blend->dw_ps_blend_dst_alpha_forced_one = blend_get_ps_blend_gen8(dev,
- blend->cso[0].dw_blend_dst_alpha_forced_one);
-
- if (state->independent_blend_enable) {
- for (i = 1; i < Elements(blend->cso); i++) {
- independent_alpha = blend_init_cso_gen8(dev, state, blend, i);
- if (independent_alpha)
- blend->dw_shared |= GEN8_BLEND_DW0_INDEPENDENT_ALPHA_ENABLE;
- }
- } else {
- for (i = 1; i < Elements(blend->cso); i++)
- blend->cso[i] = blend->cso[0];
- }
- } else {
- blend->dw_alpha_mod =
- blend_get_alpha_mod_gen6(dev, state, blend->dual_blend);
- blend->dw_logicop = blend_get_logicop_enable_gen6(dev, state);
- blend->dw_shared = (state->dither) ? GEN6_RT_DW1_DITHER_ENABLE : 0;
-
- blend->dw_ps_blend = 0;
- blend->dw_ps_blend_dst_alpha_forced_one = 0;
-
- blend_init_cso_gen6(dev, state, blend, 0);
- if (state->independent_blend_enable) {
- for (i = 1; i < Elements(blend->cso); i++)
- blend_init_cso_gen6(dev, state, blend, i);
- } else {
- for (i = 1; i < Elements(blend->cso); i++)
- blend->cso[i] = blend->cso[0];
- }
- }
-}
-
-/**
- * Translate a pipe DSA test function to the matching hardware compare
- * function.
- */
-static int
-gen6_translate_dsa_func(unsigned func)
-{
- switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
- default:
- assert(!"unknown depth/stencil/alpha test function");
- return GEN6_COMPAREFUNCTION_NEVER;
- }
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_stencil_state *stencil0,
- const struct pipe_stencil_state *stencil1)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!stencil0->enabled)
- return 0;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 359:
- *
- * "If the Depth Buffer is either undefined or does not have a surface
- * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
- * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 370:
- *
- * "This field (Stencil Test Enable) cannot be enabled if
- * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
- *
- * TODO We do not check these yet.
- */
- dw = GEN6_ZS_DW0_STENCIL_TEST_ENABLE |
- gen6_translate_dsa_func(stencil0->func) << 28 |
- gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
- gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
- gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
- if (stencil0->writemask)
- dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
-
- if (stencil1->enabled) {
- dw |= GEN6_ZS_DW0_STENCIL1_ENABLE |
- gen6_translate_dsa_func(stencil1->func) << 12 |
- gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
- gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
- gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
- if (stencil1->writemask)
- dw |= GEN6_ZS_DW0_STENCIL_WRITE_ENABLE;
- }
-
- return dw;
-}
-
-static uint32_t
-dsa_get_stencil_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_stencil_state *stencil0,
- const struct pipe_stencil_state *stencil1)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!stencil0->enabled)
- return 0;
-
- dw = gen6_translate_pipe_stencil_op(stencil0->fail_op) << 29 |
- gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 26 |
- gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 23 |
- gen6_translate_dsa_func(stencil0->func) << 8 |
- GEN8_ZS_DW1_STENCIL_TEST_ENABLE;
- if (stencil0->writemask)
- dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
-
- if (stencil1->enabled) {
- dw |= gen6_translate_dsa_func(stencil1->func) << 20 |
- gen6_translate_pipe_stencil_op(stencil1->fail_op) << 17 |
- gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 14 |
- gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 11 |
- GEN8_ZS_DW1_STENCIL1_ENABLE;
- if (stencil1->writemask)
- dw |= GEN8_ZS_DW1_STENCIL_WRITE_ENABLE;
- }
-
- return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_depth_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 360:
- *
- * "Enabling the Depth Test function without defining a Depth Buffer is
- * UNDEFINED."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 375:
- *
- * "A Depth Buffer must be defined before enabling writes to it, or
- * operation is UNDEFINED."
- *
- * TODO We do not check these yet.
- */
- if (state->enabled) {
- dw = GEN6_ZS_DW2_DEPTH_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 27;
- } else {
- dw = GEN6_COMPAREFUNCTION_ALWAYS << 27;
- }
-
- if (state->writemask)
- dw |= GEN6_ZS_DW2_DEPTH_WRITE_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_depth_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_depth_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (state->enabled) {
- dw = GEN8_ZS_DW1_DEPTH_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 5;
- } else {
- dw = GEN6_COMPAREFUNCTION_ALWAYS << 5;
- }
-
- if (state->writemask)
- dw |= GEN8_ZS_DW1_DEPTH_WRITE_ENABLE;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen6(const struct ilo_dev *dev,
- const struct pipe_alpha_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 6, 7.5);
-
- if (!state->enabled)
- return 0;
-
- /* this will be ORed to BLEND_STATE */
- dw = GEN6_RT_DW1_ALPHA_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 13;
-
- return dw;
-}
-
-static uint32_t
-dsa_get_alpha_enable_gen8(const struct ilo_dev *dev,
- const struct pipe_alpha_state *state)
-{
- uint32_t dw;
-
- ILO_DEV_ASSERT(dev, 8, 8);
-
- if (!state->enabled)
- return 0;
-
- /* this will be ORed to BLEND_STATE */
- dw = GEN8_BLEND_DW0_ALPHA_TEST_ENABLE |
- gen6_translate_dsa_func(state->func) << 24;
-
- return dw;
-}
-
-void
-ilo_gpe_init_dsa(const struct ilo_dev *dev,
- const struct pipe_depth_stencil_alpha_state *state,
- struct ilo_dsa_state *dsa)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- STATIC_ASSERT(Elements(dsa->payload) >= 3);
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- const uint32_t dw_stencil = dsa_get_stencil_enable_gen8(dev,
- &state->stencil[0], &state->stencil[1]);
- const uint32_t dw_depth = dsa_get_depth_enable_gen8(dev, &state->depth);
-
- assert(!(dw_stencil & dw_depth));
- dsa->payload[0] = dw_stencil | dw_depth;
-
- dsa->dw_blend_alpha = dsa_get_alpha_enable_gen8(dev, &state->alpha);
- dsa->dw_ps_blend_alpha = (state->alpha.enabled) ?
- GEN8_PS_BLEND_DW1_ALPHA_TEST_ENABLE : 0;
- } else {
- dsa->payload[0] = dsa_get_stencil_enable_gen6(dev,
- &state->stencil[0], &state->stencil[1]);
- dsa->payload[2] = dsa_get_depth_enable_gen6(dev, &state->depth);
-
- dsa->dw_blend_alpha = dsa_get_alpha_enable_gen6(dev, &state->alpha);
- dsa->dw_ps_blend_alpha = 0;
- }
-
- dsa->payload[1] = state->stencil[0].valuemask << 24 |
- state->stencil[0].writemask << 16 |
- state->stencil[1].valuemask << 8 |
- state->stencil[1].writemask;
-
- dsa->alpha_ref = float_to_ubyte(state->alpha.ref_value);
-}
-
-void
-ilo_gpe_set_scissor(const struct ilo_dev *dev,
- unsigned start_slot,
- unsigned num_states,
- const struct pipe_scissor_state *states,
- struct ilo_scissor_state *scissor)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- for (i = 0; i < num_states; i++) {
- uint16_t min_x, min_y, max_x, max_y;
-
- /* both max and min are inclusive in SCISSOR_RECT */
- if (states[i].minx < states[i].maxx &&
- states[i].miny < states[i].maxy) {
- min_x = states[i].minx;
- min_y = states[i].miny;
- max_x = states[i].maxx - 1;
- max_y = states[i].maxy - 1;
- }
- else {
- /* we have to make min greater than max */
- min_x = 1;
- min_y = 1;
- max_x = 0;
- max_y = 0;
- }
-
- scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
- scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
- }
-
- if (!start_slot && num_states)
- scissor->scissor0 = states[0];
-}
-
-void
-ilo_gpe_set_scissor_null(const struct ilo_dev *dev,
- struct ilo_scissor_state *scissor)
-{
- unsigned i;
-
- for (i = 0; i < Elements(scissor->payload); i += 2) {
- scissor->payload[i + 0] = 1 << 16 | 1;
- scissor->payload[i + 1] = 0;
- }
-}
-
-static void
-fb_set_blend_caps(const struct ilo_dev *dev,
- enum pipe_format format,
- struct ilo_fb_blend_caps *caps)
-{
- const struct util_format_description *desc =
- util_format_description(format);
- const int ch = util_format_get_first_non_void_channel(format);
-
- memset(caps, 0, sizeof(*caps));
-
- if (format == PIPE_FORMAT_NONE || desc->is_mixed)
- return;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 365:
- *
- * "Logic Ops are only supported on *_UNORM surfaces (excluding _SRGB
- * variants), otherwise Logic Ops must be DISABLED."
- *
- * According to the classic driver, this is lifted on Gen8+.
- */
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- caps->can_logicop = true;
- } else {
- caps->can_logicop = (ch >= 0 && desc->channel[ch].normalized &&
- desc->channel[ch].type == UTIL_FORMAT_TYPE_UNSIGNED &&
- desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
- }
-
- /* no blending for pure integer formats */
- caps->can_blend = !util_format_is_pure_integer(format);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 382:
- *
- * "Alpha Test can only be enabled if Pixel Shader outputs a float
- * alpha value."
- */
- caps->can_alpha_test = !util_format_is_pure_integer(format);
-
- caps->dst_alpha_forced_one =
- (ilo_format_translate_render(dev, format) !=
- ilo_format_translate_color(dev, format));
-
- /* sanity check */
- if (caps->dst_alpha_forced_one) {
- enum pipe_format render_format;
-
- switch (format) {
- case PIPE_FORMAT_B8G8R8X8_UNORM:
- render_format = PIPE_FORMAT_B8G8R8A8_UNORM;
- break;
- default:
- render_format = PIPE_FORMAT_NONE;
- break;
- }
-
- assert(ilo_format_translate_render(dev, format) ==
- ilo_format_translate_color(dev, render_format));
- }
-}
-
-void
-ilo_gpe_set_fb(const struct ilo_dev *dev,
- const struct pipe_framebuffer_state *state,
- struct ilo_fb_state *fb)
-{
- const struct pipe_surface *first_surf = NULL;
- int i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- util_copy_framebuffer_state(&fb->state, state);
-
- ilo_gpe_init_view_surface_null(dev,
- (state->width) ? state->width : 1,
- (state->height) ? state->height : 1,
- 1, 0, &fb->null_rt);
-
- for (i = 0; i < state->nr_cbufs; i++) {
- if (state->cbufs[i]) {
- fb_set_blend_caps(dev, state->cbufs[i]->format, &fb->blend_caps[i]);
-
- if (!first_surf)
- first_surf = state->cbufs[i];
- } else {
- fb_set_blend_caps(dev, PIPE_FORMAT_NONE, &fb->blend_caps[i]);
- }
- }
-
- if (!first_surf && state->zsbuf)
- first_surf = state->zsbuf;
-
- fb->num_samples = (first_surf) ? first_surf->texture->nr_samples : 1;
- if (!fb->num_samples)
- fb->num_samples = 1;
-
- /*
- * The PRMs list several restrictions when the framebuffer has more than
- * one surface. It seems they are actually lifted on GEN6+.
- */
-}
+++ /dev/null
-/*
- * Mesa 3-D graphics library
- *
- * Copyright (C) 2012-2014 LunarG, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- * Chia-I Wu <olv@lunarg.com>
- */
-
-#include "genhw/genhw.h"
-#include "core/ilo_format.h"
-#include "util/u_dual_blend.h"
-#include "util/u_framebuffer.h"
-#include "util/u_half.h"
-#include "util/u_resource.h"
-
-#include "ilo_context.h"
-#include "ilo_resource.h"
-#include "ilo_shader.h"
-#include "ilo_state.h"
-#include "ilo_state_3d.h"
-
-static void
-ve_init_cso(const struct ilo_dev *dev,
- const struct pipe_vertex_element *state,
- unsigned vb_index,
- struct ilo_ve_cso *cso)
-{
- int comp[4] = {
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- GEN6_VFCOMP_STORE_SRC,
- };
- int format;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- switch (util_format_get_nr_components(state->src_format)) {
- case 1: comp[1] = GEN6_VFCOMP_STORE_0;
- case 2: comp[2] = GEN6_VFCOMP_STORE_0;
- case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
- GEN6_VFCOMP_STORE_1_INT :
- GEN6_VFCOMP_STORE_1_FP;
- }
-
- format = ilo_format_translate_vertex(dev, state->src_format);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
- cso->payload[0] =
- vb_index << GEN6_VE_DW0_VB_INDEX__SHIFT |
- GEN6_VE_DW0_VALID |
- format << GEN6_VE_DW0_FORMAT__SHIFT |
- state->src_offset << GEN6_VE_DW0_VB_OFFSET__SHIFT;
-
- cso->payload[1] =
- comp[0] << GEN6_VE_DW1_COMP0__SHIFT |
- comp[1] << GEN6_VE_DW1_COMP1__SHIFT |
- comp[2] << GEN6_VE_DW1_COMP2__SHIFT |
- comp[3] << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve(const struct ilo_dev *dev,
- unsigned num_states,
- const struct pipe_vertex_element *states,
- struct ilo_ve_state *ve)
-{
- unsigned i;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- ve->count = num_states;
- ve->vb_count = 0;
-
- for (i = 0; i < num_states; i++) {
- const unsigned pipe_idx = states[i].vertex_buffer_index;
- const unsigned instance_divisor = states[i].instance_divisor;
- unsigned hw_idx;
-
- /*
- * map the pipe vb to the hardware vb, which has a fixed instance
- * divisor
- */
- for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
- if (ve->vb_mapping[hw_idx] == pipe_idx &&
- ve->instance_divisors[hw_idx] == instance_divisor)
- break;
- }
-
- /* create one if there is no matching hardware vb */
- if (hw_idx >= ve->vb_count) {
- hw_idx = ve->vb_count++;
-
- ve->vb_mapping[hw_idx] = pipe_idx;
- ve->instance_divisors[hw_idx] = instance_divisor;
- }
-
- ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
- }
-}
-
-void
-ilo_gpe_set_ve_edgeflag(const struct ilo_dev *dev,
- struct ilo_ve_cso *cso)
-{
- int format;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 94:
- *
- * "- This bit (Edge Flag Enable) must only be ENABLED on the last
- * valid VERTEX_ELEMENT structure.
- *
- * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
- * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
- *
- * - The Source Element Format must be set to the UINT format.
- *
- * - [DevSNB]: Edge Flags are not supported for QUADLIST
- * primitives. Software may elect to convert QUADLIST primitives
- * to some set of corresponding edge-flag-supported primitive
- * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
- */
- cso->payload[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE;
-
- /*
- * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
- * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
- * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
- *
- * Since all the hardware cares about is whether the flags are zero or not,
- * we can treat them as the corresponding _UINT formats.
- */
- format = GEN_EXTRACT(cso->payload[0], GEN6_VE_DW0_FORMAT);
- cso->payload[0] &= ~GEN6_VE_DW0_FORMAT__MASK;
-
- switch (format) {
- case GEN6_FORMAT_R32_FLOAT:
- format = GEN6_FORMAT_R32_UINT;
- break;
- case GEN6_FORMAT_R8_USCALED:
- format = GEN6_FORMAT_R8_UINT;
- break;
- default:
- break;
- }
-
- cso->payload[0] |= GEN_SHIFT32(format, GEN6_VE_DW0_FORMAT);
-
- cso->payload[1] =
- GEN6_VFCOMP_STORE_SRC << GEN6_VE_DW1_COMP0__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP1__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP2__SHIFT |
- GEN6_VFCOMP_NOSTORE << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_ve_nosrc(const struct ilo_dev *dev,
- int comp0, int comp1, int comp2, int comp3,
- struct ilo_ve_cso *cso)
-{
- ILO_DEV_ASSERT(dev, 6, 8);
-
- STATIC_ASSERT(Elements(cso->payload) >= 2);
-
- assert(comp0 != GEN6_VFCOMP_STORE_SRC &&
- comp1 != GEN6_VFCOMP_STORE_SRC &&
- comp2 != GEN6_VFCOMP_STORE_SRC &&
- comp3 != GEN6_VFCOMP_STORE_SRC);
-
- cso->payload[0] = GEN6_VE_DW0_VALID;
- cso->payload[1] =
- comp0 << GEN6_VE_DW1_COMP0__SHIFT |
- comp1 << GEN6_VE_DW1_COMP1__SHIFT |
- comp2 << GEN6_VE_DW1_COMP2__SHIFT |
- comp3 << GEN6_VE_DW1_COMP3__SHIFT;
-}
-
-void
-ilo_gpe_init_vs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *vs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
- vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(vs, ILO_KERNEL_SAMPLER_COUNT);
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 135:
- *
- * "(Vertex URB Entry Read Length) Specifies the number of pairs of
- * 128-bit vertex elements to be passed into the payload for each
- * vertex."
- *
- * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
- * data to be read and passed to the thread."
- */
- vue_read_len = (vue_read_len + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- max_threads = dev->thread_count;
- if (ilo_dev_gen(dev) == ILO_GEN(7.5) && dev->gt == 2)
- max_threads *= 2;
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = start_grf << GEN6_VS_DW4_URB_GRF_START__SHIFT |
- vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
-
- dw5 = GEN6_VS_DW5_STATISTICS |
- GEN6_VS_DW5_VS_ENABLE;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
- dw5 |= (max_threads - 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT;
- else
- dw5 |= (max_threads - 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT;
-
- STATIC_ASSERT(Elements(cso->payload) >= 3);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
-}
-
-static void
-gs_init_cso_gen6(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, max_threads;
- uint32_t dw2, dw4, dw5, dw6;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
- start_grf = ilo_shader_get_kernel_param(gs,
- ILO_KERNEL_URB_DATA_START_REG);
-
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
- }
- else {
- start_grf = ilo_shader_get_kernel_param(gs,
- ILO_KERNEL_VS_GEN6_SO_START_REG);
-
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
- }
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 153:
- *
- * "Specifies the amount of URB data read and passed in the thread
- * payload for each Vertex URB entry, in 256-bit register increments.
- *
- * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
- * 0 indicating no Vertex URB data to be read and passed to the
- * thread."
- */
- vue_read_len = (vue_read_len + 1) / 2;
- if (!vue_read_len)
- vue_read_len = 1;
-
- /*
- * From the Sandy Bridge PRM, volume 2 part 1, page 154:
- *
- * "Maximum Number of Threads valid range is [0,27] when Rendering
- * Enabled bit is set."
- *
- * From the Sandy Bridge PRM, volume 2 part 1, page 173:
- *
- * "Programming Note: If the GS stage is enabled, software must always
- * allocate at least one GS URB Entry. This is true even if the GS
- * thread never needs to output vertices to the pipeline, e.g., when
- * only performing stream output. This is an artifact of the need to
- * pass the GS thread an initial destination URB handle."
- *
- * As such, we always enable rendering, and limit the number of threads.
- */
- if (dev->gt == 2) {
- /* maximum is 60, but limited to 28 */
- max_threads = 28;
- }
- else {
- /* maximum is 24, but limited to 21 (see brwCreateContext()) */
- max_threads = 21;
- }
-
- dw2 = GEN6_THREADDISP_SPF;
-
- dw4 = vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
- 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN6_GS_DW4_URB_GRF_START__SHIFT;
-
- dw5 = (max_threads - 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT |
- GEN6_GS_DW5_STATISTICS |
- GEN6_GS_DW5_SO_STATISTICS |
- GEN6_GS_DW5_RENDER_ENABLE;
-
- /*
- * we cannot make use of GEN6_GS_REORDER because it will reorder
- * triangle strips according to D3D rules (triangle 2N+1 uses vertices
- * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
- * (2N+2, 2N+1, 2N+3)).
- */
- dw6 = GEN6_GS_DW6_GS_ENABLE;
-
- if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
- dw6 |= GEN6_GS_DW6_DISCARD_ADJACENCY;
-
- if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
- const uint32_t svbi_post_inc =
- ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
-
- dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
- if (svbi_post_inc) {
- dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
- svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
- }
- }
-
- STATIC_ASSERT(Elements(cso->payload) >= 4);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
- cso->payload[3] = dw6;
-}
-
-static void
-gs_init_cso_gen7(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- int start_grf, vue_read_len, sampler_count, max_threads;
- uint32_t dw2, dw4, dw5;
-
- ILO_DEV_ASSERT(dev, 7, 7.5);
-
- start_grf = ilo_shader_get_kernel_param(gs, ILO_KERNEL_URB_DATA_START_REG);
- vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
- sampler_count = ilo_shader_get_kernel_param(gs, ILO_KERNEL_SAMPLER_COUNT);
-
- /* in pairs */
- vue_read_len = (vue_read_len + 1) / 2;
-
- switch (ilo_dev_gen(dev)) {
- case ILO_GEN(7.5):
- max_threads = (dev->gt >= 2) ? 256 : 70;
- break;
- case ILO_GEN(7):
- max_threads = (dev->gt == 2) ? 128 : 36;
- break;
- default:
- max_threads = 1;
- break;
- }
-
- dw2 = (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT;
- dw2 |= ((sampler_count + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
-
- dw4 = vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
- GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
- 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
- start_grf << GEN7_GS_DW4_URB_GRF_START__SHIFT;
-
- dw5 = (max_threads - 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT |
- GEN7_GS_DW5_STATISTICS |
- GEN7_GS_DW5_GS_ENABLE;
-
- STATIC_ASSERT(Elements(cso->payload) >= 3);
- cso->payload[0] = dw2;
- cso->payload[1] = dw4;
- cso->payload[2] = dw5;
-}
-
-void
-ilo_gpe_init_gs_cso(const struct ilo_dev *dev,
- const struct ilo_shader_state *gs,
- struct ilo_shader_cso *cso)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7))
- gs_init_cso_gen7(dev, gs, cso);
- else
- gs_init_cso_gen6(dev, gs, cso);
-}
-
-static void
-view_init_null_gen6(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- assert(width >= 1 && height >= 1 && depth >= 1);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 71:
- *
- * "A null surface will be used in instances where an actual surface is
- * not bound. When a write message is generated to a null surface, no
- * actual surface is written to. When a read message (including any
- * sampling engine message) is generated to a null surface, the result
- * is all zeros. Note that a null surface type is allowed to be used
- * with all messages, even if it is not specificially indicated as
- * supported. All of the remaining fields in surface state are ignored
- * for null surfaces, with the following exceptions:
- *
- * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
- * depth buffer's corresponding state for all render target
- * surfaces, including null.
- * * Surface Format must be R8G8B8A8_UNORM."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 82:
- *
- * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
- * true"
- */
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
-
- dw[1] = 0;
-
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- level << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- GEN6_TILING_X;
-
- dw[4] = 0;
- dw[5] = 0;
-}
-
-static void
-view_init_for_buffer_gen6(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- const int elem_size = util_format_get_blocksize(elem_format);
- int width, height, depth, pitch;
- int surface_format, num_entries;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- /*
- * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
- * structure in a buffer.
- */
-
- surface_format = ilo_format_translate_color(dev, elem_format);
-
- num_entries = size / struct_size;
- /* see if there is enough space to fit another element */
- if (size % struct_size >= elem_size)
- num_entries++;
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 76:
- *
- * "For SURFTYPE_BUFFER render targets, this field (Surface Base
- * Address) specifies the base address of first element of the
- * surface. The surface is interpreted as a simple array of that
- * single element type. The address must be naturally-aligned to the
- * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
- * must be 16-byte aligned).
- *
- * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
- * the base address of the first element of the surface, computed in
- * software by adding the surface base address to the byte offset of
- * the element in the buffer."
- */
- if (is_rt)
- assert(offset % elem_size == 0);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 77:
- *
- * "For buffer surfaces, the number of entries in the buffer ranges
- * from 1 to 2^27."
- */
- assert(num_entries >= 1 && num_entries <= 1 << 27);
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
- * indicates the size of the structure."
- */
- pitch = struct_size;
-
- pitch--;
- num_entries--;
- /* bits [6:0] */
- width = (num_entries & 0x0000007f);
- /* bits [19:7] */
- height = (num_entries & 0x000fff80) >> 7;
- /* bits [26:20] */
- depth = (num_entries & 0x07f00000) >> 20;
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
- if (render_cache_rw)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
- dw[1] = offset;
-
- dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
-
- dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
-
- dw[4] = 0;
- dw[5] = 0;
-}
-
-static void
-view_init_for_texture_gen6(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- int surface_type, surface_format;
- int width, height, depth, pitch, lod;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
- if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
- format = PIPE_FORMAT_Z32_FLOAT;
-
- if (is_rt)
- surface_format = ilo_format_translate_render(dev, format);
- else
- surface_format = ilo_format_translate_texture(dev, format);
- assert(surface_format >= 0);
-
- width = tex->image.width0;
- height = tex->image.height0;
- depth = (tex->base.target == PIPE_TEXTURE_3D) ?
- tex->base.depth0 : num_layers;
- pitch = tex->image.bo_stride;
-
- if (surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
- * range of this field (Depth) is [0,84], indicating the number of
- * cube array elements (equal to the number of underlying 2D array
- * elements divided by 6). For other surfaces, this field must be
- * zero."
- *
- * When is_rt is true, we treat the texture as a 2D one to avoid the
- * restriction.
- */
- if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
- }
- else {
- assert(num_layers % 6 == 0);
- depth = num_layers / 6;
- }
- }
-
- /* sanity check the size */
- assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
- switch (surface_type) {
- case GEN6_SURFTYPE_1D:
- assert(width <= 8192 && height == 1 && depth <= 512);
- assert(first_layer < 512 && num_layers <= 512);
- break;
- case GEN6_SURFTYPE_2D:
- assert(width <= 8192 && height <= 8192 && depth <= 512);
- assert(first_layer < 512 && num_layers <= 512);
- break;
- case GEN6_SURFTYPE_3D:
- assert(width <= 2048 && height <= 2048 && depth <= 2048);
- assert(first_layer < 2048 && num_layers <= 512);
- if (!is_rt)
- assert(first_layer == 0);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(width <= 8192 && height <= 8192 && depth <= 85);
- assert(width == height);
- assert(first_layer < 512 && num_layers <= 512);
- if (is_rt)
- assert(first_layer == 0);
- break;
- default:
- assert(!"unexpected surface type");
- break;
- }
-
- /* non-full array spacing is supported only on GEN7+ */
- assert(tex->image.walk != ILO_IMAGE_WALK_LOD);
- /* non-interleaved samples are supported only on GEN7+ */
- if (tex->base.nr_samples > 1)
- assert(tex->image.interleaved_samples);
-
- if (is_rt) {
- assert(num_levels == 1);
- lod = first_level;
- }
- else {
- lod = num_levels - 1;
- }
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 76:
- *
- * "Linear render target surface base addresses must be element-size
- * aligned, for non-YUV surface formats, or a multiple of 2
- * element-sizes for YUV surface formats. Other linear surfaces have
- * no alignment requirements (byte alignment is sufficient.)"
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 81:
- *
- * "For linear render target surfaces, the pitch must be a multiple
- * of the element size for non-YUV surface formats. Pitch must be a
- * multiple of 2 * element size for YUV surface formats."
- *
- * From the Sandy Bridge PRM, volume 4 part 1, page 86:
- *
- * "For linear surfaces, this field (X Offset) must be zero"
- */
- if (tex->image.tiling == GEN6_TILING_NONE) {
- if (is_rt) {
- const int elem_size = util_format_get_blocksize(format);
- assert(pitch % elem_size == 0);
- }
- }
-
- STATIC_ASSERT(Elements(surf->payload) >= 6);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
- GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
-
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
- dw[0] |= 1 << 9 |
- GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
- }
-
- if (is_rt)
- dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
-
- dw[1] = 0;
-
- dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
- (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
- lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
-
- assert(tex->image.tiling != GEN8_TILING_W);
- dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
- (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
- tex->image.tiling;
-
- dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
- first_layer << 17 |
- (num_layers - 1) << 8 |
- ((tex->base.nr_samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
- GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
-
- dw[5] = 0;
-
- assert(tex->image.align_j == 2 || tex->image.align_j == 4);
- if (tex->image.align_j == 4)
- dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
-}
-
-static void
-view_init_null_gen7(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- assert(width >= 1 && height >= 1 && depth >= 1);
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 62:
- *
- * "A null surface is used in instances where an actual surface is not
- * bound. When a write message is generated to a null surface, no
- * actual surface is written to. When a read message (including any
- * sampling engine message) is generated to a null surface, the result
- * is all zeros. Note that a null surface type is allowed to be used
- * with all messages, even if it is not specificially indicated as
- * supported. All of the remaining fields in surface state are ignored
- * for null surfaces, with the following exceptions:
- *
- * * Width, Height, Depth, LOD, and Render Target View Extent fields
- * must match the depth buffer's corresponding state for all render
- * target surfaces, including null.
- * * All sampling engine and data port messages support null surfaces
- * with the above behavior, even if not mentioned as specifically
- * supported, except for the following:
- * * Data Port Media Block Read/Write messages.
- * * The Surface Type of a surface used as a render target (accessed
- * via the Data Port's Render Target Write message) must be the same
- * as the Surface Type of all other render targets and of the depth
- * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
- * buffer or render targets are SURFTYPE_NULL."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 65:
- *
- * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
- * true"
- */
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
- GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- dw[0] |= GEN6_TILING_X << GEN8_SURFACE_DW0_TILING__SHIFT;
- else
- dw[0] |= GEN6_TILING_X << GEN7_SURFACE_DW0_TILING__SHIFT;
-
- dw[1] = 0;
-
- dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH);
-
- dw[4] = 0;
- dw[5] = level;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-static void
-view_init_for_buffer_gen7(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- const bool typed = (elem_format != PIPE_FORMAT_NONE);
- const bool structured = (!typed && struct_size > 1);
- const int elem_size = (typed) ?
- util_format_get_blocksize(elem_format) : 1;
- int width, height, depth, pitch;
- int surface_type, surface_format, num_entries;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
-
- surface_format = (typed) ?
- ilo_format_translate_color(dev, elem_format) : GEN6_FORMAT_RAW;
-
- num_entries = size / struct_size;
- /* see if there is enough space to fit another element */
- if (size % struct_size >= elem_size && !structured)
- num_entries++;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 67:
- *
- * "For SURFTYPE_BUFFER render targets, this field (Surface Base
- * Address) specifies the base address of first element of the
- * surface. The surface is interpreted as a simple array of that
- * single element type. The address must be naturally-aligned to the
- * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
- * must be 16-byte aligned)
- *
- * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
- * the base address of the first element of the surface, computed in
- * software by adding the surface base address to the byte offset of
- * the element in the buffer."
- */
- if (is_rt)
- assert(offset % elem_size == 0);
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
- *
- * "For typed buffer and structured buffer surfaces, the number of
- * entries in the buffer ranges from 1 to 2^27. For raw buffer
- * surfaces, the number of entries in the buffer is the number of
- * bytes which can range from 1 to 2^30."
- */
- assert(num_entries >= 1 &&
- num_entries <= 1 << ((typed || structured) ? 27 : 30));
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 69:
- *
- * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
- * 11 if the Surface Format is RAW (the size of the buffer must be a
- * multiple of 4 bytes)."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
- * field (Surface Pitch) indicates the size of the structure."
- *
- * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
- * must be a multiple of 4 bytes."
- */
- if (structured)
- assert(struct_size % 4 == 0);
- else if (!typed)
- assert(num_entries % 4 == 0);
-
- pitch = struct_size;
-
- pitch--;
- num_entries--;
- /* bits [6:0] */
- width = (num_entries & 0x0000007f);
- /* bits [20:7] */
- height = (num_entries & 0x001fff80) >> 7;
- /* bits [30:21] */
- depth = (num_entries & 0x7fe00000) >> 21;
- /* limit to [26:21] */
- if (typed || structured)
- depth &= 0x3f;
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
- if (render_cache_rw)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- dw[8] = offset;
- memset(&dw[9], 0, sizeof(*dw) * (13 - 9));
- } else {
- dw[1] = offset;
- }
-
- dw[2] = GEN_SHIFT32(height, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth, GEN7_SURFACE_DW3_DEPTH) |
- pitch;
-
- dw[4] = 0;
- dw[5] = 0;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-}
-
-static void
-view_init_for_texture_gen7(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- int surface_type, surface_format;
- int width, height, depth, pitch, lod;
- uint32_t *dw;
-
- ILO_DEV_ASSERT(dev, 7, 8);
-
- surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
- assert(surface_type != GEN6_SURFTYPE_BUFFER);
-
- if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
- format = PIPE_FORMAT_Z32_FLOAT;
-
- if (is_rt)
- surface_format = ilo_format_translate_render(dev, format);
- else
- surface_format = ilo_format_translate_texture(dev, format);
- assert(surface_format >= 0);
-
- width = tex->image.width0;
- height = tex->image.height0;
- depth = (tex->base.target == PIPE_TEXTURE_3D) ?
- tex->base.depth0 : num_layers;
- pitch = tex->image.bo_stride;
-
- if (surface_type == GEN6_SURFTYPE_CUBE) {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
- * this field is [0,340], indicating the number of cube array
- * elements (equal to the number of underlying 2D array elements
- * divided by 6). For other surfaces, this field must be zero."
- *
- * When is_rt is true, we treat the texture as a 2D one to avoid the
- * restriction.
- */
- if (is_rt) {
- surface_type = GEN6_SURFTYPE_2D;
- }
- else {
- assert(num_layers % 6 == 0);
- depth = num_layers / 6;
- }
- }
-
- /* sanity check the size */
- assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
- assert(first_layer < 2048 && num_layers <= 2048);
- switch (surface_type) {
- case GEN6_SURFTYPE_1D:
- assert(width <= 16384 && height == 1 && depth <= 2048);
- break;
- case GEN6_SURFTYPE_2D:
- assert(width <= 16384 && height <= 16384 && depth <= 2048);
- break;
- case GEN6_SURFTYPE_3D:
- assert(width <= 2048 && height <= 2048 && depth <= 2048);
- if (!is_rt)
- assert(first_layer == 0);
- break;
- case GEN6_SURFTYPE_CUBE:
- assert(width <= 16384 && height <= 16384 && depth <= 86);
- assert(width == height);
- if (is_rt)
- assert(first_layer == 0);
- break;
- default:
- assert(!"unexpected surface type");
- break;
- }
-
- if (is_rt) {
- assert(num_levels == 1);
- lod = first_level;
- }
- else {
- lod = num_levels - 1;
- }
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 68:
- *
- * "The Base Address for linear render target surfaces and surfaces
- * accessed with the typed surface read/write data port messages must
- * be element-size aligned, for non-YUV surface formats, or a multiple
- * of 2 element-sizes for YUV surface formats. Other linear surfaces
- * have no alignment requirements (byte alignment is sufficient)."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 70:
- *
- * "For linear render target surfaces and surfaces accessed with the
- * typed data port messages, the pitch must be a multiple of the
- * element size for non-YUV surface formats. Pitch must be a multiple
- * of 2 * element size for YUV surface formats. For linear surfaces
- * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
- * of 4 bytes.For other linear surfaces, the pitch can be any multiple
- * of bytes."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 74:
- *
- * "For linear surfaces, this field (X Offset) must be zero."
- */
- if (tex->image.tiling == GEN6_TILING_NONE) {
- if (is_rt) {
- const int elem_size = util_format_get_blocksize(format);
- assert(pitch % elem_size == 0);
- }
- }
-
- STATIC_ASSERT(Elements(surf->payload) >= 13);
- dw = surf->payload;
-
- dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
- surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
-
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 63:
- *
- * "If this field (Surface Array) is enabled, the Surface Type must be
- * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
- * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
- * SURFTYPE_CUBE, the Depth field must be set to zero."
- *
- * For non-3D sampler surfaces, resinfo (the sampler message) always
- * returns zero for the number of layers when this field is not set.
- */
- if (surface_type != GEN6_SURFTYPE_3D) {
- if (util_resource_is_array_texture(&tex->base))
- dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
- else
- assert(depth == 1);
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- switch (tex->image.align_j) {
- case 4:
- dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
- break;
- case 8:
- dw[0] |= GEN8_SURFACE_DW0_VALIGN_8;
- break;
- case 16:
- dw[0] |= GEN8_SURFACE_DW0_VALIGN_16;
- break;
- default:
- assert(!"unsupported valign");
- break;
- }
-
- switch (tex->image.align_i) {
- case 4:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_4;
- break;
- case 8:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_8;
- break;
- case 16:
- dw[0] |= GEN8_SURFACE_DW0_HALIGN_16;
- break;
- default:
- assert(!"unsupported halign");
- break;
- }
-
- dw[0] |= tex->image.tiling << GEN8_SURFACE_DW0_TILING__SHIFT;
- } else {
- assert(tex->image.align_i == 4 || tex->image.align_i == 8);
- assert(tex->image.align_j == 2 || tex->image.align_j == 4);
-
- if (tex->image.align_j == 4)
- dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
-
- if (tex->image.align_i == 8)
- dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
-
- assert(tex->image.tiling != GEN8_TILING_W);
- dw[0] |= tex->image.tiling << GEN7_SURFACE_DW0_TILING__SHIFT;
-
- if (tex->image.walk == ILO_IMAGE_WALK_LOD)
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
- else
- dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
- }
-
- if (is_rt)
- dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
-
- if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
- dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
- assert(tex->image.layer_height % 4 == 0);
- dw[1] = tex->image.layer_height / 4;
- } else {
- dw[1] = 0;
- }
-
- dw[2] = GEN_SHIFT32(height - 1, GEN7_SURFACE_DW2_HEIGHT) |
- GEN_SHIFT32(width - 1, GEN7_SURFACE_DW2_WIDTH);
-
- dw[3] = GEN_SHIFT32(depth - 1, GEN7_SURFACE_DW3_DEPTH) |
- (pitch - 1);
-
- dw[4] = first_layer << 18 |
- (num_layers - 1) << 7;
-
- /*
- * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
- * means the samples are interleaved. The layouts are the same when the
- * number of samples is 1.
- */
- if (tex->image.interleaved_samples && tex->base.nr_samples > 1) {
- assert(!is_rt);
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
- }
- else {
- dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
- }
-
- switch (tex->base.nr_samples) {
- case 0:
- case 1:
- default:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
- break;
- case 2:
- dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2;
- break;
- case 4:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
- break;
- case 8:
- dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
- break;
- case 16:
- dw[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16;
- break;
- }
-
- dw[5] = GEN_SHIFT32(first_level, GEN7_SURFACE_DW5_MIN_LOD) |
- lod;
-
- dw[6] = 0;
- dw[7] = 0;
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7.5)) {
- dw[7] |= GEN_SHIFT32(GEN75_SCS_RED, GEN75_SURFACE_DW7_SCS_R) |
- GEN_SHIFT32(GEN75_SCS_GREEN, GEN75_SURFACE_DW7_SCS_G) |
- GEN_SHIFT32(GEN75_SCS_BLUE, GEN75_SURFACE_DW7_SCS_B) |
- GEN_SHIFT32(GEN75_SCS_ALPHA, GEN75_SURFACE_DW7_SCS_A);
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(8))
- memset(&dw[8], 0, sizeof(*dw) * (13 - 8));
-}
-
-void
-ilo_gpe_init_view_surface_null(const struct ilo_dev *dev,
- unsigned width, unsigned height,
- unsigned depth, unsigned level,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_null_gen7(dev,
- width, height, depth, level, surf);
- } else {
- view_init_null_gen6(dev,
- width, height, depth, level, surf);
- }
-
- surf->bo = NULL;
- surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev *dev,
- const struct ilo_buffer *buf,
- unsigned offset, unsigned size,
- unsigned struct_size,
- enum pipe_format elem_format,
- bool is_rt, bool render_cache_rw,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_for_buffer_gen7(dev, buf, offset, size,
- struct_size, elem_format, is_rt, render_cache_rw, surf);
- } else {
- view_init_for_buffer_gen6(dev, buf, offset, size,
- struct_size, elem_format, is_rt, render_cache_rw, surf);
- }
-
- /* do not increment reference count */
- surf->bo = buf->bo;
- surf->scanout = false;
-}
-
-void
-ilo_gpe_init_view_surface_for_texture(const struct ilo_dev *dev,
- const struct ilo_texture *tex,
- enum pipe_format format,
- unsigned first_level,
- unsigned num_levels,
- unsigned first_layer,
- unsigned num_layers,
- bool is_rt,
- struct ilo_view_surface *surf)
-{
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- view_init_for_texture_gen7(dev, tex, format,
- first_level, num_levels, first_layer, num_layers,
- is_rt, surf);
- } else {
- view_init_for_texture_gen6(dev, tex, format,
- first_level, num_levels, first_layer, num_layers,
- is_rt, surf);
- }
-
- /* do not increment reference count */
- surf->bo = tex->image.bo;
-
- /* assume imported RTs are scanouts */
- surf->scanout = ((tex->base.bind & PIPE_BIND_SCANOUT) ||
- (tex->imported && (tex->base.bind & PIPE_BIND_RENDER_TARGET)));
-}
-
-static void
-sampler_init_border_color_gen6(const struct ilo_dev *dev,
- const union pipe_color_union *color,
- uint32_t *dw, int num_dwords)
-{
- float rgba[4] = {
- color->f[0], color->f[1], color->f[2], color->f[3],
- };
-
- ILO_DEV_ASSERT(dev, 6, 6);
-
- assert(num_dwords >= 12);
-
- /*
- * This state is not documented in the Sandy Bridge PRM, but in the
- * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
- */
-
- /* IEEE_FP */
- dw[1] = fui(rgba[0]);
- dw[2] = fui(rgba[1]);
- dw[3] = fui(rgba[2]);
- dw[4] = fui(rgba[3]);
-
- /* FLOAT_16 */
- dw[5] = util_float_to_half(rgba[0]) |
- util_float_to_half(rgba[1]) << 16;
- dw[6] = util_float_to_half(rgba[2]) |
- util_float_to_half(rgba[3]) << 16;
-
- /* clamp to [-1.0f, 1.0f] */
- rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
- rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
- rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
- rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
-
- /* SNORM16 */
- dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
- (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
- dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
- (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
-
- /* SNORM8 */
- dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
- (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
- (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
- (int8_t) util_iround(rgba[3] * 127.0f) << 24;
-
- /* clamp to [0.0f, 1.0f] */
- rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
- rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
- rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
- rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
-
- /* UNORM8 */
- dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
- (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
- (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
- (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
-
- /* UNORM16 */
- dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
- (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
- dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
- (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
-}
-
-/**
- * Translate a pipe texture mipfilter to the matching hardware mipfilter.
- */
-static int
-gen6_translate_tex_mipfilter(unsigned filter)
-{
- switch (filter) {
- case PIPE_TEX_MIPFILTER_NEAREST: return GEN6_MIPFILTER_NEAREST;
- case PIPE_TEX_MIPFILTER_LINEAR: return GEN6_MIPFILTER_LINEAR;
- case PIPE_TEX_MIPFILTER_NONE: return GEN6_MIPFILTER_NONE;
- default:
- assert(!"unknown mipfilter");
- return GEN6_MIPFILTER_NONE;
- }
-}
-
-/**
- * Translate a pipe texture filter to the matching hardware mapfilter.
- */
-static int
-gen6_translate_tex_filter(unsigned filter)
-{
- switch (filter) {
- case PIPE_TEX_FILTER_NEAREST: return GEN6_MAPFILTER_NEAREST;
- case PIPE_TEX_FILTER_LINEAR: return GEN6_MAPFILTER_LINEAR;
- default:
- assert(!"unknown sampler filter");
- return GEN6_MAPFILTER_NEAREST;
- }
-}
-
-/**
- * Translate a pipe texture coordinate wrapping mode to the matching hardware
- * wrapping mode.
- */
-static int
-gen6_translate_tex_wrap(unsigned wrap)
-{
- switch (wrap) {
- case PIPE_TEX_WRAP_CLAMP: return GEN8_TEXCOORDMODE_HALF_BORDER;
- case PIPE_TEX_WRAP_REPEAT: return GEN6_TEXCOORDMODE_WRAP;
- case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return GEN6_TEXCOORDMODE_CLAMP;
- case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return GEN6_TEXCOORDMODE_CLAMP_BORDER;
- case PIPE_TEX_WRAP_MIRROR_REPEAT: return GEN6_TEXCOORDMODE_MIRROR;
- case PIPE_TEX_WRAP_MIRROR_CLAMP:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
- case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
- default:
- assert(!"unknown sampler wrap mode");
- return GEN6_TEXCOORDMODE_WRAP;
- }
-}
-
-/**
- * Translate a pipe shadow compare function to the matching hardware shadow
- * function.
- */
-static int
-gen6_translate_shadow_func(unsigned func)
-{
- /*
- * For PIPE_FUNC_x, the reference value is on the left-hand side of the
- * comparison, and 1.0 is returned when the comparison is true.
- *
- * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
- * the comparison, and 0.0 is returned when the comparison is true.
- */
- switch (func) {
- case PIPE_FUNC_NEVER: return GEN6_COMPAREFUNCTION_ALWAYS;
- case PIPE_FUNC_LESS: return GEN6_COMPAREFUNCTION_LEQUAL;
- case PIPE_FUNC_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
- case PIPE_FUNC_LEQUAL: return GEN6_COMPAREFUNCTION_LESS;
- case PIPE_FUNC_GREATER: return GEN6_COMPAREFUNCTION_GEQUAL;
- case PIPE_FUNC_NOTEQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
- case PIPE_FUNC_GEQUAL: return GEN6_COMPAREFUNCTION_GREATER;
- case PIPE_FUNC_ALWAYS: return GEN6_COMPAREFUNCTION_NEVER;
- default:
- assert(!"unknown shadow compare function");
- return GEN6_COMPAREFUNCTION_NEVER;
- }
-}
-
-void
-ilo_gpe_init_sampler_cso(const struct ilo_dev *dev,
- const struct pipe_sampler_state *state,
- struct ilo_sampler_cso *sampler)
-{
- int mip_filter, min_filter, mag_filter, max_aniso;
- int lod_bias, max_lod, min_lod;
- int wrap_s, wrap_t, wrap_r, wrap_cube;
- uint32_t dw0, dw1, dw3;
-
- ILO_DEV_ASSERT(dev, 6, 8);
-
- memset(sampler, 0, sizeof(*sampler));
-
- mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
- min_filter = gen6_translate_tex_filter(state->min_img_filter);
- mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
-
- sampler->anisotropic = state->max_anisotropy;
-
- if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
- max_aniso = state->max_anisotropy / 2 - 1;
- else if (state->max_anisotropy > 16)
- max_aniso = GEN6_ANISORATIO_16;
- else
- max_aniso = GEN6_ANISORATIO_2;
-
- /*
- *
- * Here is how the hardware calculate per-pixel LOD, from my reading of the
- * PRMs:
- *
- * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
- * other ways. The number of texels is measured using level
- * SurfMinLod.
- * 2) Bias is added to LOD.
- * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
- * compared with Base to determine whether magnification or
- * minification is needed. (if preclamp is disabled, LOD is compared
- * with Base before clamping)
- * 4) If magnification is needed, or no mipmapping is requested, LOD is
- * set to floor(MinLod).
- * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
- *
- * With Gallium interface, Base is always zero and
- * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
- */
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- const float scale = 256.0f;
-
- /* [-16.0, 16.0) in S4.8 */
- lod_bias = (int)
- (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
- lod_bias &= 0x1fff;
-
- /* [0.0, 14.0] in U4.8 */
- max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
- min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
- }
- else {
- const float scale = 64.0f;
-
- /* [-16.0, 16.0) in S4.6 */
- lod_bias = (int)
- (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
- lod_bias &= 0x7ff;
-
- /* [0.0, 13.0] in U4.6 */
- max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
- min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
- }
-
- /*
- * We want LOD to be clamped to determine magnification/minification, and
- * get set to zero when it is magnification or when mipmapping is disabled.
- * The hardware would set LOD to floor(MinLod) and that is a problem when
- * MinLod is greater than or equal to 1.0f.
- *
- * With Base being zero, it is always minification when MinLod is non-zero.
- * To achieve our goal, we just need to set MinLod to zero and set
- * MagFilter to MinFilter when mipmapping is disabled.
- */
- if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
- min_lod = 0;
- mag_filter = min_filter;
- }
-
- /* determine wrap s/t/r */
- wrap_s = gen6_translate_tex_wrap(state->wrap_s);
- wrap_t = gen6_translate_tex_wrap(state->wrap_t);
- wrap_r = gen6_translate_tex_wrap(state->wrap_r);
- if (ilo_dev_gen(dev) < ILO_GEN(8)) {
- /*
- * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
- * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
- * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
- * additionally clamping the texture coordinates to [0.0, 1.0].
- *
- * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
- * clamping has to be taken care of in the shaders. There are two
- * filters here, but let the minification one has a say.
- */
- const bool clamp_is_to_edge =
- (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
-
- if (clamp_is_to_edge) {
- if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_s = GEN6_TEXCOORDMODE_CLAMP;
- if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_t = GEN6_TEXCOORDMODE_CLAMP;
- if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER)
- wrap_r = GEN6_TEXCOORDMODE_CLAMP;
- } else {
- if (wrap_s == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_s = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_s = true;
- }
- if (wrap_t == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_t = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_t = true;
- }
- if (wrap_r == GEN8_TEXCOORDMODE_HALF_BORDER) {
- wrap_r = GEN6_TEXCOORDMODE_CLAMP_BORDER;
- sampler->saturate_r = true;
- }
- }
- }
-
- /*
- * From the Sandy Bridge PRM, volume 4 part 1, page 107:
- *
- * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
- * and TEXCOORDMODE_CUBE settings are valid, and each TC component
- * must have the same Address Control mode."
- *
- * From the Ivy Bridge PRM, volume 4 part 1, page 96:
- *
- * "This field (Cube Surface Control Mode) must be set to
- * CUBECTRLMODE_PROGRAMMED"
- *
- * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
- * map filtering.
- */
- if (state->seamless_cube_map &&
- (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
- state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
- wrap_cube = GEN6_TEXCOORDMODE_CUBE;
- }
- else {
- wrap_cube = GEN6_TEXCOORDMODE_CLAMP;
- }
-
- if (!state->normalized_coords) {
- /*
- * From the Ivy Bridge PRM, volume 4 part 1, page 98:
- *
- * "The following state must be set as indicated if this field
- * (Non-normalized Coordinate Enable) is enabled:
- *
- * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
- * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
- * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
- * - Mag Mode Filter must be MAPFILTER_NEAREST or
- * MAPFILTER_LINEAR.
- * - Min Mode Filter must be MAPFILTER_NEAREST or
- * MAPFILTER_LINEAR.
- * - Mip Mode Filter must be MIPFILTER_NONE.
- * - Min LOD must be 0.
- * - Max LOD must be 0.
- * - MIP Count must be 0.
- * - Surface Min LOD must be 0.
- * - Texture LOD Bias must be 0."
- */
- assert(wrap_s == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_s == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_t == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_t == GEN6_TEXCOORDMODE_CLAMP_BORDER);
- assert(wrap_r == GEN6_TEXCOORDMODE_CLAMP ||
- wrap_r == GEN6_TEXCOORDMODE_CLAMP_BORDER);
-
- assert(mag_filter == GEN6_MAPFILTER_NEAREST ||
- mag_filter == GEN6_MAPFILTER_LINEAR);
- assert(min_filter == GEN6_MAPFILTER_NEAREST ||
- min_filter == GEN6_MAPFILTER_LINEAR);
-
- /* work around a bug in util_blitter */
- mip_filter = GEN6_MIPFILTER_NONE;
-
- assert(mip_filter == GEN6_MIPFILTER_NONE);
- }
-
- if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
- dw0 = 1 << 28 |
- mip_filter << 20 |
- lod_bias << 1;
-
- sampler->dw_filter = mag_filter << 17 |
- min_filter << 14;
-
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14 |
- 1;
-
- dw1 = min_lod << 20 |
- max_lod << 8;
-
- if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
- dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
-
- dw3 = max_aniso << 19;
-
- /* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
- }
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
- }
-
- if (!state->normalized_coords)
- dw3 |= 1 << 10;
-
- sampler->dw_wrap = wrap_s << 6 |
- wrap_t << 3 |
- wrap_r;
-
- /*
- * As noted in the classic i965 driver, the HW may still reference
- * wrap_t and wrap_r for 1D textures. We need to set them to a safe
- * mode
- */
- sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
-
- sampler->dw_wrap_cube = wrap_cube << 6 |
- wrap_cube << 3 |
- wrap_cube;
-
- STATIC_ASSERT(Elements(sampler->payload) >= 7);
-
- sampler->payload[0] = dw0;
- sampler->payload[1] = dw1;
- sampler->payload[2] = dw3;
-
- memcpy(&sampler->payload[3],
- state->border_color.ui, sizeof(state->border_color.ui));
- }
- else {
- dw0 = 1 << 28 |
- mip_filter << 20 |
- lod_bias << 3;
-
- if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
- dw0 |= gen6_translate_shadow_func(state->compare_func);
-
- sampler->dw_filter = (min_filter != mag_filter) << 27 |
- mag_filter << 17 |
- min_filter << 14;
-
- sampler->dw_filter_aniso = GEN6_MAPFILTER_ANISOTROPIC << 17 |
- GEN6_MAPFILTER_ANISOTROPIC << 14;
-
- dw1 = min_lod << 22 |
- max_lod << 12;
-
- sampler->dw_wrap = wrap_s << 6 |
- wrap_t << 3 |
- wrap_r;
-
- sampler->dw_wrap_1d = wrap_s << 6 |
- GEN6_TEXCOORDMODE_WRAP << 3 |
- GEN6_TEXCOORDMODE_WRAP;
-
- sampler->dw_wrap_cube = wrap_cube << 6 |
- wrap_cube << 3 |
- wrap_cube;
-
- dw3 = max_aniso << 19;
-
- /* round the coordinates for linear filtering */
- if (min_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MIN_ROUND |
- GEN6_SAMPLER_DW3_V_MIN_ROUND |
- GEN6_SAMPLER_DW3_R_MIN_ROUND);
- }
- if (mag_filter != GEN6_MAPFILTER_NEAREST) {
- dw3 |= (GEN6_SAMPLER_DW3_U_MAG_ROUND |
- GEN6_SAMPLER_DW3_V_MAG_ROUND |
- GEN6_SAMPLER_DW3_R_MAG_ROUND);
- }
-
- if (!state->normalized_coords)
- dw3 |= 1;
-
- STATIC_ASSERT(Elements(sampler->payload) >= 15);
-
- sampler->payload[0] = dw0;
- sampler->payload[1] = dw1;
- sampler->payload[2] = dw3;
-
- sampler_init_border_color_gen6(dev,
- &state->border_color, &sampler->payload[3], 12);
- }
-}