iris: WM.
[mesa.git] / src / gallium / drivers / iris / iris_state.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25
26 #ifdef HAVE_VALGRIND
27 #include <valgrind.h>
28 #include <memcheck.h>
29 #define VG(x) x
30 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
31 #else
32 #define VG(x)
33 #endif
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "pipe/p_context.h"
38 #include "pipe/p_screen.h"
39 #include "util/u_inlines.h"
40 #include "util/u_transfer.h"
41 #include "intel/compiler/brw_compiler.h"
42 #include "intel/common/gen_sample_positions.h"
43 #include "iris_batch.h"
44 #include "iris_context.h"
45 #include "iris_pipe.h"
46 #include "iris_resource.h"
47
48 #define __gen_address_type struct iris_address
49 #define __gen_user_data struct iris_batch
50
51 static uint64_t
52 __gen_combine_address(struct iris_batch *batch, void *location,
53 struct iris_address addr, uint32_t delta)
54 {
55 if (addr.bo == NULL)
56 return addr.offset + delta;
57
58 return iris_batch_reloc(batch, location - batch->cmdbuf.map, addr.bo,
59 addr.offset + delta, addr.reloc_flags);
60 }
61
62 #define __genxml_cmd_length(cmd) cmd ## _length
63 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
64 #define __genxml_cmd_header(cmd) cmd ## _header
65 #define __genxml_cmd_pack(cmd) cmd ## _pack
66
67 static void *
68 get_command_space(struct iris_batch *batch, unsigned bytes)
69 {
70 iris_require_command_space(batch, bytes);
71 void *map = batch->cmdbuf.map_next;
72 batch->cmdbuf.map_next += bytes;
73 return map;
74 }
75
76 #define iris_pack_command(cmd, dst, name) \
77 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
78 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
79 ({ __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name); \
80 _dst = NULL; \
81 }))
82
83 #define iris_pack_state(cmd, dst, name) \
84 for (struct cmd name = {}, \
85 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
86 __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \
87 _dst = NULL)
88
89 #define iris_emit_cmd(batch, cmd, name) \
90 iris_pack_command(cmd, get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name)
91
92 #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \
93 do { \
94 uint32_t *dw = get_command_space(batch, 4 * num_dwords); \
95 for (uint32_t i = 0; i < num_dwords; i++) \
96 dw[i] = (dwords0)[i] | (dwords1)[i]; \
97 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
98 } while (0)
99
100 #define iris_emit_with_addr(batch, dwords, num_dw, addr_field, addr) \
101 do { \
102 STATIC_ASSERT((GENX(addr_field) % 64) == 0); \
103 assert(num_dw <= ARRAY_SIZE(dwords)); \
104 int addr_idx = GENX(addr_field) / 32; \
105 uint32_t *dw = get_command_space(batch, 4 * num_dw); \
106 for (uint32_t i = 0; i < addr_idx; i++) { \
107 dw[i] = (dwords)[i]; \
108 } \
109 uint64_t *qw = (uint64_t *) &dw[addr_idx]; \
110 qw = iris_batch_reloc(batch, qw - batch->cmdbuf.map, addr.bo, \
111 addr.offset + (dwords)[addr_idx + 1], \
112 addr.reloc_flags); \
113 for (uint32_t i = addr_idx + 1; i < num_dw; i++) { \
114 dw[i] = (dwords)[i]; \
115 } \
116 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dw * 4)); \
117 } while (0)
118
119 #include "genxml/genX_pack.h"
120 #include "genxml/gen_macros.h"
121 #include "genxml/genX_bits.h"
122
123 #define MOCS_WB (2 << 1)
124
125 UNUSED static void pipe_asserts()
126 {
127 #define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
128
129 /* pipe_logicop happens to match the hardware. */
130 PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
131 PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
132 PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
133 PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
134 PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
135 PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
136 PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
137 PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
138 PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
139 PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
140 PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
141 PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
142 PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
143 PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
144 PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
145 PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
146
147 /* pipe_blend_func happens to match the hardware. */
148 PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
149 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
150 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
151 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
152 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
153 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
154 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
155 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
156 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
157 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
158 PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
159 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
160 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
161 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
162 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
163 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
164 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
165 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
166 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
167
168 /* pipe_blend_func happens to match the hardware. */
169 PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
170 PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
171 PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
172 PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
173 PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
174
175 /* pipe_stencil_op happens to match the hardware. */
176 PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
177 PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
178 PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
179 PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
180 PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
181 PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
182 PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
183 PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
184 #undef PIPE_ASSERT
185 }
186
187 static unsigned
188 translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
189 {
190 static const unsigned map[] = {
191 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
192 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
193 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
194 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
195 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
196 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
197 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
198 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
199 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
200 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
201 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
202 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
203 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
204 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
205 [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,
206 };
207
208 return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
209 }
210
211 static unsigned
212 translate_compare_func(enum pipe_compare_func pipe_func)
213 {
214 static const unsigned map[] = {
215 [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,
216 [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,
217 [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,
218 [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,
219 [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,
220 [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
221 [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,
222 [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,
223 };
224 return map[pipe_func];
225 }
226
227 static unsigned
228 translate_shadow_func(enum pipe_compare_func pipe_func)
229 {
230 /* Gallium specifies the result of shadow comparisons as:
231 *
232 * 1 if ref <op> texel,
233 * 0 otherwise.
234 *
235 * The hardware does:
236 *
237 * 0 if texel <op> ref,
238 * 1 otherwise.
239 *
240 * So we need to flip the operator and also negate.
241 */
242 static const unsigned map[] = {
243 [PIPE_FUNC_NEVER] = PREFILTEROPALWAYS,
244 [PIPE_FUNC_LESS] = PREFILTEROPLEQUAL,
245 [PIPE_FUNC_EQUAL] = PREFILTEROPNOTEQUAL,
246 [PIPE_FUNC_LEQUAL] = PREFILTEROPLESS,
247 [PIPE_FUNC_GREATER] = PREFILTEROPGEQUAL,
248 [PIPE_FUNC_NOTEQUAL] = PREFILTEROPEQUAL,
249 [PIPE_FUNC_GEQUAL] = PREFILTEROPGREATER,
250 [PIPE_FUNC_ALWAYS] = PREFILTEROPNEVER,
251 };
252 return map[pipe_func];
253 }
254
255 static unsigned
256 translate_cull_mode(unsigned pipe_face)
257 {
258 static const unsigned map[4] = {
259 [PIPE_FACE_NONE] = CULLMODE_NONE,
260 [PIPE_FACE_FRONT] = CULLMODE_FRONT,
261 [PIPE_FACE_BACK] = CULLMODE_BACK,
262 [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
263 };
264 return map[pipe_face];
265 }
266
267 static unsigned
268 translate_fill_mode(unsigned pipe_polymode)
269 {
270 static const unsigned map[4] = {
271 [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
272 [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
273 [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,
274 [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
275 };
276 return map[pipe_polymode];
277 }
278
279 static struct iris_address
280 ro_bo(struct iris_bo *bo, uint32_t offset)
281 {
282 return (struct iris_address) { .bo = bo, .offset = offset };
283 }
284
285 void
286 iris_upload_initial_gpu_state(struct iris_batch *batch)
287 {
288 iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
289 rect.ClippedDrawingRectangleXMax = UINT16_MAX;
290 rect.ClippedDrawingRectangleYMax = UINT16_MAX;
291 }
292 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
293 GEN_SAMPLE_POS_1X(pat._1xSample);
294 GEN_SAMPLE_POS_2X(pat._2xSample);
295 GEN_SAMPLE_POS_4X(pat._4xSample);
296 GEN_SAMPLE_POS_8X(pat._8xSample);
297 GEN_SAMPLE_POS_16X(pat._16xSample);
298 }
299 iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
300 iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
301 iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
302 /* XXX: may need to set an offset for origin-UL framebuffers */
303 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
304
305 /* Just assign a static partitioning. */
306 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
307 iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
308 alloc._3DCommandSubOpcode = 18 + i;
309 alloc.ConstantBufferOffset = 6 * i;
310 alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6;
311 }
312 }
313 }
314
315 static void
316 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
317 {
318 }
319
320 static void
321 iris_set_blend_color(struct pipe_context *ctx,
322 const struct pipe_blend_color *state)
323 {
324 struct iris_context *ice = (struct iris_context *) ctx;
325
326 memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
327 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
328 }
329
330 struct iris_blend_state {
331 uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
332 uint32_t blend_state[GENX(BLEND_STATE_length)];
333 uint32_t blend_entries[BRW_MAX_DRAW_BUFFERS *
334 GENX(BLEND_STATE_ENTRY_length)];
335 };
336
337 static void *
338 iris_create_blend_state(struct pipe_context *ctx,
339 const struct pipe_blend_state *state)
340 {
341 struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state));
342
343 iris_pack_state(GENX(BLEND_STATE), cso->blend_state, bs) {
344 bs.AlphaToCoverageEnable = state->alpha_to_coverage;
345 bs.IndependentAlphaBlendEnable = state->independent_blend_enable;
346 bs.AlphaToOneEnable = state->alpha_to_one;
347 bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage;
348 bs.ColorDitherEnable = state->dither;
349 //bs.AlphaTestEnable = <comes from alpha state> :(
350 //bs.AlphaTestFunction = <comes from alpha state> :(
351 }
352
353 iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
354 //pb.HasWriteableRT = <comes from somewhere> :(
355 //pb.AlphaTestEnable = <comes from alpha state> :(
356 pb.AlphaToCoverageEnable = state->alpha_to_coverage;
357 pb.IndependentAlphaBlendEnable = state->independent_blend_enable;
358
359 pb.ColorBufferBlendEnable = state->rt[0].blend_enable;
360
361 pb.SourceBlendFactor = state->rt[0].rgb_src_factor;
362 pb.SourceAlphaBlendFactor = state->rt[0].alpha_func;
363 pb.DestinationBlendFactor = state->rt[0].rgb_dst_factor;
364 pb.DestinationAlphaBlendFactor = state->rt[0].alpha_dst_factor;
365 }
366
367 for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
368 iris_pack_state(GENX(BLEND_STATE_ENTRY), &cso->blend_entries[i], be) {
369 be.LogicOpEnable = state->logicop_enable;
370 be.LogicOpFunction = state->logicop_func;
371
372 be.PreBlendSourceOnlyClampEnable = false;
373 be.ColorClampRange = COLORCLAMP_RTFORMAT;
374 be.PreBlendColorClampEnable = true;
375 be.PostBlendColorClampEnable = true;
376
377 be.ColorBufferBlendEnable = state->rt[i].blend_enable;
378
379 be.ColorBlendFunction = state->rt[i].rgb_func;
380 be.AlphaBlendFunction = state->rt[i].alpha_func;
381 be.SourceBlendFactor = state->rt[i].rgb_src_factor;
382 be.SourceAlphaBlendFactor = state->rt[i].alpha_func;
383 be.DestinationBlendFactor = state->rt[i].rgb_dst_factor;
384 be.DestinationAlphaBlendFactor = state->rt[i].alpha_dst_factor;
385
386 be.WriteDisableRed = state->rt[i].colormask & PIPE_MASK_R;
387 be.WriteDisableGreen = state->rt[i].colormask & PIPE_MASK_G;
388 be.WriteDisableBlue = state->rt[i].colormask & PIPE_MASK_B;
389 be.WriteDisableAlpha = state->rt[i].colormask & PIPE_MASK_A;
390 }
391 }
392
393 return cso;
394 }
395
396 static void
397 iris_bind_blend_state(struct pipe_context *ctx, void *state)
398 {
399 struct iris_context *ice = (struct iris_context *) ctx;
400 ice->state.cso_blend = state;
401 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
402 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
403 }
404
405 struct iris_depth_stencil_alpha_state {
406 uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
407 uint32_t cc_vp[GENX(CC_VIEWPORT_length)];
408
409 struct pipe_alpha_state alpha; /* to BLEND_STATE, 3DSTATE_PS_BLEND */
410 };
411
412 static void *
413 iris_create_zsa_state(struct pipe_context *ctx,
414 const struct pipe_depth_stencil_alpha_state *state)
415 {
416 struct iris_depth_stencil_alpha_state *cso =
417 malloc(sizeof(struct iris_depth_stencil_alpha_state));
418
419 cso->alpha = state->alpha;
420
421 bool two_sided_stencil = state->stencil[1].enabled;
422
423 /* The state tracker needs to optimize away EQUAL writes for us. */
424 assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask));
425
426 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) {
427 wmds.StencilFailOp = state->stencil[0].fail_op;
428 wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op;
429 wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op;
430 wmds.StencilTestFunction =
431 translate_compare_func(state->stencil[0].func);
432 wmds.BackfaceStencilFailOp = state->stencil[1].fail_op;
433 wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op;
434 wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op;
435 wmds.BackfaceStencilTestFunction =
436 translate_compare_func(state->stencil[1].func);
437 wmds.DepthTestFunction = translate_compare_func(state->depth.func);
438 wmds.DoubleSidedStencilEnable = two_sided_stencil;
439 wmds.StencilTestEnable = state->stencil[0].enabled;
440 wmds.StencilBufferWriteEnable =
441 state->stencil[0].writemask != 0 ||
442 (two_sided_stencil && state->stencil[1].writemask != 0);
443 wmds.DepthTestEnable = state->depth.enabled;
444 wmds.DepthBufferWriteEnable = state->depth.writemask;
445 wmds.StencilTestMask = state->stencil[0].valuemask;
446 wmds.StencilWriteMask = state->stencil[0].writemask;
447 wmds.BackfaceStencilTestMask = state->stencil[1].valuemask;
448 wmds.BackfaceStencilWriteMask = state->stencil[1].writemask;
449 /* wmds.[Backface]StencilReferenceValue are merged later */
450 }
451
452 iris_pack_state(GENX(CC_VIEWPORT), cso->cc_vp, ccvp) {
453 ccvp.MinimumDepth = state->depth.bounds_min;
454 ccvp.MaximumDepth = state->depth.bounds_max;
455 }
456
457 return cso;
458 }
459
460 static void
461 iris_bind_zsa_state(struct pipe_context *ctx, void *state)
462 {
463 struct iris_context *ice = (struct iris_context *) ctx;
464 struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
465 struct iris_depth_stencil_alpha_state *new_cso = state;
466
467 if (new_cso) {
468 if (!old_cso || old_cso->alpha.ref_value != new_cso->alpha.ref_value) {
469 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
470 }
471 }
472
473 ice->state.cso_zsa = new_cso;
474 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
475 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
476 }
477
478 struct iris_rasterizer_state {
479 uint32_t sf[GENX(3DSTATE_SF_length)];
480 uint32_t clip[GENX(3DSTATE_CLIP_length)];
481 uint32_t raster[GENX(3DSTATE_RASTER_length)];
482 uint32_t wm[GENX(3DSTATE_WM_length)];
483 uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
484
485 bool flatshade; /* for shader state */
486 bool light_twoside; /* for shader state */
487 bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */
488 bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */
489 enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
490 };
491
492 static void *
493 iris_create_rasterizer_state(struct pipe_context *ctx,
494 const struct pipe_rasterizer_state *state)
495 {
496 struct iris_rasterizer_state *cso =
497 malloc(sizeof(struct iris_rasterizer_state));
498
499 #if 0
500 sprite_coord_mode -> SBE PointSpriteTextureCoordinateOrigin
501 sprite_coord_enable -> SBE PointSpriteTextureCoordinateEnable
502 point_quad_rasterization -> SBE?
503
504 not necessary?
505 {
506 poly_smooth
507 force_persample_interp - ?
508 bottom_edge_rule
509
510 offset_units_unscaled - cap not exposed
511 }
512 #endif
513
514 cso->flatshade = state->flatshade;
515 cso->light_twoside = state->light_twoside;
516 cso->rasterizer_discard = state->rasterizer_discard;
517 cso->half_pixel_center = state->half_pixel_center;
518
519 iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
520 sf.StatisticsEnable = true;
521 sf.ViewportTransformEnable = true;
522 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
523 sf.LineEndCapAntialiasingRegionWidth =
524 state->line_smooth ? _10pixels : _05pixels;
525 sf.LastPixelEnable = state->line_last_pixel;
526 sf.LineWidth = state->line_width;
527 sf.SmoothPointEnable = state->point_smooth;
528 sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
529 sf.PointWidth = state->point_size;
530
531 if (state->flatshade_first) {
532 sf.TriangleStripListProvokingVertexSelect = 2;
533 sf.TriangleFanProvokingVertexSelect = 2;
534 sf.LineStripListProvokingVertexSelect = 1;
535 } else {
536 sf.TriangleFanProvokingVertexSelect = 1;
537 }
538 }
539
540 /* COMPLETE! */
541 iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
542 rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
543 rr.CullMode = translate_cull_mode(state->cull_face);
544 rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
545 rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
546 rr.DXMultisampleRasterizationEnable = state->multisample;
547 rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
548 rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
549 rr.GlobalDepthOffsetEnablePoint = state->offset_point;
550 rr.GlobalDepthOffsetConstant = state->offset_units;
551 rr.GlobalDepthOffsetScale = state->offset_scale;
552 rr.GlobalDepthOffsetClamp = state->offset_clamp;
553 rr.SmoothPointEnable = state->point_smooth;
554 rr.AntialiasingEnable = state->line_smooth;
555 rr.ScissorRectangleEnable = state->scissor;
556 rr.ViewportZNearClipTestEnable = state->depth_clip_near;
557 rr.ViewportZFarClipTestEnable = state->depth_clip_far;
558 //rr.ConservativeRasterizationEnable = not yet supported by Gallium...
559 }
560
561 iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
562 cl.StatisticsEnable = true;
563 cl.EarlyCullEnable = true;
564 cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
565 cl.ForceUserClipDistanceClipTestEnableBitmask = true;
566 cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
567 cl.GuardbandClipTestEnable = true;
568 cl.ClipMode = CLIPMODE_NORMAL;
569 cl.ClipEnable = true;
570 cl.ViewportXYClipTestEnable = state->point_tri_clip;
571 cl.MinimumPointWidth = 0.125;
572 cl.MaximumPointWidth = 255.875;
573 //.NonPerspectiveBarycentricEnable = <comes from FS prog> :(
574 //.ForceZeroRTAIndexEnable = <comes from FB layers being 0>
575
576 if (state->flatshade_first) {
577 cl.TriangleStripListProvokingVertexSelect = 2;
578 cl.TriangleFanProvokingVertexSelect = 2;
579 cl.LineStripListProvokingVertexSelect = 1;
580 } else {
581 cl.TriangleFanProvokingVertexSelect = 1;
582 }
583 }
584
585 iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) {
586 wm.LineAntialiasingRegionWidth = _10pixels;
587 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
588 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
589 wm.StatisticsEnable = true;
590 wm.LineStippleEnable = state->line_stipple_enable;
591 wm.PolygonStippleEnable = state->poly_stipple_enable;
592 // wm.BarycentricInterpolationMode = <comes from FS program> :(
593 // wm.EarlyDepthStencilControl = <comes from FS program> :(
594 }
595
596 /* Remap from 0..255 back to 1..256 */
597 const unsigned line_stipple_factor = state->line_stipple_factor + 1;
598
599 iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
600 line.LineStipplePattern = state->line_stipple_pattern;
601 line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
602 line.LineStippleRepeatCount = line_stipple_factor;
603 }
604
605 return cso;
606 }
607
608 static void
609 iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
610 {
611 struct iris_context *ice = (struct iris_context *) ctx;
612 struct iris_rasterizer_state *old_cso = ice->state.cso_rast;
613 struct iris_rasterizer_state *new_cso = state;
614
615 if (new_cso) {
616 /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
617 if (!old_cso || memcmp(old_cso->line_stipple, new_cso->line_stipple,
618 sizeof(old_cso->line_stipple)) != 0) {
619 ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE;
620 }
621
622 if (!old_cso ||
623 old_cso->half_pixel_center != new_cso->half_pixel_center) {
624 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
625 }
626 }
627
628 ice->state.cso_rast = new_cso;
629 ice->state.dirty |= IRIS_DIRTY_RASTER;
630 }
631
632 static uint32_t
633 translate_wrap(unsigned pipe_wrap)
634 {
635 static const unsigned map[] = {
636 [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,
637 [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,
638 [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,
639 [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
640 [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,
641 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
642 [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, // XXX: ???
643 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ???
644 };
645 return map[pipe_wrap];
646 }
647
648 /**
649 * Return true if the given wrap mode requires the border color to exist.
650 */
651 static bool
652 wrap_mode_needs_border_color(unsigned wrap_mode)
653 {
654 return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
655 }
656
657 static unsigned
658 translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
659 {
660 static const unsigned map[] = {
661 [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
662 [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,
663 [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,
664 };
665 return map[pipe_mip];
666 }
667
668 struct iris_sampler_state {
669 struct pipe_sampler_state base;
670
671 bool needs_border_color;
672
673 uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
674 };
675
676 static void *
677 iris_create_sampler_state(struct pipe_context *pctx,
678 const struct pipe_sampler_state *state)
679 {
680 struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
681
682 if (!cso)
683 return NULL;
684
685 STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
686 STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
687
688 unsigned wrap_s = translate_wrap(state->wrap_s);
689 unsigned wrap_t = translate_wrap(state->wrap_t);
690 unsigned wrap_r = translate_wrap(state->wrap_r);
691
692 cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) ||
693 wrap_mode_needs_border_color(wrap_t) ||
694 wrap_mode_needs_border_color(wrap_r);
695
696 iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) {
697 samp.TCXAddressControlMode = wrap_s;
698 samp.TCYAddressControlMode = wrap_t;
699 samp.TCZAddressControlMode = wrap_r;
700 samp.CubeSurfaceControlMode = state->seamless_cube_map;
701 samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
702 samp.MinModeFilter = state->min_img_filter;
703 samp.MagModeFilter = state->mag_img_filter;
704 samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
705 samp.MaximumAnisotropy = RATIO21;
706
707 if (state->max_anisotropy >= 2) {
708 if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
709 samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
710 samp.AnisotropicAlgorithm = EWAApproximation;
711 }
712
713 if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
714 samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
715
716 samp.MaximumAnisotropy =
717 MIN2((state->max_anisotropy - 2) / 2, RATIO161);
718 }
719
720 /* Set address rounding bits if not using nearest filtering. */
721 if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
722 samp.UAddressMinFilterRoundingEnable = true;
723 samp.VAddressMinFilterRoundingEnable = true;
724 samp.RAddressMinFilterRoundingEnable = true;
725 }
726
727 if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
728 samp.UAddressMagFilterRoundingEnable = true;
729 samp.VAddressMagFilterRoundingEnable = true;
730 samp.RAddressMagFilterRoundingEnable = true;
731 }
732
733 if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
734 samp.ShadowFunction = translate_shadow_func(state->compare_func);
735
736 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13;
737
738 samp.LODPreClampMode = CLAMP_MODE_OGL;
739 samp.MinLOD = CLAMP(state->min_lod, 0, hw_max_lod);
740 samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
741 samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
742
743 //samp.BorderColorPointer = <<comes from elsewhere>>
744 }
745
746 return cso;
747 }
748
749 static void
750 iris_bind_sampler_states(struct pipe_context *ctx,
751 enum pipe_shader_type p_stage,
752 unsigned start, unsigned count,
753 void **states)
754 {
755 struct iris_context *ice = (struct iris_context *) ctx;
756 gl_shader_stage stage = stage_from_pipe(p_stage);
757
758 assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
759
760 for (int i = 0; i < count; i++) {
761 ice->state.samplers[stage][start + i] = states[i];
762 }
763
764 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
765 }
766
767 struct iris_sampler_view {
768 struct pipe_sampler_view pipe;
769 struct isl_view view;
770 uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
771 };
772
773 /**
774 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
775 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
776 *
777 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
778 * 0 1 2 3 4 5
779 * 4 5 6 7 0 1
780 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
781 *
782 * which is simply adding 4 then modding by 8 (or anding with 7).
783 *
784 * We then may need to apply workarounds for textureGather hardware bugs.
785 */
786 static enum isl_channel_select
787 pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
788 {
789 return (swizzle + 4) & 7;
790 }
791
792 static struct pipe_sampler_view *
793 iris_create_sampler_view(struct pipe_context *ctx,
794 struct pipe_resource *tex,
795 const struct pipe_sampler_view *tmpl)
796 {
797 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
798 struct iris_resource *itex = (struct iris_resource *) tex;
799 struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view));
800
801 if (!isv)
802 return NULL;
803
804 /* initialize base object */
805 isv->pipe = *tmpl;
806 isv->pipe.context = ctx;
807 isv->pipe.texture = NULL;
808 pipe_reference_init(&isv->pipe.reference, 1);
809 pipe_resource_reference(&isv->pipe.texture, tex);
810
811 /* XXX: do we need brw_get_texture_swizzle hacks here? */
812
813 isv->view = (struct isl_view) {
814 .format = iris_isl_format_for_pipe_format(tmpl->format),
815 .base_level = tmpl->u.tex.first_level,
816 .levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1,
817 .base_array_layer = tmpl->u.tex.first_layer,
818 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
819 .swizzle = (struct isl_swizzle) {
820 .r = pipe_swizzle_to_isl_channel(tmpl->swizzle_r),
821 .g = pipe_swizzle_to_isl_channel(tmpl->swizzle_g),
822 .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b),
823 .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a),
824 },
825 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
826 };
827
828 isl_surf_fill_state(&screen->isl_dev, isv->surface_state,
829 .surf = &itex->surf, .view = &isv->view,
830 .mocs = MOCS_WB);
831 // .address = ...
832 // .aux_surf =
833 // .clear_color = clear_color,
834
835 return &isv->pipe;
836 }
837
838 struct iris_surface {
839 struct pipe_surface pipe;
840 struct isl_view view;
841 uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
842 };
843
844 static struct pipe_surface *
845 iris_create_surface(struct pipe_context *ctx,
846 struct pipe_resource *tex,
847 const struct pipe_surface *tmpl)
848 {
849 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
850 struct iris_surface *surf = calloc(1, sizeof(struct iris_surface));
851 struct pipe_surface *psurf = &surf->pipe;
852 struct iris_resource *itex = (struct iris_resource *) tex;
853
854 if (!surf)
855 return NULL;
856
857 pipe_reference_init(&psurf->reference, 1);
858 pipe_resource_reference(&psurf->texture, tex);
859 psurf->context = ctx;
860 psurf->format = tmpl->format;
861 psurf->width = tex->width0;
862 psurf->height = tex->height0;
863 psurf->texture = tex;
864 psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
865 psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
866 psurf->u.tex.level = tmpl->u.tex.level;
867
868 surf->view = (struct isl_view) {
869 .format = iris_isl_format_for_pipe_format(tmpl->format),
870 .base_level = tmpl->u.tex.level,
871 .levels = 1,
872 .base_array_layer = tmpl->u.tex.first_layer,
873 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
874 .swizzle = ISL_SWIZZLE_IDENTITY,
875 // XXX: DEPTH_BIt, STENCIL_BIT...CUBE_BIT? Other bits?!
876 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
877 };
878
879 isl_surf_fill_state(&screen->isl_dev, surf->surface_state,
880 .surf = &itex->surf, .view = &surf->view,
881 .mocs = MOCS_WB);
882 // .address = ...
883 // .aux_surf =
884 // .clear_color = clear_color,
885
886 return psurf;
887 }
888
889 static void
890 iris_set_sampler_views(struct pipe_context *ctx,
891 enum pipe_shader_type shader,
892 unsigned start, unsigned count,
893 struct pipe_sampler_view **views)
894 {
895 }
896
897 static void
898 iris_set_clip_state(struct pipe_context *ctx,
899 const struct pipe_clip_state *state)
900 {
901 }
902
903 static void
904 iris_set_polygon_stipple(struct pipe_context *ctx,
905 const struct pipe_poly_stipple *state)
906 {
907 struct iris_context *ice = (struct iris_context *) ctx;
908 memcpy(&ice->state.poly_stipple, state, sizeof(*state));
909 ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
910 }
911
912 static void
913 iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
914 {
915 struct iris_context *ice = (struct iris_context *) ctx;
916
917 ice->state.sample_mask = sample_mask;
918 ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
919 }
920
921 static void
922 iris_set_scissor_states(struct pipe_context *ctx,
923 unsigned start_slot,
924 unsigned num_scissors,
925 const struct pipe_scissor_state *states)
926 {
927 struct iris_context *ice = (struct iris_context *) ctx;
928
929 ice->state.num_scissors = num_scissors;
930
931 for (unsigned i = 0; i < num_scissors; i++) {
932 ice->state.scissors[start_slot + i] = states[i];
933 }
934
935 ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
936 }
937
938 static void
939 iris_set_stencil_ref(struct pipe_context *ctx,
940 const struct pipe_stencil_ref *state)
941 {
942 struct iris_context *ice = (struct iris_context *) ctx;
943 memcpy(&ice->state.stencil_ref, state, sizeof(*state));
944 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
945 }
946
947
948 struct iris_viewport_state {
949 uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length)];
950 };
951
952 static float
953 extent_from_matrix(const struct pipe_viewport_state *state, int axis)
954 {
955 return fabsf(state->scale[axis]) * state->translate[axis];
956 }
957
958 #if 0
959 static void
960 calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
961 float m00, float m11, float m30, float m31,
962 float *xmin, float *xmax,
963 float *ymin, float *ymax)
964 {
965 /* According to the "Vertex X,Y Clamping and Quantization" section of the
966 * Strips and Fans documentation:
967 *
968 * "The vertex X and Y screen-space coordinates are also /clamped/ to the
969 * fixed-point "guardband" range supported by the rasterization hardware"
970 *
971 * and
972 *
973 * "In almost all circumstances, if an object’s vertices are actually
974 * modified by this clamping (i.e., had X or Y coordinates outside of
975 * the guardband extent the rendered object will not match the intended
976 * result. Therefore software should take steps to ensure that this does
977 * not happen - e.g., by clipping objects such that they do not exceed
978 * these limits after the Drawing Rectangle is applied."
979 *
980 * I believe the fundamental restriction is that the rasterizer (in
981 * the SF/WM stages) have a limit on the number of pixels that can be
982 * rasterized. We need to ensure any coordinates beyond the rasterizer
983 * limit are handled by the clipper. So effectively that limit becomes
984 * the clipper's guardband size.
985 *
986 * It goes on to say:
987 *
988 * "In addition, in order to be correctly rendered, objects must have a
989 * screenspace bounding box not exceeding 8K in the X or Y direction.
990 * This additional restriction must also be comprehended by software,
991 * i.e., enforced by use of clipping."
992 *
993 * This makes no sense. Gen7+ hardware supports 16K render targets,
994 * and you definitely need to be able to draw polygons that fill the
995 * surface. Our assumption is that the rasterizer was limited to 8K
996 * on Sandybridge, which only supports 8K surfaces, and it was actually
997 * increased to 16K on Ivybridge and later.
998 *
999 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
1000 */
1001 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
1002
1003 if (m00 != 0 && m11 != 0) {
1004 /* First, we compute the screen-space render area */
1005 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
1006 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00);
1007 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11);
1008 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11);
1009
1010 /* We want the guardband to be centered on that */
1011 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size;
1012 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size;
1013 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size;
1014 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size;
1015
1016 /* Now we need it in native device coordinates */
1017 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00;
1018 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00;
1019 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11;
1020 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11;
1021
1022 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be
1023 * flipped upside-down. X should be fine though.
1024 */
1025 assert(ndc_gb_xmin <= ndc_gb_xmax);
1026 *xmin = ndc_gb_xmin;
1027 *xmax = ndc_gb_xmax;
1028 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax);
1029 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax);
1030 } else {
1031 /* The viewport scales to 0, so nothing will be rendered. */
1032 *xmin = 0.0f;
1033 *xmax = 0.0f;
1034 *ymin = 0.0f;
1035 *ymax = 0.0f;
1036 }
1037 }
1038 #endif
1039
1040 static void
1041 iris_set_viewport_states(struct pipe_context *ctx,
1042 unsigned start_slot,
1043 unsigned num_viewports,
1044 const struct pipe_viewport_state *state)
1045 {
1046 struct iris_context *ice = (struct iris_context *) ctx;
1047 struct iris_viewport_state *cso =
1048 malloc(sizeof(struct iris_viewport_state));
1049
1050 // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right
1051 for (unsigned i = start_slot; i < start_slot + num_viewports; i++) {
1052 float x_extent = extent_from_matrix(&state[i], 0);
1053 float y_extent = extent_from_matrix(&state[i], 1);
1054
1055 iris_pack_state(GENX(SF_CLIP_VIEWPORT), cso->sf_cl_vp, vp) {
1056 vp.ViewportMatrixElementm00 = state[i].scale[0];
1057 vp.ViewportMatrixElementm11 = state[i].scale[1];
1058 vp.ViewportMatrixElementm22 = state[i].scale[2];
1059 vp.ViewportMatrixElementm30 = state[i].translate[0];
1060 vp.ViewportMatrixElementm31 = state[i].translate[1];
1061 vp.ViewportMatrixElementm32 = state[i].translate[2];
1062 /* XXX: in i965 this is computed based on the drawbuffer size,
1063 * but we don't have that here...
1064 */
1065 vp.XMinClipGuardband = -1.0;
1066 vp.XMaxClipGuardband = 1.0;
1067 vp.YMinClipGuardband = -1.0;
1068 vp.YMaxClipGuardband = 1.0;
1069 vp.XMinViewPort = -x_extent;
1070 vp.XMaxViewPort = x_extent;
1071 vp.YMinViewPort = -y_extent;
1072 vp.YMaxViewPort = y_extent;
1073 }
1074 }
1075
1076 ice->state.cso_vp = cso;
1077 // XXX: start_slot
1078 ice->state.num_viewports = num_viewports;
1079 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
1080 }
1081
1082 struct iris_depth_state
1083 {
1084 uint32_t depth_buffer[GENX(3DSTATE_DEPTH_BUFFER_length)];
1085 uint32_t hier_depth_buffer[GENX(3DSTATE_HIER_DEPTH_BUFFER_length)];
1086 uint32_t stencil_buffer[GENX(3DSTATE_STENCIL_BUFFER_length)];
1087 };
1088
1089 static void
1090 iris_set_framebuffer_state(struct pipe_context *ctx,
1091 const struct pipe_framebuffer_state *state)
1092 {
1093 struct iris_context *ice = (struct iris_context *) ctx;
1094 struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
1095
1096 if (cso->samples != state->samples) {
1097 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
1098 }
1099
1100 cso->width = state->width;
1101 cso->height = state->height;
1102 cso->layers = state->layers;
1103 cso->samples = state->samples;
1104
1105 unsigned i;
1106 for (i = 0; i < state->nr_cbufs; i++)
1107 pipe_surface_reference(&cso->cbufs[i], state->cbufs[i]);
1108 for (; i < cso->nr_cbufs; i++)
1109 pipe_surface_reference(&cso->cbufs[i], NULL);
1110
1111 cso->nr_cbufs = state->nr_cbufs;
1112
1113 pipe_surface_reference(&cso->zsbuf, state->zsbuf);
1114
1115 struct isl_depth_stencil_hiz_emit_info info = {
1116 .mocs = MOCS_WB,
1117 };
1118
1119 // XXX: depth buffers
1120 }
1121
1122 static void
1123 iris_set_constant_buffer(struct pipe_context *ctx,
1124 enum pipe_shader_type shader, uint index,
1125 const struct pipe_constant_buffer *cb)
1126 {
1127 }
1128
1129
1130 static void
1131 iris_sampler_view_destroy(struct pipe_context *ctx,
1132 struct pipe_sampler_view *state)
1133 {
1134 pipe_resource_reference(&state->texture, NULL);
1135 free(state);
1136 }
1137
1138
1139 static void
1140 iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surface)
1141 {
1142 pipe_resource_reference(&surface->texture, NULL);
1143 free(surface);
1144 }
1145
1146 static void
1147 iris_delete_state(struct pipe_context *ctx, void *state)
1148 {
1149 free(state);
1150 }
1151
1152 struct iris_vertex_buffer_state {
1153 uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
1154 struct iris_address bos[33];
1155 unsigned num_buffers;
1156 };
1157
1158 static void
1159 iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
1160 {
1161 if (cso) {
1162 for (unsigned i = 0; i < cso->num_buffers; i++)
1163 iris_bo_unreference(cso->bos[i].bo);
1164 free(cso);
1165 }
1166 }
1167
1168 static void
1169 iris_set_vertex_buffers(struct pipe_context *ctx,
1170 unsigned start_slot, unsigned count,
1171 const struct pipe_vertex_buffer *buffers)
1172 {
1173 struct iris_context *ice = (struct iris_context *) ctx;
1174 struct iris_vertex_buffer_state *cso =
1175 malloc(sizeof(struct iris_vertex_buffer_state));
1176
1177 /* If there are no buffers, do nothing. We can leave the stale
1178 * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex
1179 * elements that point to them, it should be fine.
1180 */
1181 if (!buffers)
1182 return;
1183
1184 iris_free_vertex_buffers(ice->state.cso_vertex_buffers);
1185
1186 cso->num_buffers = count;
1187
1188 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) {
1189 vb.DWordLength = 4 * cso->num_buffers - 1;
1190 }
1191
1192 uint32_t *vb_pack_dest = &cso->vertex_buffers[1];
1193
1194 for (unsigned i = 0; i < count; i++) {
1195 assert(!buffers[i].is_user_buffer);
1196
1197 struct iris_resource *res = (void *) buffers[i].buffer.resource;
1198 iris_bo_reference(res->bo);
1199 cso->bos[i] = ro_bo(res->bo, buffers[i].buffer_offset);
1200
1201 iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
1202 vb.VertexBufferIndex = start_slot + i;
1203 vb.MOCS = MOCS_WB;
1204 vb.AddressModifyEnable = true;
1205 vb.BufferPitch = buffers[i].stride;
1206 vb.BufferSize = res->bo->size;
1207 /* vb.BufferStartingAddress is filled in at draw time */
1208 }
1209
1210 vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length);
1211 }
1212
1213 ice->state.cso_vertex_buffers = cso;
1214 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
1215 }
1216
1217 struct iris_vertex_element_state {
1218 uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
1219 uint32_t vf_instancing[GENX(3DSTATE_VF_INSTANCING_length)][33];
1220 unsigned count;
1221 };
1222
1223 static void *
1224 iris_create_vertex_elements(struct pipe_context *ctx,
1225 unsigned count,
1226 const struct pipe_vertex_element *state)
1227 {
1228 struct iris_vertex_element_state *cso =
1229 malloc(sizeof(struct iris_vertex_element_state));
1230
1231 cso->count = count;
1232
1233 /* TODO:
1234 * - create edge flag one
1235 * - create SGV ones
1236 * - if those are necessary, use count + 1/2/3... OR in the length
1237 */
1238 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve);
1239
1240 uint32_t *ve_pack_dest = &cso->vertex_elements[1];
1241
1242 for (int i = 0; i < count; i++) {
1243 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
1244 ve.VertexBufferIndex = state[i].vertex_buffer_index;
1245 ve.Valid = true;
1246 ve.SourceElementOffset = state[i].src_offset;
1247 ve.SourceElementFormat =
1248 iris_isl_format_for_pipe_format(state[i].src_format);
1249 }
1250
1251 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->vf_instancing[i], vi) {
1252 vi.VertexElementIndex = i;
1253 vi.InstancingEnable = state[i].instance_divisor > 0;
1254 vi.InstanceDataStepRate = state[i].instance_divisor;
1255 }
1256
1257 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
1258 }
1259
1260 return cso;
1261 }
1262
1263 static void
1264 iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
1265 {
1266 struct iris_context *ice = (struct iris_context *) ctx;
1267
1268 ice->state.cso_vertex_elements = state;
1269 ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS;
1270 }
1271
1272 static void *
1273 iris_create_compute_state(struct pipe_context *ctx,
1274 const struct pipe_compute_state *state)
1275 {
1276 return malloc(1);
1277 }
1278
1279 static struct pipe_stream_output_target *
1280 iris_create_stream_output_target(struct pipe_context *ctx,
1281 struct pipe_resource *res,
1282 unsigned buffer_offset,
1283 unsigned buffer_size)
1284 {
1285 struct pipe_stream_output_target *t =
1286 CALLOC_STRUCT(pipe_stream_output_target);
1287 if (!t)
1288 return NULL;
1289
1290 pipe_reference_init(&t->reference, 1);
1291 pipe_resource_reference(&t->buffer, res);
1292 t->buffer_offset = buffer_offset;
1293 t->buffer_size = buffer_size;
1294 return t;
1295 }
1296
1297 static void
1298 iris_stream_output_target_destroy(struct pipe_context *ctx,
1299 struct pipe_stream_output_target *t)
1300 {
1301 pipe_resource_reference(&t->buffer, NULL);
1302 free(t);
1303 }
1304
1305 static void
1306 iris_set_stream_output_targets(struct pipe_context *ctx,
1307 unsigned num_targets,
1308 struct pipe_stream_output_target **targets,
1309 const unsigned *offsets)
1310 {
1311 }
1312
1313 void
1314 iris_setup_state_base_address(struct iris_context *ice,
1315 struct iris_batch *batch,
1316 struct iris_bo *instruction_bo)
1317 {
1318 if (!(ice->state.dirty & IRIS_DIRTY_STATE_BASE_ADDRESS))
1319 return;
1320
1321 //iris_batchbuffer_flush(...)
1322
1323 ice->state.dirty &= ~IRIS_DIRTY_STATE_BASE_ADDRESS;
1324
1325 /* XXX: PIPE_CONTROLs */
1326
1327 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
1328 #if 0
1329 // XXX: MOCS is stupid for this.
1330 sba.GeneralStateMemoryObjectControlState = MOCS_WB;
1331 sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
1332 sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
1333 sba.DynamicStateMemoryObjectControlState = MOCS_WB;
1334 sba.IndirectObjectMemoryObjectControlState = MOCS_WB;
1335 sba.InstructionMemoryObjectControlState = MOCS_WB;
1336 sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB;
1337 #endif
1338
1339 sba.GeneralStateBaseAddressModifyEnable = true;
1340 sba.SurfaceStateBaseAddressModifyEnable = true;
1341 sba.DynamicStateBaseAddressModifyEnable = true;
1342 sba.IndirectObjectBaseAddressModifyEnable = true;
1343 sba.InstructionBaseAddressModifyEnable = true;
1344 sba.GeneralStateBufferSizeModifyEnable = true;
1345 sba.DynamicStateBufferSizeModifyEnable = true;
1346 sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
1347 sba.IndirectObjectBufferSizeModifyEnable = true;
1348 sba.InstructionBuffersizeModifyEnable = true;
1349
1350 sba.SurfaceStateBaseAddress = ro_bo(batch->statebuf.bo, 0);
1351 sba.DynamicStateBaseAddress = ro_bo(batch->statebuf.bo, 0);
1352 sba.InstructionBaseAddress = ro_bo(instruction_bo, 0);
1353
1354 sba.GeneralStateBufferSize = 0xfffff000;
1355 sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096);
1356 sba.IndirectObjectBufferSize = 0xfffff000;
1357 sba.InstructionBufferSize = ALIGN(ice->shaders.cache.bo->size, 4096);
1358 sba.BindlessSurfaceStateSize = 0;
1359 }
1360 }
1361
1362 void
1363 iris_upload_render_state(struct iris_context *ice,
1364 struct iris_batch *batch,
1365 const struct pipe_draw_info *draw)
1366 {
1367 const uint64_t dirty = ice->state.dirty;
1368
1369 if (dirty & IRIS_DIRTY_CC_VIEWPORT) {
1370 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
1371 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
1372 ptr.CCViewportPointer =
1373 iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32);
1374 }
1375 }
1376
1377 if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
1378 struct iris_viewport_state *cso = ice->state.cso_vp;
1379 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
1380 ptr.SFClipViewportPointer =
1381 iris_emit_state(batch, cso->sf_cl_vp, sizeof(cso->sf_cl_vp), 64);
1382 }
1383 }
1384
1385 /* XXX: L3 State */
1386
1387 if (dirty & IRIS_DIRTY_URB) {
1388 /* XXX: URB */
1389 }
1390
1391 if (dirty & IRIS_DIRTY_BLEND_STATE) {
1392 //struct iris_blend_state *cso = ice->state.cso_blend;
1393 // XXX: 3DSTATE_BLEND_STATE_POINTERS - BLEND_STATE
1394 // -> from iris_blend_state (most) + iris_depth_stencil_alpha_state
1395 // (alpha test function/enable) + has writeable RT from ???????
1396 }
1397
1398 if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) {
1399 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
1400 uint32_t cc_offset;
1401 void *cc_map =
1402 iris_alloc_state(batch,
1403 sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
1404 64, &cc_offset);
1405 iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
1406 cc.AlphaTestFormat = ALPHATEST_FLOAT32;
1407 cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value;
1408 cc.BlendConstantColorRed = ice->state.blend_color.color[0];
1409 cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
1410 cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
1411 cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
1412 }
1413 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
1414 ptr.ColorCalcStatePointer = cc_offset;
1415 ptr.ColorCalcStatePointerValid = true;
1416 }
1417 }
1418
1419 // XXX: 3DSTATE_CONSTANT_XS
1420 // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS
1421
1422 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
1423 if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)))
1424 continue;
1425
1426 // XXX: get sampler count from shader; don't emit them all...
1427 const int count = IRIS_MAX_TEXTURE_SAMPLERS;
1428
1429 uint32_t offset;
1430 uint32_t *map = iris_alloc_state(batch,
1431 count * 4 * GENX(SAMPLER_STATE_length),
1432 32, &offset);
1433
1434 for (int i = 0; i < count; i++) {
1435 // XXX: when we have a correct count, these better be bound
1436 if (!ice->state.samplers[stage][i])
1437 continue;
1438 memcpy(map, ice->state.samplers[stage][i]->sampler_state,
1439 4 * GENX(SAMPLER_STATE_length));
1440 map += GENX(SAMPLER_STATE_length);
1441 }
1442
1443 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
1444 ptr._3DCommandSubOpcode = 43 + stage;
1445 ptr.PointertoVSSamplerState = offset;
1446 }
1447 }
1448
1449 if (dirty & IRIS_DIRTY_MULTISAMPLE) {
1450 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
1451 ms.PixelLocation =
1452 ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER;
1453 if (ice->state.framebuffer.samples > 0)
1454 ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
1455 }
1456 }
1457
1458 if (dirty & IRIS_DIRTY_SAMPLE_MASK) {
1459 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
1460 ms.SampleMask = ice->state.sample_mask;
1461 }
1462 }
1463
1464 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
1465 if (!(dirty & (IRIS_DIRTY_VS << stage)))
1466 continue;
1467
1468 if (ice->shaders.prog[stage]) {
1469 iris_batch_emit(batch, ice->shaders.prog[stage]->derived_data,
1470 iris_derived_program_state_size(stage));
1471 } else {
1472 if (stage == MESA_SHADER_TESS_EVAL) {
1473 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
1474 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
1475 iris_emit_cmd(batch, GENX(3DSTATE_DS), ds);
1476 } else if (stage == MESA_SHADER_GEOMETRY) {
1477 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
1478 }
1479 }
1480 }
1481
1482 // XXX: SOL and so on
1483
1484 if (dirty & IRIS_DIRTY_CLIP) {
1485 struct iris_rasterizer_state *cso = ice->state.cso_rast;
1486
1487 uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
1488 iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
1489 //.NonPerspectiveBarycentricEnable = <comes from FS prog> :(
1490 //.ForceZeroRTAIndexEnable = <comes from FB layers being 0>
1491 // also userclip stuffs...
1492 }
1493 iris_emit_merge(batch, cso->clip, dynamic_clip, ARRAY_SIZE(cso->clip));
1494 }
1495
1496 if (dirty & IRIS_DIRTY_RASTER) {
1497 struct iris_rasterizer_state *cso = ice->state.cso_rast;
1498 iris_batch_emit(batch, cso->raster, sizeof(cso->raster));
1499 iris_batch_emit(batch, cso->sf, sizeof(cso->sf));
1500
1501 }
1502
1503 if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) {
1504 struct iris_rasterizer_state *cso = ice->state.cso_rast;
1505 struct brw_wm_prog_data *wm_prog_data = (void *)
1506 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1507 uint32_t dynamic_wm[GENX(3DSTATE_WM_length)];
1508
1509 iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) {
1510 wm.BarycentricInterpolationMode =
1511 wm_prog_data->barycentric_interp_modes;
1512
1513 if (wm_prog_data->early_fragment_tests)
1514 wm.EarlyDepthStencilControl = EDSC_PREPS;
1515 else if (wm_prog_data->has_side_effects)
1516 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
1517 }
1518 iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm));
1519 }
1520
1521 // XXX: SBE, SBE_SWIZ
1522
1523 if (dirty & IRIS_DIRTY_PS_BLEND) {
1524 struct iris_blend_state *cso = ice->state.cso_blend;
1525 iris_batch_emit(batch, cso->ps_blend, sizeof(cso->ps_blend));
1526 }
1527
1528 if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
1529 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
1530 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
1531
1532 uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
1533 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
1534 wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
1535 wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
1536 }
1537 iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
1538 }
1539
1540 if (dirty & IRIS_DIRTY_SCISSOR) {
1541 uint32_t scissor_offset =
1542 iris_emit_state(batch, ice->state.scissors,
1543 sizeof(struct pipe_scissor_state) *
1544 ice->state.num_scissors, 32);
1545
1546 iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
1547 ptr.ScissorRectPointer = scissor_offset;
1548 }
1549 }
1550
1551 // XXX: 3DSTATE_DEPTH_BUFFER and friends
1552
1553 if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) {
1554 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
1555 for (int i = 0; i < 32; i++) {
1556 poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
1557 }
1558 }
1559 }
1560
1561 if (dirty & IRIS_DIRTY_LINE_STIPPLE) {
1562 struct iris_rasterizer_state *cso = ice->state.cso_rast;
1563 iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
1564 }
1565
1566 if (1) {
1567 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
1568 topo.PrimitiveTopologyType =
1569 translate_prim_type(draw->mode, draw->vertices_per_patch);
1570 }
1571 }
1572
1573 if (draw->index_size > 0) {
1574 struct iris_resource *res = (struct iris_resource *)draw->index.resource;
1575
1576 assert(!draw->has_user_indices);
1577
1578 iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
1579 ib.IndexFormat = draw->index_size;
1580 ib.MOCS = MOCS_WB;
1581 ib.BufferSize = res->bo->size;
1582 // XXX: gah, addresses :( need two different combine address funcs
1583 // ib.BufferStartingAddress = res->bo;
1584 }
1585 }
1586
1587 if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
1588 struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers;
1589
1590 STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4);
1591 STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0);
1592
1593 uint64_t *addr = batch->cmdbuf.map_next + sizeof(uint32_t) *
1594 (GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32);
1595 uint32_t *delta = cso->vertex_buffers +
1596 (1 + GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32);
1597
1598 iris_batch_emit(batch, cso->vertex_buffers,
1599 sizeof(uint32_t) * (1 + 4 * cso->num_buffers));
1600
1601 for (unsigned i = 0; i < cso->num_buffers; i++) {
1602 *addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map,
1603 cso->bos[i].bo, cso->bos[i].offset +
1604 *delta, cso->bos[i].reloc_flags);
1605 addr = (void *) addr + 16;
1606 delta = (void *) delta + 16;
1607 }
1608 }
1609
1610 if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) {
1611 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
1612 iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
1613 (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length)));
1614 for (int i = 0; i < cso->count; i++) {
1615 iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) *
1616 (cso->count * GENX(3DSTATE_VF_INSTANCING_length)));
1617 }
1618 for (int i = 0; i < cso->count; i++) {
1619 /* TODO: vertexid, instanceid support */
1620 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs);
1621 }
1622 }
1623
1624 if (1) {
1625 iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
1626 if (draw->primitive_restart) {
1627 vf.IndexedDrawCutIndexEnable = true;
1628 vf.CutIndex = draw->restart_index;
1629 }
1630 }
1631 }
1632
1633 // XXX: Gen8 - PMA fix
1634
1635 assert(!draw->indirect); // XXX: indirect support
1636
1637 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
1638 prim.StartInstanceLocation = draw->start_instance;
1639 prim.InstanceCount = draw->instance_count;
1640 prim.VertexCountPerInstance = draw->count;
1641 prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
1642
1643 // XXX: this is probably bonkers.
1644 prim.StartVertexLocation = draw->start;
1645
1646 if (draw->index_size) {
1647 prim.BaseVertexLocation += draw->index_bias;
1648 } else {
1649 prim.StartVertexLocation += draw->index_bias;
1650 }
1651
1652 //prim.BaseVertexLocation = ...;
1653 }
1654 #if 0
1655 l3 configuration
1656
1657 3DSTATE_URB_*
1658 -> TODO
1659
1660 3DSTATE_CONSTANT_* - push constants
1661 -> TODO
1662
1663 Surfaces:
1664 - pull constants
1665 - ubos/ssbos/abos
1666 - images
1667 - textures
1668 - render targets - write and read
1669 3DSTATE_BINDING_TABLE_POINTERS_*
1670 -> TODO
1671
1672 3DSTATE_STREAMOUT
1673 3DSTATE_SO_BUFFER
1674 3DSTATE_SO_DECL_LIST
1675
1676 3DSTATE_WM
1677 -> iris_raster_state + FS state (barycentric, EDSC)
1678 3DSTATE_SBE
1679 -> iris_raster_state (point sprite texture coordinate origin)
1680 -> bunch of shader state...
1681 3DSTATE_SBE_SWIZ
1682 -> FS state
1683
1684 3DSTATE_DEPTH_BUFFER
1685 3DSTATE_HIER_DEPTH_BUFFER
1686 3DSTATE_STENCIL_BUFFER
1687 3DSTATE_CLEAR_PARAMS
1688 -> iris_framebuffer_state?
1689 #endif
1690 }
1691
1692 static void
1693 iris_bind_compute_state(struct pipe_context *ctx, void *state)
1694 {
1695 }
1696
1697 //pkt.SamplerCount = \
1698 //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
1699 //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
1700 //ffs(stage_state->per_thread_scratch) - 11; \
1701
1702 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
1703 pkt.KernelStartPointer = shader->prog_offset; \
1704 pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \
1705 pkt.FloatingPointMode = prog_data->use_alt_mode; \
1706 \
1707 pkt.DispatchGRFStartRegisterForURBData = \
1708 prog_data->dispatch_grf_start_reg; \
1709 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
1710 pkt.prefix##URBEntryReadOffset = 0; \
1711 \
1712 pkt.StatisticsEnable = true; \
1713 pkt.Enable = true;
1714
1715 static void
1716 iris_set_vs_state(const struct gen_device_info *devinfo,
1717 struct iris_compiled_shader *shader)
1718 {
1719 struct brw_stage_prog_data *prog_data = shader->prog_data;
1720 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1721
1722 iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
1723 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
1724 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1725 vs.SIMD8DispatchEnable = true;
1726 vs.UserClipDistanceCullTestEnableBitmask =
1727 vue_prog_data->cull_distance_mask;
1728 }
1729 }
1730
1731 static void
1732 iris_set_tcs_state(const struct gen_device_info *devinfo,
1733 struct iris_compiled_shader *shader)
1734 {
1735 struct brw_stage_prog_data *prog_data = shader->prog_data;
1736 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1737 struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
1738
1739 iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
1740 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
1741
1742 hs.InstanceCount = tcs_prog_data->instances - 1;
1743 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1744 hs.IncludeVertexHandles = true;
1745 }
1746 }
1747
1748 static void
1749 iris_set_tes_state(const struct gen_device_info *devinfo,
1750 struct iris_compiled_shader *shader)
1751 {
1752 struct brw_stage_prog_data *prog_data = shader->prog_data;
1753 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1754 struct brw_tes_prog_data *tes_prog_data = (void *) prog_data;
1755
1756 uint32_t *te_state = shader->derived_data;
1757 uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length);
1758
1759 iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
1760 te.Partitioning = tes_prog_data->partitioning;
1761 te.OutputTopology = tes_prog_data->output_topology;
1762 te.TEDomain = tes_prog_data->domain;
1763 te.TEEnable = true;
1764 te.MaximumTessellationFactorOdd = 63.0;
1765 te.MaximumTessellationFactorNotOdd = 64.0;
1766 }
1767
1768 iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
1769 INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
1770
1771 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
1772 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1773 ds.ComputeWCoordinateEnable =
1774 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
1775
1776 ds.UserClipDistanceCullTestEnableBitmask =
1777 vue_prog_data->cull_distance_mask;
1778 }
1779
1780 }
1781
1782 static void
1783 iris_set_gs_state(const struct gen_device_info *devinfo,
1784 struct iris_compiled_shader *shader)
1785 {
1786 struct brw_stage_prog_data *prog_data = shader->prog_data;
1787 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1788 struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
1789
1790 iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
1791 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
1792
1793 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1794 gs.OutputTopology = gs_prog_data->output_topology;
1795 gs.ControlDataHeaderSize =
1796 gs_prog_data->control_data_header_size_hwords;
1797 gs.InstanceControl = gs_prog_data->invocations - 1;
1798 gs.DispatchMode = SIMD8;
1799 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1800 gs.ControlDataFormat = gs_prog_data->control_data_format;
1801 gs.ReorderMode = TRAILING;
1802 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1803 gs.MaximumNumberofThreads =
1804 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
1805 : (devinfo->max_gs_threads - 1);
1806
1807 if (gs_prog_data->static_vertex_count != -1) {
1808 gs.StaticOutput = true;
1809 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
1810 }
1811 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
1812
1813 gs.UserClipDistanceCullTestEnableBitmask =
1814 vue_prog_data->cull_distance_mask;
1815
1816 const int urb_entry_write_offset = 1;
1817 const uint32_t urb_entry_output_length =
1818 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
1819 urb_entry_write_offset;
1820
1821 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
1822 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
1823 }
1824 }
1825
1826 static void
1827 iris_set_fs_state(const struct gen_device_info *devinfo,
1828 struct iris_compiled_shader *shader)
1829 {
1830 struct brw_stage_prog_data *prog_data = shader->prog_data;
1831 struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1832
1833 uint32_t *ps_state = (void *) shader->derived_data;
1834 uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length);
1835
1836 iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
1837 ps.VectorMaskEnable = true;
1838 //ps.SamplerCount = ...
1839 ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;
1840 ps.FloatingPointMode = prog_data->use_alt_mode;
1841 ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1);
1842
1843 ps.PushConstantEnable = prog_data->nr_params > 0 ||
1844 prog_data->ubo_ranges[0].length > 0;
1845
1846 /* From the documentation for this packet:
1847 * "If the PS kernel does not need the Position XY Offsets to
1848 * compute a Position Value, then this field should be programmed
1849 * to POSOFFSET_NONE."
1850 *
1851 * "SW Recommendation: If the PS kernel needs the Position Offsets
1852 * to compute a Position XY value, this field should match Position
1853 * ZW Interpolation Mode to ensure a consistent position.xyzw
1854 * computation."
1855 *
1856 * We only require XY sample offsets. So, this recommendation doesn't
1857 * look useful at the moment. We might need this in future.
1858 */
1859 ps.PositionXYOffsetSelect =
1860 wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
1861 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
1862 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
1863 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
1864
1865 // XXX: Disable SIMD32 with 16x MSAA
1866
1867 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1868 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1869 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1870 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1871 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1872 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1873
1874 ps.KernelStartPointer0 =
1875 shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1876 ps.KernelStartPointer1 =
1877 shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1878 ps.KernelStartPointer2 =
1879 shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1880 }
1881
1882 iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
1883 psx.PixelShaderValid = true;
1884 psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1885 psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
1886 psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
1887 psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1888 psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1889 psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
1890
1891 if (wm_prog_data->uses_sample_mask) {
1892 /* TODO: conservative rasterization */
1893 if (wm_prog_data->post_depth_coverage)
1894 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
1895 else
1896 psx.InputCoverageMaskState = ICMS_NORMAL;
1897 }
1898
1899 psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1900 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
1901 psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
1902
1903 // XXX: UAV bit
1904 }
1905 }
1906
1907 unsigned
1908 iris_derived_program_state_size(enum iris_program_cache_id cache_id)
1909 {
1910 assert(cache_id <= IRIS_CACHE_CS);
1911
1912 static const unsigned dwords[] = {
1913 [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length),
1914 [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length),
1915 [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length),
1916 [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
1917 [IRIS_CACHE_FS] =
1918 GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
1919 [IRIS_CACHE_CS] = 0,
1920 [IRIS_CACHE_BLORP_BLIT] = 0,
1921 };
1922
1923 return sizeof(uint32_t) * dwords[cache_id];
1924 }
1925
1926 void
1927 iris_set_derived_program_state(const struct gen_device_info *devinfo,
1928 enum iris_program_cache_id cache_id,
1929 struct iris_compiled_shader *shader)
1930 {
1931 switch (cache_id) {
1932 case IRIS_CACHE_VS:
1933 iris_set_vs_state(devinfo, shader);
1934 break;
1935 case IRIS_CACHE_TCS:
1936 iris_set_tcs_state(devinfo, shader);
1937 break;
1938 case IRIS_CACHE_TES:
1939 iris_set_tes_state(devinfo, shader);
1940 break;
1941 case IRIS_CACHE_GS:
1942 iris_set_gs_state(devinfo, shader);
1943 break;
1944 case IRIS_CACHE_FS:
1945 iris_set_fs_state(devinfo, shader);
1946 break;
1947 case IRIS_CACHE_CS:
1948 break;
1949 default:
1950 break;
1951 }
1952 }
1953
1954 void
1955 iris_destroy_state(struct iris_context *ice)
1956 {
1957 // XXX: unreference resources/surfaces.
1958 for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
1959 pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
1960 }
1961 pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
1962 }
1963
1964 void
1965 iris_init_state(struct iris_context *ice)
1966 {
1967 struct pipe_context *ctx = &ice->ctx;
1968
1969 ice->state.dirty = ~0ull;
1970
1971 ctx->create_blend_state = iris_create_blend_state;
1972 ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
1973 ctx->create_rasterizer_state = iris_create_rasterizer_state;
1974 ctx->create_sampler_state = iris_create_sampler_state;
1975 ctx->create_sampler_view = iris_create_sampler_view;
1976 ctx->create_surface = iris_create_surface;
1977 ctx->create_vertex_elements_state = iris_create_vertex_elements;
1978 ctx->create_compute_state = iris_create_compute_state;
1979 ctx->bind_blend_state = iris_bind_blend_state;
1980 ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state;
1981 ctx->bind_sampler_states = iris_bind_sampler_states;
1982 ctx->bind_rasterizer_state = iris_bind_rasterizer_state;
1983 ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state;
1984 ctx->bind_compute_state = iris_bind_compute_state;
1985 ctx->delete_blend_state = iris_delete_state;
1986 ctx->delete_depth_stencil_alpha_state = iris_delete_state;
1987 ctx->delete_fs_state = iris_delete_state;
1988 ctx->delete_rasterizer_state = iris_delete_state;
1989 ctx->delete_sampler_state = iris_delete_state;
1990 ctx->delete_vertex_elements_state = iris_delete_state;
1991 ctx->delete_compute_state = iris_delete_state;
1992 ctx->delete_tcs_state = iris_delete_state;
1993 ctx->delete_tes_state = iris_delete_state;
1994 ctx->delete_gs_state = iris_delete_state;
1995 ctx->delete_vs_state = iris_delete_state;
1996 ctx->set_blend_color = iris_set_blend_color;
1997 ctx->set_clip_state = iris_set_clip_state;
1998 ctx->set_constant_buffer = iris_set_constant_buffer;
1999 ctx->set_sampler_views = iris_set_sampler_views;
2000 ctx->set_framebuffer_state = iris_set_framebuffer_state;
2001 ctx->set_polygon_stipple = iris_set_polygon_stipple;
2002 ctx->set_sample_mask = iris_set_sample_mask;
2003 ctx->set_scissor_states = iris_set_scissor_states;
2004 ctx->set_stencil_ref = iris_set_stencil_ref;
2005 ctx->set_vertex_buffers = iris_set_vertex_buffers;
2006 ctx->set_viewport_states = iris_set_viewport_states;
2007 ctx->sampler_view_destroy = iris_sampler_view_destroy;
2008 ctx->surface_destroy = iris_surface_destroy;
2009 ctx->draw_vbo = iris_draw_vbo;
2010 ctx->launch_grid = iris_launch_grid;
2011 ctx->create_stream_output_target = iris_create_stream_output_target;
2012 ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
2013 ctx->set_stream_output_targets = iris_set_stream_output_targets;
2014 }