iris: better SBE
[mesa.git] / src / gallium / drivers / iris / iris_state.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25
26 #ifdef HAVE_VALGRIND
27 #include <valgrind.h>
28 #include <memcheck.h>
29 #define VG(x) x
30 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
31 #else
32 #define VG(x)
33 #endif
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "pipe/p_context.h"
38 #include "pipe/p_screen.h"
39 #include "util/u_inlines.h"
40 #include "util/u_transfer.h"
41 #include "util/u_upload_mgr.h"
42 #include "i915_drm.h"
43 #include "nir.h"
44 #include "intel/compiler/brw_compiler.h"
45 #include "intel/common/gen_l3_config.h"
46 #include "intel/common/gen_sample_positions.h"
47 #include "iris_batch.h"
48 #include "iris_context.h"
49 #include "iris_pipe.h"
50 #include "iris_resource.h"
51
52 #define __gen_address_type struct iris_address
53 #define __gen_user_data struct iris_batch
54
55 #define ARRAY_BYTES(x) (sizeof(uint32_t) * ARRAY_SIZE(x))
56
57 static uint64_t
58 __gen_combine_address(struct iris_batch *batch, void *location,
59 struct iris_address addr, uint32_t delta)
60 {
61 uint64_t result = addr.offset + delta;
62
63 if (addr.bo) {
64 iris_use_pinned_bo(batch, addr.bo, addr.write);
65 /* Assume this is a general address, not relative to a base. */
66 result += addr.bo->gtt_offset;
67 }
68
69 return result;
70 }
71
72 #define __genxml_cmd_length(cmd) cmd ## _length
73 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
74 #define __genxml_cmd_header(cmd) cmd ## _header
75 #define __genxml_cmd_pack(cmd) cmd ## _pack
76
77 static void *
78 get_command_space(struct iris_batch *batch, unsigned bytes)
79 {
80 iris_require_command_space(batch, bytes);
81 void *map = batch->cmdbuf.map_next;
82 batch->cmdbuf.map_next += bytes;
83 return map;
84 }
85
86 #define _iris_pack_command(batch, cmd, dst, name) \
87 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
88 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
89 ({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \
90 _dst = NULL; \
91 }))
92
93 #define iris_pack_command(cmd, dst, name) \
94 _iris_pack_command(NULL, cmd, dst, name)
95
96 #define iris_pack_state(cmd, dst, name) \
97 for (struct cmd name = {}, \
98 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
99 __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \
100 _dst = NULL)
101
102 #define iris_emit_cmd(batch, cmd, name) \
103 _iris_pack_command(batch, cmd, get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name)
104
105 #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \
106 do { \
107 uint32_t *dw = get_command_space(batch, 4 * num_dwords); \
108 for (uint32_t i = 0; i < num_dwords; i++) \
109 dw[i] = (dwords0)[i] | (dwords1)[i]; \
110 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
111 } while (0)
112
113 #include "genxml/genX_pack.h"
114 #include "genxml/gen_macros.h"
115 #include "genxml/genX_bits.h"
116
117 #define MOCS_WB (2 << 1)
118
119 UNUSED static void pipe_asserts()
120 {
121 #define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
122
123 /* pipe_logicop happens to match the hardware. */
124 PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
125 PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
126 PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
127 PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
128 PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
129 PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
130 PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
131 PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
132 PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
133 PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
134 PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
135 PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
136 PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
137 PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
138 PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
139 PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
140
141 /* pipe_blend_func happens to match the hardware. */
142 PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
143 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
144 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
145 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
146 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
147 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
148 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
149 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
150 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
151 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
152 PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
153 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
154 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
155 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
156 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
157 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
158 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
159 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
160 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
161
162 /* pipe_blend_func happens to match the hardware. */
163 PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
164 PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
165 PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
166 PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
167 PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
168
169 /* pipe_stencil_op happens to match the hardware. */
170 PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
171 PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
172 PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
173 PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
174 PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
175 PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
176 PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
177 PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
178
179 /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
180 PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
181 PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
182 #undef PIPE_ASSERT
183 }
184
185 static unsigned
186 translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
187 {
188 static const unsigned map[] = {
189 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
190 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
191 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
192 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
193 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
194 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
195 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
196 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
197 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
198 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
199 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
200 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
201 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
202 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
203 [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,
204 };
205
206 return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
207 }
208
209 static unsigned
210 translate_compare_func(enum pipe_compare_func pipe_func)
211 {
212 static const unsigned map[] = {
213 [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,
214 [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,
215 [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,
216 [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,
217 [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,
218 [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
219 [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,
220 [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,
221 };
222 return map[pipe_func];
223 }
224
225 static unsigned
226 translate_shadow_func(enum pipe_compare_func pipe_func)
227 {
228 /* Gallium specifies the result of shadow comparisons as:
229 *
230 * 1 if ref <op> texel,
231 * 0 otherwise.
232 *
233 * The hardware does:
234 *
235 * 0 if texel <op> ref,
236 * 1 otherwise.
237 *
238 * So we need to flip the operator and also negate.
239 */
240 static const unsigned map[] = {
241 [PIPE_FUNC_NEVER] = PREFILTEROPALWAYS,
242 [PIPE_FUNC_LESS] = PREFILTEROPLEQUAL,
243 [PIPE_FUNC_EQUAL] = PREFILTEROPNOTEQUAL,
244 [PIPE_FUNC_LEQUAL] = PREFILTEROPLESS,
245 [PIPE_FUNC_GREATER] = PREFILTEROPGEQUAL,
246 [PIPE_FUNC_NOTEQUAL] = PREFILTEROPEQUAL,
247 [PIPE_FUNC_GEQUAL] = PREFILTEROPGREATER,
248 [PIPE_FUNC_ALWAYS] = PREFILTEROPNEVER,
249 };
250 return map[pipe_func];
251 }
252
253 static unsigned
254 translate_cull_mode(unsigned pipe_face)
255 {
256 static const unsigned map[4] = {
257 [PIPE_FACE_NONE] = CULLMODE_NONE,
258 [PIPE_FACE_FRONT] = CULLMODE_FRONT,
259 [PIPE_FACE_BACK] = CULLMODE_BACK,
260 [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
261 };
262 return map[pipe_face];
263 }
264
265 static unsigned
266 translate_fill_mode(unsigned pipe_polymode)
267 {
268 static const unsigned map[4] = {
269 [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
270 [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
271 [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,
272 [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
273 };
274 return map[pipe_polymode];
275 }
276
277 static struct iris_address
278 ro_bo(struct iris_bo *bo, uint64_t offset)
279 {
280 /* Not for CSOs! */
281 return (struct iris_address) { .bo = bo, .offset = offset };
282 }
283
284 /**
285 * Returns the BO's address relative to the appropriate base address.
286 *
287 * All of our base addresses are programmed to the start of a 4GB region,
288 * so simply returning the bottom 32 bits of the BO address will give us
289 * the offset from whatever base address corresponds to that memory region.
290 */
291 static uint32_t
292 bo_offset_from_base_address(struct pipe_resource *res)
293 {
294 struct iris_bo *bo = ((struct iris_resource *) res)->bo;
295
296 /* This only works for buffers in the memory zones corresponding to a
297 * base address - the top, unbounded memory zone doesn't have a base.
298 */
299 assert(bo->gtt_offset < 3 * (1ull << 32));
300 return bo->gtt_offset;
301 }
302
303 static uint32_t *
304 stream_state(struct iris_batch *batch,
305 struct u_upload_mgr *uploader,
306 unsigned size,
307 unsigned alignment,
308 uint32_t *out_offset)
309 {
310 struct pipe_resource *res = NULL;
311 void *ptr = NULL;
312
313 u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr);
314
315 struct iris_bo *bo = ((struct iris_resource *) res)->bo;
316 iris_use_pinned_bo(batch, bo, false);
317
318 *out_offset += bo_offset_from_base_address(res);
319
320 pipe_resource_reference(&res, NULL);
321
322 return ptr;
323 }
324
325 static uint32_t
326 emit_state(struct iris_batch *batch,
327 struct u_upload_mgr *uploader,
328 const void *data,
329 unsigned size,
330 unsigned alignment)
331 {
332 unsigned offset = 0;
333 uint32_t *map = stream_state(batch, uploader, size, alignment, &offset);
334
335 if (map)
336 memcpy(map, data, size);
337
338 return offset;
339 }
340
341 static void
342 iris_init_render_context(struct iris_screen *screen,
343 struct iris_batch *batch,
344 struct pipe_debug_callback *dbg)
345 {
346 iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER);
347
348 /* XXX: PIPE_CONTROLs */
349
350 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
351 #if 0
352 // XXX: MOCS is stupid for this.
353 sba.GeneralStateMemoryObjectControlState = MOCS_WB;
354 sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
355 sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
356 sba.DynamicStateMemoryObjectControlState = MOCS_WB;
357 sba.IndirectObjectMemoryObjectControlState = MOCS_WB;
358 sba.InstructionMemoryObjectControlState = MOCS_WB;
359 sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB;
360 #endif
361
362 sba.GeneralStateBaseAddressModifyEnable = true;
363 sba.SurfaceStateBaseAddressModifyEnable = true;
364 sba.DynamicStateBaseAddressModifyEnable = true;
365 sba.IndirectObjectBaseAddressModifyEnable = true;
366 sba.InstructionBaseAddressModifyEnable = true;
367 sba.GeneralStateBufferSizeModifyEnable = true;
368 sba.DynamicStateBufferSizeModifyEnable = true;
369 sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
370 sba.IndirectObjectBufferSizeModifyEnable = true;
371 sba.InstructionBuffersizeModifyEnable = true;
372
373 sba.SurfaceStateBaseAddress = ro_bo(NULL, 1ull << 32);
374 sba.DynamicStateBaseAddress = ro_bo(NULL, 2 * (1ull << 32));
375
376 sba.GeneralStateBufferSize = 0xfffff;
377 sba.IndirectObjectBufferSize = 0xfffff;
378 sba.InstructionBufferSize = 0xfffff;
379 sba.DynamicStateBufferSize = 0xfffff;
380 }
381
382 iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
383 rect.ClippedDrawingRectangleXMax = UINT16_MAX;
384 rect.ClippedDrawingRectangleYMax = UINT16_MAX;
385 }
386 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
387 GEN_SAMPLE_POS_1X(pat._1xSample);
388 GEN_SAMPLE_POS_2X(pat._2xSample);
389 GEN_SAMPLE_POS_4X(pat._4xSample);
390 GEN_SAMPLE_POS_8X(pat._8xSample);
391 GEN_SAMPLE_POS_16X(pat._16xSample);
392 }
393 iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
394 iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
395 iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
396 /* XXX: may need to set an offset for origin-UL framebuffers */
397 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
398
399 /* Just assign a static partitioning. */
400 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
401 iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
402 alloc._3DCommandSubOpcode = 18 + i;
403 alloc.ConstantBufferOffset = 6 * i;
404 alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6;
405 }
406 }
407 }
408
409 static void
410 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
411 {
412 }
413
414 static void
415 iris_set_blend_color(struct pipe_context *ctx,
416 const struct pipe_blend_color *state)
417 {
418 struct iris_context *ice = (struct iris_context *) ctx;
419
420 memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
421 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
422 }
423
424 struct iris_blend_state {
425 uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
426 uint32_t blend_state[GENX(BLEND_STATE_length) +
427 BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)];
428
429 bool alpha_to_coverage; /* for shader key */
430 };
431
432 static void *
433 iris_create_blend_state(struct pipe_context *ctx,
434 const struct pipe_blend_state *state)
435 {
436 struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state));
437 uint32_t *blend_state = cso->blend_state;
438
439 cso->alpha_to_coverage = state->alpha_to_coverage;
440
441 iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
442 /* pb.HasWriteableRT is filled in at draw time. */
443 /* pb.AlphaTestEnable is filled in at draw time. */
444 pb.AlphaToCoverageEnable = state->alpha_to_coverage;
445 pb.IndependentAlphaBlendEnable = state->independent_blend_enable;
446
447 pb.ColorBufferBlendEnable = state->rt[0].blend_enable;
448
449 pb.SourceBlendFactor = state->rt[0].rgb_src_factor;
450 pb.SourceAlphaBlendFactor = state->rt[0].alpha_func;
451 pb.DestinationBlendFactor = state->rt[0].rgb_dst_factor;
452 pb.DestinationAlphaBlendFactor = state->rt[0].alpha_dst_factor;
453 }
454
455 iris_pack_state(GENX(BLEND_STATE), blend_state, bs) {
456 bs.AlphaToCoverageEnable = state->alpha_to_coverage;
457 bs.IndependentAlphaBlendEnable = state->independent_blend_enable;
458 bs.AlphaToOneEnable = state->alpha_to_one;
459 bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage;
460 bs.ColorDitherEnable = state->dither;
461 /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */
462 }
463
464 blend_state += GENX(BLEND_STATE_length);
465
466 for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
467 iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_state, be) {
468 be.LogicOpEnable = state->logicop_enable;
469 be.LogicOpFunction = state->logicop_func;
470
471 be.PreBlendSourceOnlyClampEnable = false;
472 be.ColorClampRange = COLORCLAMP_RTFORMAT;
473 be.PreBlendColorClampEnable = true;
474 be.PostBlendColorClampEnable = true;
475
476 be.ColorBufferBlendEnable = state->rt[i].blend_enable;
477
478 be.ColorBlendFunction = state->rt[i].rgb_func;
479 be.AlphaBlendFunction = state->rt[i].alpha_func;
480 be.SourceBlendFactor = state->rt[i].rgb_src_factor;
481 be.SourceAlphaBlendFactor = state->rt[i].alpha_func;
482 be.DestinationBlendFactor = state->rt[i].rgb_dst_factor;
483 be.DestinationAlphaBlendFactor = state->rt[i].alpha_dst_factor;
484
485 be.WriteDisableRed = !(state->rt[i].colormask & PIPE_MASK_R);
486 be.WriteDisableGreen = !(state->rt[i].colormask & PIPE_MASK_G);
487 be.WriteDisableBlue = !(state->rt[i].colormask & PIPE_MASK_B);
488 be.WriteDisableAlpha = !(state->rt[i].colormask & PIPE_MASK_A);
489 }
490 blend_state += GENX(BLEND_STATE_ENTRY_length);
491 }
492
493 return cso;
494 }
495
496 static void
497 iris_bind_blend_state(struct pipe_context *ctx, void *state)
498 {
499 struct iris_context *ice = (struct iris_context *) ctx;
500 ice->state.cso_blend = state;
501 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
502 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
503 }
504
505 struct iris_depth_stencil_alpha_state {
506 uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
507 uint32_t cc_vp[GENX(CC_VIEWPORT_length)];
508
509 struct pipe_alpha_state alpha; /* to BLEND_STATE, 3DSTATE_PS_BLEND */
510 };
511
512 static void *
513 iris_create_zsa_state(struct pipe_context *ctx,
514 const struct pipe_depth_stencil_alpha_state *state)
515 {
516 struct iris_depth_stencil_alpha_state *cso =
517 malloc(sizeof(struct iris_depth_stencil_alpha_state));
518
519 cso->alpha = state->alpha;
520
521 bool two_sided_stencil = state->stencil[1].enabled;
522
523 /* The state tracker needs to optimize away EQUAL writes for us. */
524 assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask));
525
526 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) {
527 wmds.StencilFailOp = state->stencil[0].fail_op;
528 wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op;
529 wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op;
530 wmds.StencilTestFunction =
531 translate_compare_func(state->stencil[0].func);
532 wmds.BackfaceStencilFailOp = state->stencil[1].fail_op;
533 wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op;
534 wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op;
535 wmds.BackfaceStencilTestFunction =
536 translate_compare_func(state->stencil[1].func);
537 wmds.DepthTestFunction = translate_compare_func(state->depth.func);
538 wmds.DoubleSidedStencilEnable = two_sided_stencil;
539 wmds.StencilTestEnable = state->stencil[0].enabled;
540 wmds.StencilBufferWriteEnable =
541 state->stencil[0].writemask != 0 ||
542 (two_sided_stencil && state->stencil[1].writemask != 0);
543 wmds.DepthTestEnable = state->depth.enabled;
544 wmds.DepthBufferWriteEnable = state->depth.writemask;
545 wmds.StencilTestMask = state->stencil[0].valuemask;
546 wmds.StencilWriteMask = state->stencil[0].writemask;
547 wmds.BackfaceStencilTestMask = state->stencil[1].valuemask;
548 wmds.BackfaceStencilWriteMask = state->stencil[1].writemask;
549 /* wmds.[Backface]StencilReferenceValue are merged later */
550 }
551
552 iris_pack_state(GENX(CC_VIEWPORT), cso->cc_vp, ccvp) {
553 ccvp.MinimumDepth = state->depth.bounds_min;
554 ccvp.MaximumDepth = state->depth.bounds_max;
555 }
556
557 return cso;
558 }
559
560 static void
561 iris_bind_zsa_state(struct pipe_context *ctx, void *state)
562 {
563 struct iris_context *ice = (struct iris_context *) ctx;
564 struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
565 struct iris_depth_stencil_alpha_state *new_cso = state;
566
567 if (new_cso) {
568 if (!old_cso || old_cso->alpha.ref_value != new_cso->alpha.ref_value) {
569 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
570 }
571 }
572
573 ice->state.cso_zsa = new_cso;
574 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
575 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
576 }
577
578 struct iris_rasterizer_state {
579 uint32_t sf[GENX(3DSTATE_SF_length)];
580 uint32_t clip[GENX(3DSTATE_CLIP_length)];
581 uint32_t raster[GENX(3DSTATE_RASTER_length)];
582 uint32_t wm[GENX(3DSTATE_WM_length)];
583 uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
584
585 bool flatshade; /* for shader state */
586 bool clamp_fragment_color; /* for shader state */
587 bool light_twoside; /* for shader state */
588 bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */
589 bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */
590 enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
591 uint16_t sprite_coord_enable;
592 };
593
594 static void *
595 iris_create_rasterizer_state(struct pipe_context *ctx,
596 const struct pipe_rasterizer_state *state)
597 {
598 struct iris_rasterizer_state *cso =
599 malloc(sizeof(struct iris_rasterizer_state));
600
601 #if 0
602 point_quad_rasterization -> SBE?
603
604 not necessary?
605 {
606 poly_smooth
607 force_persample_interp - ?
608 bottom_edge_rule
609
610 offset_units_unscaled - cap not exposed
611 }
612 #endif
613
614 cso->flatshade = state->flatshade;
615 cso->clamp_fragment_color = state->clamp_fragment_color;
616 cso->light_twoside = state->light_twoside;
617 cso->rasterizer_discard = state->rasterizer_discard;
618 cso->half_pixel_center = state->half_pixel_center;
619 cso->sprite_coord_mode = state->sprite_coord_mode;
620 cso->sprite_coord_enable = state->sprite_coord_enable;
621
622 iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
623 sf.StatisticsEnable = true;
624 sf.ViewportTransformEnable = true;
625 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
626 sf.LineEndCapAntialiasingRegionWidth =
627 state->line_smooth ? _10pixels : _05pixels;
628 sf.LastPixelEnable = state->line_last_pixel;
629 sf.LineWidth = state->line_width;
630 sf.SmoothPointEnable = state->point_smooth;
631 sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
632 sf.PointWidth = state->point_size;
633
634 if (state->flatshade_first) {
635 sf.TriangleStripListProvokingVertexSelect = 2;
636 sf.TriangleFanProvokingVertexSelect = 2;
637 sf.LineStripListProvokingVertexSelect = 1;
638 } else {
639 sf.TriangleFanProvokingVertexSelect = 1;
640 }
641 }
642
643 /* COMPLETE! */
644 iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
645 rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
646 rr.CullMode = translate_cull_mode(state->cull_face);
647 rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
648 rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
649 rr.DXMultisampleRasterizationEnable = state->multisample;
650 rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
651 rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
652 rr.GlobalDepthOffsetEnablePoint = state->offset_point;
653 rr.GlobalDepthOffsetConstant = state->offset_units;
654 rr.GlobalDepthOffsetScale = state->offset_scale;
655 rr.GlobalDepthOffsetClamp = state->offset_clamp;
656 rr.SmoothPointEnable = state->point_smooth;
657 rr.AntialiasingEnable = state->line_smooth;
658 rr.ScissorRectangleEnable = state->scissor;
659 rr.ViewportZNearClipTestEnable = state->depth_clip_near;
660 rr.ViewportZFarClipTestEnable = state->depth_clip_far;
661 //rr.ConservativeRasterizationEnable = not yet supported by Gallium...
662 }
663
664 iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
665 /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
666 * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
667 */
668 cl.StatisticsEnable = true;
669 cl.EarlyCullEnable = true;
670 cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
671 cl.ForceUserClipDistanceClipTestEnableBitmask = true;
672 cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
673 cl.GuardbandClipTestEnable = true;
674 cl.ClipMode = CLIPMODE_NORMAL;
675 cl.ClipEnable = true;
676 cl.ViewportXYClipTestEnable = state->point_tri_clip;
677 cl.MinimumPointWidth = 0.125;
678 cl.MaximumPointWidth = 255.875;
679
680 if (state->flatshade_first) {
681 cl.TriangleStripListProvokingVertexSelect = 2;
682 cl.TriangleFanProvokingVertexSelect = 2;
683 cl.LineStripListProvokingVertexSelect = 1;
684 } else {
685 cl.TriangleFanProvokingVertexSelect = 1;
686 }
687 }
688
689 iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) {
690 /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are
691 * filled in at draw time from the FS program.
692 */
693 wm.LineAntialiasingRegionWidth = _10pixels;
694 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
695 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
696 wm.StatisticsEnable = true;
697 wm.LineStippleEnable = state->line_stipple_enable;
698 wm.PolygonStippleEnable = state->poly_stipple_enable;
699 }
700
701 /* Remap from 0..255 back to 1..256 */
702 const unsigned line_stipple_factor = state->line_stipple_factor + 1;
703
704 iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
705 line.LineStipplePattern = state->line_stipple_pattern;
706 line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
707 line.LineStippleRepeatCount = line_stipple_factor;
708 }
709
710 return cso;
711 }
712
713 static void
714 iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
715 {
716 struct iris_context *ice = (struct iris_context *) ctx;
717 struct iris_rasterizer_state *old_cso = ice->state.cso_rast;
718 struct iris_rasterizer_state *new_cso = state;
719
720 if (new_cso) {
721 /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
722 if (!old_cso || memcmp(old_cso->line_stipple, new_cso->line_stipple,
723 sizeof(old_cso->line_stipple)) != 0) {
724 ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE;
725 }
726
727 if (!old_cso ||
728 old_cso->half_pixel_center != new_cso->half_pixel_center) {
729 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
730 }
731 }
732
733 ice->state.cso_rast = new_cso;
734 ice->state.dirty |= IRIS_DIRTY_RASTER;
735 }
736
737 static uint32_t
738 translate_wrap(unsigned pipe_wrap)
739 {
740 static const unsigned map[] = {
741 [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,
742 [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,
743 [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,
744 [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
745 [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,
746 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
747 [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, // XXX: ???
748 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ???
749 };
750 return map[pipe_wrap];
751 }
752
753 /**
754 * Return true if the given wrap mode requires the border color to exist.
755 */
756 static bool
757 wrap_mode_needs_border_color(unsigned wrap_mode)
758 {
759 return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
760 }
761
762 static unsigned
763 translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
764 {
765 static const unsigned map[] = {
766 [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
767 [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,
768 [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,
769 };
770 return map[pipe_mip];
771 }
772
773 struct iris_sampler_state {
774 struct pipe_sampler_state base;
775
776 bool needs_border_color;
777
778 uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
779 };
780
781 static void *
782 iris_create_sampler_state(struct pipe_context *pctx,
783 const struct pipe_sampler_state *state)
784 {
785 struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
786
787 if (!cso)
788 return NULL;
789
790 STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
791 STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
792
793 unsigned wrap_s = translate_wrap(state->wrap_s);
794 unsigned wrap_t = translate_wrap(state->wrap_t);
795 unsigned wrap_r = translate_wrap(state->wrap_r);
796
797 cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) ||
798 wrap_mode_needs_border_color(wrap_t) ||
799 wrap_mode_needs_border_color(wrap_r);
800
801 iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) {
802 samp.TCXAddressControlMode = wrap_s;
803 samp.TCYAddressControlMode = wrap_t;
804 samp.TCZAddressControlMode = wrap_r;
805 samp.CubeSurfaceControlMode = state->seamless_cube_map;
806 samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
807 samp.MinModeFilter = state->min_img_filter;
808 samp.MagModeFilter = state->mag_img_filter;
809 samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
810 samp.MaximumAnisotropy = RATIO21;
811
812 if (state->max_anisotropy >= 2) {
813 if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
814 samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
815 samp.AnisotropicAlgorithm = EWAApproximation;
816 }
817
818 if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
819 samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
820
821 samp.MaximumAnisotropy =
822 MIN2((state->max_anisotropy - 2) / 2, RATIO161);
823 }
824
825 /* Set address rounding bits if not using nearest filtering. */
826 if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
827 samp.UAddressMinFilterRoundingEnable = true;
828 samp.VAddressMinFilterRoundingEnable = true;
829 samp.RAddressMinFilterRoundingEnable = true;
830 }
831
832 if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
833 samp.UAddressMagFilterRoundingEnable = true;
834 samp.VAddressMagFilterRoundingEnable = true;
835 samp.RAddressMagFilterRoundingEnable = true;
836 }
837
838 if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
839 samp.ShadowFunction = translate_shadow_func(state->compare_func);
840
841 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13;
842
843 samp.LODPreClampMode = CLAMP_MODE_OGL;
844 samp.MinLOD = CLAMP(state->min_lod, 0, hw_max_lod);
845 samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
846 samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
847
848 //samp.BorderColorPointer = <<comes from elsewhere>>
849 }
850
851 return cso;
852 }
853
854 static void
855 iris_bind_sampler_states(struct pipe_context *ctx,
856 enum pipe_shader_type p_stage,
857 unsigned start, unsigned count,
858 void **states)
859 {
860 struct iris_context *ice = (struct iris_context *) ctx;
861 gl_shader_stage stage = stage_from_pipe(p_stage);
862
863 assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
864
865 /* Assemble the SAMPLER_STATEs into a contiguous chunk of memory
866 * relative to Dynamic State Base Address.
867 */
868 void *map = NULL;
869 u_upload_alloc(ice->state.dynamic_uploader, 0,
870 count * 4 * GENX(SAMPLER_STATE_length), 32,
871 &ice->state.sampler_table_offset[stage],
872 &ice->state.sampler_table_resource[stage],
873 &map);
874 if (unlikely(!map))
875 return;
876
877 ice->state.sampler_table_offset[stage] +=
878 bo_offset_from_base_address(ice->state.sampler_table_resource[stage]);
879
880 for (int i = 0; i < count; i++) {
881 struct iris_sampler_state *state = states[i];
882
883 /* Save a pointer to the iris_sampler_state, a few fields need
884 * to inform draw-time decisions.
885 */
886 ice->state.samplers[stage][start + i] = state;
887
888 if (state)
889 memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length));
890
891 map += GENX(SAMPLER_STATE_length);
892 }
893
894 ice->state.num_samplers[stage] = count;
895
896 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
897 }
898
899 struct iris_sampler_view {
900 struct pipe_sampler_view pipe;
901 struct isl_view view;
902
903 /** The resource (BO) holding our SURFACE_STATE. */
904 struct pipe_resource *surface_state_resource;
905 unsigned surface_state_offset;
906
907 //uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
908 };
909
910 /**
911 * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the Gen7.5+
912 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
913 *
914 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
915 * 0 1 2 3 4 5
916 * 4 5 6 7 0 1
917 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
918 *
919 * which is simply adding 4 then modding by 8 (or anding with 7).
920 *
921 * We then may need to apply workarounds for textureGather hardware bugs.
922 */
923 static enum isl_channel_select
924 pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
925 {
926 return (swizzle + 4) & 7;
927 }
928
929 static struct pipe_sampler_view *
930 iris_create_sampler_view(struct pipe_context *ctx,
931 struct pipe_resource *tex,
932 const struct pipe_sampler_view *tmpl)
933 {
934 struct iris_context *ice = (struct iris_context *) ctx;
935 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
936 struct iris_resource *itex = (struct iris_resource *) tex;
937 struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view));
938
939 if (!isv)
940 return NULL;
941
942 /* initialize base object */
943 isv->pipe = *tmpl;
944 isv->pipe.context = ctx;
945 isv->pipe.texture = NULL;
946 pipe_reference_init(&isv->pipe.reference, 1);
947 pipe_resource_reference(&isv->pipe.texture, tex);
948
949 /* XXX: do we need brw_get_texture_swizzle hacks here? */
950
951 isv->view = (struct isl_view) {
952 .format = iris_isl_format_for_pipe_format(tmpl->format),
953 .base_level = tmpl->u.tex.first_level,
954 .levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1,
955 .base_array_layer = tmpl->u.tex.first_layer,
956 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
957 .swizzle = (struct isl_swizzle) {
958 .r = pipe_swizzle_to_isl_channel(tmpl->swizzle_r),
959 .g = pipe_swizzle_to_isl_channel(tmpl->swizzle_g),
960 .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b),
961 .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a),
962 },
963 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
964 };
965
966 void *map = NULL;
967 u_upload_alloc(ice->state.surface_uploader, 0,
968 4 * GENX(RENDER_SURFACE_STATE_length), 64,
969 &isv->surface_state_offset,
970 &isv->surface_state_resource,
971 &map);
972 if (!unlikely(map))
973 return NULL;
974
975 isv->surface_state_offset +=
976 bo_offset_from_base_address(isv->surface_state_resource);
977
978 isl_surf_fill_state(&screen->isl_dev, map,
979 .surf = &itex->surf, .view = &isv->view,
980 .mocs = MOCS_WB,
981 .address = itex->bo->gtt_offset);
982 // .aux_surf =
983 // .clear_color = clear_color,
984
985 return &isv->pipe;
986 }
987
988 struct iris_surface {
989 struct pipe_surface pipe;
990 struct isl_view view;
991
992 /** The resource (BO) holding our SURFACE_STATE. */
993 struct pipe_resource *surface_state_resource;
994 unsigned surface_state_offset;
995
996 // uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
997 };
998
999 static struct pipe_surface *
1000 iris_create_surface(struct pipe_context *ctx,
1001 struct pipe_resource *tex,
1002 const struct pipe_surface *tmpl)
1003 {
1004 struct iris_context *ice = (struct iris_context *) ctx;
1005 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1006 struct iris_surface *surf = calloc(1, sizeof(struct iris_surface));
1007 struct pipe_surface *psurf = &surf->pipe;
1008 struct iris_resource *itex = (struct iris_resource *) tex;
1009
1010 if (!surf)
1011 return NULL;
1012
1013 pipe_reference_init(&psurf->reference, 1);
1014 pipe_resource_reference(&psurf->texture, tex);
1015 psurf->context = ctx;
1016 psurf->format = tmpl->format;
1017 psurf->width = tex->width0;
1018 psurf->height = tex->height0;
1019 psurf->texture = tex;
1020 psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
1021 psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
1022 psurf->u.tex.level = tmpl->u.tex.level;
1023
1024 surf->view = (struct isl_view) {
1025 .format = iris_isl_format_for_pipe_format(tmpl->format),
1026 .base_level = tmpl->u.tex.level,
1027 .levels = 1,
1028 .base_array_layer = tmpl->u.tex.first_layer,
1029 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
1030 .swizzle = ISL_SWIZZLE_IDENTITY,
1031 // XXX: DEPTH_BIt, STENCIL_BIT...CUBE_BIT? Other bits?!
1032 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1033 };
1034
1035 void *map = NULL;
1036 u_upload_alloc(ice->state.surface_uploader, 0,
1037 4 * GENX(RENDER_SURFACE_STATE_length), 64,
1038 &surf->surface_state_offset,
1039 &surf->surface_state_resource,
1040 &map);
1041 if (!unlikely(map))
1042 return NULL;
1043
1044 surf->surface_state_offset +=
1045 bo_offset_from_base_address(surf->surface_state_resource);
1046
1047 isl_surf_fill_state(&screen->isl_dev, map,
1048 .surf = &itex->surf, .view = &surf->view,
1049 .mocs = MOCS_WB,
1050 .address = itex->bo->gtt_offset);
1051 // .aux_surf =
1052 // .clear_color = clear_color,
1053
1054 return psurf;
1055 }
1056
1057 static void
1058 iris_set_sampler_views(struct pipe_context *ctx,
1059 enum pipe_shader_type p_stage,
1060 unsigned start, unsigned count,
1061 struct pipe_sampler_view **views)
1062 {
1063 struct iris_context *ice = (struct iris_context *) ctx;
1064 gl_shader_stage stage = stage_from_pipe(p_stage);
1065
1066 unsigned i;
1067 for (i = 0; i < count; i++) {
1068 pipe_sampler_view_reference((struct pipe_sampler_view **)
1069 &ice->state.textures[stage][i], views[i]);
1070 }
1071 for (; i < ice->state.num_textures[stage]; i++) {
1072 pipe_sampler_view_reference((struct pipe_sampler_view **)
1073 &ice->state.textures[stage][i], NULL);
1074 }
1075
1076 ice->state.num_textures[stage] = count;
1077
1078 // XXX: ice->state.dirty |= (IRIS_DIRTY_BINDING_TABLE_VS << stage);
1079 }
1080
1081 static void
1082 iris_set_clip_state(struct pipe_context *ctx,
1083 const struct pipe_clip_state *state)
1084 {
1085 }
1086
1087 static void
1088 iris_set_polygon_stipple(struct pipe_context *ctx,
1089 const struct pipe_poly_stipple *state)
1090 {
1091 struct iris_context *ice = (struct iris_context *) ctx;
1092 memcpy(&ice->state.poly_stipple, state, sizeof(*state));
1093 ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
1094 }
1095
1096 static void
1097 iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
1098 {
1099 struct iris_context *ice = (struct iris_context *) ctx;
1100
1101 ice->state.sample_mask = sample_mask;
1102 ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
1103 }
1104
1105 static void
1106 iris_set_scissor_states(struct pipe_context *ctx,
1107 unsigned start_slot,
1108 unsigned num_scissors,
1109 const struct pipe_scissor_state *states)
1110 {
1111 struct iris_context *ice = (struct iris_context *) ctx;
1112
1113 ice->state.num_scissors = num_scissors;
1114
1115 for (unsigned i = 0; i < num_scissors; i++) {
1116 ice->state.scissors[start_slot + i] = states[i];
1117 }
1118
1119 ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
1120 }
1121
1122 static void
1123 iris_set_stencil_ref(struct pipe_context *ctx,
1124 const struct pipe_stencil_ref *state)
1125 {
1126 struct iris_context *ice = (struct iris_context *) ctx;
1127 memcpy(&ice->state.stencil_ref, state, sizeof(*state));
1128 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
1129 }
1130
1131
1132 struct iris_viewport_state {
1133 uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS];
1134 };
1135
1136 static float
1137 viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
1138 {
1139 return copysignf(state->scale[axis], sign) + state->translate[axis];
1140 }
1141
1142 #if 0
1143 static void
1144 calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
1145 float m00, float m11, float m30, float m31,
1146 float *xmin, float *xmax,
1147 float *ymin, float *ymax)
1148 {
1149 /* According to the "Vertex X,Y Clamping and Quantization" section of the
1150 * Strips and Fans documentation:
1151 *
1152 * "The vertex X and Y screen-space coordinates are also /clamped/ to the
1153 * fixed-point "guardband" range supported by the rasterization hardware"
1154 *
1155 * and
1156 *
1157 * "In almost all circumstances, if an object’s vertices are actually
1158 * modified by this clamping (i.e., had X or Y coordinates outside of
1159 * the guardband extent the rendered object will not match the intended
1160 * result. Therefore software should take steps to ensure that this does
1161 * not happen - e.g., by clipping objects such that they do not exceed
1162 * these limits after the Drawing Rectangle is applied."
1163 *
1164 * I believe the fundamental restriction is that the rasterizer (in
1165 * the SF/WM stages) have a limit on the number of pixels that can be
1166 * rasterized. We need to ensure any coordinates beyond the rasterizer
1167 * limit are handled by the clipper. So effectively that limit becomes
1168 * the clipper's guardband size.
1169 *
1170 * It goes on to say:
1171 *
1172 * "In addition, in order to be correctly rendered, objects must have a
1173 * screenspace bounding box not exceeding 8K in the X or Y direction.
1174 * This additional restriction must also be comprehended by software,
1175 * i.e., enforced by use of clipping."
1176 *
1177 * This makes no sense. Gen7+ hardware supports 16K render targets,
1178 * and you definitely need to be able to draw polygons that fill the
1179 * surface. Our assumption is that the rasterizer was limited to 8K
1180 * on Sandybridge, which only supports 8K surfaces, and it was actually
1181 * increased to 16K on Ivybridge and later.
1182 *
1183 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
1184 */
1185 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
1186
1187 if (m00 != 0 && m11 != 0) {
1188 /* First, we compute the screen-space render area */
1189 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
1190 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00);
1191 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11);
1192 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11);
1193
1194 /* We want the guardband to be centered on that */
1195 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size;
1196 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size;
1197 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size;
1198 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size;
1199
1200 /* Now we need it in native device coordinates */
1201 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00;
1202 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00;
1203 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11;
1204 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11;
1205
1206 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be
1207 * flipped upside-down. X should be fine though.
1208 */
1209 assert(ndc_gb_xmin <= ndc_gb_xmax);
1210 *xmin = ndc_gb_xmin;
1211 *xmax = ndc_gb_xmax;
1212 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax);
1213 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax);
1214 } else {
1215 /* The viewport scales to 0, so nothing will be rendered. */
1216 *xmin = 0.0f;
1217 *xmax = 0.0f;
1218 *ymin = 0.0f;
1219 *ymax = 0.0f;
1220 }
1221 }
1222 #endif
1223
1224 static void
1225 iris_set_viewport_states(struct pipe_context *ctx,
1226 unsigned start_slot,
1227 unsigned num_viewports,
1228 const struct pipe_viewport_state *state)
1229 {
1230 struct iris_context *ice = (struct iris_context *) ctx;
1231 struct iris_viewport_state *cso =
1232 malloc(sizeof(struct iris_viewport_state));
1233 uint32_t *vp_map = &cso->sf_cl_vp[start_slot];
1234
1235 // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right
1236 for (unsigned i = 0; i < num_viewports; i++) {
1237 iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) {
1238 vp.ViewportMatrixElementm00 = state[i].scale[0];
1239 vp.ViewportMatrixElementm11 = state[i].scale[1];
1240 vp.ViewportMatrixElementm22 = state[i].scale[2];
1241 vp.ViewportMatrixElementm30 = state[i].translate[0];
1242 vp.ViewportMatrixElementm31 = state[i].translate[1];
1243 vp.ViewportMatrixElementm32 = state[i].translate[2];
1244 /* XXX: in i965 this is computed based on the drawbuffer size,
1245 * but we don't have that here...
1246 */
1247 vp.XMinClipGuardband = -1.0;
1248 vp.XMaxClipGuardband = 1.0;
1249 vp.YMinClipGuardband = -1.0;
1250 vp.YMaxClipGuardband = 1.0;
1251 vp.XMinViewPort = viewport_extent(&state[i], 0, -1.0f);
1252 vp.XMaxViewPort = viewport_extent(&state[i], 0, 1.0f) - 1;
1253 vp.YMinViewPort = viewport_extent(&state[i], 1, -1.0f);
1254 vp.YMaxViewPort = viewport_extent(&state[i], 1, 1.0f) - 1;
1255 }
1256
1257 vp_map += GENX(SF_CLIP_VIEWPORT_length);
1258 }
1259
1260 ice->state.cso_vp = cso;
1261 ice->state.num_viewports = num_viewports;
1262 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
1263 }
1264
1265 struct iris_depth_state
1266 {
1267 uint32_t depth_buffer[GENX(3DSTATE_DEPTH_BUFFER_length)];
1268 uint32_t hier_depth_buffer[GENX(3DSTATE_HIER_DEPTH_BUFFER_length)];
1269 uint32_t stencil_buffer[GENX(3DSTATE_STENCIL_BUFFER_length)];
1270 };
1271
1272 static void
1273 iris_set_framebuffer_state(struct pipe_context *ctx,
1274 const struct pipe_framebuffer_state *state)
1275 {
1276 struct iris_context *ice = (struct iris_context *) ctx;
1277 struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
1278
1279 if (cso->samples != state->samples) {
1280 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
1281 }
1282
1283 if (cso->nr_cbufs != state->nr_cbufs) {
1284 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
1285 }
1286
1287 cso->width = state->width;
1288 cso->height = state->height;
1289 cso->layers = state->layers;
1290 cso->samples = state->samples;
1291
1292 unsigned i;
1293 for (i = 0; i < state->nr_cbufs; i++)
1294 pipe_surface_reference(&cso->cbufs[i], state->cbufs[i]);
1295 for (; i < cso->nr_cbufs; i++)
1296 pipe_surface_reference(&cso->cbufs[i], NULL);
1297
1298 cso->nr_cbufs = state->nr_cbufs;
1299
1300 pipe_surface_reference(&cso->zsbuf, state->zsbuf);
1301
1302 //struct isl_depth_stencil_hiz_emit_info info = {
1303 //.mocs = MOCS_WB,
1304 //};
1305
1306 // XXX: depth buffers
1307 }
1308
1309 static void
1310 iris_set_constant_buffer(struct pipe_context *ctx,
1311 enum pipe_shader_type p_stage, unsigned index,
1312 const struct pipe_constant_buffer *cb)
1313 {
1314 struct iris_context *ice = (struct iris_context *) ctx;
1315 gl_shader_stage stage = stage_from_pipe(p_stage);
1316
1317 util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb);
1318 }
1319
1320 static void
1321 iris_sampler_view_destroy(struct pipe_context *ctx,
1322 struct pipe_sampler_view *state)
1323 {
1324 struct iris_surface *isv = (void *) state;
1325 pipe_resource_reference(&state->texture, NULL);
1326 pipe_resource_reference(&isv->surface_state_resource, NULL);
1327 free(isv);
1328 }
1329
1330
1331 static void
1332 iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
1333 {
1334 struct iris_surface *surf = (void *) p_surf;
1335 pipe_resource_reference(&p_surf->texture, NULL);
1336 pipe_resource_reference(&surf->surface_state_resource, NULL);
1337 free(surf);
1338 }
1339
1340 static void
1341 iris_delete_state(struct pipe_context *ctx, void *state)
1342 {
1343 free(state);
1344 }
1345
1346 struct iris_vertex_buffer_state {
1347 uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
1348 struct iris_bo *bos[33];
1349 unsigned num_buffers;
1350 };
1351
1352 static void
1353 iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
1354 {
1355 if (cso) {
1356 for (unsigned i = 0; i < cso->num_buffers; i++)
1357 iris_bo_unreference(cso->bos[i]);
1358 free(cso);
1359 }
1360 }
1361
1362 static void
1363 iris_set_vertex_buffers(struct pipe_context *ctx,
1364 unsigned start_slot, unsigned count,
1365 const struct pipe_vertex_buffer *buffers)
1366 {
1367 struct iris_context *ice = (struct iris_context *) ctx;
1368 struct iris_vertex_buffer_state *cso =
1369 malloc(sizeof(struct iris_vertex_buffer_state));
1370
1371 /* If there are no buffers, do nothing. We can leave the stale
1372 * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex
1373 * elements that point to them, it should be fine.
1374 */
1375 if (!buffers)
1376 return;
1377
1378 iris_free_vertex_buffers(ice->state.cso_vertex_buffers);
1379
1380 cso->num_buffers = count;
1381
1382 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) {
1383 vb.DWordLength = 4 * cso->num_buffers - 1;
1384 }
1385
1386 uint32_t *vb_pack_dest = &cso->vertex_buffers[1];
1387
1388 for (unsigned i = 0; i < count; i++) {
1389 assert(!buffers[i].is_user_buffer);
1390
1391 struct iris_resource *res = (void *) buffers[i].buffer.resource;
1392 iris_bo_reference(res->bo);
1393 cso->bos[i] = res->bo;
1394
1395 iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
1396 vb.VertexBufferIndex = start_slot + i;
1397 vb.MOCS = MOCS_WB;
1398 vb.AddressModifyEnable = true;
1399 vb.BufferPitch = buffers[i].stride;
1400 vb.BufferSize = res->bo->size;
1401 vb.BufferStartingAddress =
1402 ro_bo(NULL, res->bo->gtt_offset + buffers[i].buffer_offset);
1403 }
1404
1405 vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length);
1406 }
1407
1408 ice->state.cso_vertex_buffers = cso;
1409 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
1410 }
1411
1412 struct iris_vertex_element_state {
1413 uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
1414 uint32_t vf_instancing[GENX(3DSTATE_VF_INSTANCING_length)][33];
1415 unsigned count;
1416 };
1417
1418 static void *
1419 iris_create_vertex_elements(struct pipe_context *ctx,
1420 unsigned count,
1421 const struct pipe_vertex_element *state)
1422 {
1423 struct iris_vertex_element_state *cso =
1424 malloc(sizeof(struct iris_vertex_element_state));
1425
1426 cso->count = count;
1427
1428 /* TODO:
1429 * - create edge flag one
1430 * - create SGV ones
1431 * - if those are necessary, use count + 1/2/3... OR in the length
1432 */
1433 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve);
1434
1435 uint32_t *ve_pack_dest = &cso->vertex_elements[1];
1436
1437 for (int i = 0; i < count; i++) {
1438 enum isl_format isl_format =
1439 iris_isl_format_for_pipe_format(state[i].src_format);
1440 unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
1441 VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
1442
1443 switch (isl_format_get_num_channels(isl_format)) {
1444 case 0: comp[0] = VFCOMP_STORE_0;
1445 case 1: comp[1] = VFCOMP_STORE_0;
1446 case 2: comp[2] = VFCOMP_STORE_0;
1447 case 3:
1448 comp[3] = isl_format_has_int_channel(isl_format) ? VFCOMP_STORE_1_INT
1449 : VFCOMP_STORE_1_FP;
1450 break;
1451 }
1452 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
1453 ve.VertexBufferIndex = state[i].vertex_buffer_index;
1454 ve.Valid = true;
1455 ve.SourceElementOffset = state[i].src_offset;
1456 ve.SourceElementFormat = isl_format;
1457 ve.Component0Control = comp[0];
1458 ve.Component1Control = comp[1];
1459 ve.Component2Control = comp[2];
1460 ve.Component3Control = comp[3];
1461 }
1462
1463 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->vf_instancing[i], vi) {
1464 vi.VertexElementIndex = i;
1465 vi.InstancingEnable = state[i].instance_divisor > 0;
1466 vi.InstanceDataStepRate = state[i].instance_divisor;
1467 }
1468
1469 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
1470 }
1471
1472 return cso;
1473 }
1474
1475 static void
1476 iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
1477 {
1478 struct iris_context *ice = (struct iris_context *) ctx;
1479
1480 ice->state.cso_vertex_elements = state;
1481 ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS;
1482 }
1483
1484 static void *
1485 iris_create_compute_state(struct pipe_context *ctx,
1486 const struct pipe_compute_state *state)
1487 {
1488 return malloc(1);
1489 }
1490
1491 static struct pipe_stream_output_target *
1492 iris_create_stream_output_target(struct pipe_context *ctx,
1493 struct pipe_resource *res,
1494 unsigned buffer_offset,
1495 unsigned buffer_size)
1496 {
1497 struct pipe_stream_output_target *t =
1498 CALLOC_STRUCT(pipe_stream_output_target);
1499 if (!t)
1500 return NULL;
1501
1502 pipe_reference_init(&t->reference, 1);
1503 pipe_resource_reference(&t->buffer, res);
1504 t->buffer_offset = buffer_offset;
1505 t->buffer_size = buffer_size;
1506 return t;
1507 }
1508
1509 static void
1510 iris_stream_output_target_destroy(struct pipe_context *ctx,
1511 struct pipe_stream_output_target *t)
1512 {
1513 pipe_resource_reference(&t->buffer, NULL);
1514 free(t);
1515 }
1516
1517 static void
1518 iris_set_stream_output_targets(struct pipe_context *ctx,
1519 unsigned num_targets,
1520 struct pipe_stream_output_target **targets,
1521 const unsigned *offsets)
1522 {
1523 }
1524
1525 static void
1526 iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
1527 const struct brw_vue_map *last_vue_map,
1528 bool two_sided_color,
1529 unsigned *out_offset,
1530 unsigned *out_length)
1531 {
1532 /* The compiler computes the first URB slot without considering COL/BFC
1533 * swizzling (because it doesn't know whether it's enabled), so we need
1534 * to do that here too. This may result in a smaller offset, which
1535 * should be safe.
1536 */
1537 const unsigned first_slot =
1538 brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map);
1539
1540 /* This becomes the URB read offset (counted in pairs of slots). */
1541 assert(first_slot % 2 == 0);
1542 *out_offset = first_slot / 2;
1543
1544 /* We need to adjust the inputs read to account for front/back color
1545 * swizzling, as it can make the URB length longer.
1546 */
1547 for (int c = 0; c <= 1; c++) {
1548 if (fs_input_slots & (VARYING_BIT_COL0 << c)) {
1549 /* If two sided color is enabled, the fragment shader's gl_Color
1550 * (COL0) input comes from either the gl_FrontColor (COL0) or
1551 * gl_BackColor (BFC0) input varyings. Mark BFC as used, too.
1552 */
1553 if (two_sided_color)
1554 fs_input_slots |= (VARYING_BIT_BFC0 << c);
1555
1556 /* If front color isn't written, we opt to give them back color
1557 * instead of an undefined value. Switch from COL to BFC.
1558 */
1559 if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) {
1560 fs_input_slots &= ~(VARYING_BIT_COL0 << c);
1561 fs_input_slots |= (VARYING_BIT_BFC0 << c);
1562 }
1563 }
1564 }
1565
1566 /* Compute the minimum URB Read Length necessary for the FS inputs.
1567 *
1568 * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
1569 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
1570 *
1571 * "This field should be set to the minimum length required to read the
1572 * maximum source attribute. The maximum source attribute is indicated
1573 * by the maximum value of the enabled Attribute # Source Attribute if
1574 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1575 * enable is not set.
1576 * read_length = ceiling((max_source_attr + 1) / 2)
1577 *
1578 * [errata] Corruption/Hang possible if length programmed larger than
1579 * recommended"
1580 *
1581 * Similar text exists for Ivy Bridge.
1582 *
1583 * We find the last URB slot that's actually read by the FS.
1584 */
1585 unsigned last_read_slot = last_vue_map->num_slots - 1;
1586 while (last_read_slot > first_slot && !(fs_input_slots &
1587 (1ull << last_vue_map->slot_to_varying[last_read_slot])))
1588 --last_read_slot;
1589
1590 /* The URB read length is the difference of the two, counted in pairs. */
1591 *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2);
1592 }
1593
1594 static void
1595 iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
1596 {
1597 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
1598 const struct brw_wm_prog_data *wm_prog_data = (void *)
1599 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1600 struct pipe_shader_state *p_fs =
1601 (void *) ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1602 assert(p_fs->type == PIPE_SHADER_IR_NIR);
1603 nir_shader *fs_nir = p_fs->ir.nir;
1604
1605 unsigned urb_read_offset, urb_read_length;
1606 iris_compute_sbe_urb_read_interval(fs_nir->info.inputs_read,
1607 ice->shaders.last_vue_map,
1608 cso_rast->light_twoside,
1609 &urb_read_offset, &urb_read_length);
1610
1611 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
1612 sbe.AttributeSwizzleEnable = true;
1613 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
1614 sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode;
1615 sbe.VertexURBEntryReadOffset = urb_read_offset;
1616 sbe.VertexURBEntryReadLength = urb_read_length;
1617 sbe.ForceVertexURBEntryReadOffset = true;
1618 sbe.ForceVertexURBEntryReadLength = true;
1619 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
1620
1621 for (int i = 0; i < 32; i++) {
1622 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
1623 }
1624 }
1625 }
1626
1627 static void
1628 iris_bind_compute_state(struct pipe_context *ctx, void *state)
1629 {
1630 }
1631
1632 static void
1633 iris_populate_vs_key(const struct iris_context *ice,
1634 struct brw_vs_prog_key *key)
1635 {
1636 memset(key, 0, sizeof(*key));
1637 }
1638
1639 static void
1640 iris_populate_tcs_key(const struct iris_context *ice,
1641 struct brw_tcs_prog_key *key)
1642 {
1643 memset(key, 0, sizeof(*key));
1644 }
1645
1646 static void
1647 iris_populate_tes_key(const struct iris_context *ice,
1648 struct brw_tes_prog_key *key)
1649 {
1650 memset(key, 0, sizeof(*key));
1651 }
1652
1653 static void
1654 iris_populate_gs_key(const struct iris_context *ice,
1655 struct brw_gs_prog_key *key)
1656 {
1657 memset(key, 0, sizeof(*key));
1658 }
1659
1660 static void
1661 iris_populate_fs_key(const struct iris_context *ice,
1662 struct brw_wm_prog_key *key)
1663 {
1664 memset(key, 0, sizeof(*key));
1665
1666 /* XXX: dirty flags? */
1667 const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
1668 const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
1669 const struct iris_rasterizer_state *rast = ice->state.cso_rast;
1670 const struct iris_blend_state *blend = ice->state.cso_blend;
1671
1672 key->nr_color_regions = fb->nr_cbufs;
1673
1674 key->clamp_fragment_color = rast->clamp_fragment_color;
1675
1676 key->replicate_alpha = fb->nr_cbufs > 1 &&
1677 (zsa->alpha.enabled || blend->alpha_to_coverage);
1678
1679 // key->force_dual_color_blend for unigine
1680 #if 0
1681 if (cso_rast->multisample) {
1682 key->persample_interp =
1683 ctx->Multisample.SampleShading &&
1684 (ctx->Multisample.MinSampleShadingValue *
1685 _mesa_geometric_samples(ctx->DrawBuffer) > 1);
1686
1687 key->multisample_fbo = fb->samples > 1;
1688 }
1689 #endif
1690
1691 key->coherent_fb_fetch = true;
1692 }
1693
1694 //pkt.SamplerCount = \
1695 //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
1696 //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
1697 //ffs(stage_state->per_thread_scratch) - 11; \
1698
1699 static uint64_t
1700 KSP(const struct iris_compiled_shader *shader)
1701 {
1702 struct iris_resource *res = (void *) shader->buffer;
1703 return res->bo->gtt_offset + shader->offset;
1704 }
1705
1706 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
1707 pkt.KernelStartPointer = KSP(shader); \
1708 pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \
1709 pkt.FloatingPointMode = prog_data->use_alt_mode; \
1710 \
1711 pkt.DispatchGRFStartRegisterForURBData = \
1712 prog_data->dispatch_grf_start_reg; \
1713 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
1714 pkt.prefix##URBEntryReadOffset = 0; \
1715 \
1716 pkt.StatisticsEnable = true; \
1717 pkt.Enable = true;
1718
1719 static void
1720 iris_set_vs_state(const struct gen_device_info *devinfo,
1721 struct iris_compiled_shader *shader)
1722 {
1723 struct brw_stage_prog_data *prog_data = shader->prog_data;
1724 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1725
1726 iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
1727 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
1728 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1729 vs.SIMD8DispatchEnable = true;
1730 vs.UserClipDistanceCullTestEnableBitmask =
1731 vue_prog_data->cull_distance_mask;
1732 }
1733 }
1734
1735 static void
1736 iris_set_tcs_state(const struct gen_device_info *devinfo,
1737 struct iris_compiled_shader *shader)
1738 {
1739 struct brw_stage_prog_data *prog_data = shader->prog_data;
1740 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1741 struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
1742
1743 iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
1744 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
1745
1746 hs.InstanceCount = tcs_prog_data->instances - 1;
1747 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1748 hs.IncludeVertexHandles = true;
1749 }
1750 }
1751
1752 static void
1753 iris_set_tes_state(const struct gen_device_info *devinfo,
1754 struct iris_compiled_shader *shader)
1755 {
1756 struct brw_stage_prog_data *prog_data = shader->prog_data;
1757 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1758 struct brw_tes_prog_data *tes_prog_data = (void *) prog_data;
1759
1760 uint32_t *te_state = (void *) shader->derived_data;
1761 uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length);
1762
1763 iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
1764 te.Partitioning = tes_prog_data->partitioning;
1765 te.OutputTopology = tes_prog_data->output_topology;
1766 te.TEDomain = tes_prog_data->domain;
1767 te.TEEnable = true;
1768 te.MaximumTessellationFactorOdd = 63.0;
1769 te.MaximumTessellationFactorNotOdd = 64.0;
1770 }
1771
1772 iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
1773 INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
1774
1775 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
1776 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1777 ds.ComputeWCoordinateEnable =
1778 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
1779
1780 ds.UserClipDistanceCullTestEnableBitmask =
1781 vue_prog_data->cull_distance_mask;
1782 }
1783
1784 }
1785
1786 static void
1787 iris_set_gs_state(const struct gen_device_info *devinfo,
1788 struct iris_compiled_shader *shader)
1789 {
1790 struct brw_stage_prog_data *prog_data = shader->prog_data;
1791 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1792 struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
1793
1794 iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
1795 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
1796
1797 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1798 gs.OutputTopology = gs_prog_data->output_topology;
1799 gs.ControlDataHeaderSize =
1800 gs_prog_data->control_data_header_size_hwords;
1801 gs.InstanceControl = gs_prog_data->invocations - 1;
1802 gs.DispatchMode = SIMD8;
1803 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1804 gs.ControlDataFormat = gs_prog_data->control_data_format;
1805 gs.ReorderMode = TRAILING;
1806 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1807 gs.MaximumNumberofThreads =
1808 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
1809 : (devinfo->max_gs_threads - 1);
1810
1811 if (gs_prog_data->static_vertex_count != -1) {
1812 gs.StaticOutput = true;
1813 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
1814 }
1815 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
1816
1817 gs.UserClipDistanceCullTestEnableBitmask =
1818 vue_prog_data->cull_distance_mask;
1819
1820 const int urb_entry_write_offset = 1;
1821 const uint32_t urb_entry_output_length =
1822 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
1823 urb_entry_write_offset;
1824
1825 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
1826 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
1827 }
1828 }
1829
1830 static void
1831 iris_set_fs_state(const struct gen_device_info *devinfo,
1832 struct iris_compiled_shader *shader)
1833 {
1834 struct brw_stage_prog_data *prog_data = shader->prog_data;
1835 struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1836
1837 uint32_t *ps_state = (void *) shader->derived_data;
1838 uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length);
1839
1840 iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
1841 ps.VectorMaskEnable = true;
1842 //ps.SamplerCount = ...
1843 ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;
1844 ps.FloatingPointMode = prog_data->use_alt_mode;
1845 ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1);
1846
1847 ps.PushConstantEnable = prog_data->nr_params > 0 ||
1848 prog_data->ubo_ranges[0].length > 0;
1849
1850 /* From the documentation for this packet:
1851 * "If the PS kernel does not need the Position XY Offsets to
1852 * compute a Position Value, then this field should be programmed
1853 * to POSOFFSET_NONE."
1854 *
1855 * "SW Recommendation: If the PS kernel needs the Position Offsets
1856 * to compute a Position XY value, this field should match Position
1857 * ZW Interpolation Mode to ensure a consistent position.xyzw
1858 * computation."
1859 *
1860 * We only require XY sample offsets. So, this recommendation doesn't
1861 * look useful at the moment. We might need this in future.
1862 */
1863 ps.PositionXYOffsetSelect =
1864 wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
1865 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
1866 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
1867 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
1868
1869 // XXX: Disable SIMD32 with 16x MSAA
1870
1871 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1872 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1873 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1874 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1875 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1876 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1877
1878 ps.KernelStartPointer0 =
1879 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1880 ps.KernelStartPointer1 =
1881 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1882 ps.KernelStartPointer2 =
1883 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1884 }
1885
1886 iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
1887 psx.PixelShaderValid = true;
1888 psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1889 psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
1890 psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
1891 psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1892 psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1893 psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
1894
1895 if (wm_prog_data->uses_sample_mask) {
1896 /* TODO: conservative rasterization */
1897 if (wm_prog_data->post_depth_coverage)
1898 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
1899 else
1900 psx.InputCoverageMaskState = ICMS_NORMAL;
1901 }
1902
1903 psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1904 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
1905 psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
1906
1907 // XXX: UAV bit
1908 }
1909 }
1910
1911 static unsigned
1912 iris_derived_program_state_size(enum iris_program_cache_id cache_id)
1913 {
1914 assert(cache_id <= IRIS_CACHE_CS);
1915
1916 static const unsigned dwords[] = {
1917 [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length),
1918 [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length),
1919 [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length),
1920 [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
1921 [IRIS_CACHE_FS] =
1922 GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
1923 [IRIS_CACHE_CS] = 0,
1924 [IRIS_CACHE_BLORP_BLIT] = 0,
1925 };
1926
1927 return sizeof(uint32_t) * dwords[cache_id];
1928 }
1929
1930 static void
1931 iris_set_derived_program_state(const struct gen_device_info *devinfo,
1932 enum iris_program_cache_id cache_id,
1933 struct iris_compiled_shader *shader)
1934 {
1935 switch (cache_id) {
1936 case IRIS_CACHE_VS:
1937 iris_set_vs_state(devinfo, shader);
1938 break;
1939 case IRIS_CACHE_TCS:
1940 iris_set_tcs_state(devinfo, shader);
1941 break;
1942 case IRIS_CACHE_TES:
1943 iris_set_tes_state(devinfo, shader);
1944 break;
1945 case IRIS_CACHE_GS:
1946 iris_set_gs_state(devinfo, shader);
1947 break;
1948 case IRIS_CACHE_FS:
1949 iris_set_fs_state(devinfo, shader);
1950 break;
1951 case IRIS_CACHE_CS:
1952 break;
1953 default:
1954 break;
1955 }
1956 }
1957
1958 static void
1959 iris_upload_urb_config(struct iris_context *ice, struct iris_batch *batch)
1960 {
1961 const struct gen_device_info *devinfo = &batch->screen->devinfo;
1962 const unsigned push_size_kB = 32;
1963 unsigned entries[4];
1964 unsigned start[4];
1965 unsigned size[4];
1966
1967 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1968 if (!ice->shaders.prog[i]) {
1969 size[i] = 1;
1970 } else {
1971 struct brw_vue_prog_data *vue_prog_data =
1972 (void *) ice->shaders.prog[i]->prog_data;
1973 size[i] = vue_prog_data->urb_entry_size;
1974 }
1975 assert(size[i] != 0);
1976 }
1977
1978 gen_get_urb_config(devinfo, 1024 * push_size_kB,
1979 1024 * ice->shaders.urb_size,
1980 ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
1981 ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
1982 size, entries, start);
1983
1984 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1985 iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
1986 urb._3DCommandSubOpcode += i;
1987 urb.VSURBStartingAddress = start[i];
1988 urb.VSURBEntryAllocationSize = size[i] - 1;
1989 urb.VSNumberofURBEntries = entries[i];
1990 }
1991 }
1992 }
1993
1994 static const uint32_t push_constant_opcodes[] = {
1995 [MESA_SHADER_VERTEX] = 21,
1996 [MESA_SHADER_TESS_CTRL] = 25, /* HS */
1997 [MESA_SHADER_TESS_EVAL] = 26, /* DS */
1998 [MESA_SHADER_GEOMETRY] = 22,
1999 [MESA_SHADER_FRAGMENT] = 23,
2000 [MESA_SHADER_COMPUTE] = 0,
2001 };
2002
2003 /**
2004 * Add a surface to the validation list, as well as the buffer containing
2005 * the corresponding SURFACE_STATE.
2006 *
2007 * Returns the binding table entry (offset to SURFACE_STATE).
2008 */
2009 static uint32_t
2010 use_surface(struct iris_batch *batch,
2011 struct pipe_surface *p_surf,
2012 bool writeable)
2013 {
2014 struct iris_surface *surf = (void *) p_surf;
2015 struct iris_resource *res = (void *) p_surf->texture;
2016 struct iris_resource *state_res = (void *) surf->surface_state_resource;
2017 iris_use_pinned_bo(batch, res->bo, writeable);
2018 iris_use_pinned_bo(batch, state_res->bo, false);
2019
2020 return surf->surface_state_offset;
2021 }
2022
2023 static uint32_t
2024 use_sampler_view(struct iris_batch *batch, struct iris_sampler_view *isv)
2025 {
2026 struct iris_resource *res = (void *) isv->pipe.texture;
2027 struct iris_resource *state_res = (void *) isv->surface_state_resource;
2028 iris_use_pinned_bo(batch, res->bo, false);
2029 iris_use_pinned_bo(batch, state_res->bo, false);
2030
2031 return isv->surface_state_offset;
2032 }
2033
2034 static void
2035 iris_upload_render_state(struct iris_context *ice,
2036 struct iris_batch *batch,
2037 const struct pipe_draw_info *draw)
2038 {
2039 const uint64_t dirty = ice->state.dirty;
2040
2041 struct brw_wm_prog_data *wm_prog_data = (void *)
2042 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
2043
2044 if (dirty & IRIS_DIRTY_CC_VIEWPORT) {
2045 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2046 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
2047 ptr.CCViewportPointer =
2048 emit_state(batch, ice->state.dynamic_uploader,
2049 cso->cc_vp, sizeof(cso->cc_vp), 32);
2050 }
2051 }
2052
2053 if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
2054 struct iris_viewport_state *cso = ice->state.cso_vp;
2055 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
2056 ptr.SFClipViewportPointer =
2057 emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp,
2058 4 * GENX(SF_CLIP_VIEWPORT_length) *
2059 ice->state.num_viewports, 64);
2060 }
2061 }
2062
2063 /* XXX: L3 State */
2064
2065 if (dirty & IRIS_DIRTY_URB) {
2066 iris_upload_urb_config(ice, batch);
2067 }
2068
2069 if (dirty & IRIS_DIRTY_BLEND_STATE) {
2070 struct iris_blend_state *cso_blend = ice->state.cso_blend;
2071 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2072 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
2073 const int num_dwords = 4 * (GENX(BLEND_STATE_length) +
2074 cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length));
2075 uint32_t blend_offset;
2076 uint32_t *blend_map =
2077 stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64,
2078 &blend_offset);
2079
2080 uint32_t blend_state_header;
2081 iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
2082 bs.AlphaTestEnable = cso_zsa->alpha.enabled;
2083 bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha.func);
2084 }
2085
2086 blend_map[0] = blend_state_header | cso_blend->blend_state[0];
2087 memcpy(&blend_map[1], &cso_blend->blend_state[1],
2088 sizeof(cso_blend->blend_state) - sizeof(uint32_t));
2089
2090 iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
2091 ptr.BlendStatePointer = blend_offset;
2092 ptr.BlendStatePointerValid = true;
2093 }
2094 }
2095
2096 if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) {
2097 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2098 uint32_t cc_offset;
2099 void *cc_map =
2100 stream_state(batch, ice->state.dynamic_uploader,
2101 sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
2102 64, &cc_offset);
2103 iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
2104 cc.AlphaTestFormat = ALPHATEST_FLOAT32;
2105 cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value;
2106 cc.BlendConstantColorRed = ice->state.blend_color.color[0];
2107 cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
2108 cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
2109 cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
2110 }
2111 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
2112 ptr.ColorCalcStatePointer = cc_offset;
2113 ptr.ColorCalcStatePointerValid = true;
2114 }
2115 }
2116
2117 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2118 // XXX: wrong dirty tracking...
2119 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
2120 continue;
2121
2122 struct pipe_constant_buffer *cbuf0 =
2123 &ice->shaders.state[stage].constbuf[0];
2124
2125 if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size)
2126 continue;
2127
2128 struct iris_shader_state *shs = &ice->shaders.state[stage];
2129 shs->const_size = cbuf0->buffer_size;
2130 u_upload_data(ice->ctx.const_uploader, 0, shs->const_size, 32,
2131 cbuf0->user_buffer, &shs->const_offset,
2132 &shs->push_resource);
2133 }
2134
2135 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2136 // XXX: wrong dirty tracking...
2137 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
2138 continue;
2139
2140 struct iris_shader_state *shs = &ice->shaders.state[stage];
2141 struct iris_resource *res = (void *) shs->push_resource;
2142
2143 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
2144 pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
2145 if (res) {
2146 pkt.ConstantBody.ReadLength[3] = shs->const_size;
2147 pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset);
2148 }
2149 }
2150 }
2151
2152 // Surfaces:
2153 // - pull constants
2154 // - ubos/ssbos/abos
2155 // - images
2156 // - textures
2157 // - render targets - write and read
2158 // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS
2159
2160 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2161 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2162 if (!shader) // XXX: dirty bits...also, emit a disable maybe?
2163 continue;
2164
2165 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
2166 uint32_t bt_offset = 0;
2167 uint32_t *bt_map = NULL;
2168 int s = 0;
2169
2170 if (prog_data->binding_table.size_bytes != 0) {
2171 iris_use_pinned_bo(batch, ice->state.binder.bo, false);
2172 bt_map = iris_binder_reserve(&ice->state.binder,
2173 prog_data->binding_table.size_bytes,
2174 &bt_offset);
2175 }
2176
2177 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
2178 ptr._3DCommandSubOpcode = 38 + stage;
2179 ptr.PointertoVSBindingTable = bt_offset;
2180 }
2181
2182 if (stage == MESA_SHADER_FRAGMENT) {
2183 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2184 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
2185 bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true);
2186 }
2187 }
2188
2189 assert(prog_data->binding_table.texture_start ==
2190 (ice->state.num_textures[stage] ? s : 0xd0d0d0d0));
2191
2192 for (int i = 0; i < ice->state.num_textures[stage]; i++) {
2193 struct iris_sampler_view *view = ice->state.textures[stage][i];
2194 bt_map[s++] = use_sampler_view(batch, view);
2195 }
2196
2197 #if 0
2198 // XXX: not implemented yet
2199 assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0);
2200 assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0);
2201 assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0);
2202 assert(prog_data->binding_table.image_start == 0xd0d0d0d0);
2203 assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0);
2204 //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0);
2205 //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0);
2206 #endif
2207 }
2208
2209 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2210 if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) ||
2211 !ice->shaders.prog[stage])
2212 continue;
2213
2214 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
2215 ptr._3DCommandSubOpcode = 43 + stage;
2216 ptr.PointertoVSSamplerState = ice->state.sampler_table_offset[stage];
2217 }
2218 }
2219
2220 if (dirty & IRIS_DIRTY_MULTISAMPLE) {
2221 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2222 ms.PixelLocation =
2223 ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER;
2224 if (ice->state.framebuffer.samples > 0)
2225 ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
2226 }
2227 }
2228
2229 if (dirty & IRIS_DIRTY_SAMPLE_MASK) {
2230 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
2231 ms.SampleMask = MAX2(ice->state.sample_mask, 1);
2232 }
2233 }
2234
2235 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2236 if (!(dirty & (IRIS_DIRTY_VS << stage)))
2237 continue;
2238
2239 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2240
2241 if (shader) {
2242 struct iris_resource *cache = (void *) shader->buffer;
2243 iris_use_pinned_bo(batch, cache->bo, false);
2244 iris_batch_emit(batch, shader->derived_data,
2245 iris_derived_program_state_size(stage));
2246 } else {
2247 if (stage == MESA_SHADER_TESS_EVAL) {
2248 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
2249 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
2250 iris_emit_cmd(batch, GENX(3DSTATE_DS), ds);
2251 } else if (stage == MESA_SHADER_GEOMETRY) {
2252 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
2253 }
2254 }
2255 }
2256
2257 // XXX: SOL:
2258 // 3DSTATE_STREAMOUT
2259 // 3DSTATE_SO_BUFFER
2260 // 3DSTATE_SO_DECL_LIST
2261
2262 if (dirty & IRIS_DIRTY_CLIP) {
2263 struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
2264 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2265
2266 uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
2267 iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
2268 if (wm_prog_data->barycentric_interp_modes &
2269 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
2270 cl.NonPerspectiveBarycentricEnable = true;
2271
2272 cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
2273 }
2274 iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
2275 ARRAY_SIZE(cso_rast->clip));
2276 }
2277
2278 if (dirty & IRIS_DIRTY_RASTER) {
2279 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2280 iris_batch_emit(batch, cso->raster, sizeof(cso->raster));
2281 iris_batch_emit(batch, cso->sf, sizeof(cso->sf));
2282
2283 }
2284
2285 if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) {
2286 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2287 uint32_t dynamic_wm[GENX(3DSTATE_WM_length)];
2288
2289 iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) {
2290 wm.BarycentricInterpolationMode =
2291 wm_prog_data->barycentric_interp_modes;
2292
2293 if (wm_prog_data->early_fragment_tests)
2294 wm.EarlyDepthStencilControl = EDSC_PREPS;
2295 else if (wm_prog_data->has_side_effects)
2296 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
2297 }
2298 iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm));
2299 }
2300
2301 if (1) {
2302 // XXX: 3DSTATE_SBE, 3DSTATE_SBE_SWIZ
2303 // -> iris_raster_state (point sprite texture coordinate origin)
2304 // -> bunch of shader state...
2305 iris_emit_sbe(batch, ice);
2306 iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbe) {
2307 }
2308 }
2309
2310 if (dirty & IRIS_DIRTY_PS_BLEND) {
2311 struct iris_blend_state *cso_blend = ice->state.cso_blend;
2312 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
2313 uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
2314 iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
2315 pb.HasWriteableRT = true; // XXX: comes from somewhere :(
2316 pb.AlphaTestEnable = cso_zsa->alpha.enabled;
2317 }
2318
2319 iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
2320 ARRAY_SIZE(cso_blend->ps_blend));
2321 }
2322
2323 if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
2324 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2325 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
2326
2327 uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
2328 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
2329 wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
2330 wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
2331 }
2332 iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
2333 }
2334
2335 if (dirty & IRIS_DIRTY_SCISSOR) {
2336 // XXX: allocate at set_scissor time?
2337 uint32_t scissor_offset = ice->state.num_scissors == 0 ? 0 :
2338 emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors,
2339 sizeof(struct pipe_scissor_state) *
2340 ice->state.num_scissors, 32);
2341
2342 iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
2343 ptr.ScissorRectPointer = scissor_offset;
2344 }
2345 }
2346
2347 // XXX: 3DSTATE_DEPTH_BUFFER
2348 // XXX: 3DSTATE_HIER_DEPTH_BUFFER
2349 // XXX: 3DSTATE_STENCIL_BUFFER
2350 // XXX: 3DSTATE_CLEAR_PARAMS
2351
2352 if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) {
2353 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
2354 for (int i = 0; i < 32; i++) {
2355 poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
2356 }
2357 }
2358 }
2359
2360 if (dirty & IRIS_DIRTY_LINE_STIPPLE) {
2361 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2362 iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
2363 }
2364
2365 if (1) {
2366 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2367 topo.PrimitiveTopologyType =
2368 translate_prim_type(draw->mode, draw->vertices_per_patch);
2369 }
2370 }
2371
2372 if (draw->index_size > 0) {
2373 struct iris_resource *res = (struct iris_resource *)draw->index.resource;
2374
2375 assert(!draw->has_user_indices);
2376
2377 iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
2378 ib.IndexFormat = draw->index_size;
2379 ib.MOCS = MOCS_WB;
2380 ib.BufferSize = res->bo->size;
2381 ib.BufferStartingAddress = ro_bo(res->bo, 0);
2382 }
2383 }
2384
2385 if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
2386 struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers;
2387
2388 STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4);
2389 STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0);
2390
2391 iris_batch_emit(batch, cso->vertex_buffers,
2392 sizeof(uint32_t) * (1 + 4 * cso->num_buffers));
2393
2394 for (unsigned i = 0; i < cso->num_buffers; i++) {
2395 iris_use_pinned_bo(batch, cso->bos[i], false);
2396 }
2397 }
2398
2399 if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) {
2400 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
2401 iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
2402 (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length)));
2403 for (int i = 0; i < cso->count; i++) {
2404 iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) *
2405 (cso->count * GENX(3DSTATE_VF_INSTANCING_length)));
2406 }
2407 for (int i = 0; i < cso->count; i++) {
2408 /* TODO: vertexid, instanceid support */
2409 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs);
2410 }
2411 }
2412
2413 if (1) {
2414 iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
2415 if (draw->primitive_restart) {
2416 vf.IndexedDrawCutIndexEnable = true;
2417 vf.CutIndex = draw->restart_index;
2418 }
2419 }
2420 }
2421
2422 // XXX: Gen8 - PMA fix
2423
2424 assert(!draw->indirect); // XXX: indirect support
2425
2426 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
2427 prim.StartInstanceLocation = draw->start_instance;
2428 prim.InstanceCount = draw->instance_count;
2429 prim.VertexCountPerInstance = draw->count;
2430 prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
2431
2432 // XXX: this is probably bonkers.
2433 prim.StartVertexLocation = draw->start;
2434
2435 if (draw->index_size) {
2436 prim.BaseVertexLocation += draw->index_bias;
2437 } else {
2438 prim.StartVertexLocation += draw->index_bias;
2439 }
2440
2441 //prim.BaseVertexLocation = ...;
2442 }
2443 }
2444
2445 static void
2446 iris_destroy_state(struct iris_context *ice)
2447 {
2448 // XXX: unreference resources/surfaces.
2449 for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
2450 pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
2451 }
2452 pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
2453 }
2454
2455 static unsigned
2456 flags_to_post_sync_op(uint32_t flags)
2457 {
2458 if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
2459 return WriteImmediateData;
2460
2461 if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
2462 return WritePSDepthCount;
2463
2464 if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
2465 return WriteTimestamp;
2466
2467 return 0;
2468 }
2469
2470 /**
2471 * Do the given flags have a Post Sync or LRI Post Sync operation?
2472 */
2473 static enum pipe_control_flags
2474 get_post_sync_flags(enum pipe_control_flags flags)
2475 {
2476 flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
2477 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2478 PIPE_CONTROL_WRITE_TIMESTAMP |
2479 PIPE_CONTROL_LRI_POST_SYNC_OP;
2480
2481 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
2482 * "LRI Post Sync Operation". So more than one bit set would be illegal.
2483 */
2484 assert(util_bitcount(flags) <= 1);
2485
2486 return flags;
2487 }
2488
2489 // XXX: compute support
2490 #define IS_COMPUTE_PIPELINE(batch) (batch->ring != I915_EXEC_RENDER)
2491
2492 /**
2493 * Emit a series of PIPE_CONTROL commands, taking into account any
2494 * workarounds necessary to actually accomplish the caller's request.
2495 *
2496 * Unless otherwise noted, spec quotations in this function come from:
2497 *
2498 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
2499 * Restrictions for PIPE_CONTROL.
2500 */
2501 static void
2502 iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags,
2503 struct iris_bo *bo, uint32_t offset, uint64_t imm)
2504 {
2505 UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo;
2506 enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
2507 enum pipe_control_flags non_lri_post_sync_flags =
2508 post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
2509
2510 /* Recursive PIPE_CONTROL workarounds --------------------------------
2511 * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
2512 *
2513 * We do these first because we want to look at the original operation,
2514 * rather than any workarounds we set.
2515 */
2516 if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
2517 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
2518 * lists several workarounds:
2519 *
2520 * "Project: SKL, KBL, BXT
2521 *
2522 * If the VF Cache Invalidation Enable is set to a 1 in a
2523 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
2524 * sets to 0, with the VF Cache Invalidation Enable set to 0
2525 * needs to be sent prior to the PIPE_CONTROL with VF Cache
2526 * Invalidation Enable set to a 1."
2527 */
2528 iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0);
2529 }
2530
2531 if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) {
2532 /* Project: SKL / Argument: LRI Post Sync Operation [23]
2533 *
2534 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
2535 * programmed prior to programming a PIPECONTROL command with "LRI
2536 * Post Sync Operation" in GPGPU mode of operation (i.e when
2537 * PIPELINE_SELECT command is set to GPGPU mode of operation)."
2538 *
2539 * The same text exists a few rows below for Post Sync Op.
2540 */
2541 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm);
2542 }
2543
2544 if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
2545 /* Cannonlake:
2546 * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue
2547 * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12)
2548 * = 0 and Pipe Control Flush Enable (bit 7) = 1"
2549 */
2550 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo,
2551 offset, imm);
2552 }
2553
2554 /* "Flush Types" workarounds ---------------------------------------------
2555 * We do these now because they may add post-sync operations or CS stalls.
2556 */
2557
2558 if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
2559 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
2560 *
2561 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
2562 * 'Write PS Depth Count' or 'Write Timestamp'."
2563 */
2564 if (!bo) {
2565 flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
2566 post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
2567 non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
2568 bo = batch->screen->workaround_bo;
2569 }
2570 }
2571
2572 /* #1130 from Gen10 workarounds page:
2573 *
2574 * "Enable Depth Stall on every Post Sync Op if Render target Cache
2575 * Flush is not enabled in same PIPE CONTROL and Enable Pixel score
2576 * board stall if Render target cache flush is enabled."
2577 *
2578 * Applicable to CNL B0 and C0 steppings only.
2579 *
2580 * The wording here is unclear, and this workaround doesn't look anything
2581 * like the internal bug report recommendations, but leave it be for now...
2582 */
2583 if (GEN_GEN == 10) {
2584 if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
2585 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
2586 } else if (flags & non_lri_post_sync_flags) {
2587 flags |= PIPE_CONTROL_DEPTH_STALL;
2588 }
2589 }
2590
2591 if (flags & PIPE_CONTROL_DEPTH_STALL) {
2592 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
2593 *
2594 * "This bit must be DISABLED for operations other than writing
2595 * PS_DEPTH_COUNT."
2596 *
2597 * This seems like nonsense. An Ivybridge workaround requires us to
2598 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
2599 * operation. Gen8+ requires us to emit depth stalls and depth cache
2600 * flushes together. So, it's hard to imagine this means anything other
2601 * than "we originally intended this to be used for PS_DEPTH_COUNT".
2602 *
2603 * We ignore the supposed restriction and do nothing.
2604 */
2605 }
2606
2607 if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
2608 PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
2609 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
2610 *
2611 * "This bit must be DISABLED for End-of-pipe (Read) fences,
2612 * PS_DEPTH_COUNT or TIMESTAMP queries."
2613 *
2614 * TODO: Implement end-of-pipe checking.
2615 */
2616 assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
2617 PIPE_CONTROL_WRITE_TIMESTAMP)));
2618 }
2619
2620 if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) {
2621 /* From the PIPE_CONTROL instruction table, bit 1:
2622 *
2623 * "This bit is ignored if Depth Stall Enable is set.
2624 * Further, the render cache is not flushed even if Write Cache
2625 * Flush Enable bit is set."
2626 *
2627 * We assert that the caller doesn't do this combination, to try and
2628 * prevent mistakes. It shouldn't hurt the GPU, though.
2629 */
2630 assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
2631 PIPE_CONTROL_RENDER_TARGET_FLUSH)));
2632 }
2633
2634 /* PIPE_CONTROL page workarounds ------------------------------------- */
2635
2636 if (GEN_GEN <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
2637 /* From the PIPE_CONTROL page itself:
2638 *
2639 * "IVB, HSW, BDW
2640 * Restriction: Pipe_control with CS-stall bit set must be issued
2641 * before a pipe-control command that has the State Cache
2642 * Invalidate bit set."
2643 */
2644 flags |= PIPE_CONTROL_CS_STALL;
2645 }
2646
2647 if (flags & PIPE_CONTROL_FLUSH_LLC) {
2648 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
2649 *
2650 * "Project: ALL
2651 * SW must always program Post-Sync Operation to "Write Immediate
2652 * Data" when Flush LLC is set."
2653 *
2654 * For now, we just require the caller to do it.
2655 */
2656 assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
2657 }
2658
2659 /* "Post-Sync Operation" workarounds -------------------------------- */
2660
2661 /* Project: All / Argument: Global Snapshot Count Reset [19]
2662 *
2663 * "This bit must not be exercised on any product.
2664 * Requires stall bit ([20] of DW1) set."
2665 *
2666 * We don't use this, so we just assert that it isn't used. The
2667 * PIPE_CONTROL instruction page indicates that they intended this
2668 * as a debug feature and don't think it is useful in production,
2669 * but it may actually be usable, should we ever want to.
2670 */
2671 assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
2672
2673 if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
2674 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
2675 /* Project: All / Arguments:
2676 *
2677 * - Generic Media State Clear [16]
2678 * - Indirect State Pointers Disable [16]
2679 *
2680 * "Requires stall bit ([20] of DW1) set."
2681 *
2682 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
2683 * State Clear) says:
2684 *
2685 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
2686 * programmed prior to programming a PIPECONTROL command with "Media
2687 * State Clear" set in GPGPU mode of operation"
2688 *
2689 * This is a subset of the earlier rule, so there's nothing to do.
2690 */
2691 flags |= PIPE_CONTROL_CS_STALL;
2692 }
2693
2694 if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
2695 /* Project: All / Argument: Store Data Index
2696 *
2697 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
2698 * than '0'."
2699 *
2700 * For now, we just assert that the caller does this. We might want to
2701 * automatically add a write to the workaround BO...
2702 */
2703 assert(non_lri_post_sync_flags != 0);
2704 }
2705
2706 if (flags & PIPE_CONTROL_SYNC_GFDT) {
2707 /* Project: All / Argument: Sync GFDT
2708 *
2709 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
2710 * than '0' or 0x2520[13] must be set."
2711 *
2712 * For now, we just assert that the caller does this.
2713 */
2714 assert(non_lri_post_sync_flags != 0);
2715 }
2716
2717 if (flags & PIPE_CONTROL_TLB_INVALIDATE) {
2718 /* Project: IVB+ / Argument: TLB inv
2719 *
2720 * "Requires stall bit ([20] of DW1) set."
2721 *
2722 * Also, from the PIPE_CONTROL instruction table:
2723 *
2724 * "Project: SKL+
2725 * Post Sync Operation or CS stall must be set to ensure a TLB
2726 * invalidation occurs. Otherwise no cycle will occur to the TLB
2727 * cache to invalidate."
2728 *
2729 * This is not a subset of the earlier rule, so there's nothing to do.
2730 */
2731 flags |= PIPE_CONTROL_CS_STALL;
2732 }
2733
2734 if (GEN_GEN == 9 && devinfo->gt == 4) {
2735 /* TODO: The big Skylake GT4 post sync op workaround */
2736 }
2737
2738 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
2739
2740 if (IS_COMPUTE_PIPELINE(batch)) {
2741 if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
2742 /* Project: SKL+ / Argument: Tex Invalidate
2743 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
2744 */
2745 flags |= PIPE_CONTROL_CS_STALL;
2746 }
2747
2748 if (GEN_GEN == 8 && (post_sync_flags ||
2749 (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
2750 PIPE_CONTROL_DEPTH_STALL |
2751 PIPE_CONTROL_RENDER_TARGET_FLUSH |
2752 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2753 PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
2754 /* Project: BDW / Arguments:
2755 *
2756 * - LRI Post Sync Operation [23]
2757 * - Post Sync Op [15:14]
2758 * - Notify En [8]
2759 * - Depth Stall [13]
2760 * - Render Target Cache Flush [12]
2761 * - Depth Cache Flush [0]
2762 * - DC Flush Enable [5]
2763 *
2764 * "Requires stall bit ([20] of DW) set for all GPGPU and Media
2765 * Workloads."
2766 */
2767 flags |= PIPE_CONTROL_CS_STALL;
2768
2769 /* Also, from the PIPE_CONTROL instruction table, bit 20:
2770 *
2771 * "Project: BDW
2772 * This bit must be always set when PIPE_CONTROL command is
2773 * programmed by GPGPU and MEDIA workloads, except for the cases
2774 * when only Read Only Cache Invalidation bits are set (State
2775 * Cache Invalidation Enable, Instruction cache Invalidation
2776 * Enable, Texture Cache Invalidation Enable, Constant Cache
2777 * Invalidation Enable). This is to WA FFDOP CG issue, this WA
2778 * need not implemented when FF_DOP_CG is disable via "Fixed
2779 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
2780 *
2781 * It sounds like we could avoid CS stalls in some cases, but we
2782 * don't currently bother. This list isn't exactly the list above,
2783 * either...
2784 */
2785 }
2786 }
2787
2788 /* "Stall" workarounds ----------------------------------------------
2789 * These have to come after the earlier ones because we may have added
2790 * some additional CS stalls above.
2791 */
2792
2793 if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
2794 /* Project: PRE-SKL, VLV, CHV
2795 *
2796 * "[All Stepping][All SKUs]:
2797 *
2798 * One of the following must also be set:
2799 *
2800 * - Render Target Cache Flush Enable ([12] of DW1)
2801 * - Depth Cache Flush Enable ([0] of DW1)
2802 * - Stall at Pixel Scoreboard ([1] of DW1)
2803 * - Depth Stall ([13] of DW1)
2804 * - Post-Sync Operation ([13] of DW1)
2805 * - DC Flush Enable ([5] of DW1)"
2806 *
2807 * If we don't already have one of those bits set, we choose to add
2808 * "Stall at Pixel Scoreboard". Some of the other bits require a
2809 * CS stall as a workaround (see above), which would send us into
2810 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
2811 * appears to be safe, so we choose that.
2812 */
2813 const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
2814 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
2815 PIPE_CONTROL_WRITE_IMMEDIATE |
2816 PIPE_CONTROL_WRITE_DEPTH_COUNT |
2817 PIPE_CONTROL_WRITE_TIMESTAMP |
2818 PIPE_CONTROL_STALL_AT_SCOREBOARD |
2819 PIPE_CONTROL_DEPTH_STALL |
2820 PIPE_CONTROL_DATA_CACHE_FLUSH;
2821 if (!(flags & wa_bits))
2822 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
2823 }
2824
2825 /* Emit --------------------------------------------------------------- */
2826
2827 iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
2828 pc.LRIPostSyncOperation = NoLRIOperation;
2829 pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
2830 pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
2831 pc.StoreDataIndex = 0;
2832 pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
2833 pc.GlobalSnapshotCountReset =
2834 flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
2835 pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
2836 pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
2837 pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
2838 pc.RenderTargetCacheFlushEnable =
2839 flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
2840 pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
2841 pc.StateCacheInvalidationEnable =
2842 flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
2843 pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
2844 pc.ConstantCacheInvalidationEnable =
2845 flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
2846 pc.PostSyncOperation = flags_to_post_sync_op(flags);
2847 pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
2848 pc.InstructionCacheInvalidateEnable =
2849 flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
2850 pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
2851 pc.IndirectStatePointersDisable =
2852 flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
2853 pc.TextureCacheInvalidationEnable =
2854 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
2855 pc.Address = ro_bo(bo, offset);
2856 pc.ImmediateData = imm;
2857 }
2858 }
2859
2860 void
2861 genX(init_state)(struct iris_context *ice)
2862 {
2863 struct pipe_context *ctx = &ice->ctx;
2864
2865 ctx->create_blend_state = iris_create_blend_state;
2866 ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
2867 ctx->create_rasterizer_state = iris_create_rasterizer_state;
2868 ctx->create_sampler_state = iris_create_sampler_state;
2869 ctx->create_sampler_view = iris_create_sampler_view;
2870 ctx->create_surface = iris_create_surface;
2871 ctx->create_vertex_elements_state = iris_create_vertex_elements;
2872 ctx->create_compute_state = iris_create_compute_state;
2873 ctx->bind_blend_state = iris_bind_blend_state;
2874 ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state;
2875 ctx->bind_sampler_states = iris_bind_sampler_states;
2876 ctx->bind_rasterizer_state = iris_bind_rasterizer_state;
2877 ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state;
2878 ctx->bind_compute_state = iris_bind_compute_state;
2879 ctx->delete_blend_state = iris_delete_state;
2880 ctx->delete_depth_stencil_alpha_state = iris_delete_state;
2881 ctx->delete_fs_state = iris_delete_state;
2882 ctx->delete_rasterizer_state = iris_delete_state;
2883 ctx->delete_sampler_state = iris_delete_state;
2884 ctx->delete_vertex_elements_state = iris_delete_state;
2885 ctx->delete_compute_state = iris_delete_state;
2886 ctx->delete_tcs_state = iris_delete_state;
2887 ctx->delete_tes_state = iris_delete_state;
2888 ctx->delete_gs_state = iris_delete_state;
2889 ctx->delete_vs_state = iris_delete_state;
2890 ctx->set_blend_color = iris_set_blend_color;
2891 ctx->set_clip_state = iris_set_clip_state;
2892 ctx->set_constant_buffer = iris_set_constant_buffer;
2893 ctx->set_sampler_views = iris_set_sampler_views;
2894 ctx->set_framebuffer_state = iris_set_framebuffer_state;
2895 ctx->set_polygon_stipple = iris_set_polygon_stipple;
2896 ctx->set_sample_mask = iris_set_sample_mask;
2897 ctx->set_scissor_states = iris_set_scissor_states;
2898 ctx->set_stencil_ref = iris_set_stencil_ref;
2899 ctx->set_vertex_buffers = iris_set_vertex_buffers;
2900 ctx->set_viewport_states = iris_set_viewport_states;
2901 ctx->sampler_view_destroy = iris_sampler_view_destroy;
2902 ctx->surface_destroy = iris_surface_destroy;
2903 ctx->draw_vbo = iris_draw_vbo;
2904 ctx->launch_grid = iris_launch_grid;
2905 ctx->create_stream_output_target = iris_create_stream_output_target;
2906 ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
2907 ctx->set_stream_output_targets = iris_set_stream_output_targets;
2908
2909 ice->state.destroy_state = iris_destroy_state;
2910 ice->state.init_render_context = iris_init_render_context;
2911 ice->state.upload_render_state = iris_upload_render_state;
2912 ice->state.emit_raw_pipe_control = iris_emit_raw_pipe_control;
2913 ice->state.derived_program_state_size = iris_derived_program_state_size;
2914 ice->state.set_derived_program_state = iris_set_derived_program_state;
2915 ice->state.populate_vs_key = iris_populate_vs_key;
2916 ice->state.populate_tcs_key = iris_populate_tcs_key;
2917 ice->state.populate_tes_key = iris_populate_tes_key;
2918 ice->state.populate_gs_key = iris_populate_gs_key;
2919 ice->state.populate_fs_key = iris_populate_fs_key;
2920
2921 ice->state.dirty = ~0ull;
2922 }