iris: SO buffers
[mesa.git] / src / gallium / drivers / iris / iris_state.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25
26 #if HAVE_VALGRIND
27 #include <valgrind.h>
28 #include <memcheck.h>
29 #define VG(x) x
30 #ifndef NDEBUG
31 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
32 #endif
33 #else
34 #define VG(x)
35 #endif
36
37 #include "pipe/p_defines.h"
38 #include "pipe/p_state.h"
39 #include "pipe/p_context.h"
40 #include "pipe/p_screen.h"
41 #include "util/u_inlines.h"
42 #include "util/u_format.h"
43 #include "util/u_framebuffer.h"
44 #include "util/u_transfer.h"
45 #include "util/u_upload_mgr.h"
46 #include "i915_drm.h"
47 #include "nir.h"
48 #include "intel/compiler/brw_compiler.h"
49 #include "intel/common/gen_l3_config.h"
50 #include "intel/common/gen_sample_positions.h"
51 #include "iris_batch.h"
52 #include "iris_context.h"
53 #include "iris_pipe.h"
54 #include "iris_resource.h"
55
56 #define __gen_address_type struct iris_address
57 #define __gen_user_data struct iris_batch
58
59 #define ARRAY_BYTES(x) (sizeof(uint32_t) * ARRAY_SIZE(x))
60
61 static uint64_t
62 __gen_combine_address(struct iris_batch *batch, void *location,
63 struct iris_address addr, uint32_t delta)
64 {
65 uint64_t result = addr.offset + delta;
66
67 if (addr.bo) {
68 iris_use_pinned_bo(batch, addr.bo, addr.write);
69 /* Assume this is a general address, not relative to a base. */
70 result += addr.bo->gtt_offset;
71 }
72
73 return result;
74 }
75
76 #define __genxml_cmd_length(cmd) cmd ## _length
77 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
78 #define __genxml_cmd_header(cmd) cmd ## _header
79 #define __genxml_cmd_pack(cmd) cmd ## _pack
80
81 #define _iris_pack_command(batch, cmd, dst, name) \
82 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
83 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
84 ({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \
85 _dst = NULL; \
86 }))
87
88 #define iris_pack_command(cmd, dst, name) \
89 _iris_pack_command(NULL, cmd, dst, name)
90
91 #define iris_pack_state(cmd, dst, name) \
92 for (struct cmd name = {}, \
93 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
94 __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \
95 _dst = NULL)
96
97 #define iris_emit_cmd(batch, cmd, name) \
98 _iris_pack_command(batch, cmd, iris_get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name)
99
100 #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \
101 do { \
102 uint32_t *dw = iris_get_command_space(batch, 4 * num_dwords); \
103 for (uint32_t i = 0; i < num_dwords; i++) \
104 dw[i] = (dwords0)[i] | (dwords1)[i]; \
105 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
106 } while (0)
107
108 #include "genxml/genX_pack.h"
109 #include "genxml/gen_macros.h"
110 #include "genxml/genX_bits.h"
111
112 #define MOCS_WB (2 << 1)
113
114 UNUSED static void pipe_asserts()
115 {
116 #define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
117
118 /* pipe_logicop happens to match the hardware. */
119 PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
120 PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
121 PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
122 PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
123 PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
124 PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
125 PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
126 PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
127 PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
128 PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
129 PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
130 PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
131 PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
132 PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
133 PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
134 PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
135
136 /* pipe_blend_func happens to match the hardware. */
137 PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
138 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
139 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
140 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
141 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
142 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
143 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
144 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
145 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
146 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
147 PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
148 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
149 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
150 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
151 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
152 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
153 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
154 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
155 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
156
157 /* pipe_blend_func happens to match the hardware. */
158 PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
159 PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
160 PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
161 PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
162 PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
163
164 /* pipe_stencil_op happens to match the hardware. */
165 PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
166 PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
167 PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
168 PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
169 PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
170 PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
171 PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
172 PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
173
174 /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
175 PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
176 PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
177 #undef PIPE_ASSERT
178 }
179
180 static unsigned
181 translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
182 {
183 static const unsigned map[] = {
184 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
185 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
186 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
187 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
188 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
189 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
190 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
191 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
192 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
193 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
194 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
195 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
196 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
197 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
198 [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,
199 };
200
201 return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
202 }
203
204 static unsigned
205 translate_compare_func(enum pipe_compare_func pipe_func)
206 {
207 static const unsigned map[] = {
208 [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,
209 [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,
210 [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,
211 [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,
212 [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,
213 [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
214 [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,
215 [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,
216 };
217 return map[pipe_func];
218 }
219
220 static unsigned
221 translate_shadow_func(enum pipe_compare_func pipe_func)
222 {
223 /* Gallium specifies the result of shadow comparisons as:
224 *
225 * 1 if ref <op> texel,
226 * 0 otherwise.
227 *
228 * The hardware does:
229 *
230 * 0 if texel <op> ref,
231 * 1 otherwise.
232 *
233 * So we need to flip the operator and also negate.
234 */
235 static const unsigned map[] = {
236 [PIPE_FUNC_NEVER] = PREFILTEROPALWAYS,
237 [PIPE_FUNC_LESS] = PREFILTEROPLEQUAL,
238 [PIPE_FUNC_EQUAL] = PREFILTEROPNOTEQUAL,
239 [PIPE_FUNC_LEQUAL] = PREFILTEROPLESS,
240 [PIPE_FUNC_GREATER] = PREFILTEROPGEQUAL,
241 [PIPE_FUNC_NOTEQUAL] = PREFILTEROPEQUAL,
242 [PIPE_FUNC_GEQUAL] = PREFILTEROPGREATER,
243 [PIPE_FUNC_ALWAYS] = PREFILTEROPNEVER,
244 };
245 return map[pipe_func];
246 }
247
248 static unsigned
249 translate_cull_mode(unsigned pipe_face)
250 {
251 static const unsigned map[4] = {
252 [PIPE_FACE_NONE] = CULLMODE_NONE,
253 [PIPE_FACE_FRONT] = CULLMODE_FRONT,
254 [PIPE_FACE_BACK] = CULLMODE_BACK,
255 [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
256 };
257 return map[pipe_face];
258 }
259
260 static unsigned
261 translate_fill_mode(unsigned pipe_polymode)
262 {
263 static const unsigned map[4] = {
264 [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
265 [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
266 [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,
267 [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
268 };
269 return map[pipe_polymode];
270 }
271
272 static struct iris_address
273 ro_bo(struct iris_bo *bo, uint64_t offset)
274 {
275 /* Not for CSOs! */
276 return (struct iris_address) { .bo = bo, .offset = offset };
277 }
278
279 static struct iris_address
280 rw_bo(struct iris_bo *bo, uint64_t offset)
281 {
282 /* Not for CSOs! */
283 return (struct iris_address) { .bo = bo, .offset = offset, .write = true };
284 }
285
286 static void *
287 upload_state(struct u_upload_mgr *uploader,
288 struct iris_state_ref *ref,
289 unsigned size,
290 unsigned alignment)
291 {
292 void *p = NULL;
293 u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
294 return p;
295 }
296
297 static uint32_t *
298 stream_state(struct iris_batch *batch,
299 struct u_upload_mgr *uploader,
300 struct pipe_resource **out_res,
301 unsigned size,
302 unsigned alignment,
303 uint32_t *out_offset)
304 {
305 void *ptr = NULL;
306
307 u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr);
308
309 struct iris_bo *bo = iris_resource_bo(*out_res);
310 iris_use_pinned_bo(batch, bo, false);
311
312 *out_offset += iris_bo_offset_from_base_address(bo);
313
314 return ptr;
315 }
316
317 static uint32_t
318 emit_state(struct iris_batch *batch,
319 struct u_upload_mgr *uploader,
320 struct pipe_resource **out_res,
321 const void *data,
322 unsigned size,
323 unsigned alignment)
324 {
325 unsigned offset = 0;
326 uint32_t *map =
327 stream_state(batch, uploader, out_res, size, alignment, &offset);
328
329 if (map)
330 memcpy(map, data, size);
331
332 return offset;
333 }
334
335 #define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x))
336 #define cso_changed_memcmp(x) \
337 (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0)
338
339 static void
340 iris_init_render_context(struct iris_screen *screen,
341 struct iris_batch *batch,
342 struct iris_vtable *vtbl,
343 struct pipe_debug_callback *dbg)
344 {
345 iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER);
346
347 /* XXX: PIPE_CONTROLs */
348
349 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
350 #if 0
351 // XXX: MOCS is stupid for this.
352 sba.GeneralStateMemoryObjectControlState = MOCS_WB;
353 sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
354 sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
355 sba.DynamicStateMemoryObjectControlState = MOCS_WB;
356 sba.IndirectObjectMemoryObjectControlState = MOCS_WB;
357 sba.InstructionMemoryObjectControlState = MOCS_WB;
358 sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB;
359 #endif
360
361 sba.GeneralStateBaseAddressModifyEnable = true;
362 sba.SurfaceStateBaseAddressModifyEnable = true;
363 sba.DynamicStateBaseAddressModifyEnable = true;
364 sba.IndirectObjectBaseAddressModifyEnable = true;
365 sba.InstructionBaseAddressModifyEnable = true;
366 sba.GeneralStateBufferSizeModifyEnable = true;
367 sba.DynamicStateBufferSizeModifyEnable = true;
368 sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
369 sba.IndirectObjectBufferSizeModifyEnable = true;
370 sba.InstructionBuffersizeModifyEnable = true;
371
372 sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START);
373 sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START);
374 sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START);
375
376 sba.GeneralStateBufferSize = 0xfffff;
377 sba.IndirectObjectBufferSize = 0xfffff;
378 sba.InstructionBufferSize = 0xfffff;
379 sba.DynamicStateBufferSize = 0xfffff;
380 }
381
382 iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
383 rect.ClippedDrawingRectangleXMax = UINT16_MAX;
384 rect.ClippedDrawingRectangleYMax = UINT16_MAX;
385 }
386 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
387 GEN_SAMPLE_POS_1X(pat._1xSample);
388 GEN_SAMPLE_POS_2X(pat._2xSample);
389 GEN_SAMPLE_POS_4X(pat._4xSample);
390 GEN_SAMPLE_POS_8X(pat._8xSample);
391 GEN_SAMPLE_POS_16X(pat._16xSample);
392 }
393 iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
394 iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
395 iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
396 /* XXX: may need to set an offset for origin-UL framebuffers */
397 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
398
399 /* Just assign a static partitioning. */
400 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
401 iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
402 alloc._3DCommandSubOpcode = 18 + i;
403 alloc.ConstantBufferOffset = 6 * i;
404 alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6;
405 }
406 }
407 }
408
409 struct iris_viewport_state {
410 uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS];
411 };
412
413 struct iris_vertex_buffer_state {
414 uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
415 struct pipe_resource *resources[33];
416 unsigned num_buffers;
417 };
418
419 struct iris_depth_buffer_state {
420 uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) +
421 GENX(3DSTATE_STENCIL_BUFFER_length) +
422 GENX(3DSTATE_HIER_DEPTH_BUFFER_length) +
423 GENX(3DSTATE_CLEAR_PARAMS_length)];
424 };
425
426 /**
427 * State that can't be stored directly in iris_context because the data
428 * layout varies per generation.
429 */
430 struct iris_genx_state {
431 struct iris_viewport_state viewport;
432 struct iris_vertex_buffer_state vertex_buffers;
433 struct iris_depth_buffer_state depth_buffer;
434
435 uint32_t so_buffers[4 * GENX(3DSTATE_SO_BUFFER_length)];
436 };
437
438 static void
439 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
440 {
441 }
442
443 static void
444 iris_set_blend_color(struct pipe_context *ctx,
445 const struct pipe_blend_color *state)
446 {
447 struct iris_context *ice = (struct iris_context *) ctx;
448
449 memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
450 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
451 }
452
453 struct iris_blend_state {
454 /** Partial 3DSTATE_PS_BLEND */
455 uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
456
457 /** Partial BLEND_STATE */
458 uint32_t blend_state[GENX(BLEND_STATE_length) +
459 BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)];
460
461 bool alpha_to_coverage; /* for shader key */
462 };
463
464 static void *
465 iris_create_blend_state(struct pipe_context *ctx,
466 const struct pipe_blend_state *state)
467 {
468 struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state));
469 uint32_t *blend_state = cso->blend_state;
470
471 cso->alpha_to_coverage = state->alpha_to_coverage;
472
473 iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
474 /* pb.HasWriteableRT is filled in at draw time. */
475 /* pb.AlphaTestEnable is filled in at draw time. */
476 pb.AlphaToCoverageEnable = state->alpha_to_coverage;
477 pb.IndependentAlphaBlendEnable = state->independent_blend_enable;
478
479 pb.ColorBufferBlendEnable = state->rt[0].blend_enable;
480
481 pb.SourceBlendFactor = state->rt[0].rgb_src_factor;
482 pb.SourceAlphaBlendFactor = state->rt[0].alpha_func;
483 pb.DestinationBlendFactor = state->rt[0].rgb_dst_factor;
484 pb.DestinationAlphaBlendFactor = state->rt[0].alpha_dst_factor;
485 }
486
487 iris_pack_state(GENX(BLEND_STATE), blend_state, bs) {
488 bs.AlphaToCoverageEnable = state->alpha_to_coverage;
489 bs.IndependentAlphaBlendEnable = state->independent_blend_enable;
490 bs.AlphaToOneEnable = state->alpha_to_one;
491 bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage;
492 bs.ColorDitherEnable = state->dither;
493 /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */
494 }
495
496 blend_state += GENX(BLEND_STATE_length);
497
498 for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
499 iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_state, be) {
500 be.LogicOpEnable = state->logicop_enable;
501 be.LogicOpFunction = state->logicop_func;
502
503 be.PreBlendSourceOnlyClampEnable = false;
504 be.ColorClampRange = COLORCLAMP_RTFORMAT;
505 be.PreBlendColorClampEnable = true;
506 be.PostBlendColorClampEnable = true;
507
508 be.ColorBufferBlendEnable = state->rt[i].blend_enable;
509
510 be.ColorBlendFunction = state->rt[i].rgb_func;
511 be.AlphaBlendFunction = state->rt[i].alpha_func;
512 be.SourceBlendFactor = state->rt[i].rgb_src_factor;
513 be.SourceAlphaBlendFactor = state->rt[i].alpha_func;
514 be.DestinationBlendFactor = state->rt[i].rgb_dst_factor;
515 be.DestinationAlphaBlendFactor = state->rt[i].alpha_dst_factor;
516
517 be.WriteDisableRed = !(state->rt[i].colormask & PIPE_MASK_R);
518 be.WriteDisableGreen = !(state->rt[i].colormask & PIPE_MASK_G);
519 be.WriteDisableBlue = !(state->rt[i].colormask & PIPE_MASK_B);
520 be.WriteDisableAlpha = !(state->rt[i].colormask & PIPE_MASK_A);
521 }
522 blend_state += GENX(BLEND_STATE_ENTRY_length);
523 }
524
525 return cso;
526 }
527
528 static void
529 iris_bind_blend_state(struct pipe_context *ctx, void *state)
530 {
531 struct iris_context *ice = (struct iris_context *) ctx;
532 ice->state.cso_blend = state;
533 ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
534 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
535 }
536
537 struct iris_depth_stencil_alpha_state {
538 /** Partial 3DSTATE_WM_DEPTH_STENCIL */
539 uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
540
541 /** Complete CC_VIEWPORT */
542 uint32_t cc_vp[GENX(CC_VIEWPORT_length)];
543
544 /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE */
545 struct pipe_alpha_state alpha;
546 };
547
548 static void *
549 iris_create_zsa_state(struct pipe_context *ctx,
550 const struct pipe_depth_stencil_alpha_state *state)
551 {
552 struct iris_depth_stencil_alpha_state *cso =
553 malloc(sizeof(struct iris_depth_stencil_alpha_state));
554
555 cso->alpha = state->alpha;
556
557 bool two_sided_stencil = state->stencil[1].enabled;
558
559 /* The state tracker needs to optimize away EQUAL writes for us. */
560 assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask));
561
562 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) {
563 wmds.StencilFailOp = state->stencil[0].fail_op;
564 wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op;
565 wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op;
566 wmds.StencilTestFunction =
567 translate_compare_func(state->stencil[0].func);
568 wmds.BackfaceStencilFailOp = state->stencil[1].fail_op;
569 wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op;
570 wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op;
571 wmds.BackfaceStencilTestFunction =
572 translate_compare_func(state->stencil[1].func);
573 wmds.DepthTestFunction = translate_compare_func(state->depth.func);
574 wmds.DoubleSidedStencilEnable = two_sided_stencil;
575 wmds.StencilTestEnable = state->stencil[0].enabled;
576 wmds.StencilBufferWriteEnable =
577 state->stencil[0].writemask != 0 ||
578 (two_sided_stencil && state->stencil[1].writemask != 0);
579 wmds.DepthTestEnable = state->depth.enabled;
580 wmds.DepthBufferWriteEnable = state->depth.writemask;
581 wmds.StencilTestMask = state->stencil[0].valuemask;
582 wmds.StencilWriteMask = state->stencil[0].writemask;
583 wmds.BackfaceStencilTestMask = state->stencil[1].valuemask;
584 wmds.BackfaceStencilWriteMask = state->stencil[1].writemask;
585 /* wmds.[Backface]StencilReferenceValue are merged later */
586 }
587
588 iris_pack_state(GENX(CC_VIEWPORT), cso->cc_vp, ccvp) {
589 if (state->depth.bounds_test) {
590 ccvp.MinimumDepth = state->depth.bounds_min;
591 ccvp.MaximumDepth = state->depth.bounds_max;
592 } else {
593 ccvp.MinimumDepth = 0.0;
594 ccvp.MaximumDepth = 1.0;
595 }
596 }
597
598 return cso;
599 }
600
601 static void
602 iris_bind_zsa_state(struct pipe_context *ctx, void *state)
603 {
604 struct iris_context *ice = (struct iris_context *) ctx;
605 struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
606 struct iris_depth_stencil_alpha_state *new_cso = state;
607
608 if (new_cso) {
609 if (cso_changed(alpha.ref_value))
610 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
611
612 if (cso_changed(alpha.enabled))
613 ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE;
614 }
615
616 ice->state.cso_zsa = new_cso;
617 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
618 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
619 }
620
621 struct iris_rasterizer_state {
622 uint32_t sf[GENX(3DSTATE_SF_length)];
623 uint32_t clip[GENX(3DSTATE_CLIP_length)];
624 uint32_t raster[GENX(3DSTATE_RASTER_length)];
625 uint32_t wm[GENX(3DSTATE_WM_length)];
626 uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
627
628 bool flatshade; /* for shader state */
629 bool clamp_fragment_color; /* for shader state */
630 bool light_twoside; /* for shader state */
631 bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */
632 bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */
633 bool line_stipple_enable;
634 bool poly_stipple_enable;
635 enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
636 uint16_t sprite_coord_enable;
637 };
638
639 static void *
640 iris_create_rasterizer_state(struct pipe_context *ctx,
641 const struct pipe_rasterizer_state *state)
642 {
643 struct iris_rasterizer_state *cso =
644 malloc(sizeof(struct iris_rasterizer_state));
645
646 #if 0
647 point_quad_rasterization -> SBE?
648
649 not necessary?
650 {
651 poly_smooth
652 force_persample_interp - ?
653 bottom_edge_rule
654
655 offset_units_unscaled - cap not exposed
656 }
657 #endif
658
659 cso->flatshade = state->flatshade;
660 cso->clamp_fragment_color = state->clamp_fragment_color;
661 cso->light_twoside = state->light_twoside;
662 cso->rasterizer_discard = state->rasterizer_discard;
663 cso->half_pixel_center = state->half_pixel_center;
664 cso->sprite_coord_mode = state->sprite_coord_mode;
665 cso->sprite_coord_enable = state->sprite_coord_enable;
666 cso->line_stipple_enable = state->line_stipple_enable;
667 cso->poly_stipple_enable = state->poly_stipple_enable;
668
669 iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
670 sf.StatisticsEnable = true;
671 sf.ViewportTransformEnable = true;
672 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
673 sf.LineEndCapAntialiasingRegionWidth =
674 state->line_smooth ? _10pixels : _05pixels;
675 sf.LastPixelEnable = state->line_last_pixel;
676 sf.LineWidth = state->line_width;
677 sf.SmoothPointEnable = state->point_smooth;
678 sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
679 sf.PointWidth = state->point_size;
680
681 if (state->flatshade_first) {
682 sf.TriangleFanProvokingVertexSelect = 1;
683 } else {
684 sf.TriangleStripListProvokingVertexSelect = 2;
685 sf.TriangleFanProvokingVertexSelect = 2;
686 sf.LineStripListProvokingVertexSelect = 1;
687 }
688 }
689
690 iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
691 rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
692 rr.CullMode = translate_cull_mode(state->cull_face);
693 rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
694 rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
695 rr.DXMultisampleRasterizationEnable = state->multisample;
696 rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
697 rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
698 rr.GlobalDepthOffsetEnablePoint = state->offset_point;
699 rr.GlobalDepthOffsetConstant = state->offset_units * 2;
700 rr.GlobalDepthOffsetScale = state->offset_scale;
701 rr.GlobalDepthOffsetClamp = state->offset_clamp;
702 rr.SmoothPointEnable = state->point_smooth;
703 rr.AntialiasingEnable = state->line_smooth;
704 rr.ScissorRectangleEnable = state->scissor;
705 rr.ViewportZNearClipTestEnable = state->depth_clip_near;
706 rr.ViewportZFarClipTestEnable = state->depth_clip_far;
707 //rr.ConservativeRasterizationEnable = not yet supported by Gallium...
708 }
709
710 iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
711 /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
712 * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
713 */
714 cl.StatisticsEnable = true;
715 cl.EarlyCullEnable = true;
716 cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
717 cl.ForceUserClipDistanceClipTestEnableBitmask = true;
718 cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
719 cl.GuardbandClipTestEnable = true;
720 cl.ClipMode = CLIPMODE_NORMAL;
721 cl.ClipEnable = true;
722 cl.ViewportXYClipTestEnable = state->point_tri_clip;
723 cl.MinimumPointWidth = 0.125;
724 cl.MaximumPointWidth = 255.875;
725
726 if (state->flatshade_first) {
727 cl.TriangleFanProvokingVertexSelect = 1;
728 } else {
729 cl.TriangleStripListProvokingVertexSelect = 2;
730 cl.TriangleFanProvokingVertexSelect = 2;
731 cl.LineStripListProvokingVertexSelect = 1;
732 }
733 }
734
735 iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) {
736 /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are
737 * filled in at draw time from the FS program.
738 */
739 wm.LineAntialiasingRegionWidth = _10pixels;
740 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
741 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
742 wm.StatisticsEnable = true;
743 wm.LineStippleEnable = state->line_stipple_enable;
744 wm.PolygonStippleEnable = state->poly_stipple_enable;
745 }
746
747 /* Remap from 0..255 back to 1..256 */
748 const unsigned line_stipple_factor = state->line_stipple_factor + 1;
749
750 iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
751 line.LineStipplePattern = state->line_stipple_pattern;
752 line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
753 line.LineStippleRepeatCount = line_stipple_factor;
754 }
755
756 return cso;
757 }
758
759 static void
760 iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
761 {
762 struct iris_context *ice = (struct iris_context *) ctx;
763 struct iris_rasterizer_state *old_cso = ice->state.cso_rast;
764 struct iris_rasterizer_state *new_cso = state;
765
766 if (new_cso) {
767 /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
768 if (cso_changed_memcmp(line_stipple))
769 ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE;
770
771 if (cso_changed(half_pixel_center))
772 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
773
774 if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable))
775 ice->state.dirty |= IRIS_DIRTY_WM;
776
777 if (cso_changed(rasterizer_discard))
778 ice->state.dirty |= IRIS_DIRTY_STREAMOUT;
779 }
780
781 ice->state.cso_rast = new_cso;
782 ice->state.dirty |= IRIS_DIRTY_RASTER;
783 ice->state.dirty |= IRIS_DIRTY_CLIP;
784 }
785
786 static uint32_t
787 translate_wrap(unsigned pipe_wrap)
788 {
789 static const unsigned map[] = {
790 [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,
791 [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,
792 [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,
793 [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
794 [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,
795 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
796
797 /* These are unsupported. */
798 [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1,
799 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1,
800 };
801 return map[pipe_wrap];
802 }
803
804 /**
805 * Return true if the given wrap mode requires the border color to exist.
806 */
807 static bool
808 wrap_mode_needs_border_color(unsigned wrap_mode)
809 {
810 return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
811 }
812
813 static unsigned
814 translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
815 {
816 static const unsigned map[] = {
817 [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
818 [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,
819 [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,
820 };
821 return map[pipe_mip];
822 }
823
824 struct iris_sampler_state {
825 struct pipe_sampler_state base;
826
827 bool needs_border_color;
828
829 uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
830 };
831
832 static void *
833 iris_create_sampler_state(struct pipe_context *ctx,
834 const struct pipe_sampler_state *state)
835 {
836 struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
837
838 if (!cso)
839 return NULL;
840
841 memcpy(&cso->base, state, sizeof(*state));
842
843 STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
844 STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
845
846 unsigned wrap_s = translate_wrap(state->wrap_s);
847 unsigned wrap_t = translate_wrap(state->wrap_t);
848 unsigned wrap_r = translate_wrap(state->wrap_r);
849
850 cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) ||
851 wrap_mode_needs_border_color(wrap_t) ||
852 wrap_mode_needs_border_color(wrap_r);
853
854 iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) {
855 samp.TCXAddressControlMode = wrap_s;
856 samp.TCYAddressControlMode = wrap_t;
857 samp.TCZAddressControlMode = wrap_r;
858 samp.CubeSurfaceControlMode = state->seamless_cube_map;
859 samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
860 samp.MinModeFilter = state->min_img_filter;
861 samp.MagModeFilter = state->mag_img_filter;
862 samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
863 samp.MaximumAnisotropy = RATIO21;
864
865 if (state->max_anisotropy >= 2) {
866 if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
867 samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
868 samp.AnisotropicAlgorithm = EWAApproximation;
869 }
870
871 if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
872 samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
873
874 samp.MaximumAnisotropy =
875 MIN2((state->max_anisotropy - 2) / 2, RATIO161);
876 }
877
878 /* Set address rounding bits if not using nearest filtering. */
879 if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
880 samp.UAddressMinFilterRoundingEnable = true;
881 samp.VAddressMinFilterRoundingEnable = true;
882 samp.RAddressMinFilterRoundingEnable = true;
883 }
884
885 if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
886 samp.UAddressMagFilterRoundingEnable = true;
887 samp.VAddressMagFilterRoundingEnable = true;
888 samp.RAddressMagFilterRoundingEnable = true;
889 }
890
891 if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
892 samp.ShadowFunction = translate_shadow_func(state->compare_func);
893
894 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13;
895
896 samp.LODPreClampMode = CLAMP_MODE_OGL;
897 samp.MinLOD = CLAMP(state->min_lod, 0, hw_max_lod);
898 samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
899 samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
900
901 /* .BorderColorPointer is filled in by iris_bind_sampler_states. */
902 }
903
904 return cso;
905 }
906
907 static void
908 iris_bind_sampler_states(struct pipe_context *ctx,
909 enum pipe_shader_type p_stage,
910 unsigned start, unsigned count,
911 void **states)
912 {
913 struct iris_context *ice = (struct iris_context *) ctx;
914 gl_shader_stage stage = stage_from_pipe(p_stage);
915
916 assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
917 ice->state.num_samplers[stage] =
918 MAX2(ice->state.num_samplers[stage], start + count);
919
920 for (int i = 0; i < count; i++) {
921 ice->state.samplers[stage][start + i] = states[i];
922 }
923
924 /* Assemble the SAMPLER_STATEs into a contiguous table that lives
925 * in the dynamic state memory zone, so we can point to it via the
926 * 3DSTATE_SAMPLER_STATE_POINTERS_* commands.
927 */
928 void *map = upload_state(ice->state.dynamic_uploader,
929 &ice->state.sampler_table[stage],
930 count * 4 * GENX(SAMPLER_STATE_length), 32);
931 if (unlikely(!map))
932 return;
933
934 struct pipe_resource *res = ice->state.sampler_table[stage].res;
935 ice->state.sampler_table[stage].offset +=
936 iris_bo_offset_from_base_address(iris_resource_bo(res));
937
938 /* Make sure all land in the same BO */
939 iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS);
940
941 for (int i = 0; i < count; i++) {
942 struct iris_sampler_state *state = ice->state.samplers[stage][i];
943
944 /* Save a pointer to the iris_sampler_state, a few fields need
945 * to inform draw-time decisions.
946 */
947 ice->state.samplers[stage][start + i] = state;
948
949 if (!state) {
950 memset(map, 0, 4 * GENX(SAMPLER_STATE_length));
951 } else if (!state->needs_border_color) {
952 memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length));
953 } else {
954 ice->state.need_border_colors = true;
955
956 /* Stream out the border color and merge the pointer. */
957 uint32_t offset =
958 iris_upload_border_color(ice, &state->base.border_color);
959
960 uint32_t dynamic[GENX(SAMPLER_STATE_length)];
961 iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) {
962 dyns.BorderColorPointer = offset;
963 }
964
965 for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++)
966 ((uint32_t *) map)[j] = state->sampler_state[j] | dynamic[j];
967 }
968
969 map += GENX(SAMPLER_STATE_length);
970 }
971
972 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
973 }
974
975 struct iris_sampler_view {
976 struct pipe_sampler_view pipe;
977 struct isl_view view;
978
979 /** The resource (BO) holding our SURFACE_STATE. */
980 struct iris_state_ref surface_state;
981 };
982
983 /**
984 * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the Gen7.5+
985 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
986 *
987 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
988 * 0 1 2 3 4 5
989 * 4 5 6 7 0 1
990 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
991 *
992 * which is simply adding 4 then modding by 8 (or anding with 7).
993 *
994 * We then may need to apply workarounds for textureGather hardware bugs.
995 */
996 static enum isl_channel_select
997 pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
998 {
999 return (swizzle + 4) & 7;
1000 }
1001
1002 static struct pipe_sampler_view *
1003 iris_create_sampler_view(struct pipe_context *ctx,
1004 struct pipe_resource *tex,
1005 const struct pipe_sampler_view *tmpl)
1006 {
1007 struct iris_context *ice = (struct iris_context *) ctx;
1008 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1009 struct iris_resource *itex = (struct iris_resource *) tex;
1010 struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view));
1011
1012 if (!isv)
1013 return NULL;
1014
1015 /* initialize base object */
1016 isv->pipe = *tmpl;
1017 isv->pipe.context = ctx;
1018 isv->pipe.texture = NULL;
1019 pipe_reference_init(&isv->pipe.reference, 1);
1020 pipe_resource_reference(&isv->pipe.texture, tex);
1021
1022 /* XXX: do we need brw_get_texture_swizzle hacks here? */
1023
1024 isv->view = (struct isl_view) {
1025 .format = iris_isl_format_for_pipe_format(tmpl->format),
1026 .base_level = tmpl->u.tex.first_level,
1027 .levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1,
1028 .base_array_layer = tmpl->u.tex.first_layer,
1029 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
1030 .swizzle = (struct isl_swizzle) {
1031 .r = pipe_swizzle_to_isl_channel(tmpl->swizzle_r),
1032 .g = pipe_swizzle_to_isl_channel(tmpl->swizzle_g),
1033 .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b),
1034 .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a),
1035 },
1036 .usage = ISL_SURF_USAGE_TEXTURE_BIT |
1037 (itex->surf.usage & ISL_SURF_USAGE_CUBE_BIT),
1038 };
1039
1040 void *map = upload_state(ice->state.surface_uploader, &isv->surface_state,
1041 4 * GENX(RENDER_SURFACE_STATE_length), 64);
1042 if (!unlikely(map))
1043 return NULL;
1044
1045 struct iris_bo *state_bo = iris_resource_bo(isv->surface_state.res);
1046 isv->surface_state.offset += iris_bo_offset_from_base_address(state_bo);
1047
1048 isl_surf_fill_state(&screen->isl_dev, map,
1049 .surf = &itex->surf, .view = &isv->view,
1050 .mocs = MOCS_WB,
1051 .address = itex->bo->gtt_offset);
1052 // .aux_surf =
1053 // .clear_color = clear_color,
1054
1055 return &isv->pipe;
1056 }
1057
1058 static struct pipe_surface *
1059 iris_create_surface(struct pipe_context *ctx,
1060 struct pipe_resource *tex,
1061 const struct pipe_surface *tmpl)
1062 {
1063 struct iris_context *ice = (struct iris_context *) ctx;
1064 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1065 struct iris_surface *surf = calloc(1, sizeof(struct iris_surface));
1066 struct pipe_surface *psurf = &surf->pipe;
1067 struct iris_resource *res = (struct iris_resource *) tex;
1068
1069 if (!surf)
1070 return NULL;
1071
1072 pipe_reference_init(&psurf->reference, 1);
1073 pipe_resource_reference(&psurf->texture, tex);
1074 psurf->context = ctx;
1075 psurf->format = tmpl->format;
1076 psurf->width = tex->width0;
1077 psurf->height = tex->height0;
1078 psurf->texture = tex;
1079 psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
1080 psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
1081 psurf->u.tex.level = tmpl->u.tex.level;
1082
1083 unsigned usage = 0;
1084 if (tmpl->writable)
1085 usage = ISL_SURF_USAGE_STORAGE_BIT;
1086 else if (util_format_is_depth_or_stencil(tmpl->format))
1087 usage = ISL_SURF_USAGE_DEPTH_BIT;
1088 else
1089 usage = ISL_SURF_USAGE_RENDER_TARGET_BIT;
1090
1091 surf->view = (struct isl_view) {
1092 .format = iris_isl_format_for_pipe_format(tmpl->format),
1093 .base_level = tmpl->u.tex.level,
1094 .levels = 1,
1095 .base_array_layer = tmpl->u.tex.first_layer,
1096 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
1097 .swizzle = ISL_SWIZZLE_IDENTITY,
1098 .usage = usage,
1099 };
1100
1101 /* Bail early for depth/stencil */
1102 if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT |
1103 ISL_SURF_USAGE_STENCIL_BIT))
1104 return psurf;
1105
1106
1107 void *map = upload_state(ice->state.surface_uploader, &surf->surface_state,
1108 4 * GENX(RENDER_SURFACE_STATE_length), 64);
1109 if (!unlikely(map))
1110 return NULL;
1111
1112 struct iris_bo *state_bo = iris_resource_bo(surf->surface_state.res);
1113 surf->surface_state.offset += iris_bo_offset_from_base_address(state_bo);
1114
1115 isl_surf_fill_state(&screen->isl_dev, map,
1116 .surf = &res->surf, .view = &surf->view,
1117 .mocs = MOCS_WB,
1118 .address = res->bo->gtt_offset);
1119 // .aux_surf =
1120 // .clear_color = clear_color,
1121
1122 return psurf;
1123 }
1124
1125 static void
1126 iris_set_sampler_views(struct pipe_context *ctx,
1127 enum pipe_shader_type p_stage,
1128 unsigned start, unsigned count,
1129 struct pipe_sampler_view **views)
1130 {
1131 struct iris_context *ice = (struct iris_context *) ctx;
1132 gl_shader_stage stage = stage_from_pipe(p_stage);
1133
1134 unsigned i;
1135 for (i = 0; i < count; i++) {
1136 pipe_sampler_view_reference((struct pipe_sampler_view **)
1137 &ice->state.textures[stage][i], views[i]);
1138 }
1139 for (; i < ice->state.num_textures[stage]; i++) {
1140 pipe_sampler_view_reference((struct pipe_sampler_view **)
1141 &ice->state.textures[stage][i], NULL);
1142 }
1143
1144 ice->state.num_textures[stage] = count;
1145
1146 ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage);
1147 }
1148
1149 static void
1150 iris_set_clip_state(struct pipe_context *ctx,
1151 const struct pipe_clip_state *state)
1152 {
1153 }
1154
1155 static void
1156 iris_set_polygon_stipple(struct pipe_context *ctx,
1157 const struct pipe_poly_stipple *state)
1158 {
1159 struct iris_context *ice = (struct iris_context *) ctx;
1160 memcpy(&ice->state.poly_stipple, state, sizeof(*state));
1161 ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
1162 }
1163
1164 static void
1165 iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
1166 {
1167 struct iris_context *ice = (struct iris_context *) ctx;
1168
1169 ice->state.sample_mask = sample_mask;
1170 ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
1171 }
1172
1173 static void
1174 iris_set_scissor_states(struct pipe_context *ctx,
1175 unsigned start_slot,
1176 unsigned num_scissors,
1177 const struct pipe_scissor_state *states)
1178 {
1179 struct iris_context *ice = (struct iris_context *) ctx;
1180
1181 for (unsigned i = 0; i < num_scissors; i++) {
1182 ice->state.scissors[start_slot + i] = states[i];
1183 }
1184
1185 ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
1186 }
1187
1188 static void
1189 iris_set_stencil_ref(struct pipe_context *ctx,
1190 const struct pipe_stencil_ref *state)
1191 {
1192 struct iris_context *ice = (struct iris_context *) ctx;
1193 memcpy(&ice->state.stencil_ref, state, sizeof(*state));
1194 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
1195 }
1196
1197 static float
1198 viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
1199 {
1200 return copysignf(state->scale[axis], sign) + state->translate[axis];
1201 }
1202
1203 #if 0
1204 static void
1205 calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
1206 float m00, float m11, float m30, float m31,
1207 float *xmin, float *xmax,
1208 float *ymin, float *ymax)
1209 {
1210 /* According to the "Vertex X,Y Clamping and Quantization" section of the
1211 * Strips and Fans documentation:
1212 *
1213 * "The vertex X and Y screen-space coordinates are also /clamped/ to the
1214 * fixed-point "guardband" range supported by the rasterization hardware"
1215 *
1216 * and
1217 *
1218 * "In almost all circumstances, if an object’s vertices are actually
1219 * modified by this clamping (i.e., had X or Y coordinates outside of
1220 * the guardband extent the rendered object will not match the intended
1221 * result. Therefore software should take steps to ensure that this does
1222 * not happen - e.g., by clipping objects such that they do not exceed
1223 * these limits after the Drawing Rectangle is applied."
1224 *
1225 * I believe the fundamental restriction is that the rasterizer (in
1226 * the SF/WM stages) have a limit on the number of pixels that can be
1227 * rasterized. We need to ensure any coordinates beyond the rasterizer
1228 * limit are handled by the clipper. So effectively that limit becomes
1229 * the clipper's guardband size.
1230 *
1231 * It goes on to say:
1232 *
1233 * "In addition, in order to be correctly rendered, objects must have a
1234 * screenspace bounding box not exceeding 8K in the X or Y direction.
1235 * This additional restriction must also be comprehended by software,
1236 * i.e., enforced by use of clipping."
1237 *
1238 * This makes no sense. Gen7+ hardware supports 16K render targets,
1239 * and you definitely need to be able to draw polygons that fill the
1240 * surface. Our assumption is that the rasterizer was limited to 8K
1241 * on Sandybridge, which only supports 8K surfaces, and it was actually
1242 * increased to 16K on Ivybridge and later.
1243 *
1244 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
1245 */
1246 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
1247
1248 if (m00 != 0 && m11 != 0) {
1249 /* First, we compute the screen-space render area */
1250 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
1251 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00);
1252 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11);
1253 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11);
1254
1255 /* We want the guardband to be centered on that */
1256 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size;
1257 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size;
1258 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size;
1259 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size;
1260
1261 /* Now we need it in native device coordinates */
1262 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00;
1263 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00;
1264 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11;
1265 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11;
1266
1267 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be
1268 * flipped upside-down. X should be fine though.
1269 */
1270 assert(ndc_gb_xmin <= ndc_gb_xmax);
1271 *xmin = ndc_gb_xmin;
1272 *xmax = ndc_gb_xmax;
1273 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax);
1274 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax);
1275 } else {
1276 /* The viewport scales to 0, so nothing will be rendered. */
1277 *xmin = 0.0f;
1278 *xmax = 0.0f;
1279 *ymin = 0.0f;
1280 *ymax = 0.0f;
1281 }
1282 }
1283 #endif
1284
1285 static void
1286 iris_set_viewport_states(struct pipe_context *ctx,
1287 unsigned start_slot,
1288 unsigned count,
1289 const struct pipe_viewport_state *states)
1290 {
1291 struct iris_context *ice = (struct iris_context *) ctx;
1292 struct iris_viewport_state *cso = &ice->state.genx->viewport;
1293 uint32_t *vp_map = &cso->sf_cl_vp[start_slot];
1294
1295 // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right
1296 for (unsigned i = 0; i < count; i++) {
1297 const struct pipe_viewport_state *state = &states[start_slot + i];
1298 iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) {
1299 vp.ViewportMatrixElementm00 = state->scale[0];
1300 vp.ViewportMatrixElementm11 = state->scale[1];
1301 vp.ViewportMatrixElementm22 = state->scale[2];
1302 vp.ViewportMatrixElementm30 = state->translate[0];
1303 vp.ViewportMatrixElementm31 = state->translate[1];
1304 vp.ViewportMatrixElementm32 = state->translate[2];
1305 /* XXX: in i965 this is computed based on the drawbuffer size,
1306 * but we don't have that here...
1307 */
1308 vp.XMinClipGuardband = -1.0;
1309 vp.XMaxClipGuardband = 1.0;
1310 vp.YMinClipGuardband = -1.0;
1311 vp.YMaxClipGuardband = 1.0;
1312 vp.XMinViewPort = viewport_extent(state, 0, -1.0f);
1313 vp.XMaxViewPort = viewport_extent(state, 0, 1.0f) - 1;
1314 vp.YMinViewPort = viewport_extent(state, 1, -1.0f);
1315 vp.YMaxViewPort = viewport_extent(state, 1, 1.0f) - 1;
1316 }
1317
1318 vp_map += GENX(SF_CLIP_VIEWPORT_length);
1319 }
1320
1321 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
1322 }
1323
1324 static void
1325 iris_set_framebuffer_state(struct pipe_context *ctx,
1326 const struct pipe_framebuffer_state *state)
1327 {
1328 struct iris_context *ice = (struct iris_context *) ctx;
1329 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1330 struct isl_device *isl_dev = &screen->isl_dev;
1331 struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
1332
1333 if (cso->samples != state->samples) {
1334 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
1335 }
1336
1337 if (cso->nr_cbufs != state->nr_cbufs) {
1338 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
1339 }
1340
1341 if ((cso->layers == 0) != (state->layers == 0)) {
1342 ice->state.dirty |= IRIS_DIRTY_CLIP;
1343 }
1344
1345 util_copy_framebuffer_state(cso, state);
1346
1347 struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
1348
1349 struct isl_view view = {
1350 .base_level = 0,
1351 .levels = 1,
1352 .base_array_layer = 0,
1353 .array_len = 1,
1354 .swizzle = ISL_SWIZZLE_IDENTITY,
1355 };
1356
1357 struct isl_depth_stencil_hiz_emit_info info = {
1358 .view = &view,
1359 .mocs = MOCS_WB,
1360 };
1361
1362 struct iris_resource *zres =
1363 (void *) (cso->zsbuf ? cso->zsbuf->texture : NULL);
1364
1365 if (zres) {
1366 view.usage |= ISL_SURF_USAGE_DEPTH_BIT;
1367
1368 info.depth_surf = &zres->surf;
1369 info.depth_address = zres->bo->gtt_offset;
1370
1371 view.format = zres->surf.format;
1372
1373 view.base_level = cso->zsbuf->u.tex.level;
1374 view.base_array_layer = cso->zsbuf->u.tex.first_layer;
1375 view.array_len =
1376 cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1;
1377
1378 info.hiz_usage = ISL_AUX_USAGE_NONE;
1379 }
1380
1381 #if 0
1382 if (stencil_mt) {
1383 view.usage |= ISL_SURF_USAGE_STENCIL_BIT;
1384 info.stencil_surf = &stencil_mt->surf;
1385
1386 if (!depth_mt) {
1387 view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level;
1388 view.base_array_layer = stencil_irb->mt_layer;
1389 view.array_len = MAX2(stencil_irb->layer_count, 1);
1390 view.format = stencil_mt->surf.format;
1391 }
1392
1393 uint32_t stencil_offset = 0;
1394 info.stencil_address = stencil_mt->bo->gtt_offset + stencil_mt->offset;
1395 }
1396 #endif
1397
1398 isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info);
1399
1400 ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER;
1401
1402 /* Render target change */
1403 ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS;
1404 }
1405
1406 static void
1407 iris_set_constant_buffer(struct pipe_context *ctx,
1408 enum pipe_shader_type p_stage, unsigned index,
1409 const struct pipe_constant_buffer *input)
1410 {
1411 struct iris_context *ice = (struct iris_context *) ctx;
1412 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1413 gl_shader_stage stage = stage_from_pipe(p_stage);
1414 struct iris_shader_state *shs = &ice->shaders.state[stage];
1415 struct iris_const_buffer *cbuf = &shs->constbuf[index];
1416
1417 if (input && (input->buffer || input->user_buffer)) {
1418 if (input->user_buffer) {
1419 u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32,
1420 input->user_buffer, &cbuf->data.offset,
1421 &cbuf->data.res);
1422 } else {
1423 pipe_resource_reference(&cbuf->data.res, input->buffer);
1424 }
1425
1426 // XXX: these are not retained forever, use a separate uploader?
1427 void *map =
1428 upload_state(ice->state.surface_uploader, &cbuf->surface_state,
1429 4 * GENX(RENDER_SURFACE_STATE_length), 64);
1430 if (!unlikely(map)) {
1431 pipe_resource_reference(&cbuf->data.res, NULL);
1432 return;
1433 }
1434
1435 struct iris_resource *res = (void *) cbuf->data.res;
1436 struct iris_bo *surf_bo = iris_resource_bo(cbuf->surface_state.res);
1437 cbuf->surface_state.offset += iris_bo_offset_from_base_address(surf_bo);
1438
1439 isl_buffer_fill_state(&screen->isl_dev, map,
1440 .address = res->bo->gtt_offset + cbuf->data.offset,
1441 .size_B = input->buffer_size,
1442 .format = ISL_FORMAT_R32G32B32A32_FLOAT,
1443 .stride_B = 1,
1444 .mocs = MOCS_WB)
1445 } else {
1446 pipe_resource_reference(&cbuf->data.res, NULL);
1447 pipe_resource_reference(&cbuf->surface_state.res, NULL);
1448 }
1449
1450 ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage;
1451 // XXX: maybe not necessary all the time...?
1452 ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage;
1453 }
1454
1455 static void
1456 iris_sampler_view_destroy(struct pipe_context *ctx,
1457 struct pipe_sampler_view *state)
1458 {
1459 struct iris_sampler_view *isv = (void *) state;
1460 pipe_resource_reference(&state->texture, NULL);
1461 pipe_resource_reference(&isv->surface_state.res, NULL);
1462 free(isv);
1463 }
1464
1465
1466 static void
1467 iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
1468 {
1469 struct iris_surface *surf = (void *) p_surf;
1470 pipe_resource_reference(&p_surf->texture, NULL);
1471 pipe_resource_reference(&surf->surface_state.res, NULL);
1472 free(surf);
1473 }
1474
1475 static void
1476 iris_delete_state(struct pipe_context *ctx, void *state)
1477 {
1478 free(state);
1479 }
1480
1481 static void
1482 iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
1483 {
1484 for (unsigned i = 0; i < cso->num_buffers; i++)
1485 pipe_resource_reference(&cso->resources[i], NULL);
1486 }
1487
1488 static void
1489 iris_set_vertex_buffers(struct pipe_context *ctx,
1490 unsigned start_slot, unsigned count,
1491 const struct pipe_vertex_buffer *buffers)
1492 {
1493 struct iris_context *ice = (struct iris_context *) ctx;
1494 struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
1495
1496 iris_free_vertex_buffers(&ice->state.genx->vertex_buffers);
1497
1498 if (!buffers)
1499 count = 0;
1500
1501 cso->num_buffers = count;
1502
1503 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) {
1504 vb.DWordLength = 4 * MAX2(cso->num_buffers, 1) - 1;
1505 }
1506
1507 uint32_t *vb_pack_dest = &cso->vertex_buffers[1];
1508
1509 if (count == 0) {
1510 iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
1511 vb.VertexBufferIndex = start_slot;
1512 vb.NullVertexBuffer = true;
1513 vb.AddressModifyEnable = true;
1514 }
1515 }
1516
1517 for (unsigned i = 0; i < count; i++) {
1518 assert(!buffers[i].is_user_buffer);
1519
1520 pipe_resource_reference(&cso->resources[i], buffers[i].buffer.resource);
1521 struct iris_resource *res = (void *) cso->resources[i];
1522
1523 iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
1524 vb.VertexBufferIndex = start_slot + i;
1525 vb.MOCS = MOCS_WB;
1526 vb.AddressModifyEnable = true;
1527 vb.BufferPitch = buffers[i].stride;
1528 vb.BufferSize = res->bo->size;
1529 vb.BufferStartingAddress =
1530 ro_bo(NULL, res->bo->gtt_offset + buffers[i].buffer_offset);
1531 }
1532
1533 vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length);
1534 }
1535
1536 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
1537 }
1538
1539 struct iris_vertex_element_state {
1540 uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
1541 uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)];
1542 unsigned count;
1543 };
1544
1545 static void *
1546 iris_create_vertex_elements(struct pipe_context *ctx,
1547 unsigned count,
1548 const struct pipe_vertex_element *state)
1549 {
1550 struct iris_vertex_element_state *cso =
1551 malloc(sizeof(struct iris_vertex_element_state));
1552
1553 cso->count = MAX2(count, 1);
1554
1555 /* TODO:
1556 * - create edge flag one
1557 * - create SGV ones
1558 * - if those are necessary, use count + 1/2/3... OR in the length
1559 */
1560 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) {
1561 ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * cso->count - 2;
1562 }
1563
1564 uint32_t *ve_pack_dest = &cso->vertex_elements[1];
1565 uint32_t *vfi_pack_dest = cso->vf_instancing;
1566
1567 if (count == 0) {
1568 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
1569 ve.Valid = true;
1570 ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
1571 ve.Component0Control = VFCOMP_STORE_0;
1572 ve.Component1Control = VFCOMP_STORE_0;
1573 ve.Component2Control = VFCOMP_STORE_0;
1574 ve.Component3Control = VFCOMP_STORE_1_FP;
1575 }
1576
1577 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
1578 }
1579 }
1580
1581 for (int i = 0; i < count; i++) {
1582 enum isl_format isl_format =
1583 iris_isl_format_for_pipe_format(state[i].src_format);
1584 unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
1585 VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
1586
1587 switch (isl_format_get_num_channels(isl_format)) {
1588 case 0: comp[0] = VFCOMP_STORE_0;
1589 case 1: comp[1] = VFCOMP_STORE_0;
1590 case 2: comp[2] = VFCOMP_STORE_0;
1591 case 3:
1592 comp[3] = isl_format_has_int_channel(isl_format) ? VFCOMP_STORE_1_INT
1593 : VFCOMP_STORE_1_FP;
1594 break;
1595 }
1596 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
1597 ve.VertexBufferIndex = state[i].vertex_buffer_index;
1598 ve.Valid = true;
1599 ve.SourceElementOffset = state[i].src_offset;
1600 ve.SourceElementFormat = isl_format;
1601 ve.Component0Control = comp[0];
1602 ve.Component1Control = comp[1];
1603 ve.Component2Control = comp[2];
1604 ve.Component3Control = comp[3];
1605 }
1606
1607 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) {
1608 vi.VertexElementIndex = i;
1609 vi.InstancingEnable = state[i].instance_divisor > 0;
1610 vi.InstanceDataStepRate = state[i].instance_divisor;
1611 }
1612
1613 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
1614 vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length);
1615 }
1616
1617 return cso;
1618 }
1619
1620 static void
1621 iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
1622 {
1623 struct iris_context *ice = (struct iris_context *) ctx;
1624
1625 ice->state.cso_vertex_elements = state;
1626 ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS;
1627 }
1628
1629 static void *
1630 iris_create_compute_state(struct pipe_context *ctx,
1631 const struct pipe_compute_state *state)
1632 {
1633 return malloc(1);
1634 }
1635
1636 struct iris_stream_output_target {
1637 struct pipe_stream_output_target base;
1638
1639 uint32_t so_buffer[GENX(3DSTATE_SO_BUFFER_length)];
1640
1641 struct iris_state_ref offset;
1642 };
1643
1644 static struct pipe_stream_output_target *
1645 iris_create_stream_output_target(struct pipe_context *ctx,
1646 struct pipe_resource *res,
1647 unsigned buffer_offset,
1648 unsigned buffer_size)
1649 {
1650 struct iris_stream_output_target *cso = calloc(1, sizeof(*cso));
1651 if (!cso)
1652 return NULL;
1653
1654 pipe_reference_init(&cso->base.reference, 1);
1655 pipe_resource_reference(&cso->base.buffer, res);
1656 cso->base.buffer_offset = buffer_offset;
1657 cso->base.buffer_size = buffer_size;
1658 cso->base.context = ctx;
1659
1660 upload_state(ctx->stream_uploader, &cso->offset, 4, 4);
1661
1662 iris_pack_command(GENX(3DSTATE_SO_BUFFER), cso->so_buffer, sob) {
1663 sob.SurfaceBaseAddress =
1664 rw_bo(NULL, iris_resource_bo(res)->gtt_offset + buffer_offset);
1665 sob.SOBufferEnable = true;
1666 sob.StreamOffsetWriteEnable = true;
1667 sob.StreamOutputBufferOffsetAddressEnable = true;
1668 sob.MOCS = MOCS_WB; // XXX: MOCS
1669
1670 sob.SurfaceSize = MAX2(buffer_size / 4, 1) - 1;
1671 sob.StreamOutputBufferOffsetAddress =
1672 rw_bo(NULL, iris_resource_bo(cso->offset.res)->gtt_offset + cso->offset.offset);
1673
1674 /* .SOBufferIndex and .StreamOffset are filled in later */
1675 }
1676
1677 return &cso->base;
1678 }
1679
1680 static void
1681 iris_stream_output_target_destroy(struct pipe_context *ctx,
1682 struct pipe_stream_output_target *state)
1683 {
1684 struct iris_stream_output_target *cso = (void *) state;
1685
1686 pipe_resource_reference(&cso->base.buffer, NULL);
1687 pipe_resource_reference(&cso->offset.res, NULL);
1688
1689 free(cso);
1690 }
1691
1692 static void
1693 iris_set_stream_output_targets(struct pipe_context *ctx,
1694 unsigned num_targets,
1695 struct pipe_stream_output_target **targets,
1696 const unsigned *offsets)
1697 {
1698 struct iris_context *ice = (struct iris_context *) ctx;
1699 uint32_t *so_buffers = ice->state.genx->so_buffers;
1700
1701 for (unsigned i = 0; i < 4; i++,
1702 so_buffers += GENX(3DSTATE_SO_BUFFER_length)) {
1703
1704 if (i >= num_targets || !targets[i]) {
1705 iris_pack_command(GENX(3DSTATE_SO_BUFFER), so_buffers, sob)
1706 sob.SOBufferIndex = i;
1707 continue;
1708 }
1709
1710 /* Note that offsets[i] will either be 0, causing us to zero
1711 * the value in the buffer, or 0xFFFFFFFF, which happens to mean
1712 * "continue appending at the existing offset."
1713 */
1714 assert(offsets[i] == 0 || offsets[i] == 0xFFFFFFFF);
1715
1716 uint32_t dynamic[GENX(3DSTATE_SO_BUFFER_length)];
1717 iris_pack_state(GENX(3DSTATE_SO_BUFFER), dynamic, dyns) {
1718 dyns.SOBufferIndex = i;
1719 dyns.StreamOffset = offsets[i];
1720 }
1721
1722 struct iris_stream_output_target *tgt = (void *) targets[i];
1723 for (uint32_t j = 0; j < GENX(3DSTATE_SO_BUFFER_length); j++) {
1724 so_buffers[j] = tgt->so_buffer[j] | dynamic[j];
1725 }
1726 }
1727
1728 ice->state.dirty |= IRIS_DIRTY_SO_BUFFERS;
1729 }
1730
1731 static uint32_t *
1732 iris_create_so_decl_list(const struct pipe_stream_output_info *info,
1733 const struct brw_vue_map *vue_map)
1734 {
1735 struct GENX(SO_DECL) so_decl[MAX_VERTEX_STREAMS][128];
1736 int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1737 int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1738 int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
1739 int max_decls = 0;
1740 STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
1741
1742 if (info->num_outputs == 0)
1743 return NULL;
1744
1745 memset(so_decl, 0, sizeof(so_decl));
1746
1747 /* Construct the list of SO_DECLs to be emitted. The formatting of the
1748 * command feels strange -- each dword pair contains a SO_DECL per stream.
1749 */
1750 for (unsigned i = 0; i < info->num_outputs; i++) {
1751 const struct pipe_stream_output *output = &info->output[i];
1752 const int buffer = output->output_buffer;
1753 const int varying = output->register_index;
1754 const unsigned stream_id = output->stream;
1755 assert(stream_id < MAX_VERTEX_STREAMS);
1756
1757 buffer_mask[stream_id] |= 1 << buffer;
1758
1759 assert(vue_map->varying_to_slot[varying] >= 0);
1760
1761 /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
1762 * array. Instead, it simply increments DstOffset for the following
1763 * input by the number of components that should be skipped.
1764 *
1765 * Our hardware is unusual in that it requires us to program SO_DECLs
1766 * for fake "hole" components, rather than simply taking the offset
1767 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1768 * program as many size = 4 holes as we can, then a final hole to
1769 * accommodate the final 1, 2, or 3 remaining.
1770 */
1771 int skip_components = output->dst_offset - next_offset[buffer];
1772
1773 while (skip_components > 0) {
1774 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
1775 .HoleFlag = 1,
1776 .OutputBufferSlot = output->output_buffer,
1777 .ComponentMask = (1 << MIN2(skip_components, 4)) - 1,
1778 };
1779 skip_components -= 4;
1780 }
1781
1782 next_offset[buffer] = output->dst_offset + output->num_components;
1783
1784 so_decl[stream_id][decls[stream_id]++] = (struct GENX(SO_DECL)) {
1785 .OutputBufferSlot = output->output_buffer,
1786 .RegisterIndex = vue_map->varying_to_slot[varying],
1787 .ComponentMask =
1788 ((1 << output->num_components) - 1) << output->start_component,
1789 };
1790
1791 if (decls[stream_id] > max_decls)
1792 max_decls = decls[stream_id];
1793 }
1794
1795 uint32_t *dw = ralloc_size(NULL, sizeof(uint32_t) * (3 + 2 * max_decls));
1796
1797 iris_pack_command(GENX(3DSTATE_SO_DECL_LIST), dw, list) {
1798 list.DWordLength = 3 + 2 * max_decls - 2;
1799 list.StreamtoBufferSelects0 = buffer_mask[0];
1800 list.StreamtoBufferSelects1 = buffer_mask[1];
1801 list.StreamtoBufferSelects2 = buffer_mask[2];
1802 list.StreamtoBufferSelects3 = buffer_mask[3];
1803 list.NumEntries0 = decls[0];
1804 list.NumEntries1 = decls[1];
1805 list.NumEntries2 = decls[2];
1806 list.NumEntries3 = decls[3];
1807 }
1808
1809 for (int i = 0; i < max_decls; i++) {
1810 iris_pack_state(GENX(SO_DECL_ENTRY), dw + 2 + i * 2, entry) {
1811 entry.Stream0Decl = so_decl[0][i];
1812 entry.Stream1Decl = so_decl[1][i];
1813 entry.Stream2Decl = so_decl[2][i];
1814 entry.Stream3Decl = so_decl[3][i];
1815 }
1816 }
1817
1818 return dw;
1819 }
1820
1821 static void
1822 iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots,
1823 const struct brw_vue_map *last_vue_map,
1824 bool two_sided_color,
1825 unsigned *out_offset,
1826 unsigned *out_length)
1827 {
1828 /* The compiler computes the first URB slot without considering COL/BFC
1829 * swizzling (because it doesn't know whether it's enabled), so we need
1830 * to do that here too. This may result in a smaller offset, which
1831 * should be safe.
1832 */
1833 const unsigned first_slot =
1834 brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map);
1835
1836 /* This becomes the URB read offset (counted in pairs of slots). */
1837 assert(first_slot % 2 == 0);
1838 *out_offset = first_slot / 2;
1839
1840 /* We need to adjust the inputs read to account for front/back color
1841 * swizzling, as it can make the URB length longer.
1842 */
1843 for (int c = 0; c <= 1; c++) {
1844 if (fs_input_slots & (VARYING_BIT_COL0 << c)) {
1845 /* If two sided color is enabled, the fragment shader's gl_Color
1846 * (COL0) input comes from either the gl_FrontColor (COL0) or
1847 * gl_BackColor (BFC0) input varyings. Mark BFC as used, too.
1848 */
1849 if (two_sided_color)
1850 fs_input_slots |= (VARYING_BIT_BFC0 << c);
1851
1852 /* If front color isn't written, we opt to give them back color
1853 * instead of an undefined value. Switch from COL to BFC.
1854 */
1855 if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) {
1856 fs_input_slots &= ~(VARYING_BIT_COL0 << c);
1857 fs_input_slots |= (VARYING_BIT_BFC0 << c);
1858 }
1859 }
1860 }
1861
1862 /* Compute the minimum URB Read Length necessary for the FS inputs.
1863 *
1864 * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
1865 * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
1866 *
1867 * "This field should be set to the minimum length required to read the
1868 * maximum source attribute. The maximum source attribute is indicated
1869 * by the maximum value of the enabled Attribute # Source Attribute if
1870 * Attribute Swizzle Enable is set, Number of Output Attributes-1 if
1871 * enable is not set.
1872 * read_length = ceiling((max_source_attr + 1) / 2)
1873 *
1874 * [errata] Corruption/Hang possible if length programmed larger than
1875 * recommended"
1876 *
1877 * Similar text exists for Ivy Bridge.
1878 *
1879 * We find the last URB slot that's actually read by the FS.
1880 */
1881 unsigned last_read_slot = last_vue_map->num_slots - 1;
1882 while (last_read_slot > first_slot && !(fs_input_slots &
1883 (1ull << last_vue_map->slot_to_varying[last_read_slot])))
1884 --last_read_slot;
1885
1886 /* The URB read length is the difference of the two, counted in pairs. */
1887 *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2);
1888 }
1889
1890 static void
1891 iris_emit_sbe_swiz(struct iris_batch *batch,
1892 const struct iris_context *ice,
1893 unsigned urb_read_offset)
1894 {
1895 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {};
1896 const struct brw_wm_prog_data *wm_prog_data = (void *)
1897 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1898 const struct brw_vue_map *vue_map = ice->shaders.last_vue_map;
1899 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
1900
1901 /* XXX: this should be generated when putting programs in place */
1902
1903 // XXX: raster->sprite_coord_enable
1904
1905 for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) {
1906 const int input_index = wm_prog_data->urb_setup[fs_attr];
1907 if (input_index < 0 || input_index >= 16)
1908 continue;
1909
1910 struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr =
1911 &attr_overrides[input_index];
1912
1913 /* Viewport and Layer are stored in the VUE header. We need to override
1914 * them to zero if earlier stages didn't write them, as GL requires that
1915 * they read back as zero when not explicitly set.
1916 */
1917 switch (fs_attr) {
1918 case VARYING_SLOT_VIEWPORT:
1919 case VARYING_SLOT_LAYER:
1920 attr->ComponentOverrideX = true;
1921 attr->ComponentOverrideW = true;
1922 attr->ConstantSource = CONST_0000;
1923
1924 if (!(vue_map->slots_valid & VARYING_BIT_LAYER))
1925 attr->ComponentOverrideY = true;
1926 if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT))
1927 attr->ComponentOverrideZ = true;
1928 continue;
1929
1930 case VARYING_SLOT_PRIMITIVE_ID:
1931 attr->ComponentOverrideX = true;
1932 attr->ComponentOverrideY = true;
1933 attr->ComponentOverrideZ = true;
1934 attr->ComponentOverrideW = true;
1935 attr->ConstantSource = PRIM_ID;
1936 continue;
1937
1938 default:
1939 break;
1940 }
1941
1942 int slot = vue_map->varying_to_slot[fs_attr];
1943
1944 /* If there was only a back color written but not front, use back
1945 * as the color instead of undefined.
1946 */
1947 if (slot == -1 && fs_attr == VARYING_SLOT_COL0)
1948 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0];
1949 if (slot == -1 && fs_attr == VARYING_SLOT_COL1)
1950 slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1];
1951
1952 /* Not written by the previous stage - undefined. */
1953 if (slot == -1) {
1954 attr->ComponentOverrideX = true;
1955 attr->ComponentOverrideY = true;
1956 attr->ComponentOverrideZ = true;
1957 attr->ComponentOverrideW = true;
1958 attr->ConstantSource = CONST_0001_FLOAT;
1959 continue;
1960 }
1961
1962 /* Compute the location of the attribute relative to the read offset,
1963 * which is counted in 256-bit increments (two 128-bit VUE slots).
1964 */
1965 const int source_attr = slot - 2 * urb_read_offset;
1966 assert(source_attr >= 0 && source_attr <= 32);
1967 attr->SourceAttribute = source_attr;
1968
1969 /* If we are doing two-sided color, and the VUE slot following this one
1970 * represents a back-facing color, then we need to instruct the SF unit
1971 * to do back-facing swizzling.
1972 */
1973 if (cso_rast->light_twoside &&
1974 ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 &&
1975 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) ||
1976 (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 &&
1977 vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1)))
1978 attr->SwizzleSelect = INPUTATTR_FACING;
1979 }
1980
1981 iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) {
1982 for (int i = 0; i < 16; i++)
1983 sbes.Attribute[i] = attr_overrides[i];
1984 }
1985 }
1986
1987 static void
1988 iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
1989 {
1990 const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
1991 const struct brw_wm_prog_data *wm_prog_data = (void *)
1992 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1993 struct pipe_shader_state *p_fs =
1994 (void *) ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
1995 assert(p_fs->type == PIPE_SHADER_IR_NIR);
1996 nir_shader *fs_nir = p_fs->ir.nir;
1997
1998 unsigned urb_read_offset, urb_read_length;
1999 iris_compute_sbe_urb_read_interval(fs_nir->info.inputs_read,
2000 ice->shaders.last_vue_map,
2001 cso_rast->light_twoside,
2002 &urb_read_offset, &urb_read_length);
2003
2004 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
2005 sbe.AttributeSwizzleEnable = true;
2006 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
2007 sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode;
2008 sbe.VertexURBEntryReadOffset = urb_read_offset;
2009 sbe.VertexURBEntryReadLength = urb_read_length;
2010 sbe.ForceVertexURBEntryReadOffset = true;
2011 sbe.ForceVertexURBEntryReadLength = true;
2012 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
2013
2014 for (int i = 0; i < 32; i++) {
2015 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
2016 }
2017 }
2018
2019 iris_emit_sbe_swiz(batch, ice, urb_read_offset);
2020 }
2021
2022 static void
2023 iris_bind_compute_state(struct pipe_context *ctx, void *state)
2024 {
2025 }
2026
2027 static void
2028 iris_populate_sampler_key(const struct iris_context *ice,
2029 struct brw_sampler_prog_key_data *key)
2030 {
2031 for (int i = 0; i < MAX_SAMPLERS; i++) {
2032 key->swizzles[i] = 0x688; /* XYZW */
2033 }
2034 }
2035
2036 static void
2037 iris_populate_vs_key(const struct iris_context *ice,
2038 struct brw_vs_prog_key *key)
2039 {
2040 memset(key, 0, sizeof(*key));
2041 iris_populate_sampler_key(ice, &key->tex);
2042 }
2043
2044 static void
2045 iris_populate_tcs_key(const struct iris_context *ice,
2046 struct brw_tcs_prog_key *key)
2047 {
2048 memset(key, 0, sizeof(*key));
2049 iris_populate_sampler_key(ice, &key->tex);
2050 }
2051
2052 static void
2053 iris_populate_tes_key(const struct iris_context *ice,
2054 struct brw_tes_prog_key *key)
2055 {
2056 memset(key, 0, sizeof(*key));
2057 iris_populate_sampler_key(ice, &key->tex);
2058 }
2059
2060 static void
2061 iris_populate_gs_key(const struct iris_context *ice,
2062 struct brw_gs_prog_key *key)
2063 {
2064 memset(key, 0, sizeof(*key));
2065 iris_populate_sampler_key(ice, &key->tex);
2066 }
2067
2068 static void
2069 iris_populate_fs_key(const struct iris_context *ice,
2070 struct brw_wm_prog_key *key)
2071 {
2072 memset(key, 0, sizeof(*key));
2073 iris_populate_sampler_key(ice, &key->tex);
2074
2075 /* XXX: dirty flags? */
2076 const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
2077 const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
2078 const struct iris_rasterizer_state *rast = ice->state.cso_rast;
2079 const struct iris_blend_state *blend = ice->state.cso_blend;
2080
2081 key->nr_color_regions = fb->nr_cbufs;
2082
2083 key->clamp_fragment_color = rast->clamp_fragment_color;
2084
2085 key->replicate_alpha = fb->nr_cbufs > 1 &&
2086 (zsa->alpha.enabled || blend->alpha_to_coverage);
2087
2088 /* XXX: only bother if COL0/1 are read */
2089 key->flat_shade = rast->flatshade;
2090
2091 // key->force_dual_color_blend for unigine
2092 #if 0
2093 if (cso_rast->multisample) {
2094 key->persample_interp =
2095 ctx->Multisample.SampleShading &&
2096 (ctx->Multisample.MinSampleShadingValue *
2097 _mesa_geometric_samples(ctx->DrawBuffer) > 1);
2098
2099 key->multisample_fbo = fb->samples > 1;
2100 }
2101 #endif
2102
2103 key->coherent_fb_fetch = true;
2104 }
2105
2106 #if 0
2107 // XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS
2108 pkt.SamplerCount = \
2109 DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
2110 pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
2111 ffs(stage_state->per_thread_scratch) - 11; \
2112
2113 #endif
2114
2115 static uint64_t
2116 KSP(const struct iris_compiled_shader *shader)
2117 {
2118 struct iris_resource *res = (void *) shader->assembly.res;
2119 return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset;
2120 }
2121
2122 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
2123 pkt.KernelStartPointer = KSP(shader); \
2124 pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \
2125 pkt.FloatingPointMode = prog_data->use_alt_mode; \
2126 \
2127 pkt.DispatchGRFStartRegisterForURBData = \
2128 prog_data->dispatch_grf_start_reg; \
2129 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
2130 pkt.prefix##URBEntryReadOffset = 0; \
2131 \
2132 pkt.StatisticsEnable = true; \
2133 pkt.Enable = true;
2134
2135 static void
2136 iris_store_vs_state(const struct gen_device_info *devinfo,
2137 struct iris_compiled_shader *shader)
2138 {
2139 struct brw_stage_prog_data *prog_data = shader->prog_data;
2140 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
2141
2142 iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
2143 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
2144 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
2145 vs.SIMD8DispatchEnable = true;
2146 vs.UserClipDistanceCullTestEnableBitmask =
2147 vue_prog_data->cull_distance_mask;
2148 }
2149 }
2150
2151 static void
2152 iris_store_tcs_state(const struct gen_device_info *devinfo,
2153 struct iris_compiled_shader *shader)
2154 {
2155 struct brw_stage_prog_data *prog_data = shader->prog_data;
2156 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
2157 struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
2158
2159 iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
2160 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
2161
2162 hs.InstanceCount = tcs_prog_data->instances - 1;
2163 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
2164 hs.IncludeVertexHandles = true;
2165 }
2166 }
2167
2168 static void
2169 iris_store_tes_state(const struct gen_device_info *devinfo,
2170 struct iris_compiled_shader *shader)
2171 {
2172 struct brw_stage_prog_data *prog_data = shader->prog_data;
2173 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
2174 struct brw_tes_prog_data *tes_prog_data = (void *) prog_data;
2175
2176 uint32_t *te_state = (void *) shader->derived_data;
2177 uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length);
2178
2179 iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
2180 te.Partitioning = tes_prog_data->partitioning;
2181 te.OutputTopology = tes_prog_data->output_topology;
2182 te.TEDomain = tes_prog_data->domain;
2183 te.TEEnable = true;
2184 te.MaximumTessellationFactorOdd = 63.0;
2185 te.MaximumTessellationFactorNotOdd = 64.0;
2186 }
2187
2188 iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
2189 INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
2190
2191 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
2192 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
2193 ds.ComputeWCoordinateEnable =
2194 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
2195
2196 ds.UserClipDistanceCullTestEnableBitmask =
2197 vue_prog_data->cull_distance_mask;
2198 }
2199
2200 }
2201
2202 static void
2203 iris_store_gs_state(const struct gen_device_info *devinfo,
2204 struct iris_compiled_shader *shader)
2205 {
2206 struct brw_stage_prog_data *prog_data = shader->prog_data;
2207 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
2208 struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
2209
2210 iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
2211 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
2212
2213 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
2214 gs.OutputTopology = gs_prog_data->output_topology;
2215 gs.ControlDataHeaderSize =
2216 gs_prog_data->control_data_header_size_hwords;
2217 gs.InstanceControl = gs_prog_data->invocations - 1;
2218 gs.DispatchMode = DISPATCH_MODE_SIMD8;
2219 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
2220 gs.ControlDataFormat = gs_prog_data->control_data_format;
2221 gs.ReorderMode = TRAILING;
2222 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
2223 gs.MaximumNumberofThreads =
2224 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
2225 : (devinfo->max_gs_threads - 1);
2226
2227 if (gs_prog_data->static_vertex_count != -1) {
2228 gs.StaticOutput = true;
2229 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
2230 }
2231 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
2232
2233 gs.UserClipDistanceCullTestEnableBitmask =
2234 vue_prog_data->cull_distance_mask;
2235
2236 const int urb_entry_write_offset = 1;
2237 const uint32_t urb_entry_output_length =
2238 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
2239 urb_entry_write_offset;
2240
2241 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
2242 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
2243 }
2244 }
2245
2246 static void
2247 iris_store_fs_state(const struct gen_device_info *devinfo,
2248 struct iris_compiled_shader *shader)
2249 {
2250 struct brw_stage_prog_data *prog_data = shader->prog_data;
2251 struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
2252
2253 uint32_t *ps_state = (void *) shader->derived_data;
2254 uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length);
2255
2256 iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
2257 ps.VectorMaskEnable = true;
2258 //ps.SamplerCount = ...
2259 ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;
2260 ps.FloatingPointMode = prog_data->use_alt_mode;
2261 ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1);
2262
2263 ps.PushConstantEnable = prog_data->nr_params > 0 ||
2264 prog_data->ubo_ranges[0].length > 0;
2265
2266 /* From the documentation for this packet:
2267 * "If the PS kernel does not need the Position XY Offsets to
2268 * compute a Position Value, then this field should be programmed
2269 * to POSOFFSET_NONE."
2270 *
2271 * "SW Recommendation: If the PS kernel needs the Position Offsets
2272 * to compute a Position XY value, this field should match Position
2273 * ZW Interpolation Mode to ensure a consistent position.xyzw
2274 * computation."
2275 *
2276 * We only require XY sample offsets. So, this recommendation doesn't
2277 * look useful at the moment. We might need this in future.
2278 */
2279 ps.PositionXYOffsetSelect =
2280 wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
2281 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
2282 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
2283 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
2284
2285 // XXX: Disable SIMD32 with 16x MSAA
2286
2287 ps.DispatchGRFStartRegisterForConstantSetupData0 =
2288 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
2289 ps.DispatchGRFStartRegisterForConstantSetupData1 =
2290 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
2291 ps.DispatchGRFStartRegisterForConstantSetupData2 =
2292 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
2293
2294 ps.KernelStartPointer0 =
2295 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
2296 ps.KernelStartPointer1 =
2297 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
2298 ps.KernelStartPointer2 =
2299 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
2300 }
2301
2302 iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
2303 psx.PixelShaderValid = true;
2304 psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
2305 psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
2306 psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
2307 psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
2308 psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
2309 psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
2310
2311 if (wm_prog_data->uses_sample_mask) {
2312 /* TODO: conservative rasterization */
2313 if (wm_prog_data->post_depth_coverage)
2314 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
2315 else
2316 psx.InputCoverageMaskState = ICMS_NORMAL;
2317 }
2318
2319 psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
2320 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
2321 psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
2322
2323 // XXX: UAV bit
2324 }
2325 }
2326
2327 static unsigned
2328 iris_derived_program_state_size(enum iris_program_cache_id cache_id)
2329 {
2330 assert(cache_id <= IRIS_CACHE_BLORP);
2331
2332 static const unsigned dwords[] = {
2333 [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length),
2334 [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length),
2335 [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length),
2336 [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
2337 [IRIS_CACHE_FS] =
2338 GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
2339 [IRIS_CACHE_CS] = 0,
2340 [IRIS_CACHE_BLORP] = 0,
2341 };
2342
2343 return sizeof(uint32_t) * dwords[cache_id];
2344 }
2345
2346 static void
2347 iris_store_derived_program_state(const struct gen_device_info *devinfo,
2348 enum iris_program_cache_id cache_id,
2349 struct iris_compiled_shader *shader)
2350 {
2351 switch (cache_id) {
2352 case IRIS_CACHE_VS:
2353 iris_store_vs_state(devinfo, shader);
2354 break;
2355 case IRIS_CACHE_TCS:
2356 iris_store_tcs_state(devinfo, shader);
2357 break;
2358 case IRIS_CACHE_TES:
2359 iris_store_tes_state(devinfo, shader);
2360 break;
2361 case IRIS_CACHE_GS:
2362 iris_store_gs_state(devinfo, shader);
2363 break;
2364 case IRIS_CACHE_FS:
2365 iris_store_fs_state(devinfo, shader);
2366 break;
2367 case IRIS_CACHE_CS:
2368 case IRIS_CACHE_BLORP:
2369 break;
2370 default:
2371 break;
2372 }
2373 }
2374
2375 static void
2376 iris_upload_urb_config(struct iris_context *ice, struct iris_batch *batch)
2377 {
2378 const struct gen_device_info *devinfo = &batch->screen->devinfo;
2379 const unsigned push_size_kB = 32;
2380 unsigned entries[4];
2381 unsigned start[4];
2382 unsigned size[4];
2383
2384 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
2385 if (!ice->shaders.prog[i]) {
2386 size[i] = 1;
2387 } else {
2388 struct brw_vue_prog_data *vue_prog_data =
2389 (void *) ice->shaders.prog[i]->prog_data;
2390 size[i] = vue_prog_data->urb_entry_size;
2391 }
2392 assert(size[i] != 0);
2393 }
2394
2395 gen_get_urb_config(devinfo, 1024 * push_size_kB,
2396 1024 * ice->shaders.urb_size,
2397 ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
2398 ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
2399 size, entries, start);
2400
2401 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
2402 iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
2403 urb._3DCommandSubOpcode += i;
2404 urb.VSURBStartingAddress = start[i];
2405 urb.VSURBEntryAllocationSize = size[i] - 1;
2406 urb.VSNumberofURBEntries = entries[i];
2407 }
2408 }
2409 }
2410
2411 static const uint32_t push_constant_opcodes[] = {
2412 [MESA_SHADER_VERTEX] = 21,
2413 [MESA_SHADER_TESS_CTRL] = 25, /* HS */
2414 [MESA_SHADER_TESS_EVAL] = 26, /* DS */
2415 [MESA_SHADER_GEOMETRY] = 22,
2416 [MESA_SHADER_FRAGMENT] = 23,
2417 [MESA_SHADER_COMPUTE] = 0,
2418 };
2419
2420 /**
2421 * Add a surface to the validation list, as well as the buffer containing
2422 * the corresponding SURFACE_STATE.
2423 *
2424 * Returns the binding table entry (offset to SURFACE_STATE).
2425 */
2426 static uint32_t
2427 use_surface(struct iris_batch *batch,
2428 struct pipe_surface *p_surf,
2429 bool writeable)
2430 {
2431 struct iris_surface *surf = (void *) p_surf;
2432
2433 iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture), writeable);
2434 iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.res), false);
2435
2436 return surf->surface_state.offset;
2437 }
2438
2439 static uint32_t
2440 use_sampler_view(struct iris_batch *batch, struct iris_sampler_view *isv)
2441 {
2442 iris_use_pinned_bo(batch, iris_resource_bo(isv->pipe.texture), false);
2443 iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.res), false);
2444
2445 return isv->surface_state.offset;
2446 }
2447
2448 static uint32_t
2449 use_const_buffer(struct iris_batch *batch, struct iris_const_buffer *cbuf)
2450 {
2451 iris_use_pinned_bo(batch, iris_resource_bo(cbuf->data.res), false);
2452 iris_use_pinned_bo(batch, iris_resource_bo(cbuf->surface_state.res), false);
2453
2454 return cbuf->surface_state.offset;
2455 }
2456
2457 static uint32_t
2458 use_null_surface(struct iris_batch *batch, struct iris_context *ice)
2459 {
2460 struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res);
2461
2462 iris_use_pinned_bo(batch, state_bo, false);
2463
2464 return ice->state.unbound_tex.offset;
2465 }
2466
2467 static void
2468 iris_populate_binding_table(struct iris_context *ice,
2469 struct iris_batch *batch,
2470 gl_shader_stage stage)
2471 {
2472 const struct iris_binder *binder = &batch->binder;
2473 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2474 if (!shader)
2475 return;
2476
2477 // Surfaces:
2478 // - pull constants
2479 // - ubos/ssbos/abos
2480 // - images
2481 // - textures
2482 // - render targets - write and read
2483
2484 //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
2485 uint32_t *bt_map = binder->map + binder->bt_offset[stage];
2486 int s = 0;
2487
2488 if (stage == MESA_SHADER_FRAGMENT) {
2489 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2490 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
2491 bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true);
2492 }
2493 }
2494
2495 //assert(prog_data->binding_table.texture_start ==
2496 //(ice->state.num_textures[stage] ? s : 0xd0d0d0d0));
2497
2498 for (int i = 0; i < ice->state.num_textures[stage]; i++) {
2499 struct iris_sampler_view *view = ice->state.textures[stage][i];
2500 bt_map[s++] = view ? use_sampler_view(batch, view)
2501 : use_null_surface(batch, ice);
2502 }
2503
2504 // XXX: want the number of BTE's to shorten this loop
2505 struct iris_shader_state *shs = &ice->shaders.state[stage];
2506 for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
2507 struct iris_const_buffer *cbuf = &shs->constbuf[i];
2508 if (!cbuf->surface_state.res)
2509 break;
2510
2511 bt_map[s++] = use_const_buffer(batch, cbuf);
2512 }
2513 #if 0
2514 // XXX: not implemented yet
2515 assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0);
2516 assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0);
2517 assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0);
2518 assert(prog_data->binding_table.image_start == 0xd0d0d0d0);
2519 assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0);
2520 //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0);
2521 //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0);
2522 #endif
2523 }
2524
2525 static void
2526 iris_use_optional_res(struct iris_batch *batch,
2527 struct pipe_resource *res,
2528 bool writeable)
2529 {
2530 if (res) {
2531 struct iris_bo *bo = iris_resource_bo(res);
2532 iris_use_pinned_bo(batch, bo, writeable);
2533 }
2534 }
2535
2536
2537 /**
2538 * Pin any BOs which were installed by a previous batch, and restored
2539 * via the hardware logical context mechanism.
2540 *
2541 * We don't need to re-emit all state every batch - the hardware context
2542 * mechanism will save and restore it for us. This includes pointers to
2543 * various BOs...which won't exist unless we ask the kernel to pin them
2544 * by adding them to the validation list.
2545 *
2546 * We can skip buffers if we've re-emitted those packets, as we're
2547 * overwriting those stale pointers with new ones, and don't actually
2548 * refer to the old BOs.
2549 */
2550 static void
2551 iris_restore_context_saved_bos(struct iris_context *ice,
2552 struct iris_batch *batch,
2553 const struct pipe_draw_info *draw)
2554 {
2555 // XXX: whack IRIS_SHADER_DIRTY_BINDING_TABLE on new batch
2556
2557 const uint64_t clean = ~ice->state.dirty;
2558
2559 if (clean & IRIS_DIRTY_CC_VIEWPORT) {
2560 iris_use_optional_res(batch, ice->state.last_res.cc_vp, false);
2561 }
2562
2563 if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) {
2564 iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false);
2565 }
2566
2567 if (clean & IRIS_DIRTY_BLEND_STATE) {
2568 iris_use_optional_res(batch, ice->state.last_res.blend, false);
2569 }
2570
2571 if (clean & IRIS_DIRTY_COLOR_CALC_STATE) {
2572 iris_use_optional_res(batch, ice->state.last_res.color_calc, false);
2573 }
2574
2575 if (clean & IRIS_DIRTY_SCISSOR_RECT) {
2576 iris_use_optional_res(batch, ice->state.last_res.scissor, false);
2577 }
2578
2579 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2580 if (clean & (IRIS_DIRTY_CONSTANTS_VS << stage))
2581 continue;
2582
2583 struct iris_shader_state *shs = &ice->shaders.state[stage];
2584 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2585
2586 if (!shader)
2587 continue;
2588
2589 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
2590
2591 for (int i = 0; i < 4; i++) {
2592 const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
2593
2594 if (range->length == 0)
2595 continue;
2596
2597 struct iris_const_buffer *cbuf = &shs->constbuf[range->block];
2598 struct iris_resource *res = (void *) cbuf->data.res;
2599
2600 if (res)
2601 iris_use_pinned_bo(batch, res->bo, false);
2602 else
2603 iris_use_pinned_bo(batch, batch->screen->workaround_bo, false);
2604 }
2605 }
2606
2607 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2608 struct pipe_resource *res = ice->state.sampler_table[stage].res;
2609 if (res)
2610 iris_use_pinned_bo(batch, iris_resource_bo(res), false);
2611 }
2612
2613 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2614 if (clean & (IRIS_DIRTY_VS << stage)) {
2615 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2616 if (shader) {
2617 struct iris_bo *bo = iris_resource_bo(shader->assembly.res);
2618 iris_use_pinned_bo(batch, bo, false);
2619 }
2620
2621 // XXX: scratch buffer
2622 }
2623 }
2624
2625 if (clean & IRIS_DIRTY_DEPTH_BUFFER) {
2626 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2627
2628 if (cso_fb->zsbuf) {
2629 struct iris_resource *zres = (void *) cso_fb->zsbuf->texture;
2630 // XXX: depth might not be writable...
2631 iris_use_pinned_bo(batch, zres->bo, true);
2632 }
2633 }
2634
2635 if (draw->index_size > 0) {
2636 // XXX: index buffer
2637 }
2638
2639 if (clean & IRIS_DIRTY_VERTEX_BUFFERS) {
2640 struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
2641 for (unsigned i = 0; i < cso->num_buffers; i++) {
2642 struct iris_resource *res = (void *) cso->resources[i];
2643 iris_use_pinned_bo(batch, res->bo, false);
2644 }
2645 }
2646 }
2647
2648 static void
2649 iris_upload_render_state(struct iris_context *ice,
2650 struct iris_batch *batch,
2651 const struct pipe_draw_info *draw)
2652 {
2653 const uint64_t dirty = ice->state.dirty;
2654
2655 struct iris_genx_state *genx = ice->state.genx;
2656 struct brw_wm_prog_data *wm_prog_data = (void *)
2657 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
2658
2659 if (dirty & IRIS_DIRTY_CC_VIEWPORT) {
2660 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2661 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
2662 ptr.CCViewportPointer =
2663 emit_state(batch, ice->state.dynamic_uploader,
2664 &ice->state.last_res.cc_vp,
2665 cso->cc_vp, sizeof(cso->cc_vp), 32);
2666 }
2667 }
2668
2669 if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
2670 struct iris_viewport_state *cso = &ice->state.genx->viewport;
2671 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
2672 ptr.SFClipViewportPointer =
2673 emit_state(batch, ice->state.dynamic_uploader,
2674 &ice->state.last_res.sf_cl_vp,
2675 cso->sf_cl_vp, 4 * GENX(SF_CLIP_VIEWPORT_length) *
2676 ice->state.num_viewports, 64);
2677 }
2678 }
2679
2680 /* XXX: L3 State */
2681
2682 // XXX: this is only flagged at setup, we assume a static configuration
2683 if (dirty & IRIS_DIRTY_URB) {
2684 iris_upload_urb_config(ice, batch);
2685 }
2686
2687 if (dirty & IRIS_DIRTY_BLEND_STATE) {
2688 struct iris_blend_state *cso_blend = ice->state.cso_blend;
2689 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2690 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
2691 const int num_dwords = 4 * (GENX(BLEND_STATE_length) +
2692 cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length));
2693 uint32_t blend_offset;
2694 uint32_t *blend_map =
2695 stream_state(batch, ice->state.dynamic_uploader,
2696 &ice->state.last_res.blend,
2697 4 * num_dwords, 64, &blend_offset);
2698
2699 uint32_t blend_state_header;
2700 iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
2701 bs.AlphaTestEnable = cso_zsa->alpha.enabled;
2702 bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha.func);
2703 }
2704
2705 blend_map[0] = blend_state_header | cso_blend->blend_state[0];
2706 memcpy(&blend_map[1], &cso_blend->blend_state[1],
2707 sizeof(cso_blend->blend_state) - sizeof(uint32_t));
2708
2709 iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
2710 ptr.BlendStatePointer = blend_offset;
2711 ptr.BlendStatePointerValid = true;
2712 }
2713 }
2714
2715 if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) {
2716 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2717 uint32_t cc_offset;
2718 void *cc_map =
2719 stream_state(batch, ice->state.dynamic_uploader,
2720 &ice->state.last_res.color_calc,
2721 sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
2722 64, &cc_offset);
2723 iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
2724 cc.AlphaTestFormat = ALPHATEST_FLOAT32;
2725 cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value;
2726 cc.BlendConstantColorRed = ice->state.blend_color.color[0];
2727 cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
2728 cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
2729 cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
2730 }
2731 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
2732 ptr.ColorCalcStatePointer = cc_offset;
2733 ptr.ColorCalcStatePointerValid = true;
2734 }
2735 }
2736
2737 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2738 // XXX: wrong dirty tracking...
2739 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
2740 continue;
2741
2742 struct iris_shader_state *shs = &ice->shaders.state[stage];
2743 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2744
2745 if (!shader)
2746 continue;
2747
2748 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
2749
2750 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
2751 pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
2752 if (prog_data) {
2753 /* The Skylake PRM contains the following restriction:
2754 *
2755 * "The driver must ensure The following case does not occur
2756 * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
2757 * buffer 3 read length equal to zero committed followed by a
2758 * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
2759 * zero committed."
2760 *
2761 * To avoid this, we program the buffers in the highest slots.
2762 * This way, slot 0 is only used if slot 3 is also used.
2763 */
2764 int n = 3;
2765
2766 for (int i = 3; i >= 0; i--) {
2767 const struct brw_ubo_range *range = &prog_data->ubo_ranges[i];
2768
2769 if (range->length == 0)
2770 continue;
2771
2772 // XXX: is range->block a constbuf index? it would be nice
2773 struct iris_const_buffer *cbuf = &shs->constbuf[range->block];
2774 struct iris_resource *res = (void *) cbuf->data.res;
2775
2776 assert(cbuf->data.offset % 32 == 0);
2777
2778 pkt.ConstantBody.ReadLength[n] = range->length;
2779 pkt.ConstantBody.Buffer[n] =
2780 res ? ro_bo(res->bo, range->start * 32 + cbuf->data.offset)
2781 : ro_bo(batch->screen->workaround_bo, 0);
2782 n--;
2783 }
2784 }
2785 }
2786 }
2787
2788 struct iris_binder *binder = &batch->binder;
2789
2790 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2791 if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
2792 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
2793 ptr._3DCommandSubOpcode = 38 + stage;
2794 ptr.PointertoVSBindingTable = binder->bt_offset[stage];
2795 }
2796 }
2797 }
2798
2799 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2800 if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
2801 iris_populate_binding_table(ice, batch, stage);
2802 }
2803 }
2804
2805 if (ice->state.need_border_colors)
2806 iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false);
2807
2808 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2809 if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) ||
2810 !ice->shaders.prog[stage])
2811 continue;
2812
2813 struct pipe_resource *res = ice->state.sampler_table[stage].res;
2814 if (res)
2815 iris_use_pinned_bo(batch, iris_resource_bo(res), false);
2816
2817 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
2818 ptr._3DCommandSubOpcode = 43 + stage;
2819 ptr.PointertoVSSamplerState = ice->state.sampler_table[stage].offset;
2820 }
2821 }
2822
2823 if (dirty & IRIS_DIRTY_MULTISAMPLE) {
2824 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2825 ms.PixelLocation =
2826 ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER;
2827 if (ice->state.framebuffer.samples > 0)
2828 ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
2829 }
2830 }
2831
2832 if (dirty & IRIS_DIRTY_SAMPLE_MASK) {
2833 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
2834 ms.SampleMask = MAX2(ice->state.sample_mask, 1);
2835 }
2836 }
2837
2838 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2839 if (!(dirty & (IRIS_DIRTY_VS << stage)))
2840 continue;
2841
2842 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2843
2844 if (shader) {
2845 struct iris_resource *cache = (void *) shader->assembly.res;
2846 iris_use_pinned_bo(batch, cache->bo, false);
2847 iris_batch_emit(batch, shader->derived_data,
2848 iris_derived_program_state_size(stage));
2849 } else {
2850 if (stage == MESA_SHADER_TESS_EVAL) {
2851 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
2852 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
2853 iris_emit_cmd(batch, GENX(3DSTATE_DS), ds);
2854 } else if (stage == MESA_SHADER_GEOMETRY) {
2855 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
2856 }
2857 }
2858 }
2859
2860 if (dirty & IRIS_DIRTY_SO_BUFFERS) {
2861 iris_batch_emit(batch, genx->so_buffers,
2862 4 * 4 * GENX(3DSTATE_SO_BUFFER_length));
2863 }
2864
2865 if ((dirty & IRIS_DIRTY_SO_DECL_LIST) && ice->state.so_decl_list) {
2866 iris_batch_emit(batch, ice->state.so_decl_list,
2867 4 * ((ice->state.so_decl_list[0] & 0xff) + 2));
2868 }
2869
2870 // XXX: SOL:
2871 // 3DSTATE_STREAMOUT
2872
2873 if (dirty & IRIS_DIRTY_CLIP) {
2874 struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
2875 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2876
2877 uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
2878 iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
2879 if (wm_prog_data->barycentric_interp_modes &
2880 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
2881 cl.NonPerspectiveBarycentricEnable = true;
2882
2883 cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
2884 cl.MaximumVPIndex = ice->state.num_viewports - 1;
2885 }
2886 iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
2887 ARRAY_SIZE(cso_rast->clip));
2888 }
2889
2890 if (dirty & IRIS_DIRTY_RASTER) {
2891 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2892 iris_batch_emit(batch, cso->raster, sizeof(cso->raster));
2893 iris_batch_emit(batch, cso->sf, sizeof(cso->sf));
2894
2895 }
2896
2897 /* XXX: FS program updates needs to flag IRIS_DIRTY_WM */
2898 if (dirty & IRIS_DIRTY_WM) {
2899 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2900 uint32_t dynamic_wm[GENX(3DSTATE_WM_length)];
2901
2902 iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) {
2903 wm.BarycentricInterpolationMode =
2904 wm_prog_data->barycentric_interp_modes;
2905
2906 if (wm_prog_data->early_fragment_tests)
2907 wm.EarlyDepthStencilControl = EDSC_PREPS;
2908 else if (wm_prog_data->has_side_effects)
2909 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
2910 }
2911 iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm));
2912 }
2913
2914 if (1) {
2915 // XXX: 3DSTATE_SBE, 3DSTATE_SBE_SWIZ
2916 // -> iris_raster_state (point sprite texture coordinate origin)
2917 // -> bunch of shader state...
2918 iris_emit_sbe(batch, ice);
2919 }
2920
2921 if (dirty & IRIS_DIRTY_PS_BLEND) {
2922 struct iris_blend_state *cso_blend = ice->state.cso_blend;
2923 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
2924 uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
2925 iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
2926 pb.HasWriteableRT = true; // XXX: comes from somewhere :(
2927 pb.AlphaTestEnable = cso_zsa->alpha.enabled;
2928 }
2929
2930 iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
2931 ARRAY_SIZE(cso_blend->ps_blend));
2932 }
2933
2934 if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
2935 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2936 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
2937
2938 uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
2939 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
2940 wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
2941 wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
2942 }
2943 iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
2944 }
2945
2946 if (dirty & IRIS_DIRTY_SCISSOR_RECT) {
2947 uint32_t scissor_offset =
2948 emit_state(batch, ice->state.dynamic_uploader,
2949 &ice->state.last_res.scissor,
2950 ice->state.scissors,
2951 sizeof(struct pipe_scissor_state) *
2952 ice->state.num_viewports, 32);
2953
2954 iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
2955 ptr.ScissorRectPointer = scissor_offset;
2956 }
2957 }
2958
2959 if (dirty & IRIS_DIRTY_DEPTH_BUFFER) {
2960 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2961 struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer;
2962
2963 iris_batch_emit(batch, cso_z->packets, sizeof(cso_z->packets));
2964
2965 if (cso_fb->zsbuf) {
2966 struct iris_resource *zres = (void *) cso_fb->zsbuf->texture;
2967 // XXX: depth might not be writable...
2968 iris_use_pinned_bo(batch, zres->bo, true);
2969 }
2970 }
2971
2972 if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) {
2973 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
2974 for (int i = 0; i < 32; i++) {
2975 poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
2976 }
2977 }
2978 }
2979
2980 if (dirty & IRIS_DIRTY_LINE_STIPPLE) {
2981 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2982 iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
2983 }
2984
2985 if (1) {
2986 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2987 topo.PrimitiveTopologyType =
2988 translate_prim_type(draw->mode, draw->vertices_per_patch);
2989 }
2990 }
2991
2992 if (draw->index_size > 0) {
2993 struct iris_resource *res = NULL;
2994 unsigned offset;
2995
2996 if (draw->has_user_indices) {
2997 u_upload_data(ice->ctx.stream_uploader, 0,
2998 draw->count * draw->index_size, 4, draw->index.user,
2999 &offset, (struct pipe_resource **) &res);
3000 } else {
3001 res = (struct iris_resource *) draw->index.resource;
3002 offset = 0;
3003 }
3004
3005 iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
3006 ib.IndexFormat = draw->index_size >> 1;
3007 ib.MOCS = MOCS_WB;
3008 ib.BufferSize = res->bo->size;
3009 ib.BufferStartingAddress = ro_bo(res->bo, offset);
3010 }
3011 }
3012
3013 if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
3014 struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers;
3015 const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length);
3016
3017 if (cso->num_buffers > 0) {
3018 iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) *
3019 (1 + vb_dwords * cso->num_buffers));
3020
3021 for (unsigned i = 0; i < cso->num_buffers; i++) {
3022 struct iris_resource *res = (void *) cso->resources[i];
3023 iris_use_pinned_bo(batch, res->bo, false);
3024 }
3025 }
3026 }
3027
3028 if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) {
3029 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
3030 iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
3031 (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length)));
3032 iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) *
3033 cso->count * GENX(3DSTATE_VF_INSTANCING_length));
3034 for (int i = 0; i < cso->count; i++) {
3035 /* TODO: vertexid, instanceid support */
3036 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs);
3037 }
3038 }
3039
3040 if (1) {
3041 iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
3042 if (draw->primitive_restart) {
3043 vf.IndexedDrawCutIndexEnable = true;
3044 vf.CutIndex = draw->restart_index;
3045 }
3046 }
3047 }
3048
3049 // XXX: Gen8 - PMA fix
3050
3051 assert(!draw->indirect); // XXX: indirect support
3052
3053 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
3054 prim.StartInstanceLocation = draw->start_instance;
3055 prim.InstanceCount = draw->instance_count;
3056 prim.VertexCountPerInstance = draw->count;
3057 prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
3058
3059 // XXX: this is probably bonkers.
3060 prim.StartVertexLocation = draw->start;
3061
3062 if (draw->index_size) {
3063 prim.BaseVertexLocation += draw->index_bias;
3064 } else {
3065 prim.StartVertexLocation += draw->index_bias;
3066 }
3067
3068 //prim.BaseVertexLocation = ...;
3069 }
3070
3071 if (!batch->contains_draw) {
3072 iris_restore_context_saved_bos(ice, batch, draw);
3073 batch->contains_draw = true;
3074 }
3075 }
3076
3077 /**
3078 * State module teardown.
3079 */
3080 static void
3081 iris_destroy_state(struct iris_context *ice)
3082 {
3083 iris_free_vertex_buffers(&ice->state.genx->vertex_buffers);
3084
3085 // XXX: unreference resources/surfaces.
3086 for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
3087 pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
3088 }
3089 pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
3090
3091 for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
3092 pipe_resource_reference(&ice->state.sampler_table[stage].res, NULL);
3093 }
3094 free(ice->state.genx);
3095
3096 pipe_resource_reference(&ice->state.last_res.cc_vp, NULL);
3097 pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL);
3098 pipe_resource_reference(&ice->state.last_res.color_calc, NULL);
3099 pipe_resource_reference(&ice->state.last_res.scissor, NULL);
3100 pipe_resource_reference(&ice->state.last_res.blend, NULL);
3101 }
3102
3103 static unsigned
3104 flags_to_post_sync_op(uint32_t flags)
3105 {
3106 if (flags & PIPE_CONTROL_WRITE_IMMEDIATE)
3107 return WriteImmediateData;
3108
3109 if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT)
3110 return WritePSDepthCount;
3111
3112 if (flags & PIPE_CONTROL_WRITE_TIMESTAMP)
3113 return WriteTimestamp;
3114
3115 return 0;
3116 }
3117
3118 /**
3119 * Do the given flags have a Post Sync or LRI Post Sync operation?
3120 */
3121 static enum pipe_control_flags
3122 get_post_sync_flags(enum pipe_control_flags flags)
3123 {
3124 flags &= PIPE_CONTROL_WRITE_IMMEDIATE |
3125 PIPE_CONTROL_WRITE_DEPTH_COUNT |
3126 PIPE_CONTROL_WRITE_TIMESTAMP |
3127 PIPE_CONTROL_LRI_POST_SYNC_OP;
3128
3129 /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with
3130 * "LRI Post Sync Operation". So more than one bit set would be illegal.
3131 */
3132 assert(util_bitcount(flags) <= 1);
3133
3134 return flags;
3135 }
3136
3137 // XXX: compute support
3138 #define IS_COMPUTE_PIPELINE(batch) (batch->ring != I915_EXEC_RENDER)
3139
3140 /**
3141 * Emit a series of PIPE_CONTROL commands, taking into account any
3142 * workarounds necessary to actually accomplish the caller's request.
3143 *
3144 * Unless otherwise noted, spec quotations in this function come from:
3145 *
3146 * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming
3147 * Restrictions for PIPE_CONTROL.
3148 */
3149 static void
3150 iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags,
3151 struct iris_bo *bo, uint32_t offset, uint64_t imm)
3152 {
3153 UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo;
3154 enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags);
3155 enum pipe_control_flags non_lri_post_sync_flags =
3156 post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP;
3157
3158 /* Recursive PIPE_CONTROL workarounds --------------------------------
3159 * (http://knowyourmeme.com/memes/xzibit-yo-dawg)
3160 *
3161 * We do these first because we want to look at the original operation,
3162 * rather than any workarounds we set.
3163 */
3164 if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
3165 /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description
3166 * lists several workarounds:
3167 *
3168 * "Project: SKL, KBL, BXT
3169 *
3170 * If the VF Cache Invalidation Enable is set to a 1 in a
3171 * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields
3172 * sets to 0, with the VF Cache Invalidation Enable set to 0
3173 * needs to be sent prior to the PIPE_CONTROL with VF Cache
3174 * Invalidation Enable set to a 1."
3175 */
3176 iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0);
3177 }
3178
3179 if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) {
3180 /* Project: SKL / Argument: LRI Post Sync Operation [23]
3181 *
3182 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
3183 * programmed prior to programming a PIPECONTROL command with "LRI
3184 * Post Sync Operation" in GPGPU mode of operation (i.e when
3185 * PIPELINE_SELECT command is set to GPGPU mode of operation)."
3186 *
3187 * The same text exists a few rows below for Post Sync Op.
3188 */
3189 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm);
3190 }
3191
3192 if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
3193 /* Cannonlake:
3194 * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue
3195 * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12)
3196 * = 0 and Pipe Control Flush Enable (bit 7) = 1"
3197 */
3198 iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo,
3199 offset, imm);
3200 }
3201
3202 /* "Flush Types" workarounds ---------------------------------------------
3203 * We do these now because they may add post-sync operations or CS stalls.
3204 */
3205
3206 if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) {
3207 /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate
3208 *
3209 * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or
3210 * 'Write PS Depth Count' or 'Write Timestamp'."
3211 */
3212 if (!bo) {
3213 flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
3214 post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
3215 non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE;
3216 bo = batch->screen->workaround_bo;
3217 }
3218 }
3219
3220 /* #1130 from Gen10 workarounds page:
3221 *
3222 * "Enable Depth Stall on every Post Sync Op if Render target Cache
3223 * Flush is not enabled in same PIPE CONTROL and Enable Pixel score
3224 * board stall if Render target cache flush is enabled."
3225 *
3226 * Applicable to CNL B0 and C0 steppings only.
3227 *
3228 * The wording here is unclear, and this workaround doesn't look anything
3229 * like the internal bug report recommendations, but leave it be for now...
3230 */
3231 if (GEN_GEN == 10) {
3232 if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) {
3233 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
3234 } else if (flags & non_lri_post_sync_flags) {
3235 flags |= PIPE_CONTROL_DEPTH_STALL;
3236 }
3237 }
3238
3239 if (flags & PIPE_CONTROL_DEPTH_STALL) {
3240 /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable):
3241 *
3242 * "This bit must be DISABLED for operations other than writing
3243 * PS_DEPTH_COUNT."
3244 *
3245 * This seems like nonsense. An Ivybridge workaround requires us to
3246 * emit a PIPE_CONTROL with a depth stall and write immediate post-sync
3247 * operation. Gen8+ requires us to emit depth stalls and depth cache
3248 * flushes together. So, it's hard to imagine this means anything other
3249 * than "we originally intended this to be used for PS_DEPTH_COUNT".
3250 *
3251 * We ignore the supposed restriction and do nothing.
3252 */
3253 }
3254
3255 if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH |
3256 PIPE_CONTROL_STALL_AT_SCOREBOARD)) {
3257 /* From the PIPE_CONTROL instruction table, bit 12 and bit 1:
3258 *
3259 * "This bit must be DISABLED for End-of-pipe (Read) fences,
3260 * PS_DEPTH_COUNT or TIMESTAMP queries."
3261 *
3262 * TODO: Implement end-of-pipe checking.
3263 */
3264 assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT |
3265 PIPE_CONTROL_WRITE_TIMESTAMP)));
3266 }
3267
3268 if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) {
3269 /* From the PIPE_CONTROL instruction table, bit 1:
3270 *
3271 * "This bit is ignored if Depth Stall Enable is set.
3272 * Further, the render cache is not flushed even if Write Cache
3273 * Flush Enable bit is set."
3274 *
3275 * We assert that the caller doesn't do this combination, to try and
3276 * prevent mistakes. It shouldn't hurt the GPU, though.
3277 */
3278 assert(!(flags & (PIPE_CONTROL_DEPTH_STALL |
3279 PIPE_CONTROL_RENDER_TARGET_FLUSH)));
3280 }
3281
3282 /* PIPE_CONTROL page workarounds ------------------------------------- */
3283
3284 if (GEN_GEN <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) {
3285 /* From the PIPE_CONTROL page itself:
3286 *
3287 * "IVB, HSW, BDW
3288 * Restriction: Pipe_control with CS-stall bit set must be issued
3289 * before a pipe-control command that has the State Cache
3290 * Invalidate bit set."
3291 */
3292 flags |= PIPE_CONTROL_CS_STALL;
3293 }
3294
3295 if (flags & PIPE_CONTROL_FLUSH_LLC) {
3296 /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC):
3297 *
3298 * "Project: ALL
3299 * SW must always program Post-Sync Operation to "Write Immediate
3300 * Data" when Flush LLC is set."
3301 *
3302 * For now, we just require the caller to do it.
3303 */
3304 assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE);
3305 }
3306
3307 /* "Post-Sync Operation" workarounds -------------------------------- */
3308
3309 /* Project: All / Argument: Global Snapshot Count Reset [19]
3310 *
3311 * "This bit must not be exercised on any product.
3312 * Requires stall bit ([20] of DW1) set."
3313 *
3314 * We don't use this, so we just assert that it isn't used. The
3315 * PIPE_CONTROL instruction page indicates that they intended this
3316 * as a debug feature and don't think it is useful in production,
3317 * but it may actually be usable, should we ever want to.
3318 */
3319 assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0);
3320
3321 if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR |
3322 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) {
3323 /* Project: All / Arguments:
3324 *
3325 * - Generic Media State Clear [16]
3326 * - Indirect State Pointers Disable [16]
3327 *
3328 * "Requires stall bit ([20] of DW1) set."
3329 *
3330 * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media
3331 * State Clear) says:
3332 *
3333 * "PIPECONTROL command with “Command Streamer Stall Enable” must be
3334 * programmed prior to programming a PIPECONTROL command with "Media
3335 * State Clear" set in GPGPU mode of operation"
3336 *
3337 * This is a subset of the earlier rule, so there's nothing to do.
3338 */
3339 flags |= PIPE_CONTROL_CS_STALL;
3340 }
3341
3342 if (flags & PIPE_CONTROL_STORE_DATA_INDEX) {
3343 /* Project: All / Argument: Store Data Index
3344 *
3345 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
3346 * than '0'."
3347 *
3348 * For now, we just assert that the caller does this. We might want to
3349 * automatically add a write to the workaround BO...
3350 */
3351 assert(non_lri_post_sync_flags != 0);
3352 }
3353
3354 if (flags & PIPE_CONTROL_SYNC_GFDT) {
3355 /* Project: All / Argument: Sync GFDT
3356 *
3357 * "Post-Sync Operation ([15:14] of DW1) must be set to something other
3358 * than '0' or 0x2520[13] must be set."
3359 *
3360 * For now, we just assert that the caller does this.
3361 */
3362 assert(non_lri_post_sync_flags != 0);
3363 }
3364
3365 if (flags & PIPE_CONTROL_TLB_INVALIDATE) {
3366 /* Project: IVB+ / Argument: TLB inv
3367 *
3368 * "Requires stall bit ([20] of DW1) set."
3369 *
3370 * Also, from the PIPE_CONTROL instruction table:
3371 *
3372 * "Project: SKL+
3373 * Post Sync Operation or CS stall must be set to ensure a TLB
3374 * invalidation occurs. Otherwise no cycle will occur to the TLB
3375 * cache to invalidate."
3376 *
3377 * This is not a subset of the earlier rule, so there's nothing to do.
3378 */
3379 flags |= PIPE_CONTROL_CS_STALL;
3380 }
3381
3382 if (GEN_GEN == 9 && devinfo->gt == 4) {
3383 /* TODO: The big Skylake GT4 post sync op workaround */
3384 }
3385
3386 /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */
3387
3388 if (IS_COMPUTE_PIPELINE(batch)) {
3389 if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) {
3390 /* Project: SKL+ / Argument: Tex Invalidate
3391 * "Requires stall bit ([20] of DW) set for all GPGPU Workloads."
3392 */
3393 flags |= PIPE_CONTROL_CS_STALL;
3394 }
3395
3396 if (GEN_GEN == 8 && (post_sync_flags ||
3397 (flags & (PIPE_CONTROL_NOTIFY_ENABLE |
3398 PIPE_CONTROL_DEPTH_STALL |
3399 PIPE_CONTROL_RENDER_TARGET_FLUSH |
3400 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3401 PIPE_CONTROL_DATA_CACHE_FLUSH)))) {
3402 /* Project: BDW / Arguments:
3403 *
3404 * - LRI Post Sync Operation [23]
3405 * - Post Sync Op [15:14]
3406 * - Notify En [8]
3407 * - Depth Stall [13]
3408 * - Render Target Cache Flush [12]
3409 * - Depth Cache Flush [0]
3410 * - DC Flush Enable [5]
3411 *
3412 * "Requires stall bit ([20] of DW) set for all GPGPU and Media
3413 * Workloads."
3414 */
3415 flags |= PIPE_CONTROL_CS_STALL;
3416
3417 /* Also, from the PIPE_CONTROL instruction table, bit 20:
3418 *
3419 * "Project: BDW
3420 * This bit must be always set when PIPE_CONTROL command is
3421 * programmed by GPGPU and MEDIA workloads, except for the cases
3422 * when only Read Only Cache Invalidation bits are set (State
3423 * Cache Invalidation Enable, Instruction cache Invalidation
3424 * Enable, Texture Cache Invalidation Enable, Constant Cache
3425 * Invalidation Enable). This is to WA FFDOP CG issue, this WA
3426 * need not implemented when FF_DOP_CG is disable via "Fixed
3427 * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register."
3428 *
3429 * It sounds like we could avoid CS stalls in some cases, but we
3430 * don't currently bother. This list isn't exactly the list above,
3431 * either...
3432 */
3433 }
3434 }
3435
3436 /* "Stall" workarounds ----------------------------------------------
3437 * These have to come after the earlier ones because we may have added
3438 * some additional CS stalls above.
3439 */
3440
3441 if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) {
3442 /* Project: PRE-SKL, VLV, CHV
3443 *
3444 * "[All Stepping][All SKUs]:
3445 *
3446 * One of the following must also be set:
3447 *
3448 * - Render Target Cache Flush Enable ([12] of DW1)
3449 * - Depth Cache Flush Enable ([0] of DW1)
3450 * - Stall at Pixel Scoreboard ([1] of DW1)
3451 * - Depth Stall ([13] of DW1)
3452 * - Post-Sync Operation ([13] of DW1)
3453 * - DC Flush Enable ([5] of DW1)"
3454 *
3455 * If we don't already have one of those bits set, we choose to add
3456 * "Stall at Pixel Scoreboard". Some of the other bits require a
3457 * CS stall as a workaround (see above), which would send us into
3458 * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard"
3459 * appears to be safe, so we choose that.
3460 */
3461 const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH |
3462 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
3463 PIPE_CONTROL_WRITE_IMMEDIATE |
3464 PIPE_CONTROL_WRITE_DEPTH_COUNT |
3465 PIPE_CONTROL_WRITE_TIMESTAMP |
3466 PIPE_CONTROL_STALL_AT_SCOREBOARD |
3467 PIPE_CONTROL_DEPTH_STALL |
3468 PIPE_CONTROL_DATA_CACHE_FLUSH;
3469 if (!(flags & wa_bits))
3470 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
3471 }
3472
3473 /* Emit --------------------------------------------------------------- */
3474
3475 iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) {
3476 pc.LRIPostSyncOperation = NoLRIOperation;
3477 pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE;
3478 pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH;
3479 pc.StoreDataIndex = 0;
3480 pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL;
3481 pc.GlobalSnapshotCountReset =
3482 flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET;
3483 pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE;
3484 pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR;
3485 pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD;
3486 pc.RenderTargetCacheFlushEnable =
3487 flags & PIPE_CONTROL_RENDER_TARGET_FLUSH;
3488 pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH;
3489 pc.StateCacheInvalidationEnable =
3490 flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE;
3491 pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
3492 pc.ConstantCacheInvalidationEnable =
3493 flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE;
3494 pc.PostSyncOperation = flags_to_post_sync_op(flags);
3495 pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL;
3496 pc.InstructionCacheInvalidateEnable =
3497 flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE;
3498 pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE;
3499 pc.IndirectStatePointersDisable =
3500 flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE;
3501 pc.TextureCacheInvalidationEnable =
3502 flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
3503 pc.Address = ro_bo(bo, offset);
3504 pc.ImmediateData = imm;
3505 }
3506 }
3507
3508 void
3509 genX(init_state)(struct iris_context *ice)
3510 {
3511 struct pipe_context *ctx = &ice->ctx;
3512 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
3513
3514 ctx->create_blend_state = iris_create_blend_state;
3515 ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
3516 ctx->create_rasterizer_state = iris_create_rasterizer_state;
3517 ctx->create_sampler_state = iris_create_sampler_state;
3518 ctx->create_sampler_view = iris_create_sampler_view;
3519 ctx->create_surface = iris_create_surface;
3520 ctx->create_vertex_elements_state = iris_create_vertex_elements;
3521 ctx->create_compute_state = iris_create_compute_state;
3522 ctx->bind_blend_state = iris_bind_blend_state;
3523 ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state;
3524 ctx->bind_sampler_states = iris_bind_sampler_states;
3525 ctx->bind_rasterizer_state = iris_bind_rasterizer_state;
3526 ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state;
3527 ctx->bind_compute_state = iris_bind_compute_state;
3528 ctx->delete_blend_state = iris_delete_state;
3529 ctx->delete_depth_stencil_alpha_state = iris_delete_state;
3530 ctx->delete_fs_state = iris_delete_state;
3531 ctx->delete_rasterizer_state = iris_delete_state;
3532 ctx->delete_sampler_state = iris_delete_state;
3533 ctx->delete_vertex_elements_state = iris_delete_state;
3534 ctx->delete_compute_state = iris_delete_state;
3535 ctx->delete_tcs_state = iris_delete_state;
3536 ctx->delete_tes_state = iris_delete_state;
3537 ctx->delete_gs_state = iris_delete_state;
3538 ctx->delete_vs_state = iris_delete_state;
3539 ctx->set_blend_color = iris_set_blend_color;
3540 ctx->set_clip_state = iris_set_clip_state;
3541 ctx->set_constant_buffer = iris_set_constant_buffer;
3542 ctx->set_sampler_views = iris_set_sampler_views;
3543 ctx->set_framebuffer_state = iris_set_framebuffer_state;
3544 ctx->set_polygon_stipple = iris_set_polygon_stipple;
3545 ctx->set_sample_mask = iris_set_sample_mask;
3546 ctx->set_scissor_states = iris_set_scissor_states;
3547 ctx->set_stencil_ref = iris_set_stencil_ref;
3548 ctx->set_vertex_buffers = iris_set_vertex_buffers;
3549 ctx->set_viewport_states = iris_set_viewport_states;
3550 ctx->sampler_view_destroy = iris_sampler_view_destroy;
3551 ctx->surface_destroy = iris_surface_destroy;
3552 ctx->draw_vbo = iris_draw_vbo;
3553 ctx->launch_grid = iris_launch_grid;
3554 ctx->create_stream_output_target = iris_create_stream_output_target;
3555 ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
3556 ctx->set_stream_output_targets = iris_set_stream_output_targets;
3557
3558 ice->vtbl.destroy_state = iris_destroy_state;
3559 ice->vtbl.init_render_context = iris_init_render_context;
3560 ice->vtbl.upload_render_state = iris_upload_render_state;
3561 ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control;
3562 ice->vtbl.derived_program_state_size = iris_derived_program_state_size;
3563 ice->vtbl.store_derived_program_state = iris_store_derived_program_state;
3564 ice->vtbl.create_so_decl_list = iris_create_so_decl_list;
3565 ice->vtbl.populate_vs_key = iris_populate_vs_key;
3566 ice->vtbl.populate_tcs_key = iris_populate_tcs_key;
3567 ice->vtbl.populate_tes_key = iris_populate_tes_key;
3568 ice->vtbl.populate_gs_key = iris_populate_gs_key;
3569 ice->vtbl.populate_fs_key = iris_populate_fs_key;
3570
3571 ice->state.dirty = ~0ull;
3572
3573 ice->state.num_viewports = 1;
3574 ice->state.genx = calloc(1, sizeof(struct iris_genx_state));
3575
3576 /* Make a 1x1x1 null surface for unbound textures */
3577 void *null_surf_map =
3578 upload_state(ice->state.surface_uploader, &ice->state.unbound_tex,
3579 4 * GENX(RENDER_SURFACE_STATE_length), 64);
3580 isl_null_fill_state(&screen->isl_dev, null_surf_map, isl_extent3d(1, 1, 1));
3581 }