iris: assemble SAMPLER_STATE table at bind time
[mesa.git] / src / gallium / drivers / iris / iris_state.c
1 /*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23 #include <stdio.h>
24 #include <errno.h>
25
26 #ifdef HAVE_VALGRIND
27 #include <valgrind.h>
28 #include <memcheck.h>
29 #define VG(x) x
30 #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
31 #else
32 #define VG(x)
33 #endif
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "pipe/p_context.h"
38 #include "pipe/p_screen.h"
39 #include "util/u_inlines.h"
40 #include "util/u_transfer.h"
41 #include "util/u_upload_mgr.h"
42 #include "i915_drm.h"
43 #include "intel/compiler/brw_compiler.h"
44 #include "intel/common/gen_l3_config.h"
45 #include "intel/common/gen_sample_positions.h"
46 #include "iris_batch.h"
47 #include "iris_context.h"
48 #include "iris_pipe.h"
49 #include "iris_resource.h"
50
51 #define __gen_address_type struct iris_address
52 #define __gen_user_data struct iris_batch
53
54 #define ARRAY_BYTES(x) (sizeof(uint32_t) * ARRAY_SIZE(x))
55
56 static uint64_t
57 __gen_combine_address(struct iris_batch *batch, void *location,
58 struct iris_address addr, uint32_t delta)
59 {
60 uint64_t result = addr.offset + delta;
61
62 if (addr.bo) {
63 iris_use_pinned_bo(batch, addr.bo, addr.write);
64 /* Assume this is a general address, not relative to a base. */
65 result += addr.bo->gtt_offset;
66 }
67
68 return result;
69 }
70
71 #define __genxml_cmd_length(cmd) cmd ## _length
72 #define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
73 #define __genxml_cmd_header(cmd) cmd ## _header
74 #define __genxml_cmd_pack(cmd) cmd ## _pack
75
76 static void *
77 get_command_space(struct iris_batch *batch, unsigned bytes)
78 {
79 iris_require_command_space(batch, bytes);
80 void *map = batch->cmdbuf.map_next;
81 batch->cmdbuf.map_next += bytes;
82 return map;
83 }
84
85 #define _iris_pack_command(batch, cmd, dst, name) \
86 for (struct cmd name = { __genxml_cmd_header(cmd) }, \
87 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
88 ({ __genxml_cmd_pack(cmd)(batch, (void *)_dst, &name); \
89 _dst = NULL; \
90 }))
91
92 #define iris_pack_command(cmd, dst, name) \
93 _iris_pack_command(NULL, cmd, dst, name)
94
95 #define iris_pack_state(cmd, dst, name) \
96 for (struct cmd name = {}, \
97 *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \
98 __genxml_cmd_pack(cmd)(NULL, (void *)_dst, &name), \
99 _dst = NULL)
100
101 #define iris_emit_cmd(batch, cmd, name) \
102 _iris_pack_command(batch, cmd, get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name)
103
104 #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \
105 do { \
106 uint32_t *dw = get_command_space(batch, 4 * num_dwords); \
107 for (uint32_t i = 0; i < num_dwords; i++) \
108 dw[i] = (dwords0)[i] | (dwords1)[i]; \
109 VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \
110 } while (0)
111
112 #include "genxml/genX_pack.h"
113 #include "genxml/gen_macros.h"
114 #include "genxml/genX_bits.h"
115
116 #define MOCS_WB (2 << 1)
117
118 UNUSED static void pipe_asserts()
119 {
120 #define PIPE_ASSERT(x) STATIC_ASSERT((int)x)
121
122 /* pipe_logicop happens to match the hardware. */
123 PIPE_ASSERT(PIPE_LOGICOP_CLEAR == LOGICOP_CLEAR);
124 PIPE_ASSERT(PIPE_LOGICOP_NOR == LOGICOP_NOR);
125 PIPE_ASSERT(PIPE_LOGICOP_AND_INVERTED == LOGICOP_AND_INVERTED);
126 PIPE_ASSERT(PIPE_LOGICOP_COPY_INVERTED == LOGICOP_COPY_INVERTED);
127 PIPE_ASSERT(PIPE_LOGICOP_AND_REVERSE == LOGICOP_AND_REVERSE);
128 PIPE_ASSERT(PIPE_LOGICOP_INVERT == LOGICOP_INVERT);
129 PIPE_ASSERT(PIPE_LOGICOP_XOR == LOGICOP_XOR);
130 PIPE_ASSERT(PIPE_LOGICOP_NAND == LOGICOP_NAND);
131 PIPE_ASSERT(PIPE_LOGICOP_AND == LOGICOP_AND);
132 PIPE_ASSERT(PIPE_LOGICOP_EQUIV == LOGICOP_EQUIV);
133 PIPE_ASSERT(PIPE_LOGICOP_NOOP == LOGICOP_NOOP);
134 PIPE_ASSERT(PIPE_LOGICOP_OR_INVERTED == LOGICOP_OR_INVERTED);
135 PIPE_ASSERT(PIPE_LOGICOP_COPY == LOGICOP_COPY);
136 PIPE_ASSERT(PIPE_LOGICOP_OR_REVERSE == LOGICOP_OR_REVERSE);
137 PIPE_ASSERT(PIPE_LOGICOP_OR == LOGICOP_OR);
138 PIPE_ASSERT(PIPE_LOGICOP_SET == LOGICOP_SET);
139
140 /* pipe_blend_func happens to match the hardware. */
141 PIPE_ASSERT(PIPE_BLENDFACTOR_ONE == BLENDFACTOR_ONE);
142 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_COLOR == BLENDFACTOR_SRC_COLOR);
143 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA == BLENDFACTOR_SRC_ALPHA);
144 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_ALPHA == BLENDFACTOR_DST_ALPHA);
145 PIPE_ASSERT(PIPE_BLENDFACTOR_DST_COLOR == BLENDFACTOR_DST_COLOR);
146 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE == BLENDFACTOR_SRC_ALPHA_SATURATE);
147 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_COLOR == BLENDFACTOR_CONST_COLOR);
148 PIPE_ASSERT(PIPE_BLENDFACTOR_CONST_ALPHA == BLENDFACTOR_CONST_ALPHA);
149 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_COLOR == BLENDFACTOR_SRC1_COLOR);
150 PIPE_ASSERT(PIPE_BLENDFACTOR_SRC1_ALPHA == BLENDFACTOR_SRC1_ALPHA);
151 PIPE_ASSERT(PIPE_BLENDFACTOR_ZERO == BLENDFACTOR_ZERO);
152 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_COLOR == BLENDFACTOR_INV_SRC_COLOR);
153 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC_ALPHA == BLENDFACTOR_INV_SRC_ALPHA);
154 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_ALPHA == BLENDFACTOR_INV_DST_ALPHA);
155 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_DST_COLOR == BLENDFACTOR_INV_DST_COLOR);
156 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_COLOR == BLENDFACTOR_INV_CONST_COLOR);
157 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_CONST_ALPHA == BLENDFACTOR_INV_CONST_ALPHA);
158 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_COLOR == BLENDFACTOR_INV_SRC1_COLOR);
159 PIPE_ASSERT(PIPE_BLENDFACTOR_INV_SRC1_ALPHA == BLENDFACTOR_INV_SRC1_ALPHA);
160
161 /* pipe_blend_func happens to match the hardware. */
162 PIPE_ASSERT(PIPE_BLEND_ADD == BLENDFUNCTION_ADD);
163 PIPE_ASSERT(PIPE_BLEND_SUBTRACT == BLENDFUNCTION_SUBTRACT);
164 PIPE_ASSERT(PIPE_BLEND_REVERSE_SUBTRACT == BLENDFUNCTION_REVERSE_SUBTRACT);
165 PIPE_ASSERT(PIPE_BLEND_MIN == BLENDFUNCTION_MIN);
166 PIPE_ASSERT(PIPE_BLEND_MAX == BLENDFUNCTION_MAX);
167
168 /* pipe_stencil_op happens to match the hardware. */
169 PIPE_ASSERT(PIPE_STENCIL_OP_KEEP == STENCILOP_KEEP);
170 PIPE_ASSERT(PIPE_STENCIL_OP_ZERO == STENCILOP_ZERO);
171 PIPE_ASSERT(PIPE_STENCIL_OP_REPLACE == STENCILOP_REPLACE);
172 PIPE_ASSERT(PIPE_STENCIL_OP_INCR == STENCILOP_INCRSAT);
173 PIPE_ASSERT(PIPE_STENCIL_OP_DECR == STENCILOP_DECRSAT);
174 PIPE_ASSERT(PIPE_STENCIL_OP_INCR_WRAP == STENCILOP_INCR);
175 PIPE_ASSERT(PIPE_STENCIL_OP_DECR_WRAP == STENCILOP_DECR);
176 PIPE_ASSERT(PIPE_STENCIL_OP_INVERT == STENCILOP_INVERT);
177
178 /* pipe_sprite_coord_mode happens to match 3DSTATE_SBE */
179 PIPE_ASSERT(PIPE_SPRITE_COORD_UPPER_LEFT == UPPERLEFT);
180 PIPE_ASSERT(PIPE_SPRITE_COORD_LOWER_LEFT == LOWERLEFT);
181 #undef PIPE_ASSERT
182 }
183
184 static unsigned
185 translate_prim_type(enum pipe_prim_type prim, uint8_t verts_per_patch)
186 {
187 static const unsigned map[] = {
188 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
189 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
190 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
191 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
192 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
193 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
194 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
195 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
196 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
197 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
198 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
199 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
200 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
201 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
202 [PIPE_PRIM_PATCHES] = _3DPRIM_PATCHLIST_1 - 1,
203 };
204
205 return map[prim] + (prim == PIPE_PRIM_PATCHES ? verts_per_patch : 0);
206 }
207
208 static unsigned
209 translate_compare_func(enum pipe_compare_func pipe_func)
210 {
211 static const unsigned map[] = {
212 [PIPE_FUNC_NEVER] = COMPAREFUNCTION_NEVER,
213 [PIPE_FUNC_LESS] = COMPAREFUNCTION_LESS,
214 [PIPE_FUNC_EQUAL] = COMPAREFUNCTION_EQUAL,
215 [PIPE_FUNC_LEQUAL] = COMPAREFUNCTION_LEQUAL,
216 [PIPE_FUNC_GREATER] = COMPAREFUNCTION_GREATER,
217 [PIPE_FUNC_NOTEQUAL] = COMPAREFUNCTION_NOTEQUAL,
218 [PIPE_FUNC_GEQUAL] = COMPAREFUNCTION_GEQUAL,
219 [PIPE_FUNC_ALWAYS] = COMPAREFUNCTION_ALWAYS,
220 };
221 return map[pipe_func];
222 }
223
224 static unsigned
225 translate_shadow_func(enum pipe_compare_func pipe_func)
226 {
227 /* Gallium specifies the result of shadow comparisons as:
228 *
229 * 1 if ref <op> texel,
230 * 0 otherwise.
231 *
232 * The hardware does:
233 *
234 * 0 if texel <op> ref,
235 * 1 otherwise.
236 *
237 * So we need to flip the operator and also negate.
238 */
239 static const unsigned map[] = {
240 [PIPE_FUNC_NEVER] = PREFILTEROPALWAYS,
241 [PIPE_FUNC_LESS] = PREFILTEROPLEQUAL,
242 [PIPE_FUNC_EQUAL] = PREFILTEROPNOTEQUAL,
243 [PIPE_FUNC_LEQUAL] = PREFILTEROPLESS,
244 [PIPE_FUNC_GREATER] = PREFILTEROPGEQUAL,
245 [PIPE_FUNC_NOTEQUAL] = PREFILTEROPEQUAL,
246 [PIPE_FUNC_GEQUAL] = PREFILTEROPGREATER,
247 [PIPE_FUNC_ALWAYS] = PREFILTEROPNEVER,
248 };
249 return map[pipe_func];
250 }
251
252 static unsigned
253 translate_cull_mode(unsigned pipe_face)
254 {
255 static const unsigned map[4] = {
256 [PIPE_FACE_NONE] = CULLMODE_NONE,
257 [PIPE_FACE_FRONT] = CULLMODE_FRONT,
258 [PIPE_FACE_BACK] = CULLMODE_BACK,
259 [PIPE_FACE_FRONT_AND_BACK] = CULLMODE_BOTH,
260 };
261 return map[pipe_face];
262 }
263
264 static unsigned
265 translate_fill_mode(unsigned pipe_polymode)
266 {
267 static const unsigned map[4] = {
268 [PIPE_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
269 [PIPE_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
270 [PIPE_POLYGON_MODE_POINT] = FILL_MODE_POINT,
271 [PIPE_POLYGON_MODE_FILL_RECTANGLE] = FILL_MODE_SOLID,
272 };
273 return map[pipe_polymode];
274 }
275
276 static struct iris_address
277 bo_addr(struct iris_bo *bo)
278 {
279 return (struct iris_address) { .offset = bo->gtt_offset };
280 }
281
282
283 static struct iris_address
284 ro_bo(struct iris_bo *bo, uint64_t offset)
285 {
286 return (struct iris_address) { .bo = bo, .offset = offset };
287 }
288
289 /**
290 * Returns the BO's address relative to the appropriate base address.
291 *
292 * All of our base addresses are programmed to the start of a 4GB region,
293 * so simply returning the bottom 32 bits of the BO address will give us
294 * the offset from whatever base address corresponds to that memory region.
295 */
296 static uint32_t
297 bo_offset_from_base_address(struct pipe_resource *res)
298 {
299 struct iris_bo *bo = ((struct iris_resource *) res)->bo;
300
301 /* This only works for buffers in the memory zones corresponding to a
302 * base address - the top, unbounded memory zone doesn't have a base.
303 */
304 assert(bo->gtt_offset < 3 * (1ull << 32));
305 return bo->gtt_offset;
306 }
307
308 static uint32_t *
309 stream_state(struct iris_batch *batch,
310 struct u_upload_mgr *uploader,
311 unsigned size,
312 unsigned alignment,
313 uint32_t *out_offset)
314 {
315 struct pipe_resource *res = NULL;
316 void *ptr = NULL;
317
318 u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr);
319
320 struct iris_bo *bo = ((struct iris_resource *) res)->bo;
321 iris_use_pinned_bo(batch, bo, false);
322
323 *out_offset += bo_offset_from_base_address(res);
324
325 pipe_resource_reference(&res, NULL);
326
327 return ptr;
328 }
329
330 static uint32_t
331 emit_state(struct iris_batch *batch,
332 struct u_upload_mgr *uploader,
333 const void *data,
334 unsigned size,
335 unsigned alignment)
336 {
337 unsigned offset = 0;
338 uint32_t *map = stream_state(batch, uploader, size, alignment, &offset);
339
340 if (map)
341 memcpy(map, data, size);
342
343 return offset;
344 }
345
346 static void
347 iris_emit_state_base_address(struct iris_batch *batch)
348 {
349 /* XXX: PIPE_CONTROLs */
350
351 iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) {
352 #if 0
353 // XXX: MOCS is stupid for this.
354 sba.GeneralStateMemoryObjectControlState = MOCS_WB;
355 sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB;
356 sba.SurfaceStateMemoryObjectControlState = MOCS_WB;
357 sba.DynamicStateMemoryObjectControlState = MOCS_WB;
358 sba.IndirectObjectMemoryObjectControlState = MOCS_WB;
359 sba.InstructionMemoryObjectControlState = MOCS_WB;
360 sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB;
361 #endif
362
363 sba.GeneralStateBaseAddressModifyEnable = true;
364 sba.SurfaceStateBaseAddressModifyEnable = true;
365 sba.DynamicStateBaseAddressModifyEnable = true;
366 sba.IndirectObjectBaseAddressModifyEnable = true;
367 sba.InstructionBaseAddressModifyEnable = true;
368 sba.GeneralStateBufferSizeModifyEnable = true;
369 sba.DynamicStateBufferSizeModifyEnable = true;
370 sba.BindlessSurfaceStateBaseAddressModifyEnable = true;
371 sba.IndirectObjectBufferSizeModifyEnable = true;
372 sba.InstructionBuffersizeModifyEnable = true;
373
374 sba.SurfaceStateBaseAddress = ro_bo(NULL, 1ull << 32);
375 sba.DynamicStateBaseAddress = ro_bo(NULL, 2 * (1ull << 32));
376
377 sba.GeneralStateBufferSize = 0xfffff;
378 sba.IndirectObjectBufferSize = 0xfffff;
379 sba.InstructionBufferSize = 0xfffff;
380 sba.DynamicStateBufferSize = 0xfffff;
381 }
382 }
383
384 static void
385 iris_init_render_context(struct iris_screen *screen,
386 struct iris_batch *batch,
387 struct pipe_debug_callback *dbg)
388 {
389 batch->emit_state_base_address = iris_emit_state_base_address;
390 iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER);
391
392 iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
393 rect.ClippedDrawingRectangleXMax = UINT16_MAX;
394 rect.ClippedDrawingRectangleYMax = UINT16_MAX;
395 }
396 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_PATTERN), pat) {
397 GEN_SAMPLE_POS_1X(pat._1xSample);
398 GEN_SAMPLE_POS_2X(pat._2xSample);
399 GEN_SAMPLE_POS_4X(pat._4xSample);
400 GEN_SAMPLE_POS_8X(pat._8xSample);
401 GEN_SAMPLE_POS_16X(pat._16xSample);
402 }
403 iris_emit_cmd(batch, GENX(3DSTATE_AA_LINE_PARAMETERS), foo);
404 iris_emit_cmd(batch, GENX(3DSTATE_WM_CHROMAKEY), foo);
405 iris_emit_cmd(batch, GENX(3DSTATE_WM_HZ_OP), foo);
406 /* XXX: may need to set an offset for origin-UL framebuffers */
407 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_OFFSET), foo);
408
409 /* Just assign a static partitioning. */
410 for (int i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
411 iris_emit_cmd(batch, GENX(3DSTATE_PUSH_CONSTANT_ALLOC_VS), alloc) {
412 alloc._3DCommandSubOpcode = 18 + i;
413 alloc.ConstantBufferOffset = 6 * i;
414 alloc.ConstantBufferSize = i == MESA_SHADER_FRAGMENT ? 8 : 6;
415 }
416 }
417 }
418
419 static void
420 iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info)
421 {
422 }
423
424 static void
425 iris_set_blend_color(struct pipe_context *ctx,
426 const struct pipe_blend_color *state)
427 {
428 struct iris_context *ice = (struct iris_context *) ctx;
429
430 memcpy(&ice->state.blend_color, state, sizeof(struct pipe_blend_color));
431 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
432 }
433
434 struct iris_blend_state {
435 uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)];
436 uint32_t blend_state[GENX(BLEND_STATE_length) +
437 BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)];
438
439 bool alpha_to_coverage; /* for shader key */
440 };
441
442 static void *
443 iris_create_blend_state(struct pipe_context *ctx,
444 const struct pipe_blend_state *state)
445 {
446 struct iris_blend_state *cso = malloc(sizeof(struct iris_blend_state));
447 uint32_t *blend_state = cso->blend_state;
448
449 cso->alpha_to_coverage = state->alpha_to_coverage;
450
451 iris_pack_command(GENX(3DSTATE_PS_BLEND), cso->ps_blend, pb) {
452 /* pb.HasWriteableRT is filled in at draw time. */
453 /* pb.AlphaTestEnable is filled in at draw time. */
454 pb.AlphaToCoverageEnable = state->alpha_to_coverage;
455 pb.IndependentAlphaBlendEnable = state->independent_blend_enable;
456
457 pb.ColorBufferBlendEnable = state->rt[0].blend_enable;
458
459 pb.SourceBlendFactor = state->rt[0].rgb_src_factor;
460 pb.SourceAlphaBlendFactor = state->rt[0].alpha_func;
461 pb.DestinationBlendFactor = state->rt[0].rgb_dst_factor;
462 pb.DestinationAlphaBlendFactor = state->rt[0].alpha_dst_factor;
463 }
464
465 iris_pack_state(GENX(BLEND_STATE), blend_state, bs) {
466 bs.AlphaToCoverageEnable = state->alpha_to_coverage;
467 bs.IndependentAlphaBlendEnable = state->independent_blend_enable;
468 bs.AlphaToOneEnable = state->alpha_to_one;
469 bs.AlphaToCoverageDitherEnable = state->alpha_to_coverage;
470 bs.ColorDitherEnable = state->dither;
471 /* bl.AlphaTestEnable and bs.AlphaTestFunction are filled in later. */
472 }
473
474 blend_state += GENX(BLEND_STATE_length);
475
476 for (int i = 0; i < BRW_MAX_DRAW_BUFFERS; i++) {
477 iris_pack_state(GENX(BLEND_STATE_ENTRY), blend_state, be) {
478 be.LogicOpEnable = state->logicop_enable;
479 be.LogicOpFunction = state->logicop_func;
480
481 be.PreBlendSourceOnlyClampEnable = false;
482 be.ColorClampRange = COLORCLAMP_RTFORMAT;
483 be.PreBlendColorClampEnable = true;
484 be.PostBlendColorClampEnable = true;
485
486 be.ColorBufferBlendEnable = state->rt[i].blend_enable;
487
488 be.ColorBlendFunction = state->rt[i].rgb_func;
489 be.AlphaBlendFunction = state->rt[i].alpha_func;
490 be.SourceBlendFactor = state->rt[i].rgb_src_factor;
491 be.SourceAlphaBlendFactor = state->rt[i].alpha_func;
492 be.DestinationBlendFactor = state->rt[i].rgb_dst_factor;
493 be.DestinationAlphaBlendFactor = state->rt[i].alpha_dst_factor;
494
495 be.WriteDisableRed = !(state->rt[i].colormask & PIPE_MASK_R);
496 be.WriteDisableGreen = !(state->rt[i].colormask & PIPE_MASK_G);
497 be.WriteDisableBlue = !(state->rt[i].colormask & PIPE_MASK_B);
498 be.WriteDisableAlpha = !(state->rt[i].colormask & PIPE_MASK_A);
499 }
500 blend_state += GENX(BLEND_STATE_ENTRY_length);
501 }
502
503 return cso;
504 }
505
506 static void
507 iris_bind_blend_state(struct pipe_context *ctx, void *state)
508 {
509 struct iris_context *ice = (struct iris_context *) ctx;
510 ice->state.cso_blend = state;
511 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
512 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
513 }
514
515 struct iris_depth_stencil_alpha_state {
516 uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
517 uint32_t cc_vp[GENX(CC_VIEWPORT_length)];
518
519 struct pipe_alpha_state alpha; /* to BLEND_STATE, 3DSTATE_PS_BLEND */
520 };
521
522 static void *
523 iris_create_zsa_state(struct pipe_context *ctx,
524 const struct pipe_depth_stencil_alpha_state *state)
525 {
526 struct iris_depth_stencil_alpha_state *cso =
527 malloc(sizeof(struct iris_depth_stencil_alpha_state));
528
529 cso->alpha = state->alpha;
530
531 bool two_sided_stencil = state->stencil[1].enabled;
532
533 /* The state tracker needs to optimize away EQUAL writes for us. */
534 assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask));
535
536 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) {
537 wmds.StencilFailOp = state->stencil[0].fail_op;
538 wmds.StencilPassDepthFailOp = state->stencil[0].zfail_op;
539 wmds.StencilPassDepthPassOp = state->stencil[0].zpass_op;
540 wmds.StencilTestFunction =
541 translate_compare_func(state->stencil[0].func);
542 wmds.BackfaceStencilFailOp = state->stencil[1].fail_op;
543 wmds.BackfaceStencilPassDepthFailOp = state->stencil[1].zfail_op;
544 wmds.BackfaceStencilPassDepthPassOp = state->stencil[1].zpass_op;
545 wmds.BackfaceStencilTestFunction =
546 translate_compare_func(state->stencil[1].func);
547 wmds.DepthTestFunction = translate_compare_func(state->depth.func);
548 wmds.DoubleSidedStencilEnable = two_sided_stencil;
549 wmds.StencilTestEnable = state->stencil[0].enabled;
550 wmds.StencilBufferWriteEnable =
551 state->stencil[0].writemask != 0 ||
552 (two_sided_stencil && state->stencil[1].writemask != 0);
553 wmds.DepthTestEnable = state->depth.enabled;
554 wmds.DepthBufferWriteEnable = state->depth.writemask;
555 wmds.StencilTestMask = state->stencil[0].valuemask;
556 wmds.StencilWriteMask = state->stencil[0].writemask;
557 wmds.BackfaceStencilTestMask = state->stencil[1].valuemask;
558 wmds.BackfaceStencilWriteMask = state->stencil[1].writemask;
559 /* wmds.[Backface]StencilReferenceValue are merged later */
560 }
561
562 iris_pack_state(GENX(CC_VIEWPORT), cso->cc_vp, ccvp) {
563 ccvp.MinimumDepth = state->depth.bounds_min;
564 ccvp.MaximumDepth = state->depth.bounds_max;
565 }
566
567 return cso;
568 }
569
570 static void
571 iris_bind_zsa_state(struct pipe_context *ctx, void *state)
572 {
573 struct iris_context *ice = (struct iris_context *) ctx;
574 struct iris_depth_stencil_alpha_state *old_cso = ice->state.cso_zsa;
575 struct iris_depth_stencil_alpha_state *new_cso = state;
576
577 if (new_cso) {
578 if (!old_cso || old_cso->alpha.ref_value != new_cso->alpha.ref_value) {
579 ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE;
580 }
581 }
582
583 ice->state.cso_zsa = new_cso;
584 ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT;
585 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
586 }
587
588 struct iris_rasterizer_state {
589 uint32_t sf[GENX(3DSTATE_SF_length)];
590 uint32_t clip[GENX(3DSTATE_CLIP_length)];
591 uint32_t raster[GENX(3DSTATE_RASTER_length)];
592 uint32_t wm[GENX(3DSTATE_WM_length)];
593 uint32_t line_stipple[GENX(3DSTATE_LINE_STIPPLE_length)];
594
595 bool flatshade; /* for shader state */
596 bool clamp_fragment_color; /* for shader state */
597 bool light_twoside; /* for shader state */
598 bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */
599 bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */
600 enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
601 uint16_t sprite_coord_enable;
602 };
603
604 static void *
605 iris_create_rasterizer_state(struct pipe_context *ctx,
606 const struct pipe_rasterizer_state *state)
607 {
608 struct iris_rasterizer_state *cso =
609 malloc(sizeof(struct iris_rasterizer_state));
610
611 #if 0
612 point_quad_rasterization -> SBE?
613
614 not necessary?
615 {
616 poly_smooth
617 force_persample_interp - ?
618 bottom_edge_rule
619
620 offset_units_unscaled - cap not exposed
621 }
622 #endif
623
624 cso->flatshade = state->flatshade;
625 cso->clamp_fragment_color = state->clamp_fragment_color;
626 cso->light_twoside = state->light_twoside;
627 cso->rasterizer_discard = state->rasterizer_discard;
628 cso->half_pixel_center = state->half_pixel_center;
629 cso->sprite_coord_mode = state->sprite_coord_mode;
630 cso->sprite_coord_enable = state->sprite_coord_enable;
631
632 iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) {
633 sf.StatisticsEnable = true;
634 sf.ViewportTransformEnable = true;
635 sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
636 sf.LineEndCapAntialiasingRegionWidth =
637 state->line_smooth ? _10pixels : _05pixels;
638 sf.LastPixelEnable = state->line_last_pixel;
639 sf.LineWidth = state->line_width;
640 sf.SmoothPointEnable = state->point_smooth;
641 sf.PointWidthSource = state->point_size_per_vertex ? Vertex : State;
642 sf.PointWidth = state->point_size;
643
644 if (state->flatshade_first) {
645 sf.TriangleStripListProvokingVertexSelect = 2;
646 sf.TriangleFanProvokingVertexSelect = 2;
647 sf.LineStripListProvokingVertexSelect = 1;
648 } else {
649 sf.TriangleFanProvokingVertexSelect = 1;
650 }
651 }
652
653 /* COMPLETE! */
654 iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) {
655 rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise;
656 rr.CullMode = translate_cull_mode(state->cull_face);
657 rr.FrontFaceFillMode = translate_fill_mode(state->fill_front);
658 rr.BackFaceFillMode = translate_fill_mode(state->fill_back);
659 rr.DXMultisampleRasterizationEnable = state->multisample;
660 rr.GlobalDepthOffsetEnableSolid = state->offset_tri;
661 rr.GlobalDepthOffsetEnableWireframe = state->offset_line;
662 rr.GlobalDepthOffsetEnablePoint = state->offset_point;
663 rr.GlobalDepthOffsetConstant = state->offset_units;
664 rr.GlobalDepthOffsetScale = state->offset_scale;
665 rr.GlobalDepthOffsetClamp = state->offset_clamp;
666 rr.SmoothPointEnable = state->point_smooth;
667 rr.AntialiasingEnable = state->line_smooth;
668 rr.ScissorRectangleEnable = state->scissor;
669 rr.ViewportZNearClipTestEnable = state->depth_clip_near;
670 rr.ViewportZFarClipTestEnable = state->depth_clip_far;
671 //rr.ConservativeRasterizationEnable = not yet supported by Gallium...
672 }
673
674 iris_pack_command(GENX(3DSTATE_CLIP), cso->clip, cl) {
675 /* cl.NonPerspectiveBarycentricEnable is filled in at draw time from
676 * the FS program; cl.ForceZeroRTAIndexEnable is filled in from the FB.
677 */
678 cl.StatisticsEnable = true;
679 cl.EarlyCullEnable = true;
680 cl.UserClipDistanceClipTestEnableBitmask = state->clip_plane_enable;
681 cl.ForceUserClipDistanceClipTestEnableBitmask = true;
682 cl.APIMode = state->clip_halfz ? APIMODE_D3D : APIMODE_OGL;
683 cl.GuardbandClipTestEnable = true;
684 cl.ClipMode = CLIPMODE_NORMAL;
685 cl.ClipEnable = true;
686 cl.ViewportXYClipTestEnable = state->point_tri_clip;
687 cl.MinimumPointWidth = 0.125;
688 cl.MaximumPointWidth = 255.875;
689
690 if (state->flatshade_first) {
691 cl.TriangleStripListProvokingVertexSelect = 2;
692 cl.TriangleFanProvokingVertexSelect = 2;
693 cl.LineStripListProvokingVertexSelect = 1;
694 } else {
695 cl.TriangleFanProvokingVertexSelect = 1;
696 }
697 }
698
699 iris_pack_command(GENX(3DSTATE_WM), cso->wm, wm) {
700 /* wm.BarycentricInterpolationMode and wm.EarlyDepthStencilControl are
701 * filled in at draw time from the FS program.
702 */
703 wm.LineAntialiasingRegionWidth = _10pixels;
704 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
705 wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
706 wm.StatisticsEnable = true;
707 wm.LineStippleEnable = state->line_stipple_enable;
708 wm.PolygonStippleEnable = state->poly_stipple_enable;
709 }
710
711 /* Remap from 0..255 back to 1..256 */
712 const unsigned line_stipple_factor = state->line_stipple_factor + 1;
713
714 iris_pack_command(GENX(3DSTATE_LINE_STIPPLE), cso->line_stipple, line) {
715 line.LineStipplePattern = state->line_stipple_pattern;
716 line.LineStippleInverseRepeatCount = 1.0f / line_stipple_factor;
717 line.LineStippleRepeatCount = line_stipple_factor;
718 }
719
720 return cso;
721 }
722
723 static void
724 iris_bind_rasterizer_state(struct pipe_context *ctx, void *state)
725 {
726 struct iris_context *ice = (struct iris_context *) ctx;
727 struct iris_rasterizer_state *old_cso = ice->state.cso_rast;
728 struct iris_rasterizer_state *new_cso = state;
729
730 if (new_cso) {
731 /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */
732 if (!old_cso || memcmp(old_cso->line_stipple, new_cso->line_stipple,
733 sizeof(old_cso->line_stipple)) != 0) {
734 ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE;
735 }
736
737 if (!old_cso ||
738 old_cso->half_pixel_center != new_cso->half_pixel_center) {
739 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
740 }
741 }
742
743 ice->state.cso_rast = new_cso;
744 ice->state.dirty |= IRIS_DIRTY_RASTER;
745 }
746
747 static uint32_t
748 translate_wrap(unsigned pipe_wrap)
749 {
750 static const unsigned map[] = {
751 [PIPE_TEX_WRAP_REPEAT] = TCM_WRAP,
752 [PIPE_TEX_WRAP_CLAMP] = TCM_HALF_BORDER,
753 [PIPE_TEX_WRAP_CLAMP_TO_EDGE] = TCM_CLAMP,
754 [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER,
755 [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR,
756 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE,
757 [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, // XXX: ???
758 [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ???
759 };
760 return map[pipe_wrap];
761 }
762
763 /**
764 * Return true if the given wrap mode requires the border color to exist.
765 */
766 static bool
767 wrap_mode_needs_border_color(unsigned wrap_mode)
768 {
769 return wrap_mode == TCM_CLAMP_BORDER || wrap_mode == TCM_HALF_BORDER;
770 }
771
772 static unsigned
773 translate_mip_filter(enum pipe_tex_mipfilter pipe_mip)
774 {
775 static const unsigned map[] = {
776 [PIPE_TEX_MIPFILTER_NEAREST] = MIPFILTER_NEAREST,
777 [PIPE_TEX_MIPFILTER_LINEAR] = MIPFILTER_LINEAR,
778 [PIPE_TEX_MIPFILTER_NONE] = MIPFILTER_NONE,
779 };
780 return map[pipe_mip];
781 }
782
783 struct iris_sampler_state {
784 struct pipe_sampler_state base;
785
786 bool needs_border_color;
787
788 uint32_t sampler_state[GENX(SAMPLER_STATE_length)];
789 };
790
791 static void *
792 iris_create_sampler_state(struct pipe_context *pctx,
793 const struct pipe_sampler_state *state)
794 {
795 struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state);
796
797 if (!cso)
798 return NULL;
799
800 STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST);
801 STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR);
802
803 unsigned wrap_s = translate_wrap(state->wrap_s);
804 unsigned wrap_t = translate_wrap(state->wrap_t);
805 unsigned wrap_r = translate_wrap(state->wrap_r);
806
807 cso->needs_border_color = wrap_mode_needs_border_color(wrap_s) ||
808 wrap_mode_needs_border_color(wrap_t) ||
809 wrap_mode_needs_border_color(wrap_r);
810
811 iris_pack_state(GENX(SAMPLER_STATE), cso->sampler_state, samp) {
812 samp.TCXAddressControlMode = wrap_s;
813 samp.TCYAddressControlMode = wrap_t;
814 samp.TCZAddressControlMode = wrap_r;
815 samp.CubeSurfaceControlMode = state->seamless_cube_map;
816 samp.NonnormalizedCoordinateEnable = !state->normalized_coords;
817 samp.MinModeFilter = state->min_img_filter;
818 samp.MagModeFilter = state->mag_img_filter;
819 samp.MipModeFilter = translate_mip_filter(state->min_mip_filter);
820 samp.MaximumAnisotropy = RATIO21;
821
822 if (state->max_anisotropy >= 2) {
823 if (state->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
824 samp.MinModeFilter = MAPFILTER_ANISOTROPIC;
825 samp.AnisotropicAlgorithm = EWAApproximation;
826 }
827
828 if (state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)
829 samp.MagModeFilter = MAPFILTER_ANISOTROPIC;
830
831 samp.MaximumAnisotropy =
832 MIN2((state->max_anisotropy - 2) / 2, RATIO161);
833 }
834
835 /* Set address rounding bits if not using nearest filtering. */
836 if (state->min_img_filter != PIPE_TEX_FILTER_NEAREST) {
837 samp.UAddressMinFilterRoundingEnable = true;
838 samp.VAddressMinFilterRoundingEnable = true;
839 samp.RAddressMinFilterRoundingEnable = true;
840 }
841
842 if (state->mag_img_filter != PIPE_TEX_FILTER_NEAREST) {
843 samp.UAddressMagFilterRoundingEnable = true;
844 samp.VAddressMagFilterRoundingEnable = true;
845 samp.RAddressMagFilterRoundingEnable = true;
846 }
847
848 if (state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
849 samp.ShadowFunction = translate_shadow_func(state->compare_func);
850
851 const float hw_max_lod = GEN_GEN >= 7 ? 14 : 13;
852
853 samp.LODPreClampMode = CLAMP_MODE_OGL;
854 samp.MinLOD = CLAMP(state->min_lod, 0, hw_max_lod);
855 samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod);
856 samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15);
857
858 //samp.BorderColorPointer = <<comes from elsewhere>>
859 }
860
861 return cso;
862 }
863
864 static void
865 iris_bind_sampler_states(struct pipe_context *ctx,
866 enum pipe_shader_type p_stage,
867 unsigned start, unsigned count,
868 void **states)
869 {
870 struct iris_context *ice = (struct iris_context *) ctx;
871 gl_shader_stage stage = stage_from_pipe(p_stage);
872
873 assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS);
874
875 /* Assemble the SAMPLER_STATEs into a contiguous chunk of memory
876 * relative to Dynamic State Base Address.
877 */
878 void *map = NULL;
879 u_upload_alloc(ice->state.dynamic_uploader, 0,
880 count * 4 * GENX(SAMPLER_STATE_length), 32,
881 &ice->state.sampler_table_offset[stage],
882 &ice->state.sampler_table_resource[stage],
883 &map);
884 if (!unlikely(map))
885 return NULL;
886
887 ice->state.sampler_table_offset[stage] +=
888 bo_offset_from_base_address(ice->state.sampler_table_resource[stage]);
889
890 for (int i = 0; i < count; i++) {
891 struct iris_sampler_state *state = states[i];
892
893 /* Save a pointer to the iris_sampler_state, a few fields need
894 * to inform draw-time decisions.
895 */
896 ice->state.samplers[stage][start + i] = state;
897
898 if (state)
899 memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length));
900
901 map += GENX(SAMPLER_STATE_length);
902 }
903
904 ice->state.num_samplers = count;
905
906 ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage;
907 }
908
909 struct iris_sampler_view {
910 struct pipe_sampler_view pipe;
911 struct isl_view view;
912
913 /** The resource (BO) holding our SURFACE_STATE. */
914 struct pipe_resource *surface_state_resource;
915 unsigned surface_state_offset;
916
917 //uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
918 };
919
920 /**
921 * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the Gen7.5+
922 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
923 *
924 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
925 * 0 1 2 3 4 5
926 * 4 5 6 7 0 1
927 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
928 *
929 * which is simply adding 4 then modding by 8 (or anding with 7).
930 *
931 * We then may need to apply workarounds for textureGather hardware bugs.
932 */
933 static enum isl_channel_select
934 pipe_swizzle_to_isl_channel(enum pipe_swizzle swizzle)
935 {
936 return (swizzle + 4) & 7;
937 }
938
939 static struct pipe_sampler_view *
940 iris_create_sampler_view(struct pipe_context *ctx,
941 struct pipe_resource *tex,
942 const struct pipe_sampler_view *tmpl)
943 {
944 struct iris_context *ice = (struct iris_context *) ctx;
945 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
946 struct iris_resource *itex = (struct iris_resource *) tex;
947 struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view));
948
949 if (!isv)
950 return NULL;
951
952 /* initialize base object */
953 isv->pipe = *tmpl;
954 isv->pipe.context = ctx;
955 isv->pipe.texture = NULL;
956 pipe_reference_init(&isv->pipe.reference, 1);
957 pipe_resource_reference(&isv->pipe.texture, tex);
958
959 /* XXX: do we need brw_get_texture_swizzle hacks here? */
960
961 isv->view = (struct isl_view) {
962 .format = iris_isl_format_for_pipe_format(tmpl->format),
963 .base_level = tmpl->u.tex.first_level,
964 .levels = tmpl->u.tex.last_level - tmpl->u.tex.first_level + 1,
965 .base_array_layer = tmpl->u.tex.first_layer,
966 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
967 .swizzle = (struct isl_swizzle) {
968 .r = pipe_swizzle_to_isl_channel(tmpl->swizzle_r),
969 .g = pipe_swizzle_to_isl_channel(tmpl->swizzle_g),
970 .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b),
971 .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a),
972 },
973 .usage = ISL_SURF_USAGE_TEXTURE_BIT,
974 };
975
976 void *map = NULL;
977 u_upload_alloc(ice->state.surface_uploader, 0,
978 4 * GENX(RENDER_SURFACE_STATE_length), 64,
979 &isv->surface_state_offset,
980 &isv->surface_state_resource,
981 &map);
982 if (!unlikely(map))
983 return NULL;
984
985 isv->surface_state_offset +=
986 bo_offset_from_base_address(isv->surface_state_resource);
987
988 isl_surf_fill_state(&screen->isl_dev, map,
989 .surf = &itex->surf, .view = &isv->view,
990 .mocs = MOCS_WB,
991 .address = itex->bo->gtt_offset);
992 // .aux_surf =
993 // .clear_color = clear_color,
994
995 return &isv->pipe;
996 }
997
998 struct iris_surface {
999 struct pipe_surface pipe;
1000 struct isl_view view;
1001
1002 /** The resource (BO) holding our SURFACE_STATE. */
1003 struct pipe_resource *surface_state_resource;
1004 unsigned surface_state_offset;
1005
1006 // uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)];
1007 };
1008
1009 static struct pipe_surface *
1010 iris_create_surface(struct pipe_context *ctx,
1011 struct pipe_resource *tex,
1012 const struct pipe_surface *tmpl)
1013 {
1014 struct iris_context *ice = (struct iris_context *) ctx;
1015 struct iris_screen *screen = (struct iris_screen *)ctx->screen;
1016 struct iris_surface *surf = calloc(1, sizeof(struct iris_surface));
1017 struct pipe_surface *psurf = &surf->pipe;
1018 struct iris_resource *itex = (struct iris_resource *) tex;
1019
1020 if (!surf)
1021 return NULL;
1022
1023 pipe_reference_init(&psurf->reference, 1);
1024 pipe_resource_reference(&psurf->texture, tex);
1025 psurf->context = ctx;
1026 psurf->format = tmpl->format;
1027 psurf->width = tex->width0;
1028 psurf->height = tex->height0;
1029 psurf->texture = tex;
1030 psurf->u.tex.first_layer = tmpl->u.tex.first_layer;
1031 psurf->u.tex.last_layer = tmpl->u.tex.last_layer;
1032 psurf->u.tex.level = tmpl->u.tex.level;
1033
1034 surf->view = (struct isl_view) {
1035 .format = iris_isl_format_for_pipe_format(tmpl->format),
1036 .base_level = tmpl->u.tex.level,
1037 .levels = 1,
1038 .base_array_layer = tmpl->u.tex.first_layer,
1039 .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1,
1040 .swizzle = ISL_SWIZZLE_IDENTITY,
1041 // XXX: DEPTH_BIt, STENCIL_BIT...CUBE_BIT? Other bits?!
1042 .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT,
1043 };
1044
1045 void *map = NULL;
1046 u_upload_alloc(ice->state.surface_uploader, 0,
1047 4 * GENX(RENDER_SURFACE_STATE_length), 64,
1048 &surf->surface_state_offset,
1049 &surf->surface_state_resource,
1050 &map);
1051 if (!unlikely(map))
1052 return NULL;
1053
1054 surf->surface_state_offset +=
1055 bo_offset_from_base_address(surf->surface_state_resource);
1056
1057 isl_surf_fill_state(&screen->isl_dev, map,
1058 .surf = &itex->surf, .view = &surf->view,
1059 .mocs = MOCS_WB,
1060 .address = itex->bo->gtt_offset);
1061 // .aux_surf =
1062 // .clear_color = clear_color,
1063
1064 return psurf;
1065 }
1066
1067 static void
1068 iris_set_sampler_views(struct pipe_context *ctx,
1069 enum pipe_shader_type shader,
1070 unsigned start, unsigned count,
1071 struct pipe_sampler_view **views)
1072 {
1073 }
1074
1075 static void
1076 iris_set_clip_state(struct pipe_context *ctx,
1077 const struct pipe_clip_state *state)
1078 {
1079 }
1080
1081 static void
1082 iris_set_polygon_stipple(struct pipe_context *ctx,
1083 const struct pipe_poly_stipple *state)
1084 {
1085 struct iris_context *ice = (struct iris_context *) ctx;
1086 memcpy(&ice->state.poly_stipple, state, sizeof(*state));
1087 ice->state.dirty |= IRIS_DIRTY_POLYGON_STIPPLE;
1088 }
1089
1090 static void
1091 iris_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask)
1092 {
1093 struct iris_context *ice = (struct iris_context *) ctx;
1094
1095 ice->state.sample_mask = sample_mask;
1096 ice->state.dirty |= IRIS_DIRTY_SAMPLE_MASK;
1097 }
1098
1099 static void
1100 iris_set_scissor_states(struct pipe_context *ctx,
1101 unsigned start_slot,
1102 unsigned num_scissors,
1103 const struct pipe_scissor_state *states)
1104 {
1105 struct iris_context *ice = (struct iris_context *) ctx;
1106
1107 ice->state.num_scissors = num_scissors;
1108
1109 for (unsigned i = 0; i < num_scissors; i++) {
1110 ice->state.scissors[start_slot + i] = states[i];
1111 }
1112
1113 ice->state.dirty |= IRIS_DIRTY_SCISSOR_RECT;
1114 }
1115
1116 static void
1117 iris_set_stencil_ref(struct pipe_context *ctx,
1118 const struct pipe_stencil_ref *state)
1119 {
1120 struct iris_context *ice = (struct iris_context *) ctx;
1121 memcpy(&ice->state.stencil_ref, state, sizeof(*state));
1122 ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL;
1123 }
1124
1125
1126 struct iris_viewport_state {
1127 uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS];
1128 };
1129
1130 static float
1131 viewport_extent(const struct pipe_viewport_state *state, int axis, float sign)
1132 {
1133 return copysignf(state->scale[axis], sign) + state->translate[axis];
1134 }
1135
1136 #if 0
1137 static void
1138 calculate_guardband_size(uint32_t fb_width, uint32_t fb_height,
1139 float m00, float m11, float m30, float m31,
1140 float *xmin, float *xmax,
1141 float *ymin, float *ymax)
1142 {
1143 /* According to the "Vertex X,Y Clamping and Quantization" section of the
1144 * Strips and Fans documentation:
1145 *
1146 * "The vertex X and Y screen-space coordinates are also /clamped/ to the
1147 * fixed-point "guardband" range supported by the rasterization hardware"
1148 *
1149 * and
1150 *
1151 * "In almost all circumstances, if an object’s vertices are actually
1152 * modified by this clamping (i.e., had X or Y coordinates outside of
1153 * the guardband extent the rendered object will not match the intended
1154 * result. Therefore software should take steps to ensure that this does
1155 * not happen - e.g., by clipping objects such that they do not exceed
1156 * these limits after the Drawing Rectangle is applied."
1157 *
1158 * I believe the fundamental restriction is that the rasterizer (in
1159 * the SF/WM stages) have a limit on the number of pixels that can be
1160 * rasterized. We need to ensure any coordinates beyond the rasterizer
1161 * limit are handled by the clipper. So effectively that limit becomes
1162 * the clipper's guardband size.
1163 *
1164 * It goes on to say:
1165 *
1166 * "In addition, in order to be correctly rendered, objects must have a
1167 * screenspace bounding box not exceeding 8K in the X or Y direction.
1168 * This additional restriction must also be comprehended by software,
1169 * i.e., enforced by use of clipping."
1170 *
1171 * This makes no sense. Gen7+ hardware supports 16K render targets,
1172 * and you definitely need to be able to draw polygons that fill the
1173 * surface. Our assumption is that the rasterizer was limited to 8K
1174 * on Sandybridge, which only supports 8K surfaces, and it was actually
1175 * increased to 16K on Ivybridge and later.
1176 *
1177 * So, limit the guardband to 16K on Gen7+ and 8K on Sandybridge.
1178 */
1179 const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
1180
1181 if (m00 != 0 && m11 != 0) {
1182 /* First, we compute the screen-space render area */
1183 const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
1184 const float ss_ra_xmax = MAX3( fb_width, m30 + m00, m30 - m00);
1185 const float ss_ra_ymin = MIN3( 0, m31 + m11, m31 - m11);
1186 const float ss_ra_ymax = MAX3(fb_height, m31 + m11, m31 - m11);
1187
1188 /* We want the guardband to be centered on that */
1189 const float ss_gb_xmin = (ss_ra_xmin + ss_ra_xmax) / 2 - gb_size;
1190 const float ss_gb_xmax = (ss_ra_xmin + ss_ra_xmax) / 2 + gb_size;
1191 const float ss_gb_ymin = (ss_ra_ymin + ss_ra_ymax) / 2 - gb_size;
1192 const float ss_gb_ymax = (ss_ra_ymin + ss_ra_ymax) / 2 + gb_size;
1193
1194 /* Now we need it in native device coordinates */
1195 const float ndc_gb_xmin = (ss_gb_xmin - m30) / m00;
1196 const float ndc_gb_xmax = (ss_gb_xmax - m30) / m00;
1197 const float ndc_gb_ymin = (ss_gb_ymin - m31) / m11;
1198 const float ndc_gb_ymax = (ss_gb_ymax - m31) / m11;
1199
1200 /* Thanks to Y-flipping and ORIGIN_UPPER_LEFT, the Y coordinates may be
1201 * flipped upside-down. X should be fine though.
1202 */
1203 assert(ndc_gb_xmin <= ndc_gb_xmax);
1204 *xmin = ndc_gb_xmin;
1205 *xmax = ndc_gb_xmax;
1206 *ymin = MIN2(ndc_gb_ymin, ndc_gb_ymax);
1207 *ymax = MAX2(ndc_gb_ymin, ndc_gb_ymax);
1208 } else {
1209 /* The viewport scales to 0, so nothing will be rendered. */
1210 *xmin = 0.0f;
1211 *xmax = 0.0f;
1212 *ymin = 0.0f;
1213 *ymax = 0.0f;
1214 }
1215 }
1216 #endif
1217
1218 static void
1219 iris_set_viewport_states(struct pipe_context *ctx,
1220 unsigned start_slot,
1221 unsigned num_viewports,
1222 const struct pipe_viewport_state *state)
1223 {
1224 struct iris_context *ice = (struct iris_context *) ctx;
1225 struct iris_viewport_state *cso =
1226 malloc(sizeof(struct iris_viewport_state));
1227 uint32_t *vp_map = &cso->sf_cl_vp[start_slot];
1228
1229 // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right
1230 for (unsigned i = 0; i < num_viewports; i++) {
1231 iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) {
1232 vp.ViewportMatrixElementm00 = state[i].scale[0];
1233 vp.ViewportMatrixElementm11 = state[i].scale[1];
1234 vp.ViewportMatrixElementm22 = state[i].scale[2];
1235 vp.ViewportMatrixElementm30 = state[i].translate[0];
1236 vp.ViewportMatrixElementm31 = state[i].translate[1];
1237 vp.ViewportMatrixElementm32 = state[i].translate[2];
1238 /* XXX: in i965 this is computed based on the drawbuffer size,
1239 * but we don't have that here...
1240 */
1241 vp.XMinClipGuardband = -1.0;
1242 vp.XMaxClipGuardband = 1.0;
1243 vp.YMinClipGuardband = -1.0;
1244 vp.YMaxClipGuardband = 1.0;
1245 vp.XMinViewPort = viewport_extent(&state[i], 0, -1.0f);
1246 vp.XMaxViewPort = viewport_extent(&state[i], 0, 1.0f) - 1;
1247 vp.YMinViewPort = viewport_extent(&state[i], 1, -1.0f);
1248 vp.YMaxViewPort = viewport_extent(&state[i], 1, 1.0f) - 1;
1249 }
1250
1251 vp_map += GENX(SF_CLIP_VIEWPORT_length);
1252 }
1253
1254 ice->state.cso_vp = cso;
1255 ice->state.num_viewports = num_viewports;
1256 ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT;
1257 }
1258
1259 struct iris_depth_state
1260 {
1261 uint32_t depth_buffer[GENX(3DSTATE_DEPTH_BUFFER_length)];
1262 uint32_t hier_depth_buffer[GENX(3DSTATE_HIER_DEPTH_BUFFER_length)];
1263 uint32_t stencil_buffer[GENX(3DSTATE_STENCIL_BUFFER_length)];
1264 };
1265
1266 static void
1267 iris_set_framebuffer_state(struct pipe_context *ctx,
1268 const struct pipe_framebuffer_state *state)
1269 {
1270 struct iris_context *ice = (struct iris_context *) ctx;
1271 struct pipe_framebuffer_state *cso = &ice->state.framebuffer;
1272
1273 if (cso->samples != state->samples) {
1274 ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE;
1275 }
1276
1277 if (cso->nr_cbufs != state->nr_cbufs) {
1278 ice->state.dirty |= IRIS_DIRTY_BLEND_STATE;
1279 }
1280
1281 cso->width = state->width;
1282 cso->height = state->height;
1283 cso->layers = state->layers;
1284 cso->samples = state->samples;
1285
1286 unsigned i;
1287 for (i = 0; i < state->nr_cbufs; i++)
1288 pipe_surface_reference(&cso->cbufs[i], state->cbufs[i]);
1289 for (; i < cso->nr_cbufs; i++)
1290 pipe_surface_reference(&cso->cbufs[i], NULL);
1291
1292 cso->nr_cbufs = state->nr_cbufs;
1293
1294 pipe_surface_reference(&cso->zsbuf, state->zsbuf);
1295
1296 struct isl_depth_stencil_hiz_emit_info info = {
1297 .mocs = MOCS_WB,
1298 };
1299
1300 // XXX: depth buffers
1301 }
1302
1303 static void
1304 iris_set_constant_buffer(struct pipe_context *ctx,
1305 enum pipe_shader_type p_stage, unsigned index,
1306 const struct pipe_constant_buffer *cb)
1307 {
1308 struct iris_context *ice = (struct iris_context *) ctx;
1309 gl_shader_stage stage = stage_from_pipe(p_stage);
1310
1311 util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb);
1312 }
1313
1314 static void
1315 iris_sampler_view_destroy(struct pipe_context *ctx,
1316 struct pipe_sampler_view *state)
1317 {
1318 pipe_resource_reference(&state->texture, NULL);
1319 free(state);
1320 }
1321
1322
1323 static void
1324 iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf)
1325 {
1326 struct iris_surface *surf = (void *) p_surf;
1327 pipe_resource_reference(&p_surf->texture, NULL);
1328 pipe_resource_reference(&surf->surface_state_resource, NULL);
1329 free(surf);
1330 }
1331
1332 static void
1333 iris_delete_state(struct pipe_context *ctx, void *state)
1334 {
1335 free(state);
1336 }
1337
1338 struct iris_vertex_buffer_state {
1339 uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)];
1340 struct iris_bo *bos[33];
1341 unsigned num_buffers;
1342 };
1343
1344 static void
1345 iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso)
1346 {
1347 if (cso) {
1348 for (unsigned i = 0; i < cso->num_buffers; i++)
1349 iris_bo_unreference(cso->bos[i]);
1350 free(cso);
1351 }
1352 }
1353
1354 static void
1355 iris_set_vertex_buffers(struct pipe_context *ctx,
1356 unsigned start_slot, unsigned count,
1357 const struct pipe_vertex_buffer *buffers)
1358 {
1359 struct iris_context *ice = (struct iris_context *) ctx;
1360 struct iris_vertex_buffer_state *cso =
1361 malloc(sizeof(struct iris_vertex_buffer_state));
1362
1363 /* If there are no buffers, do nothing. We can leave the stale
1364 * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex
1365 * elements that point to them, it should be fine.
1366 */
1367 if (!buffers)
1368 return;
1369
1370 iris_free_vertex_buffers(ice->state.cso_vertex_buffers);
1371
1372 cso->num_buffers = count;
1373
1374 iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) {
1375 vb.DWordLength = 4 * cso->num_buffers - 1;
1376 }
1377
1378 uint32_t *vb_pack_dest = &cso->vertex_buffers[1];
1379
1380 for (unsigned i = 0; i < count; i++) {
1381 assert(!buffers[i].is_user_buffer);
1382
1383 struct iris_resource *res = (void *) buffers[i].buffer.resource;
1384 iris_bo_reference(res->bo);
1385 cso->bos[i] = res->bo;
1386
1387 iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) {
1388 vb.VertexBufferIndex = start_slot + i;
1389 vb.MOCS = MOCS_WB;
1390 vb.AddressModifyEnable = true;
1391 vb.BufferPitch = buffers[i].stride;
1392 vb.BufferSize = res->bo->size;
1393 vb.BufferStartingAddress =
1394 ro_bo(NULL, res->bo->gtt_offset + buffers[i].buffer_offset);
1395 }
1396
1397 vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length);
1398 }
1399
1400 ice->state.cso_vertex_buffers = cso;
1401 ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS;
1402 }
1403
1404 struct iris_vertex_element_state {
1405 uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)];
1406 uint32_t vf_instancing[GENX(3DSTATE_VF_INSTANCING_length)][33];
1407 unsigned count;
1408 };
1409
1410 static void *
1411 iris_create_vertex_elements(struct pipe_context *ctx,
1412 unsigned count,
1413 const struct pipe_vertex_element *state)
1414 {
1415 struct iris_vertex_element_state *cso =
1416 malloc(sizeof(struct iris_vertex_element_state));
1417
1418 cso->count = count;
1419
1420 /* TODO:
1421 * - create edge flag one
1422 * - create SGV ones
1423 * - if those are necessary, use count + 1/2/3... OR in the length
1424 */
1425 iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve);
1426
1427 uint32_t *ve_pack_dest = &cso->vertex_elements[1];
1428
1429 for (int i = 0; i < count; i++) {
1430 enum isl_format isl_format =
1431 iris_isl_format_for_pipe_format(state[i].src_format);
1432 unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC,
1433 VFCOMP_STORE_SRC, VFCOMP_STORE_SRC };
1434
1435 switch (isl_format_get_num_channels(isl_format)) {
1436 case 0: comp[0] = VFCOMP_STORE_0;
1437 case 1: comp[1] = VFCOMP_STORE_0;
1438 case 2: comp[2] = VFCOMP_STORE_0;
1439 case 3:
1440 comp[3] = isl_format_has_int_channel(isl_format) ? VFCOMP_STORE_1_INT
1441 : VFCOMP_STORE_1_FP;
1442 break;
1443 }
1444 iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) {
1445 ve.VertexBufferIndex = state[i].vertex_buffer_index;
1446 ve.Valid = true;
1447 ve.SourceElementOffset = state[i].src_offset;
1448 ve.SourceElementFormat = isl_format;
1449 ve.Component0Control = comp[0];
1450 ve.Component1Control = comp[1];
1451 ve.Component2Control = comp[2];
1452 ve.Component3Control = comp[3];
1453 }
1454
1455 iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->vf_instancing[i], vi) {
1456 vi.VertexElementIndex = i;
1457 vi.InstancingEnable = state[i].instance_divisor > 0;
1458 vi.InstanceDataStepRate = state[i].instance_divisor;
1459 }
1460
1461 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
1462 }
1463
1464 return cso;
1465 }
1466
1467 static void
1468 iris_bind_vertex_elements_state(struct pipe_context *ctx, void *state)
1469 {
1470 struct iris_context *ice = (struct iris_context *) ctx;
1471
1472 ice->state.cso_vertex_elements = state;
1473 ice->state.dirty |= IRIS_DIRTY_VERTEX_ELEMENTS;
1474 }
1475
1476 static void *
1477 iris_create_compute_state(struct pipe_context *ctx,
1478 const struct pipe_compute_state *state)
1479 {
1480 return malloc(1);
1481 }
1482
1483 static struct pipe_stream_output_target *
1484 iris_create_stream_output_target(struct pipe_context *ctx,
1485 struct pipe_resource *res,
1486 unsigned buffer_offset,
1487 unsigned buffer_size)
1488 {
1489 struct pipe_stream_output_target *t =
1490 CALLOC_STRUCT(pipe_stream_output_target);
1491 if (!t)
1492 return NULL;
1493
1494 pipe_reference_init(&t->reference, 1);
1495 pipe_resource_reference(&t->buffer, res);
1496 t->buffer_offset = buffer_offset;
1497 t->buffer_size = buffer_size;
1498 return t;
1499 }
1500
1501 static void
1502 iris_stream_output_target_destroy(struct pipe_context *ctx,
1503 struct pipe_stream_output_target *t)
1504 {
1505 pipe_resource_reference(&t->buffer, NULL);
1506 free(t);
1507 }
1508
1509 static void
1510 iris_set_stream_output_targets(struct pipe_context *ctx,
1511 unsigned num_targets,
1512 struct pipe_stream_output_target **targets,
1513 const unsigned *offsets)
1514 {
1515 }
1516
1517 #if 0
1518 static void
1519 iris_compute_sbe(const struct iris_context *ice,
1520 const struct brw_wm_prog_data *wm_prog_data)
1521 {
1522 uint32_t sbe_map[GENX(3DSTATE_SBE_length)];
1523 struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
1524
1525 unsigned urb_read_offset, urb_read_length;
1526 brw_compute_sbe_urb_slot_interval(fp->info.inputs_read,
1527 ice->shaders.last_vue_map,
1528 &urb_read_offset, &urb_read_length);
1529
1530 iris_pack_command(GENX(3DSTATE_SBE), sbe_map, sbe) {
1531 sbe.AttributeSwizzleEnable = true;
1532 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
1533 sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode;
1534 sbe.VertexURBEntryReadOffset = urb_read_offset;
1535 sbe.VertexURBEntryReadLength = urb_read_length;
1536 sbe.ForceVertexURBEntryReadOffset = true;
1537 sbe.ForceVertexURBEntryReadLength = true;
1538 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
1539
1540 for (int i = 0; i < urb_read_length * 2; i++) {
1541 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
1542 }
1543 }
1544 }
1545 #endif
1546
1547 static void
1548 iris_bind_compute_state(struct pipe_context *ctx, void *state)
1549 {
1550 }
1551
1552 static void
1553 iris_populate_vs_key(const struct iris_context *ice,
1554 struct brw_vs_prog_key *key)
1555 {
1556 memset(key, 0, sizeof(*key));
1557 }
1558
1559 static void
1560 iris_populate_tcs_key(const struct iris_context *ice,
1561 struct brw_tcs_prog_key *key)
1562 {
1563 memset(key, 0, sizeof(*key));
1564 }
1565
1566 static void
1567 iris_populate_tes_key(const struct iris_context *ice,
1568 struct brw_tes_prog_key *key)
1569 {
1570 memset(key, 0, sizeof(*key));
1571 }
1572
1573 static void
1574 iris_populate_gs_key(const struct iris_context *ice,
1575 struct brw_gs_prog_key *key)
1576 {
1577 memset(key, 0, sizeof(*key));
1578 }
1579
1580 static void
1581 iris_populate_fs_key(const struct iris_context *ice,
1582 struct brw_wm_prog_key *key)
1583 {
1584 memset(key, 0, sizeof(*key));
1585
1586 /* XXX: dirty flags? */
1587 const struct pipe_framebuffer_state *fb = &ice->state.framebuffer;
1588 const struct iris_depth_stencil_alpha_state *zsa = ice->state.cso_zsa;
1589 const struct iris_rasterizer_state *rast = ice->state.cso_rast;
1590 const struct iris_blend_state *blend = ice->state.cso_blend;
1591
1592 key->nr_color_regions = fb->nr_cbufs;
1593
1594 key->clamp_fragment_color = rast->clamp_fragment_color;
1595
1596 key->replicate_alpha = fb->nr_cbufs > 1 &&
1597 (zsa->alpha.enabled || blend->alpha_to_coverage);
1598
1599 // key->force_dual_color_blend for unigine
1600 #if 0
1601 if (cso_rast->multisample) {
1602 key->persample_interp =
1603 ctx->Multisample.SampleShading &&
1604 (ctx->Multisample.MinSampleShadingValue *
1605 _mesa_geometric_samples(ctx->DrawBuffer) > 1);
1606
1607 key->multisample_fbo = fb->samples > 1;
1608 }
1609 #endif
1610
1611 key->coherent_fb_fetch = true;
1612 }
1613
1614 //pkt.SamplerCount = \
1615 //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
1616 //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \
1617 //ffs(stage_state->per_thread_scratch) - 11; \
1618
1619 static uint64_t
1620 KSP(const struct iris_compiled_shader *shader)
1621 {
1622 struct iris_resource *res = (void *) shader->buffer;
1623 return res->bo->gtt_offset + shader->offset;
1624 }
1625
1626 #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
1627 pkt.KernelStartPointer = KSP(shader); \
1628 pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \
1629 pkt.FloatingPointMode = prog_data->use_alt_mode; \
1630 \
1631 pkt.DispatchGRFStartRegisterForURBData = \
1632 prog_data->dispatch_grf_start_reg; \
1633 pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \
1634 pkt.prefix##URBEntryReadOffset = 0; \
1635 \
1636 pkt.StatisticsEnable = true; \
1637 pkt.Enable = true;
1638
1639 static void
1640 iris_set_vs_state(const struct gen_device_info *devinfo,
1641 struct iris_compiled_shader *shader)
1642 {
1643 struct brw_stage_prog_data *prog_data = shader->prog_data;
1644 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1645
1646 iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) {
1647 INIT_THREAD_DISPATCH_FIELDS(vs, Vertex);
1648 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1649 vs.SIMD8DispatchEnable = true;
1650 vs.UserClipDistanceCullTestEnableBitmask =
1651 vue_prog_data->cull_distance_mask;
1652 }
1653 }
1654
1655 static void
1656 iris_set_tcs_state(const struct gen_device_info *devinfo,
1657 struct iris_compiled_shader *shader)
1658 {
1659 struct brw_stage_prog_data *prog_data = shader->prog_data;
1660 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1661 struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data;
1662
1663 iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) {
1664 INIT_THREAD_DISPATCH_FIELDS(hs, Vertex);
1665
1666 hs.InstanceCount = tcs_prog_data->instances - 1;
1667 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1668 hs.IncludeVertexHandles = true;
1669 }
1670 }
1671
1672 static void
1673 iris_set_tes_state(const struct gen_device_info *devinfo,
1674 struct iris_compiled_shader *shader)
1675 {
1676 struct brw_stage_prog_data *prog_data = shader->prog_data;
1677 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1678 struct brw_tes_prog_data *tes_prog_data = (void *) prog_data;
1679
1680 uint32_t *te_state = (void *) shader->derived_data;
1681 uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length);
1682
1683 iris_pack_command(GENX(3DSTATE_TE), te_state, te) {
1684 te.Partitioning = tes_prog_data->partitioning;
1685 te.OutputTopology = tes_prog_data->output_topology;
1686 te.TEDomain = tes_prog_data->domain;
1687 te.TEEnable = true;
1688 te.MaximumTessellationFactorOdd = 63.0;
1689 te.MaximumTessellationFactorNotOdd = 64.0;
1690 }
1691
1692 iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) {
1693 INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
1694
1695 ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH;
1696 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1697 ds.ComputeWCoordinateEnable =
1698 tes_prog_data->domain == BRW_TESS_DOMAIN_TRI;
1699
1700 ds.UserClipDistanceCullTestEnableBitmask =
1701 vue_prog_data->cull_distance_mask;
1702 }
1703
1704 }
1705
1706 static void
1707 iris_set_gs_state(const struct gen_device_info *devinfo,
1708 struct iris_compiled_shader *shader)
1709 {
1710 struct brw_stage_prog_data *prog_data = shader->prog_data;
1711 struct brw_vue_prog_data *vue_prog_data = (void *) prog_data;
1712 struct brw_gs_prog_data *gs_prog_data = (void *) prog_data;
1713
1714 iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) {
1715 INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
1716
1717 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1718 gs.OutputTopology = gs_prog_data->output_topology;
1719 gs.ControlDataHeaderSize =
1720 gs_prog_data->control_data_header_size_hwords;
1721 gs.InstanceControl = gs_prog_data->invocations - 1;
1722 gs.DispatchMode = SIMD8;
1723 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1724 gs.ControlDataFormat = gs_prog_data->control_data_format;
1725 gs.ReorderMode = TRAILING;
1726 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1727 gs.MaximumNumberofThreads =
1728 GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1)
1729 : (devinfo->max_gs_threads - 1);
1730
1731 if (gs_prog_data->static_vertex_count != -1) {
1732 gs.StaticOutput = true;
1733 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count;
1734 }
1735 gs.IncludeVertexHandles = vue_prog_data->include_vue_handles;
1736
1737 gs.UserClipDistanceCullTestEnableBitmask =
1738 vue_prog_data->cull_distance_mask;
1739
1740 const int urb_entry_write_offset = 1;
1741 const uint32_t urb_entry_output_length =
1742 DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) -
1743 urb_entry_write_offset;
1744
1745 gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset;
1746 gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
1747 }
1748 }
1749
1750 static void
1751 iris_set_fs_state(const struct gen_device_info *devinfo,
1752 struct iris_compiled_shader *shader)
1753 {
1754 struct brw_stage_prog_data *prog_data = shader->prog_data;
1755 struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data;
1756
1757 uint32_t *ps_state = (void *) shader->derived_data;
1758 uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length);
1759
1760 iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) {
1761 ps.VectorMaskEnable = true;
1762 //ps.SamplerCount = ...
1763 ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4;
1764 ps.FloatingPointMode = prog_data->use_alt_mode;
1765 ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1);
1766
1767 ps.PushConstantEnable = prog_data->nr_params > 0 ||
1768 prog_data->ubo_ranges[0].length > 0;
1769
1770 /* From the documentation for this packet:
1771 * "If the PS kernel does not need the Position XY Offsets to
1772 * compute a Position Value, then this field should be programmed
1773 * to POSOFFSET_NONE."
1774 *
1775 * "SW Recommendation: If the PS kernel needs the Position Offsets
1776 * to compute a Position XY value, this field should match Position
1777 * ZW Interpolation Mode to ensure a consistent position.xyzw
1778 * computation."
1779 *
1780 * We only require XY sample offsets. So, this recommendation doesn't
1781 * look useful at the moment. We might need this in future.
1782 */
1783 ps.PositionXYOffsetSelect =
1784 wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE;
1785 ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
1786 ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
1787 ps._32PixelDispatchEnable = wm_prog_data->dispatch_32;
1788
1789 // XXX: Disable SIMD32 with 16x MSAA
1790
1791 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1792 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1793 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1794 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1795 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1796 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1797
1798 ps.KernelStartPointer0 =
1799 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1800 ps.KernelStartPointer1 =
1801 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1802 ps.KernelStartPointer2 =
1803 KSP(shader) + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1804 }
1805
1806 iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) {
1807 psx.PixelShaderValid = true;
1808 psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1809 psx.PixelShaderKillsPixel = wm_prog_data->uses_kill;
1810 psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0;
1811 psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1812 psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1813 psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch;
1814
1815 if (wm_prog_data->uses_sample_mask) {
1816 /* TODO: conservative rasterization */
1817 if (wm_prog_data->post_depth_coverage)
1818 psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE;
1819 else
1820 psx.InputCoverageMaskState = ICMS_NORMAL;
1821 }
1822
1823 psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1824 psx.PixelShaderPullsBary = wm_prog_data->pulls_bary;
1825 psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil;
1826
1827 // XXX: UAV bit
1828 }
1829 }
1830
1831 static unsigned
1832 iris_derived_program_state_size(enum iris_program_cache_id cache_id)
1833 {
1834 assert(cache_id <= IRIS_CACHE_CS);
1835
1836 static const unsigned dwords[] = {
1837 [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length),
1838 [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length),
1839 [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length),
1840 [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length),
1841 [IRIS_CACHE_FS] =
1842 GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length),
1843 [IRIS_CACHE_CS] = 0,
1844 [IRIS_CACHE_BLORP_BLIT] = 0,
1845 };
1846
1847 return sizeof(uint32_t) * dwords[cache_id];
1848 }
1849
1850 static void
1851 iris_set_derived_program_state(const struct gen_device_info *devinfo,
1852 enum iris_program_cache_id cache_id,
1853 struct iris_compiled_shader *shader)
1854 {
1855 switch (cache_id) {
1856 case IRIS_CACHE_VS:
1857 iris_set_vs_state(devinfo, shader);
1858 break;
1859 case IRIS_CACHE_TCS:
1860 iris_set_tcs_state(devinfo, shader);
1861 break;
1862 case IRIS_CACHE_TES:
1863 iris_set_tes_state(devinfo, shader);
1864 break;
1865 case IRIS_CACHE_GS:
1866 iris_set_gs_state(devinfo, shader);
1867 break;
1868 case IRIS_CACHE_FS:
1869 iris_set_fs_state(devinfo, shader);
1870 break;
1871 case IRIS_CACHE_CS:
1872 break;
1873 default:
1874 break;
1875 }
1876 }
1877
1878 static void
1879 iris_upload_urb_config(struct iris_context *ice, struct iris_batch *batch)
1880 {
1881 const struct gen_device_info *devinfo = &batch->screen->devinfo;
1882 const unsigned push_size_kB = 32;
1883 unsigned entries[4];
1884 unsigned start[4];
1885 unsigned size[4];
1886
1887 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1888 if (!ice->shaders.prog[i]) {
1889 size[i] = 1;
1890 } else {
1891 struct brw_vue_prog_data *vue_prog_data =
1892 (void *) ice->shaders.prog[i]->prog_data;
1893 size[i] = vue_prog_data->urb_entry_size;
1894 }
1895 assert(size[i] != 0);
1896 }
1897
1898 gen_get_urb_config(devinfo, 1024 * push_size_kB,
1899 1024 * ice->shaders.urb_size,
1900 ice->shaders.prog[MESA_SHADER_TESS_EVAL] != NULL,
1901 ice->shaders.prog[MESA_SHADER_GEOMETRY] != NULL,
1902 size, entries, start);
1903
1904 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
1905 iris_emit_cmd(batch, GENX(3DSTATE_URB_VS), urb) {
1906 urb._3DCommandSubOpcode += i;
1907 urb.VSURBStartingAddress = start[i];
1908 urb.VSURBEntryAllocationSize = size[i] - 1;
1909 urb.VSNumberofURBEntries = entries[i];
1910 }
1911 }
1912 }
1913
1914 static const uint32_t push_constant_opcodes[] = {
1915 [MESA_SHADER_VERTEX] = 21,
1916 [MESA_SHADER_TESS_CTRL] = 25, /* HS */
1917 [MESA_SHADER_TESS_EVAL] = 26, /* DS */
1918 [MESA_SHADER_GEOMETRY] = 22,
1919 [MESA_SHADER_FRAGMENT] = 23,
1920 [MESA_SHADER_COMPUTE] = 0,
1921 };
1922
1923 /**
1924 * Add a surface to the validation list, as well as the buffer containing
1925 * the corresponding SURFACE_STATE.
1926 *
1927 * Returns the binding table entry (offset to SURFACE_STATE).
1928 */
1929 static uint32_t
1930 use_surface(struct iris_batch *batch,
1931 struct pipe_surface *p_surf,
1932 bool writeable)
1933 {
1934 struct iris_surface *surf = (void *) p_surf;
1935 struct iris_resource *res = (void *) surf->pipe.texture;
1936 struct iris_resource *state_res = (void *) surf->surface_state_resource;
1937 iris_use_pinned_bo(batch, res->bo, writeable);
1938 iris_use_pinned_bo(batch, state_res->bo, false);
1939
1940 return surf->surface_state_offset;
1941 }
1942
1943 static void
1944 iris_upload_render_state(struct iris_context *ice,
1945 struct iris_batch *batch,
1946 const struct pipe_draw_info *draw)
1947 {
1948 const uint64_t dirty = ice->state.dirty;
1949
1950 struct brw_wm_prog_data *wm_prog_data = (void *)
1951 ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data;
1952
1953 if (dirty & IRIS_DIRTY_CC_VIEWPORT) {
1954 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
1955 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) {
1956 ptr.CCViewportPointer =
1957 emit_state(batch, ice->state.dynamic_uploader,
1958 cso->cc_vp, sizeof(cso->cc_vp), 32);
1959 }
1960 }
1961
1962 if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) {
1963 struct iris_viewport_state *cso = ice->state.cso_vp;
1964 iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) {
1965 ptr.SFClipViewportPointer =
1966 emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp,
1967 4 * GENX(SF_CLIP_VIEWPORT_length) *
1968 ice->state.num_viewports, 64);
1969 }
1970 }
1971
1972 /* XXX: L3 State */
1973
1974 if (dirty & IRIS_DIRTY_URB) {
1975 iris_upload_urb_config(ice, batch);
1976 }
1977
1978 if (dirty & IRIS_DIRTY_BLEND_STATE) {
1979 struct iris_blend_state *cso_blend = ice->state.cso_blend;
1980 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
1981 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
1982 const int num_dwords = 4 * (GENX(BLEND_STATE_length) +
1983 cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length));
1984 uint32_t blend_offset;
1985 uint32_t *blend_map =
1986 stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64,
1987 &blend_offset);
1988
1989 uint32_t blend_state_header;
1990 iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) {
1991 bs.AlphaTestEnable = cso_zsa->alpha.enabled;
1992 bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha.func);
1993 }
1994
1995 blend_map[0] = blend_state_header | cso_blend->blend_state[0];
1996 memcpy(&blend_map[1], &cso_blend->blend_state[1],
1997 sizeof(cso_blend->blend_state) - sizeof(uint32_t));
1998
1999 iris_emit_cmd(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), ptr) {
2000 ptr.BlendStatePointer = blend_offset;
2001 ptr.BlendStatePointerValid = true;
2002 }
2003 }
2004
2005 if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) {
2006 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2007 uint32_t cc_offset;
2008 void *cc_map =
2009 stream_state(batch, ice->state.dynamic_uploader,
2010 sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length),
2011 64, &cc_offset);
2012 iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) {
2013 cc.AlphaTestFormat = ALPHATEST_FLOAT32;
2014 cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value;
2015 cc.BlendConstantColorRed = ice->state.blend_color.color[0];
2016 cc.BlendConstantColorGreen = ice->state.blend_color.color[1];
2017 cc.BlendConstantColorBlue = ice->state.blend_color.color[2];
2018 cc.BlendConstantColorAlpha = ice->state.blend_color.color[3];
2019 }
2020 iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
2021 ptr.ColorCalcStatePointer = cc_offset;
2022 ptr.ColorCalcStatePointerValid = true;
2023 }
2024 }
2025
2026 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2027 // XXX: wrong dirty tracking...
2028 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
2029 continue;
2030
2031 struct pipe_constant_buffer *cbuf0 =
2032 &ice->shaders.state[stage].constbuf[0];
2033
2034 if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size)
2035 continue;
2036
2037 struct iris_shader_state *shs = &ice->shaders.state[stage];
2038 shs->const_size = cbuf0->buffer_size;
2039 u_upload_data(ice->ctx.const_uploader, 0, shs->const_size, 32,
2040 cbuf0->user_buffer, &shs->const_offset,
2041 &shs->push_resource);
2042 }
2043
2044 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2045 // XXX: wrong dirty tracking...
2046 if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage)))
2047 continue;
2048
2049 struct iris_shader_state *shs = &ice->shaders.state[stage];
2050 struct iris_resource *res = (void *) shs->push_resource;
2051
2052 iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) {
2053 pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
2054 if (res) {
2055 pkt.ConstantBody.ReadLength[3] = shs->const_size;
2056 pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset);
2057 }
2058 }
2059 }
2060
2061 // Surfaces:
2062 // - pull constants
2063 // - ubos/ssbos/abos
2064 // - images
2065 // - textures
2066 // - render targets - write and read
2067 // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS
2068
2069 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2070 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2071 if (!shader) // XXX: dirty bits...also, emit a disable maybe?
2072 continue;
2073
2074 struct brw_stage_prog_data *prog_data = (void *) shader->prog_data;
2075 uint32_t bt_offset = 0;
2076 uint32_t *bt_map = NULL;
2077
2078 if (prog_data->binding_table.size_bytes != 0) {
2079 iris_use_pinned_bo(batch, ice->state.binder.bo, false);
2080 bt_map = iris_binder_reserve(&ice->state.binder,
2081 prog_data->binding_table.size_bytes,
2082 &bt_offset);
2083 }
2084
2085 iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) {
2086 ptr._3DCommandSubOpcode = 38 + stage;
2087 ptr.PointertoVSBindingTable = bt_offset;
2088 }
2089
2090 if (stage == MESA_SHADER_FRAGMENT) {
2091 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2092 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
2093 *bt_map++ = use_surface(batch, cso_fb->cbufs[i], true);
2094 }
2095 }
2096
2097 #if 0
2098 for (int i = 0; i < ice->state.num_samplers; i++) {
2099 struct iris_sampler_view *view = SOMEWHERE;
2100 struct iris_resource *res = (void *) view->pipe.texture;
2101 *bt_map++ = use_surface(batch, isv, true);
2102 }
2103
2104 // XXX: not implemented yet
2105 assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0);
2106 assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0);
2107 assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0);
2108 assert(prog_data->binding_table.image_start == 0xd0d0d0d0);
2109 assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0);
2110 //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0);
2111 //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0);
2112 #endif
2113 }
2114
2115 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2116 if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) ||
2117 !ice->shaders.prog[stage])
2118 continue;
2119
2120 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) {
2121 ptr._3DCommandSubOpcode = 43 + stage;
2122 ptr.PointertoVSSamplerState = ice->state.sampler_table_offset[stage];
2123 }
2124 }
2125
2126 if (dirty & IRIS_DIRTY_MULTISAMPLE) {
2127 iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2128 ms.PixelLocation =
2129 ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER;
2130 if (ice->state.framebuffer.samples > 0)
2131 ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1;
2132 }
2133 }
2134
2135 if (dirty & IRIS_DIRTY_SAMPLE_MASK) {
2136 iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) {
2137 ms.SampleMask = MAX2(ice->state.sample_mask, 1);
2138 }
2139 }
2140
2141 for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
2142 if (!(dirty & (IRIS_DIRTY_VS << stage)))
2143 continue;
2144
2145 struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2146
2147 if (shader) {
2148 struct iris_resource *cache = (void *) shader->buffer;
2149 iris_use_pinned_bo(batch, cache->bo, false);
2150 iris_batch_emit(batch, shader->derived_data,
2151 iris_derived_program_state_size(stage));
2152 } else {
2153 if (stage == MESA_SHADER_TESS_EVAL) {
2154 iris_emit_cmd(batch, GENX(3DSTATE_HS), hs);
2155 iris_emit_cmd(batch, GENX(3DSTATE_TE), te);
2156 iris_emit_cmd(batch, GENX(3DSTATE_DS), ds);
2157 } else if (stage == MESA_SHADER_GEOMETRY) {
2158 iris_emit_cmd(batch, GENX(3DSTATE_GS), gs);
2159 }
2160 }
2161 }
2162
2163 // XXX: SOL:
2164 // 3DSTATE_STREAMOUT
2165 // 3DSTATE_SO_BUFFER
2166 // 3DSTATE_SO_DECL_LIST
2167
2168 if (dirty & IRIS_DIRTY_CLIP) {
2169 struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
2170 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
2171
2172 uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)];
2173 iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) {
2174 if (wm_prog_data->barycentric_interp_modes &
2175 BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
2176 cl.NonPerspectiveBarycentricEnable = true;
2177
2178 cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
2179 }
2180 iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
2181 ARRAY_SIZE(cso_rast->clip));
2182 }
2183
2184 if (dirty & IRIS_DIRTY_RASTER) {
2185 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2186 iris_batch_emit(batch, cso->raster, sizeof(cso->raster));
2187 iris_batch_emit(batch, cso->sf, sizeof(cso->sf));
2188
2189 }
2190
2191 if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) {
2192 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2193 uint32_t dynamic_wm[GENX(3DSTATE_WM_length)];
2194
2195 iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) {
2196 wm.BarycentricInterpolationMode =
2197 wm_prog_data->barycentric_interp_modes;
2198
2199 if (wm_prog_data->early_fragment_tests)
2200 wm.EarlyDepthStencilControl = EDSC_PREPS;
2201 else if (wm_prog_data->has_side_effects)
2202 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
2203 }
2204 iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm));
2205 }
2206
2207 if (1) {
2208 // XXX: 3DSTATE_SBE, 3DSTATE_SBE_SWIZ
2209 // -> iris_raster_state (point sprite texture coordinate origin)
2210 // -> bunch of shader state...
2211
2212 iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
2213 sbe.AttributeSwizzleEnable = true;
2214 sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
2215 sbe.VertexURBEntryReadOffset = 1;
2216 sbe.VertexURBEntryReadLength = 1;
2217 sbe.ForceVertexURBEntryReadOffset = true;
2218 sbe.ForceVertexURBEntryReadLength = true;
2219 sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs;
2220
2221 for (int i = 0; i < 2; i++) {
2222 sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
2223 }
2224 }
2225
2226 iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbe) {
2227 }
2228 }
2229
2230 if (dirty & IRIS_DIRTY_PS_BLEND) {
2231 struct iris_blend_state *cso_blend = ice->state.cso_blend;
2232 struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa;
2233 uint32_t dynamic_pb[GENX(3DSTATE_PS_BLEND_length)];
2234 iris_pack_command(GENX(3DSTATE_PS_BLEND), &dynamic_pb, pb) {
2235 pb.HasWriteableRT = true; // XXX: comes from somewhere :(
2236 pb.AlphaTestEnable = cso_zsa->alpha.enabled;
2237 }
2238
2239 iris_emit_merge(batch, cso_blend->ps_blend, dynamic_pb,
2240 ARRAY_SIZE(cso_blend->ps_blend));
2241 }
2242
2243 if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) {
2244 struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa;
2245 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref;
2246
2247 uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
2248 iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) {
2249 wmds.StencilReferenceValue = p_stencil_refs->ref_value[0];
2250 wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1];
2251 }
2252 iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds));
2253 }
2254
2255 if (dirty & IRIS_DIRTY_SCISSOR) {
2256 uint32_t scissor_offset =
2257 emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors,
2258 sizeof(struct pipe_scissor_state) *
2259 ice->state.num_scissors, 32);
2260
2261 iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) {
2262 ptr.ScissorRectPointer = scissor_offset;
2263 }
2264 }
2265
2266 // XXX: 3DSTATE_DEPTH_BUFFER
2267 // XXX: 3DSTATE_HIER_DEPTH_BUFFER
2268 // XXX: 3DSTATE_STENCIL_BUFFER
2269 // XXX: 3DSTATE_CLEAR_PARAMS
2270
2271 if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) {
2272 iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) {
2273 for (int i = 0; i < 32; i++) {
2274 poly.PatternRow[i] = ice->state.poly_stipple.stipple[i];
2275 }
2276 }
2277 }
2278
2279 if (dirty & IRIS_DIRTY_LINE_STIPPLE) {
2280 struct iris_rasterizer_state *cso = ice->state.cso_rast;
2281 iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple));
2282 }
2283
2284 if (1) {
2285 iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2286 topo.PrimitiveTopologyType =
2287 translate_prim_type(draw->mode, draw->vertices_per_patch);
2288 }
2289 }
2290
2291 if (draw->index_size > 0) {
2292 struct iris_resource *res = (struct iris_resource *)draw->index.resource;
2293
2294 assert(!draw->has_user_indices);
2295
2296 iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
2297 ib.IndexFormat = draw->index_size;
2298 ib.MOCS = MOCS_WB;
2299 ib.BufferSize = res->bo->size;
2300 ib.BufferStartingAddress = ro_bo(res->bo, 0);
2301 }
2302 }
2303
2304 if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) {
2305 struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers;
2306
2307 STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4);
2308 STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0);
2309
2310 iris_batch_emit(batch, cso->vertex_buffers,
2311 sizeof(uint32_t) * (1 + 4 * cso->num_buffers));
2312
2313 for (unsigned i = 0; i < cso->num_buffers; i++) {
2314 iris_use_pinned_bo(batch, cso->bos[i], false);
2315 }
2316 }
2317
2318 if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) {
2319 struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements;
2320 iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) *
2321 (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length)));
2322 for (int i = 0; i < cso->count; i++) {
2323 iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) *
2324 (cso->count * GENX(3DSTATE_VF_INSTANCING_length)));
2325 }
2326 for (int i = 0; i < cso->count; i++) {
2327 /* TODO: vertexid, instanceid support */
2328 iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs);
2329 }
2330 }
2331
2332 if (1) {
2333 iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) {
2334 if (draw->primitive_restart) {
2335 vf.IndexedDrawCutIndexEnable = true;
2336 vf.CutIndex = draw->restart_index;
2337 }
2338 }
2339 }
2340
2341 // XXX: Gen8 - PMA fix
2342
2343 assert(!draw->indirect); // XXX: indirect support
2344
2345 iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) {
2346 prim.StartInstanceLocation = draw->start_instance;
2347 prim.InstanceCount = draw->instance_count;
2348 prim.VertexCountPerInstance = draw->count;
2349 prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL;
2350
2351 // XXX: this is probably bonkers.
2352 prim.StartVertexLocation = draw->start;
2353
2354 if (draw->index_size) {
2355 prim.BaseVertexLocation += draw->index_bias;
2356 } else {
2357 prim.StartVertexLocation += draw->index_bias;
2358 }
2359
2360 //prim.BaseVertexLocation = ...;
2361 }
2362 }
2363
2364 static void
2365 iris_destroy_state(struct iris_context *ice)
2366 {
2367 // XXX: unreference resources/surfaces.
2368 for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) {
2369 pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL);
2370 }
2371 pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL);
2372 }
2373
2374 void
2375 genX(init_state)(struct iris_context *ice)
2376 {
2377 struct pipe_context *ctx = &ice->ctx;
2378
2379 ctx->create_blend_state = iris_create_blend_state;
2380 ctx->create_depth_stencil_alpha_state = iris_create_zsa_state;
2381 ctx->create_rasterizer_state = iris_create_rasterizer_state;
2382 ctx->create_sampler_state = iris_create_sampler_state;
2383 ctx->create_sampler_view = iris_create_sampler_view;
2384 ctx->create_surface = iris_create_surface;
2385 ctx->create_vertex_elements_state = iris_create_vertex_elements;
2386 ctx->create_compute_state = iris_create_compute_state;
2387 ctx->bind_blend_state = iris_bind_blend_state;
2388 ctx->bind_depth_stencil_alpha_state = iris_bind_zsa_state;
2389 ctx->bind_sampler_states = iris_bind_sampler_states;
2390 ctx->bind_rasterizer_state = iris_bind_rasterizer_state;
2391 ctx->bind_vertex_elements_state = iris_bind_vertex_elements_state;
2392 ctx->bind_compute_state = iris_bind_compute_state;
2393 ctx->delete_blend_state = iris_delete_state;
2394 ctx->delete_depth_stencil_alpha_state = iris_delete_state;
2395 ctx->delete_fs_state = iris_delete_state;
2396 ctx->delete_rasterizer_state = iris_delete_state;
2397 ctx->delete_sampler_state = iris_delete_state;
2398 ctx->delete_vertex_elements_state = iris_delete_state;
2399 ctx->delete_compute_state = iris_delete_state;
2400 ctx->delete_tcs_state = iris_delete_state;
2401 ctx->delete_tes_state = iris_delete_state;
2402 ctx->delete_gs_state = iris_delete_state;
2403 ctx->delete_vs_state = iris_delete_state;
2404 ctx->set_blend_color = iris_set_blend_color;
2405 ctx->set_clip_state = iris_set_clip_state;
2406 ctx->set_constant_buffer = iris_set_constant_buffer;
2407 ctx->set_sampler_views = iris_set_sampler_views;
2408 ctx->set_framebuffer_state = iris_set_framebuffer_state;
2409 ctx->set_polygon_stipple = iris_set_polygon_stipple;
2410 ctx->set_sample_mask = iris_set_sample_mask;
2411 ctx->set_scissor_states = iris_set_scissor_states;
2412 ctx->set_stencil_ref = iris_set_stencil_ref;
2413 ctx->set_vertex_buffers = iris_set_vertex_buffers;
2414 ctx->set_viewport_states = iris_set_viewport_states;
2415 ctx->sampler_view_destroy = iris_sampler_view_destroy;
2416 ctx->surface_destroy = iris_surface_destroy;
2417 ctx->draw_vbo = iris_draw_vbo;
2418 ctx->launch_grid = iris_launch_grid;
2419 ctx->create_stream_output_target = iris_create_stream_output_target;
2420 ctx->stream_output_target_destroy = iris_stream_output_target_destroy;
2421 ctx->set_stream_output_targets = iris_set_stream_output_targets;
2422
2423 ice->state.destroy_state = iris_destroy_state;
2424 ice->state.init_render_context = iris_init_render_context;
2425 ice->state.upload_render_state = iris_upload_render_state;
2426 ice->state.derived_program_state_size = iris_derived_program_state_size;
2427 ice->state.set_derived_program_state = iris_set_derived_program_state;
2428 ice->state.populate_vs_key = iris_populate_vs_key;
2429 ice->state.populate_tcs_key = iris_populate_tcs_key;
2430 ice->state.populate_tes_key = iris_populate_tes_key;
2431 ice->state.populate_gs_key = iris_populate_gs_key;
2432 ice->state.populate_fs_key = iris_populate_fs_key;
2433
2434
2435 ice->state.dirty = ~0ull;
2436 }