ilo: make ilo_cp based on ilo_builder
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
33
34 #include "ilo_common.h"
35 #include "ilo_cp.h"
36 #include "ilo_format.h"
37 #include "ilo_resource.h"
38 #include "ilo_shader.h"
39 #include "ilo_gpe.h"
40
41 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
42 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
43
44 /**
45 * Translate winsys tiling to hardware tiling.
46 */
47 static inline int
48 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
49 {
50 switch (tiling) {
51 case INTEL_TILING_NONE:
52 return GEN6_TILING_NONE;
53 case INTEL_TILING_X:
54 return GEN6_TILING_X;
55 case INTEL_TILING_Y:
56 return GEN6_TILING_Y;
57 default:
58 assert(!"unknown tiling");
59 return GEN6_TILING_NONE;
60 }
61 }
62
63 /**
64 * Translate a pipe primitive type to the matching hardware primitive type.
65 */
66 static inline int
67 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
68 {
69 static const int prim_mapping[PIPE_PRIM_MAX] = {
70 [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
71 [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
72 [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
73 [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
74 [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
75 [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
76 [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
77 [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
78 [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
79 [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
80 [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
81 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
82 [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
83 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
84 };
85
86 assert(prim_mapping[prim]);
87
88 return prim_mapping[prim];
89 }
90
91 /**
92 * Translate a pipe texture target to the matching hardware surface type.
93 */
94 static inline int
95 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
96 {
97 switch (target) {
98 case PIPE_BUFFER:
99 return GEN6_SURFTYPE_BUFFER;
100 case PIPE_TEXTURE_1D:
101 case PIPE_TEXTURE_1D_ARRAY:
102 return GEN6_SURFTYPE_1D;
103 case PIPE_TEXTURE_2D:
104 case PIPE_TEXTURE_RECT:
105 case PIPE_TEXTURE_2D_ARRAY:
106 return GEN6_SURFTYPE_2D;
107 case PIPE_TEXTURE_3D:
108 return GEN6_SURFTYPE_3D;
109 case PIPE_TEXTURE_CUBE:
110 case PIPE_TEXTURE_CUBE_ARRAY:
111 return GEN6_SURFTYPE_CUBE;
112 default:
113 assert(!"unknown texture target");
114 return GEN6_SURFTYPE_BUFFER;
115 }
116 }
117
118 /**
119 * Fill in DW2 to DW7 of 3DSTATE_SF.
120 */
121 static inline void
122 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
123 const struct ilo_rasterizer_state *rasterizer,
124 int num_samples,
125 enum pipe_format depth_format,
126 uint32_t *payload, unsigned payload_len)
127 {
128 assert(payload_len == Elements(rasterizer->sf.payload));
129
130 if (rasterizer) {
131 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
132
133 memcpy(payload, sf->payload, sizeof(sf->payload));
134 if (num_samples > 1)
135 payload[1] |= sf->dw_msaa;
136 }
137 else {
138 payload[0] = 0;
139 payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
140 payload[2] = 0;
141 payload[3] = 0;
142 payload[4] = 0;
143 payload[5] = 0;
144 }
145
146 if (dev->gen >= ILO_GEN(7)) {
147 int format;
148
149 /* separate stencil */
150 switch (depth_format) {
151 case PIPE_FORMAT_Z16_UNORM:
152 format = GEN6_ZFORMAT_D16_UNORM;
153 break;
154 case PIPE_FORMAT_Z32_FLOAT:
155 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
156 format = GEN6_ZFORMAT_D32_FLOAT;
157 break;
158 case PIPE_FORMAT_Z24X8_UNORM:
159 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
160 format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
161 break;
162 default:
163 /* FLOAT surface is assumed when there is no depth buffer */
164 format = GEN6_ZFORMAT_D32_FLOAT;
165 break;
166 }
167
168 payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
169 }
170 }
171
172 /**
173 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
174 */
175 static inline void
176 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
177 const struct ilo_rasterizer_state *rasterizer,
178 const struct ilo_shader_state *fs,
179 uint32_t *dw, int num_dwords)
180 {
181 int output_count, vue_offset, vue_len;
182 const struct ilo_kernel_routing *routing;
183
184 ILO_GPE_VALID_GEN(dev, 6, 7.5);
185 assert(num_dwords == 13);
186
187 if (!fs) {
188 memset(dw, 0, sizeof(dw[0]) * num_dwords);
189 dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
190 return;
191 }
192
193 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
194 assert(output_count <= 32);
195
196 routing = ilo_shader_get_kernel_routing(fs);
197
198 vue_offset = routing->source_skip;
199 assert(vue_offset % 2 == 0);
200 vue_offset /= 2;
201
202 vue_len = (routing->source_len + 1) / 2;
203 if (!vue_len)
204 vue_len = 1;
205
206 dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
207 vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
208 vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
209 if (routing->swizzle_enable)
210 dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
211
212 switch (rasterizer->state.sprite_coord_mode) {
213 case PIPE_SPRITE_COORD_UPPER_LEFT:
214 dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
215 break;
216 case PIPE_SPRITE_COORD_LOWER_LEFT:
217 dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
218 break;
219 }
220
221 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
222 memcpy(&dw[1], routing->swizzles, 2 * 16);
223
224 /*
225 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
226 *
227 * "This field (Point Sprite Texture Coordinate Enable) must be
228 * programmed to 0 when non-point primitives are rendered."
229 *
230 * TODO We do not check that yet.
231 */
232 dw[9] = routing->point_sprite_enable;
233
234 dw[10] = routing->const_interp_enable;
235
236 /* WrapShortest enables */
237 dw[11] = 0;
238 dw[12] = 0;
239 }
240
241 static inline void
242 gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info *dev,
243 struct intel_bo *bo, uint32_t bo_offset,
244 uint64_t val, bool store_qword,
245 struct ilo_cp *cp)
246 {
247 const uint8_t cmd_len = (store_qword) ? 5 : 4;
248 uint32_t dw0 = GEN6_MI_CMD(MI_STORE_DATA_IMM) | (cmd_len - 2);
249 uint32_t reloc_flags = INTEL_RELOC_WRITE;
250
251 ILO_GPE_VALID_GEN(dev, 6, 7.5);
252
253 assert(bo_offset % ((store_qword) ? 8 : 4) == 0);
254
255 /* must use GGTT on GEN6 as in PIPE_CONTROL */
256 if (dev->gen == ILO_GEN(6)) {
257 dw0 |= GEN6_MI_STORE_DATA_IMM_DW0_USE_GGTT;
258 reloc_flags |= INTEL_RELOC_GGTT;
259 }
260
261 ilo_cp_begin(cp, cmd_len);
262 ilo_cp_write(cp, dw0);
263 ilo_cp_write(cp, 0);
264 ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
265 ilo_cp_write(cp, (uint32_t) val);
266
267 if (store_qword)
268 ilo_cp_write(cp, (uint32_t) (val >> 32));
269 else
270 assert(val == (uint64_t) ((uint32_t) val));
271
272 ilo_cp_end(cp);
273 }
274
275 static inline void
276 gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info *dev,
277 uint32_t reg, uint32_t val,
278 struct ilo_cp *cp)
279 {
280 const uint8_t cmd_len = 3;
281 const uint32_t dw0 = GEN6_MI_CMD(MI_LOAD_REGISTER_IMM) | (cmd_len - 2);
282
283 ILO_GPE_VALID_GEN(dev, 6, 7.5);
284
285 assert(reg % 4 == 0);
286
287 ilo_cp_begin(cp, cmd_len);
288 ilo_cp_write(cp, dw0);
289 ilo_cp_write(cp, reg);
290 ilo_cp_write(cp, val);
291 ilo_cp_end(cp);
292 }
293
294 static inline void
295 gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info *dev,
296 struct intel_bo *bo, uint32_t bo_offset,
297 uint32_t reg, struct ilo_cp *cp)
298 {
299 const uint8_t cmd_len = 3;
300 uint32_t dw0 = GEN6_MI_CMD(MI_STORE_REGISTER_MEM) | (cmd_len - 2);
301 uint32_t reloc_flags = INTEL_RELOC_WRITE;
302
303 ILO_GPE_VALID_GEN(dev, 6, 7.5);
304
305 assert(reg % 4 == 0 && bo_offset % 4 == 0);
306
307 /* must use GGTT on GEN6 as in PIPE_CONTROL */
308 if (dev->gen == ILO_GEN(6)) {
309 dw0 |= GEN6_MI_STORE_REGISTER_MEM_DW0_USE_GGTT;
310 reloc_flags |= INTEL_RELOC_GGTT;
311 }
312
313 ilo_cp_begin(cp, cmd_len);
314 ilo_cp_write(cp, dw0);
315 ilo_cp_write(cp, reg);
316 ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
317 ilo_cp_end(cp);
318 }
319
320 static inline void
321 gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info *dev,
322 struct intel_bo *bo, uint32_t bo_offset,
323 uint32_t report_id, struct ilo_cp *cp)
324 {
325 const uint8_t cmd_len = 3;
326 const uint32_t dw0 = GEN6_MI_CMD(MI_REPORT_PERF_COUNT) | (cmd_len - 2);
327 uint32_t reloc_flags = INTEL_RELOC_WRITE;
328
329 ILO_GPE_VALID_GEN(dev, 6, 7.5);
330
331 assert(bo_offset % 64 == 0);
332
333 /* must use GGTT on GEN6 as in PIPE_CONTROL */
334 if (dev->gen == ILO_GEN(6)) {
335 bo_offset |= GEN6_MI_REPORT_PERF_COUNT_DW1_USE_GGTT;
336 reloc_flags |= INTEL_RELOC_GGTT;
337 }
338
339 ilo_cp_begin(cp, cmd_len);
340 ilo_cp_write(cp, dw0);
341 ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
342 ilo_cp_write(cp, report_id);
343 ilo_cp_end(cp);
344 }
345
346 static inline void
347 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
348 struct intel_bo *general_state_bo,
349 struct intel_bo *surface_state_bo,
350 struct intel_bo *dynamic_state_bo,
351 struct intel_bo *indirect_object_bo,
352 struct intel_bo *instruction_bo,
353 uint32_t general_state_size,
354 uint32_t dynamic_state_size,
355 uint32_t indirect_object_size,
356 uint32_t instruction_size,
357 struct ilo_cp *cp)
358 {
359 const uint8_t cmd_len = 10;
360 const uint32_t dw0 = GEN6_RENDER_CMD(COMMON, STATE_BASE_ADDRESS) |
361 (cmd_len - 2);
362
363 ILO_GPE_VALID_GEN(dev, 6, 7.5);
364
365 /* 4K-page aligned */
366 assert(((general_state_size | dynamic_state_size |
367 indirect_object_size | instruction_size) & 0xfff) == 0);
368
369 ilo_cp_begin(cp, cmd_len);
370 ilo_cp_write(cp, dw0);
371
372 ilo_cp_write_bo(cp, 1, general_state_bo, 0);
373 ilo_cp_write_bo(cp, 1, surface_state_bo, 0);
374 ilo_cp_write_bo(cp, 1, dynamic_state_bo, 0);
375 ilo_cp_write_bo(cp, 1, indirect_object_bo, 0);
376 ilo_cp_write_bo(cp, 1, instruction_bo, 0);
377
378 if (general_state_size) {
379 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo, 0);
380 }
381 else {
382 /* skip range check */
383 ilo_cp_write(cp, 1);
384 }
385
386 if (dynamic_state_size) {
387 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo, 0);
388 }
389 else {
390 /* skip range check */
391 ilo_cp_write(cp, 0xfffff000 + 1);
392 }
393
394 if (indirect_object_size) {
395 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo, 0);
396 }
397 else {
398 /* skip range check */
399 ilo_cp_write(cp, 0xfffff000 + 1);
400 }
401
402 if (instruction_size) {
403 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo, 0);
404 }
405 else {
406 /* skip range check */
407 ilo_cp_write(cp, 1);
408 }
409
410 ilo_cp_end(cp);
411 }
412
413 static inline void
414 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
415 uint32_t sip,
416 struct ilo_cp *cp)
417 {
418 const uint8_t cmd_len = 2;
419 const uint32_t dw0 = GEN6_RENDER_CMD(COMMON, STATE_SIP) | (cmd_len - 2);
420
421 ILO_GPE_VALID_GEN(dev, 6, 7.5);
422
423 ilo_cp_begin(cp, cmd_len);
424 ilo_cp_write(cp, dw0);
425 ilo_cp_write(cp, sip);
426 ilo_cp_end(cp);
427 }
428
429 static inline void
430 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
431 bool enable,
432 struct ilo_cp *cp)
433 {
434 const uint8_t cmd_len = 1;
435 const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
436 enable;
437
438 ILO_GPE_VALID_GEN(dev, 6, 7.5);
439
440 ilo_cp_begin(cp, cmd_len);
441 ilo_cp_write(cp, dw0);
442 ilo_cp_end(cp);
443 }
444
445 static inline void
446 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
447 int pipeline,
448 struct ilo_cp *cp)
449 {
450 const uint8_t cmd_len = 1;
451 const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, PIPELINE_SELECT) |
452 pipeline;
453
454 ILO_GPE_VALID_GEN(dev, 6, 7.5);
455
456 /* 3D or media */
457 assert(pipeline == 0x0 || pipeline == 0x1);
458
459 ilo_cp_begin(cp, cmd_len);
460 ilo_cp_write(cp, dw0);
461 ilo_cp_end(cp);
462 }
463
464 static inline void
465 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
466 int max_threads, int num_urb_entries,
467 int urb_entry_size,
468 struct ilo_cp *cp)
469 {
470 const uint8_t cmd_len = 8;
471 const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_VFE_STATE) |
472 (cmd_len - 2);
473 uint32_t dw2, dw4;
474
475 ILO_GPE_VALID_GEN(dev, 6, 6);
476
477 dw2 = (max_threads - 1) << 16 |
478 num_urb_entries << 8 |
479 1 << 7 | /* Reset Gateway Timer */
480 1 << 6; /* Bypass Gateway Control */
481
482 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
483 480; /* CURBE Allocation Size */
484
485 ilo_cp_begin(cp, cmd_len);
486 ilo_cp_write(cp, dw0);
487 ilo_cp_write(cp, 0); /* scratch */
488 ilo_cp_write(cp, dw2);
489 ilo_cp_write(cp, 0); /* MBZ */
490 ilo_cp_write(cp, dw4);
491 ilo_cp_write(cp, 0); /* scoreboard */
492 ilo_cp_write(cp, 0);
493 ilo_cp_write(cp, 0);
494 ilo_cp_end(cp);
495 }
496
497 static inline void
498 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
499 uint32_t buf, int size,
500 struct ilo_cp *cp)
501 {
502 const uint8_t cmd_len = 4;
503 const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_CURBE_LOAD) |
504 (cmd_len - 2);
505
506 ILO_GPE_VALID_GEN(dev, 6, 6);
507
508 assert(buf % 32 == 0);
509 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
510 size = align(size, 32);
511
512 ilo_cp_begin(cp, cmd_len);
513 ilo_cp_write(cp, dw0);
514 ilo_cp_write(cp, 0); /* MBZ */
515 ilo_cp_write(cp, size);
516 ilo_cp_write(cp, buf);
517 ilo_cp_end(cp);
518 }
519
520 static inline void
521 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
522 uint32_t offset, int num_ids,
523 struct ilo_cp *cp)
524 {
525 const uint8_t cmd_len = 4;
526 const uint32_t dw0 =
527 GEN6_RENDER_CMD(MEDIA, MEDIA_INTERFACE_DESCRIPTOR_LOAD) | (cmd_len - 2);
528
529 ILO_GPE_VALID_GEN(dev, 6, 6);
530
531 assert(offset % 32 == 0);
532
533 ilo_cp_begin(cp, cmd_len);
534 ilo_cp_write(cp, dw0);
535 ilo_cp_write(cp, 0); /* MBZ */
536 /* every ID has 8 DWords */
537 ilo_cp_write(cp, num_ids * 8 * 4);
538 ilo_cp_write(cp, offset);
539 ilo_cp_end(cp);
540 }
541
542 static inline void
543 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
544 int id, int byte, int thread_count,
545 struct ilo_cp *cp)
546 {
547 const uint8_t cmd_len = 2;
548 const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_GATEWAY_STATE) |
549 (cmd_len - 2);
550 uint32_t dw1;
551
552 ILO_GPE_VALID_GEN(dev, 6, 6);
553
554 dw1 = id << 16 |
555 byte << 8 |
556 thread_count;
557
558 ilo_cp_begin(cp, cmd_len);
559 ilo_cp_write(cp, dw0);
560 ilo_cp_write(cp, dw1);
561 ilo_cp_end(cp);
562 }
563
564 static inline void
565 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
566 int thread_count_water_mark,
567 int barrier_mask,
568 struct ilo_cp *cp)
569 {
570 const uint8_t cmd_len = 2;
571 const uint32_t dw0 = GEN6_RENDER_CMD(MEDIA, MEDIA_STATE_FLUSH) |
572 (cmd_len - 2);
573 uint32_t dw1;
574
575 ILO_GPE_VALID_GEN(dev, 6, 6);
576
577 dw1 = thread_count_water_mark << 16 |
578 barrier_mask;
579
580 ilo_cp_begin(cp, cmd_len);
581 ilo_cp_write(cp, dw0);
582 ilo_cp_write(cp, dw1);
583 ilo_cp_end(cp);
584 }
585
586 static inline void
587 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
588 struct ilo_cp *cp)
589 {
590 assert(!"MEDIA_OBJECT_WALKER unsupported");
591 }
592
593 static inline void
594 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
595 uint32_t vs_binding_table,
596 uint32_t gs_binding_table,
597 uint32_t ps_binding_table,
598 struct ilo_cp *cp)
599 {
600 const uint8_t cmd_len = 4;
601 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
602 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
603 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
604 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
605 (cmd_len - 2);
606
607 ILO_GPE_VALID_GEN(dev, 6, 6);
608
609 ilo_cp_begin(cp, cmd_len);
610 ilo_cp_write(cp, dw0);
611 ilo_cp_write(cp, vs_binding_table);
612 ilo_cp_write(cp, gs_binding_table);
613 ilo_cp_write(cp, ps_binding_table);
614 ilo_cp_end(cp);
615 }
616
617 static inline void
618 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
619 uint32_t vs_sampler_state,
620 uint32_t gs_sampler_state,
621 uint32_t ps_sampler_state,
622 struct ilo_cp *cp)
623 {
624 const uint8_t cmd_len = 4;
625 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
626 GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
627 GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
628 GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
629 (cmd_len - 2);
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, dw0);
635 ilo_cp_write(cp, vs_sampler_state);
636 ilo_cp_write(cp, gs_sampler_state);
637 ilo_cp_write(cp, ps_sampler_state);
638 ilo_cp_end(cp);
639 }
640
641 static inline void
642 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
643 int vs_total_size, int gs_total_size,
644 int vs_entry_size, int gs_entry_size,
645 struct ilo_cp *cp)
646 {
647 const uint8_t cmd_len = 3;
648 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
649 const int row_size = 128; /* 1024 bits */
650 int vs_alloc_size, gs_alloc_size;
651 int vs_num_entries, gs_num_entries;
652
653 ILO_GPE_VALID_GEN(dev, 6, 6);
654
655 /* in 1024-bit URB rows */
656 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
657 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
658
659 /* the valid range is [1, 5] */
660 if (!vs_alloc_size)
661 vs_alloc_size = 1;
662 if (!gs_alloc_size)
663 gs_alloc_size = 1;
664 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
665
666 /* the valid range is [24, 256] in multiples of 4 */
667 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
668 if (vs_num_entries > 256)
669 vs_num_entries = 256;
670 assert(vs_num_entries >= 24);
671
672 /* the valid range is [0, 256] in multiples of 4 */
673 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
674 if (gs_num_entries > 256)
675 gs_num_entries = 256;
676
677 ilo_cp_begin(cp, cmd_len);
678 ilo_cp_write(cp, dw0);
679 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
680 vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT);
681 ilo_cp_write(cp, gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
682 (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT);
683 ilo_cp_end(cp);
684 }
685
686 static inline void
687 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
688 const struct ilo_ve_state *ve,
689 const struct ilo_vb_state *vb,
690 struct ilo_cp *cp)
691 {
692 uint8_t cmd_len;
693 uint32_t dw0;
694 unsigned hw_idx;
695
696 ILO_GPE_VALID_GEN(dev, 6, 7.5);
697
698 /*
699 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
700 *
701 * "From 1 to 33 VBs can be specified..."
702 */
703 assert(ve->vb_count <= 33);
704
705 if (!ve->vb_count)
706 return;
707
708 cmd_len = 1 + 4 * ve->vb_count;
709 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) |
710 (cmd_len - 2);
711
712 ilo_cp_begin(cp, cmd_len);
713 ilo_cp_write(cp, dw0);
714
715 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
716 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
717 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
718 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
719 uint32_t dw;
720
721 dw = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
722
723 if (instance_divisor)
724 dw |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
725 else
726 dw |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
727
728 if (dev->gen >= ILO_GEN(7))
729 dw |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
730
731 /* use null vb if there is no buffer or the stride is out of range */
732 if (cso->buffer && cso->stride <= 2048) {
733 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
734 const uint32_t start_offset = cso->buffer_offset;
735 const uint32_t end_offset = buf->bo_size - 1;
736
737 dw |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
738
739 ilo_cp_write(cp, dw);
740 ilo_cp_write_bo(cp, start_offset, buf->bo, 0);
741 ilo_cp_write_bo(cp, end_offset, buf->bo, 0);
742 ilo_cp_write(cp, instance_divisor);
743 }
744 else {
745 dw |= 1 << 13;
746
747 ilo_cp_write(cp, dw);
748 ilo_cp_write(cp, 0);
749 ilo_cp_write(cp, 0);
750 ilo_cp_write(cp, instance_divisor);
751 }
752 }
753
754 ilo_cp_end(cp);
755 }
756
757 static inline void
758 ve_init_cso_with_components(const struct ilo_dev_info *dev,
759 int comp0, int comp1, int comp2, int comp3,
760 struct ilo_ve_cso *cso)
761 {
762 ILO_GPE_VALID_GEN(dev, 6, 7.5);
763
764 STATIC_ASSERT(Elements(cso->payload) >= 2);
765 cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
766 cso->payload[1] =
767 comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
768 comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
769 comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
770 comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
771 }
772
773 static inline void
774 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
775 struct ilo_ve_cso *cso)
776 {
777 int format;
778
779 ILO_GPE_VALID_GEN(dev, 6, 7.5);
780
781 /*
782 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
783 *
784 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
785 * valid VERTEX_ELEMENT structure.
786 *
787 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
788 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
789 *
790 * - The Source Element Format must be set to the UINT format.
791 *
792 * - [DevSNB]: Edge Flags are not supported for QUADLIST
793 * primitives. Software may elect to convert QUADLIST primitives
794 * to some set of corresponding edge-flag-supported primitive
795 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
796 */
797
798 cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
799 cso->payload[1] =
800 GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
801 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
802 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
803 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
804
805 /*
806 * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
807 * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
808 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
809 *
810 * Since all the hardware cares about is whether the flags are zero or not,
811 * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
812 */
813 format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
814 if (format == GEN6_FORMAT_R32_FLOAT) {
815 STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
816 cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
817 }
818 else {
819 assert(format == GEN6_FORMAT_R8_UINT);
820 }
821 }
822
823 static inline void
824 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
825 const struct ilo_ve_state *ve,
826 bool last_velement_edgeflag,
827 bool prepend_generated_ids,
828 struct ilo_cp *cp)
829 {
830 uint8_t cmd_len;
831 uint32_t dw0;
832 unsigned i;
833
834 ILO_GPE_VALID_GEN(dev, 6, 7.5);
835
836 /*
837 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
838 *
839 * "Up to 34 (DevSNB+) vertex elements are supported."
840 */
841 assert(ve->count + prepend_generated_ids <= 34);
842
843 if (!ve->count && !prepend_generated_ids) {
844 struct ilo_ve_cso dummy;
845
846 ve_init_cso_with_components(dev,
847 GEN6_VFCOMP_STORE_0,
848 GEN6_VFCOMP_STORE_0,
849 GEN6_VFCOMP_STORE_0,
850 GEN6_VFCOMP_STORE_1_FP,
851 &dummy);
852
853 cmd_len = 3;
854 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
855 (cmd_len - 2);
856
857 ilo_cp_begin(cp, cmd_len);
858 ilo_cp_write(cp, dw0);
859 ilo_cp_write_multi(cp, dummy.payload, 2);
860 ilo_cp_end(cp);
861
862 return;
863 }
864
865 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
866 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) |
867 (cmd_len - 2);
868
869 ilo_cp_begin(cp, cmd_len);
870 ilo_cp_write(cp, dw0);
871
872 if (prepend_generated_ids) {
873 struct ilo_ve_cso gen_ids;
874
875 ve_init_cso_with_components(dev,
876 GEN6_VFCOMP_STORE_VID,
877 GEN6_VFCOMP_STORE_IID,
878 GEN6_VFCOMP_NOSTORE,
879 GEN6_VFCOMP_NOSTORE,
880 &gen_ids);
881
882 ilo_cp_write_multi(cp, gen_ids.payload, 2);
883 }
884
885 if (last_velement_edgeflag) {
886 struct ilo_ve_cso edgeflag;
887
888 for (i = 0; i < ve->count - 1; i++)
889 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
890
891 edgeflag = ve->cso[i];
892 ve_set_cso_edgeflag(dev, &edgeflag);
893 ilo_cp_write_multi(cp, edgeflag.payload, 2);
894 }
895 else {
896 for (i = 0; i < ve->count; i++)
897 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
898 }
899
900 ilo_cp_end(cp);
901 }
902
903 static inline void
904 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
905 const struct ilo_ib_state *ib,
906 bool enable_cut_index,
907 struct ilo_cp *cp)
908 {
909 const uint8_t cmd_len = 3;
910 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
911 uint32_t start_offset, end_offset;
912 int format;
913 uint32_t dw0;
914
915 ILO_GPE_VALID_GEN(dev, 6, 7.5);
916
917 if (!buf)
918 return;
919
920 /* this is moved to the new 3DSTATE_VF */
921 if (dev->gen >= ILO_GEN(7.5))
922 assert(!enable_cut_index);
923
924 switch (ib->hw_index_size) {
925 case 4:
926 format = GEN6_IB_DW0_FORMAT_DWORD;
927 break;
928 case 2:
929 format = GEN6_IB_DW0_FORMAT_WORD;
930 break;
931 case 1:
932 format = GEN6_IB_DW0_FORMAT_BYTE;
933 break;
934 default:
935 assert(!"unknown index size");
936 format = GEN6_IB_DW0_FORMAT_BYTE;
937 break;
938 }
939
940 /*
941 * set start_offset to 0 here and adjust pipe_draw_info::start with
942 * ib->draw_start_offset in 3DPRIMITIVE
943 */
944 start_offset = 0;
945 end_offset = buf->bo_size;
946
947 /* end_offset must also be aligned and is inclusive */
948 end_offset -= (end_offset % ib->hw_index_size);
949 end_offset--;
950
951 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
952 format |
953 (cmd_len - 2);
954 if (enable_cut_index)
955 dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
956
957 ilo_cp_begin(cp, cmd_len);
958 ilo_cp_write(cp, dw0);
959 ilo_cp_write_bo(cp, start_offset, buf->bo, 0);
960 ilo_cp_write_bo(cp, end_offset, buf->bo, 0);
961 ilo_cp_end(cp);
962 }
963
964 static inline void
965 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
966 uint32_t clip_viewport,
967 uint32_t sf_viewport,
968 uint32_t cc_viewport,
969 struct ilo_cp *cp)
970 {
971 const uint8_t cmd_len = 4;
972 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) |
973 GEN6_PTR_VP_DW0_CLIP_CHANGED |
974 GEN6_PTR_VP_DW0_SF_CHANGED |
975 GEN6_PTR_VP_DW0_CC_CHANGED |
976 (cmd_len - 2);
977
978 ILO_GPE_VALID_GEN(dev, 6, 6);
979
980 ilo_cp_begin(cp, cmd_len);
981 ilo_cp_write(cp, dw0);
982 ilo_cp_write(cp, clip_viewport);
983 ilo_cp_write(cp, sf_viewport);
984 ilo_cp_write(cp, cc_viewport);
985 ilo_cp_end(cp);
986 }
987
988 static inline void
989 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
990 uint32_t blend_state,
991 uint32_t depth_stencil_state,
992 uint32_t color_calc_state,
993 struct ilo_cp *cp)
994 {
995 const uint8_t cmd_len = 4;
996 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) |
997 (cmd_len - 2);
998
999 ILO_GPE_VALID_GEN(dev, 6, 6);
1000
1001 ilo_cp_begin(cp, cmd_len);
1002 ilo_cp_write(cp, dw0);
1003 ilo_cp_write(cp, blend_state | 1);
1004 ilo_cp_write(cp, depth_stencil_state | 1);
1005 ilo_cp_write(cp, color_calc_state | 1);
1006 ilo_cp_end(cp);
1007 }
1008
1009 static inline void
1010 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1011 uint32_t scissor_rect,
1012 struct ilo_cp *cp)
1013 {
1014 const uint8_t cmd_len = 2;
1015 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) |
1016 (cmd_len - 2);
1017
1018 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1019
1020 ilo_cp_begin(cp, cmd_len);
1021 ilo_cp_write(cp, dw0);
1022 ilo_cp_write(cp, scissor_rect);
1023 ilo_cp_end(cp);
1024 }
1025
1026 static inline void
1027 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1028 const struct ilo_shader_state *vs,
1029 int num_samplers,
1030 struct ilo_cp *cp)
1031 {
1032 const uint8_t cmd_len = 6;
1033 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
1034 const struct ilo_shader_cso *cso;
1035 uint32_t dw2, dw4, dw5;
1036
1037 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1038
1039 if (!vs) {
1040 ilo_cp_begin(cp, cmd_len);
1041 ilo_cp_write(cp, dw0);
1042 ilo_cp_write(cp, 0);
1043 ilo_cp_write(cp, 0);
1044 ilo_cp_write(cp, 0);
1045 ilo_cp_write(cp, 0);
1046 ilo_cp_write(cp, 0);
1047 ilo_cp_end(cp);
1048 return;
1049 }
1050
1051 cso = ilo_shader_get_kernel_cso(vs);
1052 dw2 = cso->payload[0];
1053 dw4 = cso->payload[1];
1054 dw5 = cso->payload[2];
1055
1056 dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
1057
1058 ilo_cp_begin(cp, cmd_len);
1059 ilo_cp_write(cp, dw0);
1060 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1061 ilo_cp_write(cp, dw2);
1062 ilo_cp_write(cp, 0); /* scratch */
1063 ilo_cp_write(cp, dw4);
1064 ilo_cp_write(cp, dw5);
1065 ilo_cp_end(cp);
1066 }
1067
1068 static inline void
1069 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1070 const struct ilo_shader_state *gs,
1071 const struct ilo_shader_state *vs,
1072 int verts_per_prim,
1073 struct ilo_cp *cp)
1074 {
1075 const uint8_t cmd_len = 7;
1076 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
1077 uint32_t dw1, dw2, dw4, dw5, dw6;
1078
1079 ILO_GPE_VALID_GEN(dev, 6, 6);
1080
1081 if (gs) {
1082 const struct ilo_shader_cso *cso;
1083
1084 dw1 = ilo_shader_get_kernel_offset(gs);
1085
1086 cso = ilo_shader_get_kernel_cso(gs);
1087 dw2 = cso->payload[0];
1088 dw4 = cso->payload[1];
1089 dw5 = cso->payload[2];
1090 dw6 = cso->payload[3];
1091 }
1092 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1093 struct ilo_shader_cso cso;
1094 enum ilo_kernel_param param;
1095
1096 switch (verts_per_prim) {
1097 case 1:
1098 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1099 break;
1100 case 2:
1101 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1102 break;
1103 default:
1104 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1105 break;
1106 }
1107
1108 dw1 = ilo_shader_get_kernel_offset(vs) +
1109 ilo_shader_get_kernel_param(vs, param);
1110
1111 /* cannot use VS's CSO */
1112 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1113 dw2 = cso.payload[0];
1114 dw4 = cso.payload[1];
1115 dw5 = cso.payload[2];
1116 dw6 = cso.payload[3];
1117 }
1118 else {
1119 dw1 = 0;
1120 dw2 = 0;
1121 dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
1122 dw5 = GEN6_GS_DW5_STATISTICS;
1123 dw6 = 0;
1124 }
1125
1126 ilo_cp_begin(cp, cmd_len);
1127 ilo_cp_write(cp, dw0);
1128 ilo_cp_write(cp, dw1);
1129 ilo_cp_write(cp, dw2);
1130 ilo_cp_write(cp, 0);
1131 ilo_cp_write(cp, dw4);
1132 ilo_cp_write(cp, dw5);
1133 ilo_cp_write(cp, dw6);
1134 ilo_cp_end(cp);
1135 }
1136
1137 static inline void
1138 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1139 const struct ilo_rasterizer_state *rasterizer,
1140 const struct ilo_shader_state *fs,
1141 bool enable_guardband,
1142 int num_viewports,
1143 struct ilo_cp *cp)
1144 {
1145 const uint8_t cmd_len = 4;
1146 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | (cmd_len - 2);
1147 uint32_t dw1, dw2, dw3;
1148
1149 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1150
1151 if (rasterizer) {
1152 int interps;
1153
1154 dw1 = rasterizer->clip.payload[0];
1155 dw2 = rasterizer->clip.payload[1];
1156 dw3 = rasterizer->clip.payload[2];
1157
1158 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1159 dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
1160
1161 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1162 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1163
1164 if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
1165 GEN6_INTERP_NONPERSPECTIVE_CENTROID |
1166 GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
1167 dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
1168
1169 dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
1170 (num_viewports - 1);
1171 }
1172 else {
1173 dw1 = 0;
1174 dw2 = 0;
1175 dw3 = 0;
1176 }
1177
1178 ilo_cp_begin(cp, cmd_len);
1179 ilo_cp_write(cp, dw0);
1180 ilo_cp_write(cp, dw1);
1181 ilo_cp_write(cp, dw2);
1182 ilo_cp_write(cp, dw3);
1183 ilo_cp_end(cp);
1184 }
1185
1186 static inline void
1187 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1188 const struct ilo_rasterizer_state *rasterizer,
1189 const struct ilo_shader_state *fs,
1190 struct ilo_cp *cp)
1191 {
1192 const uint8_t cmd_len = 20;
1193 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | (cmd_len - 2);
1194 uint32_t payload_raster[6], payload_sbe[13];
1195
1196 ILO_GPE_VALID_GEN(dev, 6, 6);
1197
1198 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1199 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1200 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1201 fs, payload_sbe, Elements(payload_sbe));
1202
1203 ilo_cp_begin(cp, cmd_len);
1204 ilo_cp_write(cp, dw0);
1205 ilo_cp_write(cp, payload_sbe[0]);
1206 ilo_cp_write_multi(cp, payload_raster, 6);
1207 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1208 ilo_cp_end(cp);
1209 }
1210
1211 static inline void
1212 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1213 const struct ilo_shader_state *fs,
1214 int num_samplers,
1215 const struct ilo_rasterizer_state *rasterizer,
1216 bool dual_blend, bool cc_may_kill,
1217 uint32_t hiz_op,
1218 struct ilo_cp *cp)
1219 {
1220 const uint8_t cmd_len = 9;
1221 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2);
1222 const int num_samples = 1;
1223 const struct ilo_shader_cso *fs_cso;
1224 uint32_t dw2, dw4, dw5, dw6;
1225
1226 ILO_GPE_VALID_GEN(dev, 6, 6);
1227
1228 if (!fs) {
1229 /* see brwCreateContext() */
1230 const int max_threads = (dev->gt == 2) ? 80 : 40;
1231
1232 ilo_cp_begin(cp, cmd_len);
1233 ilo_cp_write(cp, dw0);
1234 ilo_cp_write(cp, 0);
1235 ilo_cp_write(cp, 0);
1236 ilo_cp_write(cp, 0);
1237 ilo_cp_write(cp, hiz_op);
1238 /* honor the valid range even if dispatching is disabled */
1239 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT);
1240 ilo_cp_write(cp, 0);
1241 ilo_cp_write(cp, 0);
1242 ilo_cp_write(cp, 0);
1243 ilo_cp_end(cp);
1244
1245 return;
1246 }
1247
1248 fs_cso = ilo_shader_get_kernel_cso(fs);
1249 dw2 = fs_cso->payload[0];
1250 dw4 = fs_cso->payload[1];
1251 dw5 = fs_cso->payload[2];
1252 dw6 = fs_cso->payload[3];
1253
1254 dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
1255
1256 /*
1257 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1258 *
1259 * "This bit (Statistics Enable) must be disabled if either of these
1260 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1261 * Enable or Depth Buffer Resolve Enable."
1262 */
1263 assert(!hiz_op);
1264 dw4 |= GEN6_WM_DW4_STATISTICS;
1265
1266 if (cc_may_kill)
1267 dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE;
1268
1269 if (dual_blend)
1270 dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND;
1271
1272 dw5 |= rasterizer->wm.payload[0];
1273
1274 dw6 |= rasterizer->wm.payload[1];
1275
1276 if (num_samples > 1) {
1277 dw6 |= rasterizer->wm.dw_msaa_rast |
1278 rasterizer->wm.dw_msaa_disp;
1279 }
1280
1281 ilo_cp_begin(cp, cmd_len);
1282 ilo_cp_write(cp, dw0);
1283 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1284 ilo_cp_write(cp, dw2);
1285 ilo_cp_write(cp, 0); /* scratch */
1286 ilo_cp_write(cp, dw4);
1287 ilo_cp_write(cp, dw5);
1288 ilo_cp_write(cp, dw6);
1289 ilo_cp_write(cp, 0); /* kernel 1 */
1290 ilo_cp_write(cp, 0); /* kernel 2 */
1291 ilo_cp_end(cp);
1292 }
1293
1294 static inline unsigned
1295 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1296 const uint32_t *bufs, const int *sizes,
1297 int num_bufs, int max_read_length,
1298 uint32_t *dw, int num_dwords)
1299 {
1300 unsigned enabled = 0x0;
1301 int total_read_length, i;
1302
1303 assert(num_dwords == 4);
1304
1305 total_read_length = 0;
1306 for (i = 0; i < 4; i++) {
1307 if (i < num_bufs && sizes[i]) {
1308 /* in 256-bit units minus one */
1309 const int read_len = (sizes[i] + 31) / 32 - 1;
1310
1311 assert(bufs[i] % 32 == 0);
1312 assert(read_len < 32);
1313
1314 enabled |= 1 << i;
1315 dw[i] = bufs[i] | read_len;
1316
1317 total_read_length += read_len + 1;
1318 }
1319 else {
1320 dw[i] = 0;
1321 }
1322 }
1323
1324 assert(total_read_length <= max_read_length);
1325
1326 return enabled;
1327 }
1328
1329 static inline void
1330 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1331 const uint32_t *bufs, const int *sizes,
1332 int num_bufs,
1333 struct ilo_cp *cp)
1334 {
1335 const uint8_t cmd_len = 5;
1336 uint32_t buf_dw[4], buf_enabled;
1337 uint32_t dw0;
1338
1339 ILO_GPE_VALID_GEN(dev, 6, 6);
1340 assert(num_bufs <= 4);
1341
1342 /*
1343 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1344 *
1345 * "The sum of all four read length fields (each incremented to
1346 * represent the actual read length) must be less than or equal to 32"
1347 */
1348 buf_enabled = gen6_fill_3dstate_constant(dev,
1349 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1350
1351 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
1352 buf_enabled << 12 |
1353 (cmd_len - 2);
1354
1355 ilo_cp_begin(cp, cmd_len);
1356 ilo_cp_write(cp, dw0);
1357 ilo_cp_write(cp, buf_dw[0]);
1358 ilo_cp_write(cp, buf_dw[1]);
1359 ilo_cp_write(cp, buf_dw[2]);
1360 ilo_cp_write(cp, buf_dw[3]);
1361 ilo_cp_end(cp);
1362 }
1363
1364 static inline void
1365 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1366 const uint32_t *bufs, const int *sizes,
1367 int num_bufs,
1368 struct ilo_cp *cp)
1369 {
1370 const uint8_t cmd_len = 5;
1371 uint32_t buf_dw[4], buf_enabled;
1372 uint32_t dw0;
1373
1374 ILO_GPE_VALID_GEN(dev, 6, 6);
1375 assert(num_bufs <= 4);
1376
1377 /*
1378 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1379 *
1380 * "The sum of all four read length fields (each incremented to
1381 * represent the actual read length) must be less than or equal to 64"
1382 */
1383 buf_enabled = gen6_fill_3dstate_constant(dev,
1384 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1385
1386 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
1387 buf_enabled << 12 |
1388 (cmd_len - 2);
1389
1390 ilo_cp_begin(cp, cmd_len);
1391 ilo_cp_write(cp, dw0);
1392 ilo_cp_write(cp, buf_dw[0]);
1393 ilo_cp_write(cp, buf_dw[1]);
1394 ilo_cp_write(cp, buf_dw[2]);
1395 ilo_cp_write(cp, buf_dw[3]);
1396 ilo_cp_end(cp);
1397 }
1398
1399 static inline void
1400 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1401 const uint32_t *bufs, const int *sizes,
1402 int num_bufs,
1403 struct ilo_cp *cp)
1404 {
1405 const uint8_t cmd_len = 5;
1406 uint32_t buf_dw[4], buf_enabled;
1407 uint32_t dw0;
1408
1409 ILO_GPE_VALID_GEN(dev, 6, 6);
1410 assert(num_bufs <= 4);
1411
1412 /*
1413 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1414 *
1415 * "The sum of all four read length fields (each incremented to
1416 * represent the actual read length) must be less than or equal to 64"
1417 */
1418 buf_enabled = gen6_fill_3dstate_constant(dev,
1419 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1420
1421 dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_PS) |
1422 buf_enabled << 12 |
1423 (cmd_len - 2);
1424
1425 ilo_cp_begin(cp, cmd_len);
1426 ilo_cp_write(cp, dw0);
1427 ilo_cp_write(cp, buf_dw[0]);
1428 ilo_cp_write(cp, buf_dw[1]);
1429 ilo_cp_write(cp, buf_dw[2]);
1430 ilo_cp_write(cp, buf_dw[3]);
1431 ilo_cp_end(cp);
1432 }
1433
1434 static inline void
1435 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1436 unsigned sample_mask,
1437 struct ilo_cp *cp)
1438 {
1439 const uint8_t cmd_len = 2;
1440 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK) |
1441 (cmd_len - 2);
1442 const unsigned valid_mask = 0xf;
1443
1444 ILO_GPE_VALID_GEN(dev, 6, 6);
1445
1446 sample_mask &= valid_mask;
1447
1448 ilo_cp_begin(cp, cmd_len);
1449 ilo_cp_write(cp, dw0);
1450 ilo_cp_write(cp, sample_mask);
1451 ilo_cp_end(cp);
1452 }
1453
1454 static inline void
1455 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1456 unsigned x, unsigned y,
1457 unsigned width, unsigned height,
1458 struct ilo_cp *cp)
1459 {
1460 const uint8_t cmd_len = 4;
1461 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) |
1462 (cmd_len - 2);
1463 unsigned xmax = x + width - 1;
1464 unsigned ymax = y + height - 1;
1465 int rect_limit;
1466
1467 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1468
1469 if (dev->gen >= ILO_GEN(7)) {
1470 rect_limit = 16383;
1471 }
1472 else {
1473 /*
1474 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1475 *
1476 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1477 * must be an even number"
1478 */
1479 assert(y % 2 == 0);
1480
1481 rect_limit = 8191;
1482 }
1483
1484 if (x > rect_limit) x = rect_limit;
1485 if (y > rect_limit) y = rect_limit;
1486 if (xmax > rect_limit) xmax = rect_limit;
1487 if (ymax > rect_limit) ymax = rect_limit;
1488
1489 ilo_cp_begin(cp, cmd_len);
1490 ilo_cp_write(cp, dw0);
1491 ilo_cp_write(cp, y << 16 | x);
1492 ilo_cp_write(cp, ymax << 16 | xmax);
1493
1494 /*
1495 * There is no need to set the origin. It is intended to support front
1496 * buffer rendering.
1497 */
1498 ilo_cp_write(cp, 0);
1499
1500 ilo_cp_end(cp);
1501 }
1502
1503 static inline void
1504 zs_align_surface(const struct ilo_dev_info *dev,
1505 unsigned align_w, unsigned align_h,
1506 struct ilo_zs_surface *zs)
1507 {
1508 unsigned mask, shift_w, shift_h;
1509 unsigned width, height;
1510 uint32_t dw3;
1511
1512 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1513
1514 if (dev->gen >= ILO_GEN(7)) {
1515 shift_w = 4;
1516 shift_h = 18;
1517 mask = 0x3fff;
1518 }
1519 else {
1520 shift_w = 6;
1521 shift_h = 19;
1522 mask = 0x1fff;
1523 }
1524
1525 dw3 = zs->payload[2];
1526
1527 /* aligned width and height */
1528 width = align(((dw3 >> shift_w) & mask) + 1, align_w);
1529 height = align(((dw3 >> shift_h) & mask) + 1, align_h);
1530
1531 dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
1532 (width - 1) << shift_w |
1533 (height - 1) << shift_h;
1534
1535 zs->payload[2] = dw3;
1536 }
1537
1538 static inline void
1539 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1540 const struct ilo_zs_surface *zs,
1541 struct ilo_cp *cp)
1542 {
1543 const uint8_t cmd_len = 7;
1544 uint32_t dw0;
1545
1546 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1547
1548 dw0 = (dev->gen >= ILO_GEN(7)) ?
1549 GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) :
1550 GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER);
1551 dw0 |= (cmd_len - 2);
1552
1553 ilo_cp_begin(cp, cmd_len);
1554 ilo_cp_write(cp, dw0);
1555 ilo_cp_write(cp, zs->payload[0]);
1556 ilo_cp_write_bo(cp, zs->payload[1], zs->bo, INTEL_RELOC_WRITE);
1557 ilo_cp_write(cp, zs->payload[2]);
1558 ilo_cp_write(cp, zs->payload[3]);
1559 ilo_cp_write(cp, zs->payload[4]);
1560 ilo_cp_write(cp, zs->payload[5]);
1561 ilo_cp_end(cp);
1562 }
1563
1564 static inline void
1565 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1566 int x_offset, int y_offset,
1567 struct ilo_cp *cp)
1568 {
1569 const uint8_t cmd_len = 2;
1570 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_OFFSET) |
1571 (cmd_len - 2);
1572
1573 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1574 assert(x_offset >= 0 && x_offset <= 31);
1575 assert(y_offset >= 0 && y_offset <= 31);
1576
1577 ilo_cp_begin(cp, cmd_len);
1578 ilo_cp_write(cp, dw0);
1579 ilo_cp_write(cp, x_offset << 8 | y_offset);
1580 ilo_cp_end(cp);
1581 }
1582
1583 static inline void
1584 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1585 const struct pipe_poly_stipple *pattern,
1586 struct ilo_cp *cp)
1587 {
1588 const uint8_t cmd_len = 33;
1589 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_POLY_STIPPLE_PATTERN) |
1590 (cmd_len - 2);
1591 int i;
1592
1593 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1594 assert(Elements(pattern->stipple) == 32);
1595
1596 ilo_cp_begin(cp, cmd_len);
1597 ilo_cp_write(cp, dw0);
1598 for (i = 0; i < 32; i++)
1599 ilo_cp_write(cp, pattern->stipple[i]);
1600 ilo_cp_end(cp);
1601 }
1602
1603 static inline void
1604 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1605 unsigned pattern, unsigned factor,
1606 struct ilo_cp *cp)
1607 {
1608 const uint8_t cmd_len = 3;
1609 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_LINE_STIPPLE) |
1610 (cmd_len - 2);
1611 unsigned inverse;
1612
1613 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1614 assert((pattern & 0xffff) == pattern);
1615 assert(factor >= 1 && factor <= 256);
1616
1617 ilo_cp_begin(cp, cmd_len);
1618 ilo_cp_write(cp, dw0);
1619 ilo_cp_write(cp, pattern);
1620
1621 if (dev->gen >= ILO_GEN(7)) {
1622 /* in U1.16 */
1623 inverse = (unsigned) (65536.0f / factor);
1624 ilo_cp_write(cp, inverse << 15 | factor);
1625 }
1626 else {
1627 /* in U1.13 */
1628 inverse = (unsigned) (8192.0f / factor);
1629 ilo_cp_write(cp, inverse << 16 | factor);
1630 }
1631
1632 ilo_cp_end(cp);
1633 }
1634
1635 static inline void
1636 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1637 struct ilo_cp *cp)
1638 {
1639 const uint8_t cmd_len = 3;
1640 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_AA_LINE_PARAMETERS) |
1641 (cmd_len - 2);
1642
1643 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1644
1645 ilo_cp_begin(cp, cmd_len);
1646 ilo_cp_write(cp, dw0);
1647 ilo_cp_write(cp, 0 << 16 | 0);
1648 ilo_cp_write(cp, 0 << 16 | 0);
1649 ilo_cp_end(cp);
1650 }
1651
1652 static inline void
1653 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1654 int index, unsigned svbi,
1655 unsigned max_svbi,
1656 bool load_vertex_count,
1657 struct ilo_cp *cp)
1658 {
1659 const uint8_t cmd_len = 4;
1660 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) |
1661 (cmd_len - 2);
1662 uint32_t dw1;
1663
1664 ILO_GPE_VALID_GEN(dev, 6, 6);
1665 assert(index >= 0 && index < 4);
1666
1667 dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
1668 if (load_vertex_count)
1669 dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
1670
1671 ilo_cp_begin(cp, cmd_len);
1672 ilo_cp_write(cp, dw0);
1673 ilo_cp_write(cp, dw1);
1674 ilo_cp_write(cp, svbi);
1675 ilo_cp_write(cp, max_svbi);
1676 ilo_cp_end(cp);
1677 }
1678
1679 static inline void
1680 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1681 int num_samples,
1682 const uint32_t *packed_sample_pos,
1683 bool pixel_location_center,
1684 struct ilo_cp *cp)
1685 {
1686 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1687 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE) |
1688 (cmd_len - 2);
1689 uint32_t dw1, dw2, dw3;
1690
1691 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1692
1693 dw1 = (pixel_location_center) ?
1694 GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
1695
1696 switch (num_samples) {
1697 case 0:
1698 case 1:
1699 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
1700 dw2 = 0;
1701 dw3 = 0;
1702 break;
1703 case 4:
1704 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
1705 dw2 = packed_sample_pos[0];
1706 dw3 = 0;
1707 break;
1708 case 8:
1709 assert(dev->gen >= ILO_GEN(7));
1710 dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
1711 dw2 = packed_sample_pos[0];
1712 dw3 = packed_sample_pos[1];
1713 break;
1714 default:
1715 assert(!"unsupported sample count");
1716 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
1717 dw2 = 0;
1718 dw3 = 0;
1719 break;
1720 }
1721
1722 ilo_cp_begin(cp, cmd_len);
1723 ilo_cp_write(cp, dw0);
1724 ilo_cp_write(cp, dw1);
1725 ilo_cp_write(cp, dw2);
1726 if (dev->gen >= ILO_GEN(7))
1727 ilo_cp_write(cp, dw3);
1728 ilo_cp_end(cp);
1729 }
1730
1731 static inline void
1732 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1733 const struct ilo_zs_surface *zs,
1734 struct ilo_cp *cp)
1735 {
1736 const uint8_t cmd_len = 3;
1737 uint32_t dw0;
1738
1739 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1740
1741 dw0 = (dev->gen >= ILO_GEN(7)) ?
1742 GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) :
1743 GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER);
1744 dw0 |= (cmd_len - 2);
1745
1746 ilo_cp_begin(cp, cmd_len);
1747 ilo_cp_write(cp, dw0);
1748 /* see ilo_gpe_init_zs_surface() */
1749 ilo_cp_write(cp, zs->payload[6]);
1750 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo, INTEL_RELOC_WRITE);
1751 ilo_cp_end(cp);
1752 }
1753
1754 static inline void
1755 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1756 const struct ilo_zs_surface *zs,
1757 struct ilo_cp *cp)
1758 {
1759 const uint8_t cmd_len = 3;
1760 uint32_t dw0;
1761
1762 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1763
1764 dw0 = (dev->gen >= ILO_GEN(7)) ?
1765 GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) :
1766 GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER);
1767 dw0 |= (cmd_len - 2);
1768
1769 ilo_cp_begin(cp, cmd_len);
1770 ilo_cp_write(cp, dw0);
1771 /* see ilo_gpe_init_zs_surface() */
1772 ilo_cp_write(cp, zs->payload[8]);
1773 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo, INTEL_RELOC_WRITE);
1774 ilo_cp_end(cp);
1775 }
1776
1777 static inline void
1778 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1779 uint32_t clear_val,
1780 struct ilo_cp *cp)
1781 {
1782 const uint8_t cmd_len = 2;
1783 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) |
1784 GEN6_CLEAR_PARAMS_DW0_VALID |
1785 (cmd_len - 2);
1786
1787 ILO_GPE_VALID_GEN(dev, 6, 6);
1788
1789 ilo_cp_begin(cp, cmd_len);
1790 ilo_cp_write(cp, dw0);
1791 ilo_cp_write(cp, clear_val);
1792 ilo_cp_end(cp);
1793 }
1794
1795 static inline void
1796 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1797 uint32_t dw1,
1798 struct intel_bo *bo, uint32_t bo_offset,
1799 bool write_qword,
1800 struct ilo_cp *cp)
1801 {
1802 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1803 const uint32_t dw0 = GEN6_RENDER_CMD(3D, PIPE_CONTROL) | (cmd_len - 2);
1804 uint32_t reloc_flags = INTEL_RELOC_WRITE;
1805
1806 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1807
1808 assert(bo_offset % ((write_qword) ? 8 : 4) == 0);
1809
1810 if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
1811 /*
1812 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1813 *
1814 * "1 of the following must also be set (when CS stall is set):
1815 *
1816 * * Depth Cache Flush Enable ([0] of DW1)
1817 * * Stall at Pixel Scoreboard ([1] of DW1)
1818 * * Depth Stall ([13] of DW1)
1819 * * Post-Sync Operation ([13] of DW1)
1820 * * Render Target Cache Flush Enable ([12] of DW1)
1821 * * Notify Enable ([8] of DW1)"
1822 *
1823 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1824 *
1825 * "One of the following must also be set (when CS stall is set):
1826 *
1827 * * Render Target Cache Flush Enable ([12] of DW1)
1828 * * Depth Cache Flush Enable ([0] of DW1)
1829 * * Stall at Pixel Scoreboard ([1] of DW1)
1830 * * Depth Stall ([13] of DW1)
1831 * * Post-Sync Operation ([13] of DW1)"
1832 */
1833 uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
1834 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1835 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
1836 GEN6_PIPE_CONTROL_DEPTH_STALL;
1837
1838 /* post-sync op */
1839 bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM |
1840 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
1841 GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
1842
1843 if (dev->gen == ILO_GEN(6))
1844 bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
1845
1846 assert(dw1 & bit_test);
1847 }
1848
1849 if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) {
1850 /*
1851 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1852 *
1853 * "Following bits must be clear (when Depth Stall is set):
1854 *
1855 * * Render Target Cache Flush Enable ([12] of DW1)
1856 * * Depth Cache Flush Enable ([0] of DW1)"
1857 */
1858 assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
1859 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1860 }
1861
1862 /*
1863 * From the Sandy Bridge PRM, volume 1 part 3, page 19:
1864 *
1865 * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
1866 * and PIPE_CONTROL are not supported."
1867 *
1868 * The kernel will add the mapping automatically (when write domain is
1869 * INTEL_DOMAIN_INSTRUCTION).
1870 */
1871 if (dev->gen == ILO_GEN(6) && bo) {
1872 bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
1873 reloc_flags |= INTEL_RELOC_GGTT;
1874 }
1875
1876 ilo_cp_begin(cp, cmd_len);
1877 ilo_cp_write(cp, dw0);
1878 ilo_cp_write(cp, dw1);
1879 ilo_cp_write_bo(cp, bo_offset, bo, reloc_flags);
1880 ilo_cp_write(cp, 0);
1881 if (write_qword)
1882 ilo_cp_write(cp, 0);
1883 ilo_cp_end(cp);
1884 }
1885
1886 static inline void
1887 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1888 const struct pipe_draw_info *info,
1889 const struct ilo_ib_state *ib,
1890 bool rectlist,
1891 struct ilo_cp *cp)
1892 {
1893 const uint8_t cmd_len = 6;
1894 const int prim = (rectlist) ?
1895 GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1896 const int vb_access = (info->indexed) ?
1897 GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
1898 const uint32_t vb_start = info->start +
1899 ((info->indexed) ? ib->draw_start_offset : 0);
1900 uint32_t dw0;
1901
1902 ILO_GPE_VALID_GEN(dev, 6, 6);
1903
1904 dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) |
1905 vb_access |
1906 prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
1907 (cmd_len - 2);
1908
1909 ilo_cp_begin(cp, cmd_len);
1910 ilo_cp_write(cp, dw0);
1911 ilo_cp_write(cp, info->count);
1912 ilo_cp_write(cp, vb_start);
1913 ilo_cp_write(cp, info->instance_count);
1914 ilo_cp_write(cp, info->start_instance);
1915 ilo_cp_write(cp, info->index_bias);
1916 ilo_cp_end(cp);
1917 }
1918
1919 static inline uint32_t
1920 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1921 const struct ilo_shader_state **cs,
1922 uint32_t *sampler_state,
1923 int *num_samplers,
1924 uint32_t *binding_table_state,
1925 int *num_surfaces,
1926 int num_ids,
1927 struct ilo_cp *cp)
1928 {
1929 /*
1930 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1931 *
1932 * "(Interface Descriptor Total Length) This field must have the same
1933 * alignment as the Interface Descriptor Data Start Address.
1934 *
1935 * It must be DQWord (32-byte) aligned..."
1936 *
1937 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1938 *
1939 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1940 * aligned address of the Interface Descriptor data."
1941 */
1942 const int state_align = 32 / 4;
1943 const int state_len = (32 / 4) * num_ids;
1944 uint32_t state_offset, *dw;
1945 int i;
1946
1947 ILO_GPE_VALID_GEN(dev, 6, 6);
1948
1949 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_BLOB,
1950 state_len, state_align, &state_offset);
1951
1952 for (i = 0; i < num_ids; i++) {
1953 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1954 dw[1] = 1 << 18; /* SPF */
1955 dw[2] = sampler_state[i] |
1956 (num_samplers[i] + 3) / 4 << 2;
1957 dw[3] = binding_table_state[i] |
1958 num_surfaces[i];
1959 dw[4] = 0 << 16 | /* CURBE Read Length */
1960 0; /* CURBE Read Offset */
1961 dw[5] = 0; /* Barrier ID */
1962 dw[6] = 0;
1963 dw[7] = 0;
1964
1965 dw += 8;
1966 }
1967
1968 return state_offset;
1969 }
1970
1971 static inline uint32_t
1972 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1973 const struct ilo_viewport_cso *viewports,
1974 unsigned num_viewports,
1975 struct ilo_cp *cp)
1976 {
1977 const int state_align = 32 / 4;
1978 const int state_len = 8 * num_viewports;
1979 uint32_t state_offset, *dw;
1980 unsigned i;
1981
1982 ILO_GPE_VALID_GEN(dev, 6, 6);
1983
1984 /*
1985 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1986 *
1987 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1988 * stored as an array of up to 16 elements..."
1989 */
1990 assert(num_viewports && num_viewports <= 16);
1991
1992 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_SF_VIEWPORT,
1993 state_len, state_align, &state_offset);
1994
1995 for (i = 0; i < num_viewports; i++) {
1996 const struct ilo_viewport_cso *vp = &viewports[i];
1997
1998 dw[0] = fui(vp->m00);
1999 dw[1] = fui(vp->m11);
2000 dw[2] = fui(vp->m22);
2001 dw[3] = fui(vp->m30);
2002 dw[4] = fui(vp->m31);
2003 dw[5] = fui(vp->m32);
2004 dw[6] = 0;
2005 dw[7] = 0;
2006
2007 dw += 8;
2008 }
2009
2010 return state_offset;
2011 }
2012
2013 static inline uint32_t
2014 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
2015 const struct ilo_viewport_cso *viewports,
2016 unsigned num_viewports,
2017 struct ilo_cp *cp)
2018 {
2019 const int state_align = 32 / 4;
2020 const int state_len = 4 * num_viewports;
2021 uint32_t state_offset, *dw;
2022 unsigned i;
2023
2024 ILO_GPE_VALID_GEN(dev, 6, 6);
2025
2026 /*
2027 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
2028 *
2029 * "The viewport-related state is stored as an array of up to 16
2030 * elements..."
2031 */
2032 assert(num_viewports && num_viewports <= 16);
2033
2034 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_CLIP_VIEWPORT,
2035 state_len, state_align, &state_offset);
2036
2037 for (i = 0; i < num_viewports; i++) {
2038 const struct ilo_viewport_cso *vp = &viewports[i];
2039
2040 dw[0] = fui(vp->min_gbx);
2041 dw[1] = fui(vp->max_gbx);
2042 dw[2] = fui(vp->min_gby);
2043 dw[3] = fui(vp->max_gby);
2044
2045 dw += 4;
2046 }
2047
2048 return state_offset;
2049 }
2050
2051 static inline uint32_t
2052 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
2053 const struct ilo_viewport_cso *viewports,
2054 unsigned num_viewports,
2055 struct ilo_cp *cp)
2056 {
2057 const int state_align = 32 / 4;
2058 const int state_len = 2 * num_viewports;
2059 uint32_t state_offset, *dw;
2060 unsigned i;
2061
2062 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2063
2064 /*
2065 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2066 *
2067 * "The viewport state is stored as an array of up to 16 elements..."
2068 */
2069 assert(num_viewports && num_viewports <= 16);
2070
2071 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_CC_VIEWPORT,
2072 state_len, state_align, &state_offset);
2073
2074 for (i = 0; i < num_viewports; i++) {
2075 const struct ilo_viewport_cso *vp = &viewports[i];
2076
2077 dw[0] = fui(vp->min_z);
2078 dw[1] = fui(vp->max_z);
2079
2080 dw += 2;
2081 }
2082
2083 return state_offset;
2084 }
2085
2086 static inline uint32_t
2087 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2088 const struct pipe_stencil_ref *stencil_ref,
2089 ubyte alpha_ref,
2090 const struct pipe_blend_color *blend_color,
2091 struct ilo_cp *cp)
2092 {
2093 const int state_align = 64 / 4;
2094 const int state_len = 6;
2095 uint32_t state_offset, *dw;
2096
2097 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2098
2099 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_COLOR_CALC,
2100 state_len, state_align, &state_offset);
2101
2102 dw[0] = stencil_ref->ref_value[0] << 24 |
2103 stencil_ref->ref_value[1] << 16 |
2104 GEN6_CC_DW0_ALPHATEST_UNORM8;
2105 dw[1] = alpha_ref;
2106 dw[2] = fui(blend_color->color[0]);
2107 dw[3] = fui(blend_color->color[1]);
2108 dw[4] = fui(blend_color->color[2]);
2109 dw[5] = fui(blend_color->color[3]);
2110
2111 return state_offset;
2112 }
2113
2114 static inline uint32_t
2115 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2116 const struct ilo_blend_state *blend,
2117 const struct ilo_fb_state *fb,
2118 const struct ilo_dsa_state *dsa,
2119 struct ilo_cp *cp)
2120 {
2121 const int state_align = 64 / 4;
2122 int state_len;
2123 uint32_t state_offset, *dw;
2124 unsigned num_targets, i;
2125
2126 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2127
2128 /*
2129 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2130 *
2131 * "The blend state is stored as an array of up to 8 elements..."
2132 */
2133 num_targets = fb->state.nr_cbufs;
2134 assert(num_targets <= 8);
2135
2136 if (!num_targets) {
2137 if (!dsa->dw_alpha)
2138 return 0;
2139 /* to be able to reference alpha func */
2140 num_targets = 1;
2141 }
2142
2143 state_len = 2 * num_targets;
2144
2145 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_BLEND,
2146 state_len, state_align, &state_offset);
2147
2148 for (i = 0; i < num_targets; i++) {
2149 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2150 const struct ilo_blend_cso *cso = &blend->cso[idx];
2151 const int num_samples = fb->num_samples;
2152 const struct util_format_description *format_desc =
2153 (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
2154 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2155 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2156
2157 rt_is_unorm = true;
2158 rt_is_pure_integer = false;
2159 rt_dst_alpha_forced_one = false;
2160
2161 if (format_desc) {
2162 int ch;
2163
2164 switch (format_desc->format) {
2165 case PIPE_FORMAT_B8G8R8X8_UNORM:
2166 /* force alpha to one when the HW format has alpha */
2167 assert(ilo_translate_render_format(dev, PIPE_FORMAT_B8G8R8X8_UNORM)
2168 == GEN6_FORMAT_B8G8R8A8_UNORM);
2169 rt_dst_alpha_forced_one = true;
2170 break;
2171 default:
2172 break;
2173 }
2174
2175 for (ch = 0; ch < 4; ch++) {
2176 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2177 continue;
2178
2179 if (format_desc->channel[ch].pure_integer) {
2180 rt_is_unorm = false;
2181 rt_is_pure_integer = true;
2182 break;
2183 }
2184
2185 if (!format_desc->channel[ch].normalized ||
2186 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2187 rt_is_unorm = false;
2188 }
2189 }
2190
2191 dw[0] = cso->payload[0];
2192 dw[1] = cso->payload[1];
2193
2194 if (!rt_is_pure_integer) {
2195 if (rt_dst_alpha_forced_one)
2196 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2197 else
2198 dw[0] |= cso->dw_blend;
2199 }
2200
2201 /*
2202 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2203 *
2204 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2205 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2206 *
2207 * Since logicop is ignored for non-UNORM color buffers, no special care
2208 * is needed.
2209 */
2210 if (rt_is_unorm)
2211 dw[1] |= cso->dw_logicop;
2212
2213 /*
2214 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2215 *
2216 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2217 * Dither both must be disabled."
2218 *
2219 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2220 * requires that anyway.
2221 */
2222 if (num_samples > 1)
2223 dw[1] |= cso->dw_alpha_mod;
2224
2225 /*
2226 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2227 *
2228 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2229 * alpha value."
2230 */
2231 if (!rt_is_pure_integer)
2232 dw[1] |= dsa->dw_alpha;
2233
2234 dw += 2;
2235 }
2236
2237 return state_offset;
2238 }
2239
2240 static inline uint32_t
2241 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2242 const struct ilo_dsa_state *dsa,
2243 struct ilo_cp *cp)
2244 {
2245 const int state_align = 64 / 4;
2246 const int state_len = 3;
2247 uint32_t state_offset, *dw;
2248
2249
2250 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2251
2252 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_DEPTH_STENCIL,
2253 state_len, state_align, &state_offset);
2254
2255 dw[0] = dsa->payload[0];
2256 dw[1] = dsa->payload[1];
2257 dw[2] = dsa->payload[2];
2258
2259 return state_offset;
2260 }
2261
2262 static inline uint32_t
2263 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2264 const struct ilo_scissor_state *scissor,
2265 unsigned num_viewports,
2266 struct ilo_cp *cp)
2267 {
2268 const int state_align = 32 / 4;
2269 const int state_len = 2 * num_viewports;
2270 uint32_t state_offset, *dw;
2271
2272 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2273
2274 /*
2275 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2276 *
2277 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2278 * stored as an array of up to 16 elements..."
2279 */
2280 assert(num_viewports && num_viewports <= 16);
2281
2282 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_SCISSOR_RECT,
2283 state_len, state_align, &state_offset);
2284
2285 memcpy(dw, scissor->payload, state_len * 4);
2286
2287 return state_offset;
2288 }
2289
2290 static inline uint32_t
2291 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2292 uint32_t *surface_states,
2293 int num_surface_states,
2294 struct ilo_cp *cp)
2295 {
2296 const int state_align = 32 / 4;
2297 const int state_len = num_surface_states;
2298 uint32_t state_offset, *dw;
2299
2300 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2301
2302 /*
2303 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2304 *
2305 * "It is stored as an array of up to 256 elements..."
2306 */
2307 assert(num_surface_states <= 256);
2308
2309 if (!num_surface_states)
2310 return 0;
2311
2312 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_BINDING_TABLE,
2313 state_len, state_align, &state_offset);
2314 memcpy(dw, surface_states,
2315 num_surface_states * sizeof(surface_states[0]));
2316
2317 return state_offset;
2318 }
2319
2320 static inline uint32_t
2321 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2322 const struct ilo_view_surface *surf,
2323 bool for_render,
2324 struct ilo_cp *cp)
2325 {
2326 const int state_align = 32 / 4;
2327 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2328 uint32_t state_offset;
2329
2330 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2331
2332 ilo_cp_steal(cp, ILO_BUILDER_ITEM_SURFACE,
2333 state_len, state_align, &state_offset);
2334
2335 STATIC_ASSERT(Elements(surf->payload) >= 8);
2336
2337 ilo_cp_write(cp, surf->payload[0]);
2338 ilo_cp_write_bo(cp, surf->payload[1], surf->bo,
2339 (for_render) ? INTEL_RELOC_WRITE : 0);
2340 ilo_cp_write(cp, surf->payload[2]);
2341 ilo_cp_write(cp, surf->payload[3]);
2342 ilo_cp_write(cp, surf->payload[4]);
2343 ilo_cp_write(cp, surf->payload[5]);
2344
2345 if (dev->gen >= ILO_GEN(7)) {
2346 ilo_cp_write(cp, surf->payload[6]);
2347 ilo_cp_write(cp, surf->payload[7]);
2348 }
2349
2350 ilo_cp_end(cp);
2351
2352 return state_offset;
2353 }
2354
2355 static inline uint32_t
2356 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2357 const struct pipe_stream_output_target *so,
2358 const struct pipe_stream_output_info *so_info,
2359 int so_index,
2360 struct ilo_cp *cp)
2361 {
2362 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2363 unsigned bo_offset, struct_size;
2364 enum pipe_format elem_format;
2365 struct ilo_view_surface surf;
2366
2367 ILO_GPE_VALID_GEN(dev, 6, 6);
2368
2369 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2370 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2371
2372 switch (so_info->output[so_index].num_components) {
2373 case 1:
2374 elem_format = PIPE_FORMAT_R32_FLOAT;
2375 break;
2376 case 2:
2377 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2378 break;
2379 case 3:
2380 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2381 break;
2382 case 4:
2383 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2384 break;
2385 default:
2386 assert(!"unexpected SO components length");
2387 elem_format = PIPE_FORMAT_R32_FLOAT;
2388 break;
2389 }
2390
2391 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2392 struct_size, elem_format, false, true, &surf);
2393
2394 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2395 }
2396
2397 static inline uint32_t
2398 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2399 const struct ilo_sampler_cso * const *samplers,
2400 const struct pipe_sampler_view * const *views,
2401 const uint32_t *sampler_border_colors,
2402 int num_samplers,
2403 struct ilo_cp *cp)
2404 {
2405 const int state_align = 32 / 4;
2406 const int state_len = 4 * num_samplers;
2407 uint32_t state_offset, *dw;
2408 int i;
2409
2410 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2411
2412 /*
2413 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2414 *
2415 * "The sampler state is stored as an array of up to 16 elements..."
2416 */
2417 assert(num_samplers <= 16);
2418
2419 if (!num_samplers)
2420 return 0;
2421
2422 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_SAMPLER,
2423 state_len, state_align, &state_offset);
2424
2425 for (i = 0; i < num_samplers; i++) {
2426 const struct ilo_sampler_cso *sampler = samplers[i];
2427 const struct pipe_sampler_view *view = views[i];
2428 const uint32_t border_color = sampler_border_colors[i];
2429 uint32_t dw_filter, dw_wrap;
2430
2431 /* there may be holes */
2432 if (!sampler || !view) {
2433 /* disabled sampler */
2434 dw[0] = 1 << 31;
2435 dw[1] = 0;
2436 dw[2] = 0;
2437 dw[3] = 0;
2438 dw += 4;
2439
2440 continue;
2441 }
2442
2443 /* determine filter and wrap modes */
2444 switch (view->texture->target) {
2445 case PIPE_TEXTURE_1D:
2446 dw_filter = (sampler->anisotropic) ?
2447 sampler->dw_filter_aniso : sampler->dw_filter;
2448 dw_wrap = sampler->dw_wrap_1d;
2449 break;
2450 case PIPE_TEXTURE_3D:
2451 /*
2452 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2453 *
2454 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2455 * surfaces of type SURFTYPE_3D."
2456 */
2457 dw_filter = sampler->dw_filter;
2458 dw_wrap = sampler->dw_wrap;
2459 break;
2460 case PIPE_TEXTURE_CUBE:
2461 dw_filter = (sampler->anisotropic) ?
2462 sampler->dw_filter_aniso : sampler->dw_filter;
2463 dw_wrap = sampler->dw_wrap_cube;
2464 break;
2465 default:
2466 dw_filter = (sampler->anisotropic) ?
2467 sampler->dw_filter_aniso : sampler->dw_filter;
2468 dw_wrap = sampler->dw_wrap;
2469 break;
2470 }
2471
2472 dw[0] = sampler->payload[0];
2473 dw[1] = sampler->payload[1];
2474 assert(!(border_color & 0x1f));
2475 dw[2] = border_color;
2476 dw[3] = sampler->payload[2];
2477
2478 dw[0] |= dw_filter;
2479
2480 if (dev->gen >= ILO_GEN(7)) {
2481 dw[3] |= dw_wrap;
2482 }
2483 else {
2484 /*
2485 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2486 *
2487 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2488 * where the min and mag mode filters are different and
2489 * SurfMinLOD is nonzero. The determination of MagMode uses the
2490 * following equation instead of the one in the above
2491 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2492 *
2493 * As a way to work around that, we set Base to
2494 * view->u.tex.first_level.
2495 */
2496 dw[0] |= view->u.tex.first_level << 22;
2497
2498 dw[1] |= dw_wrap;
2499 }
2500
2501 dw += 4;
2502 }
2503
2504 return state_offset;
2505 }
2506
2507 static inline uint32_t
2508 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2509 const struct ilo_sampler_cso *sampler,
2510 struct ilo_cp *cp)
2511 {
2512 const int state_align = 32 / 4;
2513 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2514 uint32_t state_offset, *dw;
2515
2516 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2517
2518 dw = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_BLOB,
2519 state_len, state_align, &state_offset);
2520
2521 /* see ilo_gpe_init_sampler_cso() */
2522 memcpy(dw, &sampler->payload[3], state_len * 4);
2523
2524 return state_offset;
2525 }
2526
2527 static inline uint32_t
2528 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2529 int size, void **pcb,
2530 struct ilo_cp *cp)
2531 {
2532 /*
2533 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2534 * to 32 bytes, and their sizes are specified in 256-bit units.
2535 */
2536 const int state_align = 32 / 4;
2537 const int state_len = align(size, 32) / 4;
2538 uint32_t state_offset;
2539 char *buf;
2540
2541 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2542
2543 buf = ilo_cp_steal_ptr(cp, ILO_BUILDER_ITEM_BLOB,
2544 state_len, state_align, &state_offset);
2545
2546 /* zero out the unused range */
2547 if (size < state_len * 4)
2548 memset(&buf[size], 0, state_len * 4 - size);
2549
2550 if (pcb)
2551 *pcb = buf;
2552
2553 return state_offset;
2554 }
2555
2556 #endif /* ILO_GPE_GEN6_H */