gallium/radeon: Only print a message for LLVM diagnostic errors
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
33
34 #include "ilo_common.h"
35 #include "ilo_cp.h"
36 #include "ilo_format.h"
37 #include "ilo_resource.h"
38 #include "ilo_shader.h"
39 #include "ilo_gpe.h"
40
41 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
42 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
43
44 #define ILO_GPE_MI(op) (0x0 << 29 | (op) << 23)
45
46 #define ILO_GPE_CMD(pipeline, op, subop) \
47 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
48
49 /**
50 * Translate winsys tiling to hardware tiling.
51 */
52 static inline int
53 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
54 {
55 switch (tiling) {
56 case INTEL_TILING_NONE:
57 return GEN6_TILING_NONE;
58 case INTEL_TILING_X:
59 return GEN6_TILING_X;
60 case INTEL_TILING_Y:
61 return GEN6_TILING_Y;
62 default:
63 assert(!"unknown tiling");
64 return GEN6_TILING_NONE;
65 }
66 }
67
68 /**
69 * Translate a pipe primitive type to the matching hardware primitive type.
70 */
71 static inline int
72 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
73 {
74 static const int prim_mapping[PIPE_PRIM_MAX] = {
75 [PIPE_PRIM_POINTS] = GEN6_3DPRIM_POINTLIST,
76 [PIPE_PRIM_LINES] = GEN6_3DPRIM_LINELIST,
77 [PIPE_PRIM_LINE_LOOP] = GEN6_3DPRIM_LINELOOP,
78 [PIPE_PRIM_LINE_STRIP] = GEN6_3DPRIM_LINESTRIP,
79 [PIPE_PRIM_TRIANGLES] = GEN6_3DPRIM_TRILIST,
80 [PIPE_PRIM_TRIANGLE_STRIP] = GEN6_3DPRIM_TRISTRIP,
81 [PIPE_PRIM_TRIANGLE_FAN] = GEN6_3DPRIM_TRIFAN,
82 [PIPE_PRIM_QUADS] = GEN6_3DPRIM_QUADLIST,
83 [PIPE_PRIM_QUAD_STRIP] = GEN6_3DPRIM_QUADSTRIP,
84 [PIPE_PRIM_POLYGON] = GEN6_3DPRIM_POLYGON,
85 [PIPE_PRIM_LINES_ADJACENCY] = GEN6_3DPRIM_LINELIST_ADJ,
86 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = GEN6_3DPRIM_LINESTRIP_ADJ,
87 [PIPE_PRIM_TRIANGLES_ADJACENCY] = GEN6_3DPRIM_TRILIST_ADJ,
88 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = GEN6_3DPRIM_TRISTRIP_ADJ,
89 };
90
91 assert(prim_mapping[prim]);
92
93 return prim_mapping[prim];
94 }
95
96 /**
97 * Translate a pipe texture target to the matching hardware surface type.
98 */
99 static inline int
100 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
101 {
102 switch (target) {
103 case PIPE_BUFFER:
104 return GEN6_SURFTYPE_BUFFER;
105 case PIPE_TEXTURE_1D:
106 case PIPE_TEXTURE_1D_ARRAY:
107 return GEN6_SURFTYPE_1D;
108 case PIPE_TEXTURE_2D:
109 case PIPE_TEXTURE_RECT:
110 case PIPE_TEXTURE_2D_ARRAY:
111 return GEN6_SURFTYPE_2D;
112 case PIPE_TEXTURE_3D:
113 return GEN6_SURFTYPE_3D;
114 case PIPE_TEXTURE_CUBE:
115 case PIPE_TEXTURE_CUBE_ARRAY:
116 return GEN6_SURFTYPE_CUBE;
117 default:
118 assert(!"unknown texture target");
119 return GEN6_SURFTYPE_BUFFER;
120 }
121 }
122
123 /**
124 * Fill in DW2 to DW7 of 3DSTATE_SF.
125 */
126 static inline void
127 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
128 const struct ilo_rasterizer_state *rasterizer,
129 int num_samples,
130 enum pipe_format depth_format,
131 uint32_t *payload, unsigned payload_len)
132 {
133 assert(payload_len == Elements(rasterizer->sf.payload));
134
135 if (rasterizer) {
136 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
137
138 memcpy(payload, sf->payload, sizeof(sf->payload));
139 if (num_samples > 1)
140 payload[1] |= sf->dw_msaa;
141 }
142 else {
143 payload[0] = 0;
144 payload[1] = (num_samples > 1) ? GEN7_SF_DW2_MSRASTMODE_ON_PATTERN : 0;
145 payload[2] = 0;
146 payload[3] = 0;
147 payload[4] = 0;
148 payload[5] = 0;
149 }
150
151 if (dev->gen >= ILO_GEN(7)) {
152 int format;
153
154 /* separate stencil */
155 switch (depth_format) {
156 case PIPE_FORMAT_Z16_UNORM:
157 format = GEN6_ZFORMAT_D16_UNORM;
158 break;
159 case PIPE_FORMAT_Z32_FLOAT:
160 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
161 format = GEN6_ZFORMAT_D32_FLOAT;
162 break;
163 case PIPE_FORMAT_Z24X8_UNORM:
164 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
165 format = GEN6_ZFORMAT_D24_UNORM_X8_UINT;
166 break;
167 default:
168 /* FLOAT surface is assumed when there is no depth buffer */
169 format = GEN6_ZFORMAT_D32_FLOAT;
170 break;
171 }
172
173 payload[0] |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT;
174 }
175 }
176
177 /**
178 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
179 */
180 static inline void
181 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
182 const struct ilo_rasterizer_state *rasterizer,
183 const struct ilo_shader_state *fs,
184 uint32_t *dw, int num_dwords)
185 {
186 int output_count, vue_offset, vue_len;
187 const struct ilo_kernel_routing *routing;
188
189 ILO_GPE_VALID_GEN(dev, 6, 7.5);
190 assert(num_dwords == 13);
191
192 if (!fs) {
193 memset(dw, 0, sizeof(dw[0]) * num_dwords);
194 dw[0] = 1 << GEN7_SBE_DW1_URB_READ_LEN__SHIFT;
195 return;
196 }
197
198 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
199 assert(output_count <= 32);
200
201 routing = ilo_shader_get_kernel_routing(fs);
202
203 vue_offset = routing->source_skip;
204 assert(vue_offset % 2 == 0);
205 vue_offset /= 2;
206
207 vue_len = (routing->source_len + 1) / 2;
208 if (!vue_len)
209 vue_len = 1;
210
211 dw[0] = output_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT |
212 vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT |
213 vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT;
214 if (routing->swizzle_enable)
215 dw[0] |= GEN7_SBE_DW1_ATTR_SWIZZLE_ENABLE;
216
217 switch (rasterizer->state.sprite_coord_mode) {
218 case PIPE_SPRITE_COORD_UPPER_LEFT:
219 dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_UPPERLEFT;
220 break;
221 case PIPE_SPRITE_COORD_LOWER_LEFT:
222 dw[0] |= GEN7_SBE_DW1_POINT_SPRITE_TEXCOORD_LOWERLEFT;
223 break;
224 }
225
226 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
227 memcpy(&dw[1], routing->swizzles, 2 * 16);
228
229 /*
230 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
231 *
232 * "This field (Point Sprite Texture Coordinate Enable) must be
233 * programmed to 0 when non-point primitives are rendered."
234 *
235 * TODO We do not check that yet.
236 */
237 dw[9] = routing->point_sprite_enable;
238
239 dw[10] = routing->const_interp_enable;
240
241 /* WrapShortest enables */
242 dw[11] = 0;
243 dw[12] = 0;
244 }
245
246 static inline void
247 gen6_emit_MI_STORE_DATA_IMM(const struct ilo_dev_info *dev,
248 struct intel_bo *bo, uint32_t bo_offset,
249 uint64_t val, bool store_qword,
250 struct ilo_cp *cp)
251 {
252 const uint32_t cmd = ILO_GPE_MI(0x20);
253 const uint8_t cmd_len = (store_qword) ? 5 : 4;
254 /* must use GGTT on GEN6 as in PIPE_CONTROL */
255 const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0;
256 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
257 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
258
259 ILO_GPE_VALID_GEN(dev, 6, 7.5);
260
261 assert(bo_offset % ((store_qword) ? 8 : 4) == 0);
262
263 ilo_cp_begin(cp, cmd_len);
264 ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2));
265 ilo_cp_write(cp, 0);
266 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
267 ilo_cp_write(cp, (uint32_t) val);
268
269 if (store_qword)
270 ilo_cp_write(cp, (uint32_t) (val >> 32));
271 else
272 assert(val == (uint64_t) ((uint32_t) val));
273
274 ilo_cp_end(cp);
275 }
276
277 static inline void
278 gen6_emit_MI_LOAD_REGISTER_IMM(const struct ilo_dev_info *dev,
279 uint32_t reg, uint32_t val,
280 struct ilo_cp *cp)
281 {
282 const uint32_t cmd = ILO_GPE_MI(0x22);
283 const uint8_t cmd_len = 3;
284
285 ILO_GPE_VALID_GEN(dev, 6, 7.5);
286
287 assert(reg % 4 == 0);
288
289 ilo_cp_begin(cp, cmd_len);
290 ilo_cp_write(cp, cmd | (cmd_len - 2));
291 ilo_cp_write(cp, reg);
292 ilo_cp_write(cp, val);
293 ilo_cp_end(cp);
294 }
295
296 static inline void
297 gen6_emit_MI_STORE_REGISTER_MEM(const struct ilo_dev_info *dev,
298 struct intel_bo *bo, uint32_t bo_offset,
299 uint32_t reg, struct ilo_cp *cp)
300 {
301 const uint32_t cmd = ILO_GPE_MI(0x24);
302 const uint8_t cmd_len = 3;
303 /* must use GGTT on GEN6 as in PIPE_CONTROL */
304 const uint32_t cmd_flags = (dev->gen == ILO_GEN(6)) ? (1 << 22) : 0;
305 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
306 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
307
308 ILO_GPE_VALID_GEN(dev, 6, 7.5);
309
310 assert(reg % 4 == 0 && bo_offset % 4 == 0);
311
312 ilo_cp_begin(cp, cmd_len);
313 ilo_cp_write(cp, cmd | cmd_flags | (cmd_len - 2));
314 ilo_cp_write(cp, reg);
315 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
316 ilo_cp_end(cp);
317 }
318
319 static inline void
320 gen6_emit_MI_REPORT_PERF_COUNT(const struct ilo_dev_info *dev,
321 struct intel_bo *bo, uint32_t bo_offset,
322 uint32_t report_id, struct ilo_cp *cp)
323 {
324 const uint32_t cmd = ILO_GPE_MI(0x28);
325 const uint8_t cmd_len = 3;
326 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
327 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
328
329 ILO_GPE_VALID_GEN(dev, 6, 7.5);
330
331 assert(bo_offset % 64 == 0);
332
333 /* must use GGTT on GEN6 as in PIPE_CONTROL */
334 if (dev->gen == ILO_GEN(6))
335 bo_offset |= 0x1;
336
337 ilo_cp_begin(cp, cmd_len);
338 ilo_cp_write(cp, cmd | (cmd_len - 2));
339 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
340 ilo_cp_write(cp, report_id);
341 ilo_cp_end(cp);
342 }
343
344 static inline void
345 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
346 struct intel_bo *general_state_bo,
347 struct intel_bo *surface_state_bo,
348 struct intel_bo *dynamic_state_bo,
349 struct intel_bo *indirect_object_bo,
350 struct intel_bo *instruction_bo,
351 uint32_t general_state_size,
352 uint32_t dynamic_state_size,
353 uint32_t indirect_object_size,
354 uint32_t instruction_size,
355 struct ilo_cp *cp)
356 {
357 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
358 const uint8_t cmd_len = 10;
359
360 ILO_GPE_VALID_GEN(dev, 6, 7.5);
361
362 /* 4K-page aligned */
363 assert(((general_state_size | dynamic_state_size |
364 indirect_object_size | instruction_size) & 0xfff) == 0);
365
366 ilo_cp_begin(cp, cmd_len);
367 ilo_cp_write(cp, cmd | (cmd_len - 2));
368
369 ilo_cp_write_bo(cp, 1, general_state_bo,
370 INTEL_DOMAIN_RENDER,
371 0);
372 ilo_cp_write_bo(cp, 1, surface_state_bo,
373 INTEL_DOMAIN_SAMPLER,
374 0);
375 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
376 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
377 0);
378 ilo_cp_write_bo(cp, 1, indirect_object_bo,
379 0,
380 0);
381 ilo_cp_write_bo(cp, 1, instruction_bo,
382 INTEL_DOMAIN_INSTRUCTION,
383 0);
384
385 if (general_state_size) {
386 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
387 INTEL_DOMAIN_RENDER,
388 0);
389 }
390 else {
391 /* skip range check */
392 ilo_cp_write(cp, 1);
393 }
394
395 if (dynamic_state_size) {
396 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
397 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
398 0);
399 }
400 else {
401 /* skip range check */
402 ilo_cp_write(cp, 0xfffff000 + 1);
403 }
404
405 if (indirect_object_size) {
406 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
407 0,
408 0);
409 }
410 else {
411 /* skip range check */
412 ilo_cp_write(cp, 0xfffff000 + 1);
413 }
414
415 if (instruction_size) {
416 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
417 INTEL_DOMAIN_INSTRUCTION,
418 0);
419 }
420 else {
421 /* skip range check */
422 ilo_cp_write(cp, 1);
423 }
424
425 ilo_cp_end(cp);
426 }
427
428 static inline void
429 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
430 uint32_t sip,
431 struct ilo_cp *cp)
432 {
433 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
434 const uint8_t cmd_len = 2;
435
436 ILO_GPE_VALID_GEN(dev, 6, 7.5);
437
438 ilo_cp_begin(cp, cmd_len);
439 ilo_cp_write(cp, cmd | (cmd_len - 2));
440 ilo_cp_write(cp, sip);
441 ilo_cp_end(cp);
442 }
443
444 static inline void
445 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
446 bool enable,
447 struct ilo_cp *cp)
448 {
449 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
450 const uint8_t cmd_len = 1;
451
452 ILO_GPE_VALID_GEN(dev, 6, 7.5);
453
454 ilo_cp_begin(cp, cmd_len);
455 ilo_cp_write(cp, cmd | enable);
456 ilo_cp_end(cp);
457 }
458
459 static inline void
460 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
461 int pipeline,
462 struct ilo_cp *cp)
463 {
464 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
465 const uint8_t cmd_len = 1;
466
467 ILO_GPE_VALID_GEN(dev, 6, 7.5);
468
469 /* 3D or media */
470 assert(pipeline == 0x0 || pipeline == 0x1);
471
472 ilo_cp_begin(cp, cmd_len);
473 ilo_cp_write(cp, cmd | pipeline);
474 ilo_cp_end(cp);
475 }
476
477 static inline void
478 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
479 int max_threads, int num_urb_entries,
480 int urb_entry_size,
481 struct ilo_cp *cp)
482 {
483 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
484 const uint8_t cmd_len = 8;
485 uint32_t dw2, dw4;
486
487 ILO_GPE_VALID_GEN(dev, 6, 6);
488
489 dw2 = (max_threads - 1) << 16 |
490 num_urb_entries << 8 |
491 1 << 7 | /* Reset Gateway Timer */
492 1 << 6; /* Bypass Gateway Control */
493
494 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
495 480; /* CURBE Allocation Size */
496
497 ilo_cp_begin(cp, cmd_len);
498 ilo_cp_write(cp, cmd | (cmd_len - 2));
499 ilo_cp_write(cp, 0); /* scratch */
500 ilo_cp_write(cp, dw2);
501 ilo_cp_write(cp, 0); /* MBZ */
502 ilo_cp_write(cp, dw4);
503 ilo_cp_write(cp, 0); /* scoreboard */
504 ilo_cp_write(cp, 0);
505 ilo_cp_write(cp, 0);
506 ilo_cp_end(cp);
507 }
508
509 static inline void
510 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
511 uint32_t buf, int size,
512 struct ilo_cp *cp)
513 {
514 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
515 const uint8_t cmd_len = 4;
516
517 ILO_GPE_VALID_GEN(dev, 6, 6);
518
519 assert(buf % 32 == 0);
520 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
521 size = align(size, 32);
522
523 ilo_cp_begin(cp, cmd_len);
524 ilo_cp_write(cp, cmd | (cmd_len - 2));
525 ilo_cp_write(cp, 0); /* MBZ */
526 ilo_cp_write(cp, size);
527 ilo_cp_write(cp, buf);
528 ilo_cp_end(cp);
529 }
530
531 static inline void
532 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
533 uint32_t offset, int num_ids,
534 struct ilo_cp *cp)
535 {
536 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
537 const uint8_t cmd_len = 4;
538
539 ILO_GPE_VALID_GEN(dev, 6, 6);
540
541 assert(offset % 32 == 0);
542
543 ilo_cp_begin(cp, cmd_len);
544 ilo_cp_write(cp, cmd | (cmd_len - 2));
545 ilo_cp_write(cp, 0); /* MBZ */
546 /* every ID has 8 DWords */
547 ilo_cp_write(cp, num_ids * 8 * 4);
548 ilo_cp_write(cp, offset);
549 ilo_cp_end(cp);
550 }
551
552 static inline void
553 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
554 int id, int byte, int thread_count,
555 struct ilo_cp *cp)
556 {
557 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
558 const uint8_t cmd_len = 2;
559 uint32_t dw1;
560
561 ILO_GPE_VALID_GEN(dev, 6, 6);
562
563 dw1 = id << 16 |
564 byte << 8 |
565 thread_count;
566
567 ilo_cp_begin(cp, cmd_len);
568 ilo_cp_write(cp, cmd | (cmd_len - 2));
569 ilo_cp_write(cp, dw1);
570 ilo_cp_end(cp);
571 }
572
573 static inline void
574 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
575 int thread_count_water_mark,
576 int barrier_mask,
577 struct ilo_cp *cp)
578 {
579 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
580 const uint8_t cmd_len = 2;
581 uint32_t dw1;
582
583 ILO_GPE_VALID_GEN(dev, 6, 6);
584
585 dw1 = thread_count_water_mark << 16 |
586 barrier_mask;
587
588 ilo_cp_begin(cp, cmd_len);
589 ilo_cp_write(cp, cmd | (cmd_len - 2));
590 ilo_cp_write(cp, dw1);
591 ilo_cp_end(cp);
592 }
593
594 static inline void
595 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
596 struct ilo_cp *cp)
597 {
598 assert(!"MEDIA_OBJECT_WALKER unsupported");
599 }
600
601 static inline void
602 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
603 uint32_t vs_binding_table,
604 uint32_t gs_binding_table,
605 uint32_t ps_binding_table,
606 struct ilo_cp *cp)
607 {
608 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
609 const uint8_t cmd_len = 4;
610
611 ILO_GPE_VALID_GEN(dev, 6, 6);
612
613 ilo_cp_begin(cp, cmd_len);
614 ilo_cp_write(cp, cmd | (cmd_len - 2) |
615 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
616 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
617 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED);
618 ilo_cp_write(cp, vs_binding_table);
619 ilo_cp_write(cp, gs_binding_table);
620 ilo_cp_write(cp, ps_binding_table);
621 ilo_cp_end(cp);
622 }
623
624 static inline void
625 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
626 uint32_t vs_sampler_state,
627 uint32_t gs_sampler_state,
628 uint32_t ps_sampler_state,
629 struct ilo_cp *cp)
630 {
631 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
632 const uint8_t cmd_len = 4;
633
634 ILO_GPE_VALID_GEN(dev, 6, 6);
635
636 ilo_cp_begin(cp, cmd_len);
637 ilo_cp_write(cp, cmd | (cmd_len - 2) |
638 GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
639 GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
640 GEN6_PTR_SAMPLER_DW0_PS_CHANGED);
641 ilo_cp_write(cp, vs_sampler_state);
642 ilo_cp_write(cp, gs_sampler_state);
643 ilo_cp_write(cp, ps_sampler_state);
644 ilo_cp_end(cp);
645 }
646
647 static inline void
648 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
649 int vs_total_size, int gs_total_size,
650 int vs_entry_size, int gs_entry_size,
651 struct ilo_cp *cp)
652 {
653 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
654 const uint8_t cmd_len = 3;
655 const int row_size = 128; /* 1024 bits */
656 int vs_alloc_size, gs_alloc_size;
657 int vs_num_entries, gs_num_entries;
658
659 ILO_GPE_VALID_GEN(dev, 6, 6);
660
661 /* in 1024-bit URB rows */
662 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
663 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
664
665 /* the valid range is [1, 5] */
666 if (!vs_alloc_size)
667 vs_alloc_size = 1;
668 if (!gs_alloc_size)
669 gs_alloc_size = 1;
670 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
671
672 /* the valid range is [24, 256] in multiples of 4 */
673 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
674 if (vs_num_entries > 256)
675 vs_num_entries = 256;
676 assert(vs_num_entries >= 24);
677
678 /* the valid range is [0, 256] in multiples of 4 */
679 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
680 if (gs_num_entries > 256)
681 gs_num_entries = 256;
682
683 ilo_cp_begin(cp, cmd_len);
684 ilo_cp_write(cp, cmd | (cmd_len - 2));
685 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
686 vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT);
687 ilo_cp_write(cp, gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
688 (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT);
689 ilo_cp_end(cp);
690 }
691
692 static inline void
693 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
694 const struct ilo_ve_state *ve,
695 const struct ilo_vb_state *vb,
696 struct ilo_cp *cp)
697 {
698 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
699 uint8_t cmd_len;
700 unsigned hw_idx;
701
702 ILO_GPE_VALID_GEN(dev, 6, 7.5);
703
704 /*
705 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
706 *
707 * "From 1 to 33 VBs can be specified..."
708 */
709 assert(ve->vb_count <= 33);
710
711 if (!ve->vb_count)
712 return;
713
714 cmd_len = 1 + 4 * ve->vb_count;
715
716 ilo_cp_begin(cp, cmd_len);
717 ilo_cp_write(cp, cmd | (cmd_len - 2));
718
719 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
720 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
721 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
722 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
723 uint32_t dw;
724
725 dw = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
726
727 if (instance_divisor)
728 dw |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
729 else
730 dw |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
731
732 if (dev->gen >= ILO_GEN(7))
733 dw |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
734
735 /* use null vb if there is no buffer or the stride is out of range */
736 if (cso->buffer && cso->stride <= 2048) {
737 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
738 const uint32_t start_offset = cso->buffer_offset;
739 const uint32_t end_offset = buf->bo_size - 1;
740
741 dw |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
742
743 ilo_cp_write(cp, dw);
744 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
745 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
746 ilo_cp_write(cp, instance_divisor);
747 }
748 else {
749 dw |= 1 << 13;
750
751 ilo_cp_write(cp, dw);
752 ilo_cp_write(cp, 0);
753 ilo_cp_write(cp, 0);
754 ilo_cp_write(cp, instance_divisor);
755 }
756 }
757
758 ilo_cp_end(cp);
759 }
760
761 static inline void
762 ve_init_cso_with_components(const struct ilo_dev_info *dev,
763 int comp0, int comp1, int comp2, int comp3,
764 struct ilo_ve_cso *cso)
765 {
766 ILO_GPE_VALID_GEN(dev, 6, 7.5);
767
768 STATIC_ASSERT(Elements(cso->payload) >= 2);
769 cso->payload[0] = GEN6_VE_STATE_DW0_VALID;
770 cso->payload[1] =
771 comp0 << GEN6_VE_STATE_DW1_COMP0__SHIFT |
772 comp1 << GEN6_VE_STATE_DW1_COMP1__SHIFT |
773 comp2 << GEN6_VE_STATE_DW1_COMP2__SHIFT |
774 comp3 << GEN6_VE_STATE_DW1_COMP3__SHIFT;
775 }
776
777 static inline void
778 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
779 struct ilo_ve_cso *cso)
780 {
781 int format;
782
783 ILO_GPE_VALID_GEN(dev, 6, 7.5);
784
785 /*
786 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
787 *
788 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
789 * valid VERTEX_ELEMENT structure.
790 *
791 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
792 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
793 *
794 * - The Source Element Format must be set to the UINT format.
795 *
796 * - [DevSNB]: Edge Flags are not supported for QUADLIST
797 * primitives. Software may elect to convert QUADLIST primitives
798 * to some set of corresponding edge-flag-supported primitive
799 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
800 */
801
802 cso->payload[0] |= GEN6_VE_STATE_DW0_EDGE_FLAG_ENABLE;
803 cso->payload[1] =
804 GEN6_VFCOMP_STORE_SRC << GEN6_VE_STATE_DW1_COMP0__SHIFT |
805 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP1__SHIFT |
806 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP2__SHIFT |
807 GEN6_VFCOMP_NOSTORE << GEN6_VE_STATE_DW1_COMP3__SHIFT;
808
809 /*
810 * Edge flags have format GEN6_FORMAT_R8_UINT when defined via
811 * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
812 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
813 *
814 * Since all the hardware cares about is whether the flags are zero or not,
815 * we can treat them as GEN6_FORMAT_R32_UINT in the latter case.
816 */
817 format = (cso->payload[0] >> GEN6_VE_STATE_DW0_FORMAT__SHIFT) & 0x1ff;
818 if (format == GEN6_FORMAT_R32_FLOAT) {
819 STATIC_ASSERT(GEN6_FORMAT_R32_UINT == GEN6_FORMAT_R32_FLOAT - 1);
820 cso->payload[0] -= (1 << GEN6_VE_STATE_DW0_FORMAT__SHIFT);
821 }
822 else {
823 assert(format == GEN6_FORMAT_R8_UINT);
824 }
825 }
826
827 static inline void
828 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
829 const struct ilo_ve_state *ve,
830 bool last_velement_edgeflag,
831 bool prepend_generated_ids,
832 struct ilo_cp *cp)
833 {
834 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
835 uint8_t cmd_len;
836 unsigned i;
837
838 ILO_GPE_VALID_GEN(dev, 6, 7.5);
839
840 /*
841 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
842 *
843 * "Up to 34 (DevSNB+) vertex elements are supported."
844 */
845 assert(ve->count + prepend_generated_ids <= 34);
846
847 if (!ve->count && !prepend_generated_ids) {
848 struct ilo_ve_cso dummy;
849
850 ve_init_cso_with_components(dev,
851 GEN6_VFCOMP_STORE_0,
852 GEN6_VFCOMP_STORE_0,
853 GEN6_VFCOMP_STORE_0,
854 GEN6_VFCOMP_STORE_1_FP,
855 &dummy);
856
857 cmd_len = 3;
858 ilo_cp_begin(cp, cmd_len);
859 ilo_cp_write(cp, cmd | (cmd_len - 2));
860 ilo_cp_write_multi(cp, dummy.payload, 2);
861 ilo_cp_end(cp);
862
863 return;
864 }
865
866 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
867
868 ilo_cp_begin(cp, cmd_len);
869 ilo_cp_write(cp, cmd | (cmd_len - 2));
870
871 if (prepend_generated_ids) {
872 struct ilo_ve_cso gen_ids;
873
874 ve_init_cso_with_components(dev,
875 GEN6_VFCOMP_STORE_VID,
876 GEN6_VFCOMP_STORE_IID,
877 GEN6_VFCOMP_NOSTORE,
878 GEN6_VFCOMP_NOSTORE,
879 &gen_ids);
880
881 ilo_cp_write_multi(cp, gen_ids.payload, 2);
882 }
883
884 if (last_velement_edgeflag) {
885 struct ilo_ve_cso edgeflag;
886
887 for (i = 0; i < ve->count - 1; i++)
888 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
889
890 edgeflag = ve->cso[i];
891 ve_set_cso_edgeflag(dev, &edgeflag);
892 ilo_cp_write_multi(cp, edgeflag.payload, 2);
893 }
894 else {
895 for (i = 0; i < ve->count; i++)
896 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
897 }
898
899 ilo_cp_end(cp);
900 }
901
902 static inline void
903 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
904 const struct ilo_ib_state *ib,
905 bool enable_cut_index,
906 struct ilo_cp *cp)
907 {
908 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
909 const uint8_t cmd_len = 3;
910 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
911 uint32_t start_offset, end_offset;
912 int format;
913
914 ILO_GPE_VALID_GEN(dev, 6, 7.5);
915
916 if (!buf)
917 return;
918
919 /* this is moved to the new 3DSTATE_VF */
920 if (dev->gen >= ILO_GEN(7.5))
921 assert(!enable_cut_index);
922
923 switch (ib->hw_index_size) {
924 case 4:
925 format = GEN6_IB_DW0_FORMAT_DWORD;
926 break;
927 case 2:
928 format = GEN6_IB_DW0_FORMAT_WORD;
929 break;
930 case 1:
931 format = GEN6_IB_DW0_FORMAT_BYTE;
932 break;
933 default:
934 assert(!"unknown index size");
935 format = GEN6_IB_DW0_FORMAT_BYTE;
936 break;
937 }
938
939 /*
940 * set start_offset to 0 here and adjust pipe_draw_info::start with
941 * ib->draw_start_offset in 3DPRIMITIVE
942 */
943 start_offset = 0;
944 end_offset = buf->bo_size;
945
946 /* end_offset must also be aligned and is inclusive */
947 end_offset -= (end_offset % ib->hw_index_size);
948 end_offset--;
949
950 ilo_cp_begin(cp, cmd_len);
951 ilo_cp_write(cp, cmd | (cmd_len - 2) |
952 ((enable_cut_index) ? GEN6_IB_DW0_CUT_INDEX_ENABLE : 0) |
953 format);
954 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
955 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
956 ilo_cp_end(cp);
957 }
958
959 static inline void
960 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
961 uint32_t clip_viewport,
962 uint32_t sf_viewport,
963 uint32_t cc_viewport,
964 struct ilo_cp *cp)
965 {
966 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
967 const uint8_t cmd_len = 4;
968
969 ILO_GPE_VALID_GEN(dev, 6, 6);
970
971 ilo_cp_begin(cp, cmd_len);
972 ilo_cp_write(cp, cmd | (cmd_len - 2) |
973 GEN6_PTR_VP_DW0_CLIP_CHANGED |
974 GEN6_PTR_VP_DW0_SF_CHANGED |
975 GEN6_PTR_VP_DW0_CC_CHANGED);
976 ilo_cp_write(cp, clip_viewport);
977 ilo_cp_write(cp, sf_viewport);
978 ilo_cp_write(cp, cc_viewport);
979 ilo_cp_end(cp);
980 }
981
982 static inline void
983 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
984 uint32_t blend_state,
985 uint32_t depth_stencil_state,
986 uint32_t color_calc_state,
987 struct ilo_cp *cp)
988 {
989 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
990 const uint8_t cmd_len = 4;
991
992 ILO_GPE_VALID_GEN(dev, 6, 6);
993
994 ilo_cp_begin(cp, cmd_len);
995 ilo_cp_write(cp, cmd | (cmd_len - 2));
996 ilo_cp_write(cp, blend_state | 1);
997 ilo_cp_write(cp, depth_stencil_state | 1);
998 ilo_cp_write(cp, color_calc_state | 1);
999 ilo_cp_end(cp);
1000 }
1001
1002 static inline void
1003 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1004 uint32_t scissor_rect,
1005 struct ilo_cp *cp)
1006 {
1007 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1008 const uint8_t cmd_len = 2;
1009
1010 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1011
1012 ilo_cp_begin(cp, cmd_len);
1013 ilo_cp_write(cp, cmd | (cmd_len - 2));
1014 ilo_cp_write(cp, scissor_rect);
1015 ilo_cp_end(cp);
1016 }
1017
1018 static inline void
1019 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1020 const struct ilo_shader_state *vs,
1021 int num_samplers,
1022 struct ilo_cp *cp)
1023 {
1024 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1025 const uint8_t cmd_len = 6;
1026 const struct ilo_shader_cso *cso;
1027 uint32_t dw2, dw4, dw5;
1028
1029 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1030
1031 if (!vs) {
1032 ilo_cp_begin(cp, cmd_len);
1033 ilo_cp_write(cp, cmd | (cmd_len - 2));
1034 ilo_cp_write(cp, 0);
1035 ilo_cp_write(cp, 0);
1036 ilo_cp_write(cp, 0);
1037 ilo_cp_write(cp, 0);
1038 ilo_cp_write(cp, 0);
1039 ilo_cp_end(cp);
1040 return;
1041 }
1042
1043 cso = ilo_shader_get_kernel_cso(vs);
1044 dw2 = cso->payload[0];
1045 dw4 = cso->payload[1];
1046 dw5 = cso->payload[2];
1047
1048 dw2 |= ((num_samplers + 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
1049
1050 ilo_cp_begin(cp, cmd_len);
1051 ilo_cp_write(cp, cmd | (cmd_len - 2));
1052 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1053 ilo_cp_write(cp, dw2);
1054 ilo_cp_write(cp, 0); /* scratch */
1055 ilo_cp_write(cp, dw4);
1056 ilo_cp_write(cp, dw5);
1057 ilo_cp_end(cp);
1058 }
1059
1060 static inline void
1061 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1062 const struct ilo_shader_state *gs,
1063 const struct ilo_shader_state *vs,
1064 int verts_per_prim,
1065 struct ilo_cp *cp)
1066 {
1067 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1068 const uint8_t cmd_len = 7;
1069 uint32_t dw1, dw2, dw4, dw5, dw6;
1070
1071 ILO_GPE_VALID_GEN(dev, 6, 6);
1072
1073 if (gs) {
1074 const struct ilo_shader_cso *cso;
1075
1076 dw1 = ilo_shader_get_kernel_offset(gs);
1077
1078 cso = ilo_shader_get_kernel_cso(gs);
1079 dw2 = cso->payload[0];
1080 dw4 = cso->payload[1];
1081 dw5 = cso->payload[2];
1082 dw6 = cso->payload[3];
1083 }
1084 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1085 struct ilo_shader_cso cso;
1086 enum ilo_kernel_param param;
1087
1088 switch (verts_per_prim) {
1089 case 1:
1090 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1091 break;
1092 case 2:
1093 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1094 break;
1095 default:
1096 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1097 break;
1098 }
1099
1100 dw1 = ilo_shader_get_kernel_offset(vs) +
1101 ilo_shader_get_kernel_param(vs, param);
1102
1103 /* cannot use VS's CSO */
1104 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1105 dw2 = cso.payload[0];
1106 dw4 = cso.payload[1];
1107 dw5 = cso.payload[2];
1108 dw6 = cso.payload[3];
1109 }
1110 else {
1111 dw1 = 0;
1112 dw2 = 0;
1113 dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
1114 dw5 = GEN6_GS_DW5_STATISTICS;
1115 dw6 = 0;
1116 }
1117
1118 ilo_cp_begin(cp, cmd_len);
1119 ilo_cp_write(cp, cmd | (cmd_len - 2));
1120 ilo_cp_write(cp, dw1);
1121 ilo_cp_write(cp, dw2);
1122 ilo_cp_write(cp, 0);
1123 ilo_cp_write(cp, dw4);
1124 ilo_cp_write(cp, dw5);
1125 ilo_cp_write(cp, dw6);
1126 ilo_cp_end(cp);
1127 }
1128
1129 static inline void
1130 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1131 const struct ilo_rasterizer_state *rasterizer,
1132 const struct ilo_shader_state *fs,
1133 bool enable_guardband,
1134 int num_viewports,
1135 struct ilo_cp *cp)
1136 {
1137 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1138 const uint8_t cmd_len = 4;
1139 uint32_t dw1, dw2, dw3;
1140
1141 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1142
1143 if (rasterizer) {
1144 int interps;
1145
1146 dw1 = rasterizer->clip.payload[0];
1147 dw2 = rasterizer->clip.payload[1];
1148 dw3 = rasterizer->clip.payload[2];
1149
1150 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1151 dw2 |= GEN6_CLIP_DW2_GB_TEST_ENABLE;
1152
1153 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1154 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1155
1156 if (interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL |
1157 GEN6_INTERP_NONPERSPECTIVE_CENTROID |
1158 GEN6_INTERP_NONPERSPECTIVE_SAMPLE))
1159 dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE;
1160
1161 dw3 |= GEN6_CLIP_DW3_RTAINDEX_FORCED_ZERO |
1162 (num_viewports - 1);
1163 }
1164 else {
1165 dw1 = 0;
1166 dw2 = 0;
1167 dw3 = 0;
1168 }
1169
1170 ilo_cp_begin(cp, cmd_len);
1171 ilo_cp_write(cp, cmd | (cmd_len - 2));
1172 ilo_cp_write(cp, dw1);
1173 ilo_cp_write(cp, dw2);
1174 ilo_cp_write(cp, dw3);
1175 ilo_cp_end(cp);
1176 }
1177
1178 static inline void
1179 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1180 const struct ilo_rasterizer_state *rasterizer,
1181 const struct ilo_shader_state *fs,
1182 struct ilo_cp *cp)
1183 {
1184 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1185 const uint8_t cmd_len = 20;
1186 uint32_t payload_raster[6], payload_sbe[13];
1187
1188 ILO_GPE_VALID_GEN(dev, 6, 6);
1189
1190 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1191 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1192 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1193 fs, payload_sbe, Elements(payload_sbe));
1194
1195 ilo_cp_begin(cp, cmd_len);
1196 ilo_cp_write(cp, cmd | (cmd_len - 2));
1197 ilo_cp_write(cp, payload_sbe[0]);
1198 ilo_cp_write_multi(cp, payload_raster, 6);
1199 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1200 ilo_cp_end(cp);
1201 }
1202
1203 static inline void
1204 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1205 const struct ilo_shader_state *fs,
1206 int num_samplers,
1207 const struct ilo_rasterizer_state *rasterizer,
1208 bool dual_blend, bool cc_may_kill,
1209 uint32_t hiz_op,
1210 struct ilo_cp *cp)
1211 {
1212 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1213 const uint8_t cmd_len = 9;
1214 const int num_samples = 1;
1215 const struct ilo_shader_cso *fs_cso;
1216 uint32_t dw2, dw4, dw5, dw6;
1217
1218 ILO_GPE_VALID_GEN(dev, 6, 6);
1219
1220 if (!fs) {
1221 /* see brwCreateContext() */
1222 const int max_threads = (dev->gt == 2) ? 80 : 40;
1223
1224 ilo_cp_begin(cp, cmd_len);
1225 ilo_cp_write(cp, cmd | (cmd_len - 2));
1226 ilo_cp_write(cp, 0);
1227 ilo_cp_write(cp, 0);
1228 ilo_cp_write(cp, 0);
1229 ilo_cp_write(cp, hiz_op);
1230 /* honor the valid range even if dispatching is disabled */
1231 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT);
1232 ilo_cp_write(cp, 0);
1233 ilo_cp_write(cp, 0);
1234 ilo_cp_write(cp, 0);
1235 ilo_cp_end(cp);
1236
1237 return;
1238 }
1239
1240 fs_cso = ilo_shader_get_kernel_cso(fs);
1241 dw2 = fs_cso->payload[0];
1242 dw4 = fs_cso->payload[1];
1243 dw5 = fs_cso->payload[2];
1244 dw6 = fs_cso->payload[3];
1245
1246 dw2 |= (num_samplers + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT;
1247
1248 /*
1249 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1250 *
1251 * "This bit (Statistics Enable) must be disabled if either of these
1252 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1253 * Enable or Depth Buffer Resolve Enable."
1254 */
1255 assert(!hiz_op);
1256 dw4 |= GEN6_WM_DW4_STATISTICS;
1257
1258 if (cc_may_kill)
1259 dw5 |= GEN6_WM_DW5_PS_KILL | GEN6_WM_DW5_PS_ENABLE;
1260
1261 if (dual_blend)
1262 dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND;
1263
1264 dw5 |= rasterizer->wm.payload[0];
1265
1266 dw6 |= rasterizer->wm.payload[1];
1267
1268 if (num_samples > 1) {
1269 dw6 |= rasterizer->wm.dw_msaa_rast |
1270 rasterizer->wm.dw_msaa_disp;
1271 }
1272
1273 ilo_cp_begin(cp, cmd_len);
1274 ilo_cp_write(cp, cmd | (cmd_len - 2));
1275 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1276 ilo_cp_write(cp, dw2);
1277 ilo_cp_write(cp, 0); /* scratch */
1278 ilo_cp_write(cp, dw4);
1279 ilo_cp_write(cp, dw5);
1280 ilo_cp_write(cp, dw6);
1281 ilo_cp_write(cp, 0); /* kernel 1 */
1282 ilo_cp_write(cp, 0); /* kernel 2 */
1283 ilo_cp_end(cp);
1284 }
1285
1286 static inline unsigned
1287 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1288 const uint32_t *bufs, const int *sizes,
1289 int num_bufs, int max_read_length,
1290 uint32_t *dw, int num_dwords)
1291 {
1292 unsigned enabled = 0x0;
1293 int total_read_length, i;
1294
1295 assert(num_dwords == 4);
1296
1297 total_read_length = 0;
1298 for (i = 0; i < 4; i++) {
1299 if (i < num_bufs && sizes[i]) {
1300 /* in 256-bit units minus one */
1301 const int read_len = (sizes[i] + 31) / 32 - 1;
1302
1303 assert(bufs[i] % 32 == 0);
1304 assert(read_len < 32);
1305
1306 enabled |= 1 << i;
1307 dw[i] = bufs[i] | read_len;
1308
1309 total_read_length += read_len + 1;
1310 }
1311 else {
1312 dw[i] = 0;
1313 }
1314 }
1315
1316 assert(total_read_length <= max_read_length);
1317
1318 return enabled;
1319 }
1320
1321 static inline void
1322 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1323 const uint32_t *bufs, const int *sizes,
1324 int num_bufs,
1325 struct ilo_cp *cp)
1326 {
1327 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
1328 const uint8_t cmd_len = 5;
1329 uint32_t buf_dw[4], buf_enabled;
1330
1331 ILO_GPE_VALID_GEN(dev, 6, 6);
1332 assert(num_bufs <= 4);
1333
1334 /*
1335 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1336 *
1337 * "The sum of all four read length fields (each incremented to
1338 * represent the actual read length) must be less than or equal to 32"
1339 */
1340 buf_enabled = gen6_fill_3dstate_constant(dev,
1341 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1342
1343 ilo_cp_begin(cp, cmd_len);
1344 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1345 ilo_cp_write(cp, buf_dw[0]);
1346 ilo_cp_write(cp, buf_dw[1]);
1347 ilo_cp_write(cp, buf_dw[2]);
1348 ilo_cp_write(cp, buf_dw[3]);
1349 ilo_cp_end(cp);
1350 }
1351
1352 static inline void
1353 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1354 const uint32_t *bufs, const int *sizes,
1355 int num_bufs,
1356 struct ilo_cp *cp)
1357 {
1358 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
1359 const uint8_t cmd_len = 5;
1360 uint32_t buf_dw[4], buf_enabled;
1361
1362 ILO_GPE_VALID_GEN(dev, 6, 6);
1363 assert(num_bufs <= 4);
1364
1365 /*
1366 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1367 *
1368 * "The sum of all four read length fields (each incremented to
1369 * represent the actual read length) must be less than or equal to 64"
1370 */
1371 buf_enabled = gen6_fill_3dstate_constant(dev,
1372 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1373
1374 ilo_cp_begin(cp, cmd_len);
1375 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1376 ilo_cp_write(cp, buf_dw[0]);
1377 ilo_cp_write(cp, buf_dw[1]);
1378 ilo_cp_write(cp, buf_dw[2]);
1379 ilo_cp_write(cp, buf_dw[3]);
1380 ilo_cp_end(cp);
1381 }
1382
1383 static inline void
1384 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1385 const uint32_t *bufs, const int *sizes,
1386 int num_bufs,
1387 struct ilo_cp *cp)
1388 {
1389 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
1390 const uint8_t cmd_len = 5;
1391 uint32_t buf_dw[4], buf_enabled;
1392
1393 ILO_GPE_VALID_GEN(dev, 6, 6);
1394 assert(num_bufs <= 4);
1395
1396 /*
1397 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1398 *
1399 * "The sum of all four read length fields (each incremented to
1400 * represent the actual read length) must be less than or equal to 64"
1401 */
1402 buf_enabled = gen6_fill_3dstate_constant(dev,
1403 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1404
1405 ilo_cp_begin(cp, cmd_len);
1406 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1407 ilo_cp_write(cp, buf_dw[0]);
1408 ilo_cp_write(cp, buf_dw[1]);
1409 ilo_cp_write(cp, buf_dw[2]);
1410 ilo_cp_write(cp, buf_dw[3]);
1411 ilo_cp_end(cp);
1412 }
1413
1414 static inline void
1415 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1416 unsigned sample_mask,
1417 struct ilo_cp *cp)
1418 {
1419 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
1420 const uint8_t cmd_len = 2;
1421 const unsigned valid_mask = 0xf;
1422
1423 ILO_GPE_VALID_GEN(dev, 6, 6);
1424
1425 sample_mask &= valid_mask;
1426
1427 ilo_cp_begin(cp, cmd_len);
1428 ilo_cp_write(cp, cmd | (cmd_len - 2));
1429 ilo_cp_write(cp, sample_mask);
1430 ilo_cp_end(cp);
1431 }
1432
1433 static inline void
1434 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1435 unsigned x, unsigned y,
1436 unsigned width, unsigned height,
1437 struct ilo_cp *cp)
1438 {
1439 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
1440 const uint8_t cmd_len = 4;
1441 unsigned xmax = x + width - 1;
1442 unsigned ymax = y + height - 1;
1443 int rect_limit;
1444
1445 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1446
1447 if (dev->gen >= ILO_GEN(7)) {
1448 rect_limit = 16383;
1449 }
1450 else {
1451 /*
1452 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1453 *
1454 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1455 * must be an even number"
1456 */
1457 assert(y % 2 == 0);
1458
1459 rect_limit = 8191;
1460 }
1461
1462 if (x > rect_limit) x = rect_limit;
1463 if (y > rect_limit) y = rect_limit;
1464 if (xmax > rect_limit) xmax = rect_limit;
1465 if (ymax > rect_limit) ymax = rect_limit;
1466
1467 ilo_cp_begin(cp, cmd_len);
1468 ilo_cp_write(cp, cmd | (cmd_len - 2));
1469 ilo_cp_write(cp, y << 16 | x);
1470 ilo_cp_write(cp, ymax << 16 | xmax);
1471
1472 /*
1473 * There is no need to set the origin. It is intended to support front
1474 * buffer rendering.
1475 */
1476 ilo_cp_write(cp, 0);
1477
1478 ilo_cp_end(cp);
1479 }
1480
1481 static inline void
1482 zs_align_surface(const struct ilo_dev_info *dev,
1483 unsigned align_w, unsigned align_h,
1484 struct ilo_zs_surface *zs)
1485 {
1486 unsigned mask, shift_w, shift_h;
1487 unsigned width, height;
1488 uint32_t dw3;
1489
1490 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1491
1492 if (dev->gen >= ILO_GEN(7)) {
1493 shift_w = 4;
1494 shift_h = 18;
1495 mask = 0x3fff;
1496 }
1497 else {
1498 shift_w = 6;
1499 shift_h = 19;
1500 mask = 0x1fff;
1501 }
1502
1503 dw3 = zs->payload[2];
1504
1505 /* aligned width and height */
1506 width = align(((dw3 >> shift_w) & mask) + 1, align_w);
1507 height = align(((dw3 >> shift_h) & mask) + 1, align_h);
1508
1509 dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
1510 (width - 1) << shift_w |
1511 (height - 1) << shift_h;
1512
1513 zs->payload[2] = dw3;
1514 }
1515
1516 static inline void
1517 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1518 const struct ilo_zs_surface *zs,
1519 struct ilo_cp *cp)
1520 {
1521 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1522 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1523 const uint8_t cmd_len = 7;
1524
1525 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1526
1527 ilo_cp_begin(cp, cmd_len);
1528 ilo_cp_write(cp, cmd | (cmd_len - 2));
1529 ilo_cp_write(cp, zs->payload[0]);
1530 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
1531 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1532 ilo_cp_write(cp, zs->payload[2]);
1533 ilo_cp_write(cp, zs->payload[3]);
1534 ilo_cp_write(cp, zs->payload[4]);
1535 ilo_cp_write(cp, zs->payload[5]);
1536 ilo_cp_end(cp);
1537 }
1538
1539 static inline void
1540 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1541 int x_offset, int y_offset,
1542 struct ilo_cp *cp)
1543 {
1544 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
1545 const uint8_t cmd_len = 2;
1546
1547 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1548 assert(x_offset >= 0 && x_offset <= 31);
1549 assert(y_offset >= 0 && y_offset <= 31);
1550
1551 ilo_cp_begin(cp, cmd_len);
1552 ilo_cp_write(cp, cmd | (cmd_len - 2));
1553 ilo_cp_write(cp, x_offset << 8 | y_offset);
1554 ilo_cp_end(cp);
1555 }
1556
1557 static inline void
1558 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1559 const struct pipe_poly_stipple *pattern,
1560 struct ilo_cp *cp)
1561 {
1562 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
1563 const uint8_t cmd_len = 33;
1564 int i;
1565
1566 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1567 assert(Elements(pattern->stipple) == 32);
1568
1569 ilo_cp_begin(cp, cmd_len);
1570 ilo_cp_write(cp, cmd | (cmd_len - 2));
1571 for (i = 0; i < 32; i++)
1572 ilo_cp_write(cp, pattern->stipple[i]);
1573 ilo_cp_end(cp);
1574 }
1575
1576 static inline void
1577 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1578 unsigned pattern, unsigned factor,
1579 struct ilo_cp *cp)
1580 {
1581 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
1582 const uint8_t cmd_len = 3;
1583 unsigned inverse;
1584
1585 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1586 assert((pattern & 0xffff) == pattern);
1587 assert(factor >= 1 && factor <= 256);
1588
1589 ilo_cp_begin(cp, cmd_len);
1590 ilo_cp_write(cp, cmd | (cmd_len - 2));
1591 ilo_cp_write(cp, pattern);
1592
1593 if (dev->gen >= ILO_GEN(7)) {
1594 /* in U1.16 */
1595 inverse = (unsigned) (65536.0f / factor);
1596 ilo_cp_write(cp, inverse << 15 | factor);
1597 }
1598 else {
1599 /* in U1.13 */
1600 inverse = (unsigned) (8192.0f / factor);
1601 ilo_cp_write(cp, inverse << 16 | factor);
1602 }
1603
1604 ilo_cp_end(cp);
1605 }
1606
1607 static inline void
1608 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1609 struct ilo_cp *cp)
1610 {
1611 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
1612 const uint8_t cmd_len = 3;
1613
1614 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1615
1616 ilo_cp_begin(cp, cmd_len);
1617 ilo_cp_write(cp, cmd | (cmd_len - 2));
1618 ilo_cp_write(cp, 0 << 16 | 0);
1619 ilo_cp_write(cp, 0 << 16 | 0);
1620 ilo_cp_end(cp);
1621 }
1622
1623 static inline void
1624 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1625 int index, unsigned svbi,
1626 unsigned max_svbi,
1627 bool load_vertex_count,
1628 struct ilo_cp *cp)
1629 {
1630 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
1631 const uint8_t cmd_len = 4;
1632 uint32_t dw1;
1633
1634 ILO_GPE_VALID_GEN(dev, 6, 6);
1635 assert(index >= 0 && index < 4);
1636
1637 dw1 = index << GEN6_SVBI_DW1_INDEX__SHIFT;
1638 if (load_vertex_count)
1639 dw1 |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
1640
1641 ilo_cp_begin(cp, cmd_len);
1642 ilo_cp_write(cp, cmd | (cmd_len - 2));
1643 ilo_cp_write(cp, dw1);
1644 ilo_cp_write(cp, svbi);
1645 ilo_cp_write(cp, max_svbi);
1646 ilo_cp_end(cp);
1647 }
1648
1649 static inline void
1650 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1651 int num_samples,
1652 const uint32_t *packed_sample_pos,
1653 bool pixel_location_center,
1654 struct ilo_cp *cp)
1655 {
1656 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
1657 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1658 uint32_t dw1, dw2, dw3;
1659
1660 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1661
1662 dw1 = (pixel_location_center) ?
1663 GEN6_MULTISAMPLE_DW1_PIXLOC_CENTER : GEN6_MULTISAMPLE_DW1_PIXLOC_UL_CORNER;
1664
1665 switch (num_samples) {
1666 case 0:
1667 case 1:
1668 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
1669 dw2 = 0;
1670 dw3 = 0;
1671 break;
1672 case 4:
1673 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
1674 dw2 = packed_sample_pos[0];
1675 dw3 = 0;
1676 break;
1677 case 8:
1678 assert(dev->gen >= ILO_GEN(7));
1679 dw1 |= GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
1680 dw2 = packed_sample_pos[0];
1681 dw3 = packed_sample_pos[1];
1682 break;
1683 default:
1684 assert(!"unsupported sample count");
1685 dw1 |= GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
1686 dw2 = 0;
1687 dw3 = 0;
1688 break;
1689 }
1690
1691 ilo_cp_begin(cp, cmd_len);
1692 ilo_cp_write(cp, cmd | (cmd_len - 2));
1693 ilo_cp_write(cp, dw1);
1694 ilo_cp_write(cp, dw2);
1695 if (dev->gen >= ILO_GEN(7))
1696 ilo_cp_write(cp, dw3);
1697 ilo_cp_end(cp);
1698 }
1699
1700 static inline void
1701 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1702 const struct ilo_zs_surface *zs,
1703 struct ilo_cp *cp)
1704 {
1705 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1708 const uint8_t cmd_len = 3;
1709
1710 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1711
1712 ilo_cp_begin(cp, cmd_len);
1713 ilo_cp_write(cp, cmd | (cmd_len - 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp, zs->payload[6]);
1716 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
1717 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1718 ilo_cp_end(cp);
1719 }
1720
1721 static inline void
1722 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1723 const struct ilo_zs_surface *zs,
1724 struct ilo_cp *cp)
1725 {
1726 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1727 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1728 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1729 const uint8_t cmd_len = 3;
1730
1731 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1732
1733 ilo_cp_begin(cp, cmd_len);
1734 ilo_cp_write(cp, cmd | (cmd_len - 2));
1735 /* see ilo_gpe_init_zs_surface() */
1736 ilo_cp_write(cp, zs->payload[8]);
1737 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
1738 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1739 ilo_cp_end(cp);
1740 }
1741
1742 static inline void
1743 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1744 uint32_t clear_val,
1745 struct ilo_cp *cp)
1746 {
1747 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
1748 const uint8_t cmd_len = 2;
1749
1750 ILO_GPE_VALID_GEN(dev, 6, 6);
1751
1752 ilo_cp_begin(cp, cmd_len);
1753 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1754 GEN6_CLEAR_PARAMS_DW0_VALID);
1755 ilo_cp_write(cp, clear_val);
1756 ilo_cp_end(cp);
1757 }
1758
1759 static inline void
1760 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1761 uint32_t dw1,
1762 struct intel_bo *bo, uint32_t bo_offset,
1763 bool write_qword,
1764 struct ilo_cp *cp)
1765 {
1766 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
1767 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1768 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
1769 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
1770
1771 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1772
1773 assert(bo_offset % ((write_qword) ? 8 : 4) == 0);
1774
1775 if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
1776 /*
1777 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1778 *
1779 * "1 of the following must also be set (when CS stall is set):
1780 *
1781 * * Depth Cache Flush Enable ([0] of DW1)
1782 * * Stall at Pixel Scoreboard ([1] of DW1)
1783 * * Depth Stall ([13] of DW1)
1784 * * Post-Sync Operation ([13] of DW1)
1785 * * Render Target Cache Flush Enable ([12] of DW1)
1786 * * Notify Enable ([8] of DW1)"
1787 *
1788 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1789 *
1790 * "One of the following must also be set (when CS stall is set):
1791 *
1792 * * Render Target Cache Flush Enable ([12] of DW1)
1793 * * Depth Cache Flush Enable ([0] of DW1)
1794 * * Stall at Pixel Scoreboard ([1] of DW1)
1795 * * Depth Stall ([13] of DW1)
1796 * * Post-Sync Operation ([13] of DW1)"
1797 */
1798 uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
1799 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1800 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
1801 GEN6_PIPE_CONTROL_DEPTH_STALL;
1802
1803 /* post-sync op */
1804 bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM |
1805 GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT |
1806 GEN6_PIPE_CONTROL_WRITE_TIMESTAMP;
1807
1808 if (dev->gen == ILO_GEN(6))
1809 bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE;
1810
1811 assert(dw1 & bit_test);
1812 }
1813
1814 if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) {
1815 /*
1816 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1817 *
1818 * "Following bits must be clear (when Depth Stall is set):
1819 *
1820 * * Render Target Cache Flush Enable ([12] of DW1)
1821 * * Depth Cache Flush Enable ([0] of DW1)"
1822 */
1823 assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
1824 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1825 }
1826
1827 /*
1828 * From the Sandy Bridge PRM, volume 1 part 3, page 19:
1829 *
1830 * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM)
1831 * and PIPE_CONTROL are not supported."
1832 *
1833 * The kernel will add the mapping automatically (when write domain is
1834 * INTEL_DOMAIN_INSTRUCTION).
1835 */
1836 if (dev->gen == ILO_GEN(6) && bo)
1837 bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT;
1838
1839 ilo_cp_begin(cp, cmd_len);
1840 ilo_cp_write(cp, cmd | (cmd_len - 2));
1841 ilo_cp_write(cp, dw1);
1842 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
1843 ilo_cp_write(cp, 0);
1844 if (write_qword)
1845 ilo_cp_write(cp, 0);
1846 ilo_cp_end(cp);
1847 }
1848
1849 static inline void
1850 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1851 const struct pipe_draw_info *info,
1852 const struct ilo_ib_state *ib,
1853 bool rectlist,
1854 struct ilo_cp *cp)
1855 {
1856 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1857 const uint8_t cmd_len = 6;
1858 const int prim = (rectlist) ?
1859 GEN6_3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1860 const int vb_access = (info->indexed) ?
1861 GEN6_3DPRIM_DW0_ACCESS_RANDOM : GEN6_3DPRIM_DW0_ACCESS_SEQUENTIAL;
1862 const uint32_t vb_start = info->start +
1863 ((info->indexed) ? ib->draw_start_offset : 0);
1864
1865 ILO_GPE_VALID_GEN(dev, 6, 6);
1866
1867 ilo_cp_begin(cp, cmd_len);
1868 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1869 prim << GEN6_3DPRIM_DW0_TYPE__SHIFT |
1870 vb_access);
1871 ilo_cp_write(cp, info->count);
1872 ilo_cp_write(cp, vb_start);
1873 ilo_cp_write(cp, info->instance_count);
1874 ilo_cp_write(cp, info->start_instance);
1875 ilo_cp_write(cp, info->index_bias);
1876 ilo_cp_end(cp);
1877 }
1878
1879 static inline uint32_t
1880 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1881 const struct ilo_shader_state **cs,
1882 uint32_t *sampler_state,
1883 int *num_samplers,
1884 uint32_t *binding_table_state,
1885 int *num_surfaces,
1886 int num_ids,
1887 struct ilo_cp *cp)
1888 {
1889 /*
1890 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1891 *
1892 * "(Interface Descriptor Total Length) This field must have the same
1893 * alignment as the Interface Descriptor Data Start Address.
1894 *
1895 * It must be DQWord (32-byte) aligned..."
1896 *
1897 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1898 *
1899 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1900 * aligned address of the Interface Descriptor data."
1901 */
1902 const int state_align = 32 / 4;
1903 const int state_len = (32 / 4) * num_ids;
1904 uint32_t state_offset, *dw;
1905 int i;
1906
1907 ILO_GPE_VALID_GEN(dev, 6, 6);
1908
1909 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
1910 state_len, state_align, &state_offset);
1911
1912 for (i = 0; i < num_ids; i++) {
1913 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1914 dw[1] = 1 << 18; /* SPF */
1915 dw[2] = sampler_state[i] |
1916 (num_samplers[i] + 3) / 4 << 2;
1917 dw[3] = binding_table_state[i] |
1918 num_surfaces[i];
1919 dw[4] = 0 << 16 | /* CURBE Read Length */
1920 0; /* CURBE Read Offset */
1921 dw[5] = 0; /* Barrier ID */
1922 dw[6] = 0;
1923 dw[7] = 0;
1924
1925 dw += 8;
1926 }
1927
1928 return state_offset;
1929 }
1930
1931 static inline uint32_t
1932 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1933 const struct ilo_viewport_cso *viewports,
1934 unsigned num_viewports,
1935 struct ilo_cp *cp)
1936 {
1937 const int state_align = 32 / 4;
1938 const int state_len = 8 * num_viewports;
1939 uint32_t state_offset, *dw;
1940 unsigned i;
1941
1942 ILO_GPE_VALID_GEN(dev, 6, 6);
1943
1944 /*
1945 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1946 *
1947 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1948 * stored as an array of up to 16 elements..."
1949 */
1950 assert(num_viewports && num_viewports <= 16);
1951
1952 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
1953 state_len, state_align, &state_offset);
1954
1955 for (i = 0; i < num_viewports; i++) {
1956 const struct ilo_viewport_cso *vp = &viewports[i];
1957
1958 dw[0] = fui(vp->m00);
1959 dw[1] = fui(vp->m11);
1960 dw[2] = fui(vp->m22);
1961 dw[3] = fui(vp->m30);
1962 dw[4] = fui(vp->m31);
1963 dw[5] = fui(vp->m32);
1964 dw[6] = 0;
1965 dw[7] = 0;
1966
1967 dw += 8;
1968 }
1969
1970 return state_offset;
1971 }
1972
1973 static inline uint32_t
1974 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1975 const struct ilo_viewport_cso *viewports,
1976 unsigned num_viewports,
1977 struct ilo_cp *cp)
1978 {
1979 const int state_align = 32 / 4;
1980 const int state_len = 4 * num_viewports;
1981 uint32_t state_offset, *dw;
1982 unsigned i;
1983
1984 ILO_GPE_VALID_GEN(dev, 6, 6);
1985
1986 /*
1987 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1988 *
1989 * "The viewport-related state is stored as an array of up to 16
1990 * elements..."
1991 */
1992 assert(num_viewports && num_viewports <= 16);
1993
1994 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
1995 state_len, state_align, &state_offset);
1996
1997 for (i = 0; i < num_viewports; i++) {
1998 const struct ilo_viewport_cso *vp = &viewports[i];
1999
2000 dw[0] = fui(vp->min_gbx);
2001 dw[1] = fui(vp->max_gbx);
2002 dw[2] = fui(vp->min_gby);
2003 dw[3] = fui(vp->max_gby);
2004
2005 dw += 4;
2006 }
2007
2008 return state_offset;
2009 }
2010
2011 static inline uint32_t
2012 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
2013 const struct ilo_viewport_cso *viewports,
2014 unsigned num_viewports,
2015 struct ilo_cp *cp)
2016 {
2017 const int state_align = 32 / 4;
2018 const int state_len = 2 * num_viewports;
2019 uint32_t state_offset, *dw;
2020 unsigned i;
2021
2022 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2023
2024 /*
2025 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2026 *
2027 * "The viewport state is stored as an array of up to 16 elements..."
2028 */
2029 assert(num_viewports && num_viewports <= 16);
2030
2031 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
2032 state_len, state_align, &state_offset);
2033
2034 for (i = 0; i < num_viewports; i++) {
2035 const struct ilo_viewport_cso *vp = &viewports[i];
2036
2037 dw[0] = fui(vp->min_z);
2038 dw[1] = fui(vp->max_z);
2039
2040 dw += 2;
2041 }
2042
2043 return state_offset;
2044 }
2045
2046 static inline uint32_t
2047 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2048 const struct pipe_stencil_ref *stencil_ref,
2049 ubyte alpha_ref,
2050 const struct pipe_blend_color *blend_color,
2051 struct ilo_cp *cp)
2052 {
2053 const int state_align = 64 / 4;
2054 const int state_len = 6;
2055 uint32_t state_offset, *dw;
2056
2057 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2058
2059 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
2060 state_len, state_align, &state_offset);
2061
2062 dw[0] = stencil_ref->ref_value[0] << 24 |
2063 stencil_ref->ref_value[1] << 16 |
2064 GEN6_CC_DW0_ALPHATEST_UNORM8;
2065 dw[1] = alpha_ref;
2066 dw[2] = fui(blend_color->color[0]);
2067 dw[3] = fui(blend_color->color[1]);
2068 dw[4] = fui(blend_color->color[2]);
2069 dw[5] = fui(blend_color->color[3]);
2070
2071 return state_offset;
2072 }
2073
2074 static inline uint32_t
2075 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2076 const struct ilo_blend_state *blend,
2077 const struct ilo_fb_state *fb,
2078 const struct ilo_dsa_state *dsa,
2079 struct ilo_cp *cp)
2080 {
2081 const int state_align = 64 / 4;
2082 int state_len;
2083 uint32_t state_offset, *dw;
2084 unsigned num_targets, i;
2085
2086 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2087
2088 /*
2089 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2090 *
2091 * "The blend state is stored as an array of up to 8 elements..."
2092 */
2093 num_targets = fb->state.nr_cbufs;
2094 assert(num_targets <= 8);
2095
2096 if (!num_targets) {
2097 if (!dsa->dw_alpha)
2098 return 0;
2099 /* to be able to reference alpha func */
2100 num_targets = 1;
2101 }
2102
2103 state_len = 2 * num_targets;
2104
2105 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
2106 state_len, state_align, &state_offset);
2107
2108 for (i = 0; i < num_targets; i++) {
2109 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2110 const struct ilo_blend_cso *cso = &blend->cso[idx];
2111 const int num_samples = fb->num_samples;
2112 const struct util_format_description *format_desc =
2113 (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
2114 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2115 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2116
2117 rt_is_unorm = true;
2118 rt_is_pure_integer = false;
2119 rt_dst_alpha_forced_one = false;
2120
2121 if (format_desc) {
2122 int ch;
2123
2124 switch (format_desc->format) {
2125 case PIPE_FORMAT_B8G8R8X8_UNORM:
2126 /* force alpha to one when the HW format has alpha */
2127 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
2128 == GEN6_FORMAT_B8G8R8A8_UNORM);
2129 rt_dst_alpha_forced_one = true;
2130 break;
2131 default:
2132 break;
2133 }
2134
2135 for (ch = 0; ch < 4; ch++) {
2136 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2137 continue;
2138
2139 if (format_desc->channel[ch].pure_integer) {
2140 rt_is_unorm = false;
2141 rt_is_pure_integer = true;
2142 break;
2143 }
2144
2145 if (!format_desc->channel[ch].normalized ||
2146 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2147 rt_is_unorm = false;
2148 }
2149 }
2150
2151 dw[0] = cso->payload[0];
2152 dw[1] = cso->payload[1];
2153
2154 if (!rt_is_pure_integer) {
2155 if (rt_dst_alpha_forced_one)
2156 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2157 else
2158 dw[0] |= cso->dw_blend;
2159 }
2160
2161 /*
2162 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2163 *
2164 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2165 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2166 *
2167 * Since logicop is ignored for non-UNORM color buffers, no special care
2168 * is needed.
2169 */
2170 if (rt_is_unorm)
2171 dw[1] |= cso->dw_logicop;
2172
2173 /*
2174 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2175 *
2176 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2177 * Dither both must be disabled."
2178 *
2179 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2180 * requires that anyway.
2181 */
2182 if (num_samples > 1)
2183 dw[1] |= cso->dw_alpha_mod;
2184
2185 /*
2186 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2187 *
2188 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2189 * alpha value."
2190 */
2191 if (!rt_is_pure_integer)
2192 dw[1] |= dsa->dw_alpha;
2193
2194 dw += 2;
2195 }
2196
2197 return state_offset;
2198 }
2199
2200 static inline uint32_t
2201 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2202 const struct ilo_dsa_state *dsa,
2203 struct ilo_cp *cp)
2204 {
2205 const int state_align = 64 / 4;
2206 const int state_len = 3;
2207 uint32_t state_offset, *dw;
2208
2209
2210 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2211
2212 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
2213 state_len, state_align, &state_offset);
2214
2215 dw[0] = dsa->payload[0];
2216 dw[1] = dsa->payload[1];
2217 dw[2] = dsa->payload[2];
2218
2219 return state_offset;
2220 }
2221
2222 static inline uint32_t
2223 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2224 const struct ilo_scissor_state *scissor,
2225 unsigned num_viewports,
2226 struct ilo_cp *cp)
2227 {
2228 const int state_align = 32 / 4;
2229 const int state_len = 2 * num_viewports;
2230 uint32_t state_offset, *dw;
2231
2232 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2233
2234 /*
2235 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2236 *
2237 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2238 * stored as an array of up to 16 elements..."
2239 */
2240 assert(num_viewports && num_viewports <= 16);
2241
2242 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
2243 state_len, state_align, &state_offset);
2244
2245 memcpy(dw, scissor->payload, state_len * 4);
2246
2247 return state_offset;
2248 }
2249
2250 static inline uint32_t
2251 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2252 uint32_t *surface_states,
2253 int num_surface_states,
2254 struct ilo_cp *cp)
2255 {
2256 const int state_align = 32 / 4;
2257 const int state_len = num_surface_states;
2258 uint32_t state_offset, *dw;
2259
2260 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2261
2262 /*
2263 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2264 *
2265 * "It is stored as an array of up to 256 elements..."
2266 */
2267 assert(num_surface_states <= 256);
2268
2269 if (!num_surface_states)
2270 return 0;
2271
2272 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
2273 state_len, state_align, &state_offset);
2274 memcpy(dw, surface_states,
2275 num_surface_states * sizeof(surface_states[0]));
2276
2277 return state_offset;
2278 }
2279
2280 static inline uint32_t
2281 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2282 const struct ilo_view_surface *surf,
2283 bool for_render,
2284 struct ilo_cp *cp)
2285 {
2286 const int state_align = 32 / 4;
2287 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2288 uint32_t state_offset;
2289 uint32_t read_domains, write_domain;
2290
2291 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2292
2293 if (for_render) {
2294 read_domains = INTEL_DOMAIN_RENDER;
2295 write_domain = INTEL_DOMAIN_RENDER;
2296 }
2297 else {
2298 read_domains = INTEL_DOMAIN_SAMPLER;
2299 write_domain = 0;
2300 }
2301
2302 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
2303
2304 STATIC_ASSERT(Elements(surf->payload) >= 8);
2305
2306 ilo_cp_write(cp, surf->payload[0]);
2307 ilo_cp_write_bo(cp, surf->payload[1],
2308 surf->bo, read_domains, write_domain);
2309 ilo_cp_write(cp, surf->payload[2]);
2310 ilo_cp_write(cp, surf->payload[3]);
2311 ilo_cp_write(cp, surf->payload[4]);
2312 ilo_cp_write(cp, surf->payload[5]);
2313
2314 if (dev->gen >= ILO_GEN(7)) {
2315 ilo_cp_write(cp, surf->payload[6]);
2316 ilo_cp_write(cp, surf->payload[7]);
2317 }
2318
2319 ilo_cp_end(cp);
2320
2321 return state_offset;
2322 }
2323
2324 static inline uint32_t
2325 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2326 const struct pipe_stream_output_target *so,
2327 const struct pipe_stream_output_info *so_info,
2328 int so_index,
2329 struct ilo_cp *cp)
2330 {
2331 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2332 unsigned bo_offset, struct_size;
2333 enum pipe_format elem_format;
2334 struct ilo_view_surface surf;
2335
2336 ILO_GPE_VALID_GEN(dev, 6, 6);
2337
2338 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2339 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2340
2341 switch (so_info->output[so_index].num_components) {
2342 case 1:
2343 elem_format = PIPE_FORMAT_R32_FLOAT;
2344 break;
2345 case 2:
2346 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2347 break;
2348 case 3:
2349 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2350 break;
2351 case 4:
2352 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2353 break;
2354 default:
2355 assert(!"unexpected SO components length");
2356 elem_format = PIPE_FORMAT_R32_FLOAT;
2357 break;
2358 }
2359
2360 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2361 struct_size, elem_format, false, true, &surf);
2362
2363 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2364 }
2365
2366 static inline uint32_t
2367 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2368 const struct ilo_sampler_cso * const *samplers,
2369 const struct pipe_sampler_view * const *views,
2370 const uint32_t *sampler_border_colors,
2371 int num_samplers,
2372 struct ilo_cp *cp)
2373 {
2374 const int state_align = 32 / 4;
2375 const int state_len = 4 * num_samplers;
2376 uint32_t state_offset, *dw;
2377 int i;
2378
2379 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2380
2381 /*
2382 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2383 *
2384 * "The sampler state is stored as an array of up to 16 elements..."
2385 */
2386 assert(num_samplers <= 16);
2387
2388 if (!num_samplers)
2389 return 0;
2390
2391 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
2392 state_len, state_align, &state_offset);
2393
2394 for (i = 0; i < num_samplers; i++) {
2395 const struct ilo_sampler_cso *sampler = samplers[i];
2396 const struct pipe_sampler_view *view = views[i];
2397 const uint32_t border_color = sampler_border_colors[i];
2398 uint32_t dw_filter, dw_wrap;
2399
2400 /* there may be holes */
2401 if (!sampler || !view) {
2402 /* disabled sampler */
2403 dw[0] = 1 << 31;
2404 dw[1] = 0;
2405 dw[2] = 0;
2406 dw[3] = 0;
2407 dw += 4;
2408
2409 continue;
2410 }
2411
2412 /* determine filter and wrap modes */
2413 switch (view->texture->target) {
2414 case PIPE_TEXTURE_1D:
2415 dw_filter = (sampler->anisotropic) ?
2416 sampler->dw_filter_aniso : sampler->dw_filter;
2417 dw_wrap = sampler->dw_wrap_1d;
2418 break;
2419 case PIPE_TEXTURE_3D:
2420 /*
2421 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2422 *
2423 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2424 * surfaces of type SURFTYPE_3D."
2425 */
2426 dw_filter = sampler->dw_filter;
2427 dw_wrap = sampler->dw_wrap;
2428 break;
2429 case PIPE_TEXTURE_CUBE:
2430 dw_filter = (sampler->anisotropic) ?
2431 sampler->dw_filter_aniso : sampler->dw_filter;
2432 dw_wrap = sampler->dw_wrap_cube;
2433 break;
2434 default:
2435 dw_filter = (sampler->anisotropic) ?
2436 sampler->dw_filter_aniso : sampler->dw_filter;
2437 dw_wrap = sampler->dw_wrap;
2438 break;
2439 }
2440
2441 dw[0] = sampler->payload[0];
2442 dw[1] = sampler->payload[1];
2443 assert(!(border_color & 0x1f));
2444 dw[2] = border_color;
2445 dw[3] = sampler->payload[2];
2446
2447 dw[0] |= dw_filter;
2448
2449 if (dev->gen >= ILO_GEN(7)) {
2450 dw[3] |= dw_wrap;
2451 }
2452 else {
2453 /*
2454 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2455 *
2456 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2457 * where the min and mag mode filters are different and
2458 * SurfMinLOD is nonzero. The determination of MagMode uses the
2459 * following equation instead of the one in the above
2460 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2461 *
2462 * As a way to work around that, we set Base to
2463 * view->u.tex.first_level.
2464 */
2465 dw[0] |= view->u.tex.first_level << 22;
2466
2467 dw[1] |= dw_wrap;
2468 }
2469
2470 dw += 4;
2471 }
2472
2473 return state_offset;
2474 }
2475
2476 static inline uint32_t
2477 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2478 const struct ilo_sampler_cso *sampler,
2479 struct ilo_cp *cp)
2480 {
2481 const int state_align = 32 / 4;
2482 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2483 uint32_t state_offset, *dw;
2484
2485 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2486
2487 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
2488 state_len, state_align, &state_offset);
2489
2490 /* see ilo_gpe_init_sampler_cso() */
2491 memcpy(dw, &sampler->payload[3], state_len * 4);
2492
2493 return state_offset;
2494 }
2495
2496 static inline uint32_t
2497 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2498 int size, void **pcb,
2499 struct ilo_cp *cp)
2500 {
2501 /*
2502 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2503 * to 32 bytes, and their sizes are specified in 256-bit units.
2504 */
2505 const int state_align = 32 / 4;
2506 const int state_len = align(size, 32) / 4;
2507 uint32_t state_offset;
2508 char *buf;
2509
2510 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2511
2512 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
2513 state_len, state_align, &state_offset);
2514
2515 /* zero out the unused range */
2516 if (size < state_len * 4)
2517 memset(&buf[size], 0, state_len * 4 - size);
2518
2519 if (pcb)
2520 *pcb = buf;
2521
2522 return state_offset;
2523 }
2524
2525 #endif /* ILO_GPE_GEN6_H */