ilo: prefix 3DSTATE_VF with gen75
[mesa.git] / src / gallium / drivers / ilo / ilo_builder_3d_top.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_BUILDER_3D_TOP_H
29 #define ILO_BUILDER_3D_TOP_H
30
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
33
34 #include "ilo_common.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_state.h"
38 #include "ilo_state_3d.h"
39 #include "ilo_builder.h"
40
41 static inline void
42 gen6_3DSTATE_URB(struct ilo_builder *builder,
43 int vs_total_size, int gs_total_size,
44 int vs_entry_size, int gs_entry_size)
45 {
46 const uint8_t cmd_len = 3;
47 const int row_size = 128; /* 1024 bits */
48 int vs_alloc_size, gs_alloc_size;
49 int vs_num_entries, gs_num_entries;
50 uint32_t *dw;
51
52 ILO_DEV_ASSERT(builder->dev, 6, 6);
53
54 /* in 1024-bit URB rows */
55 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
56 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
57
58 /* the valid range is [1, 5] */
59 if (!vs_alloc_size)
60 vs_alloc_size = 1;
61 if (!gs_alloc_size)
62 gs_alloc_size = 1;
63 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
64
65 /* the valid range is [24, 256] in multiples of 4 */
66 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
67 if (vs_num_entries > 256)
68 vs_num_entries = 256;
69 assert(vs_num_entries >= 24);
70
71 /* the valid range is [0, 256] in multiples of 4 */
72 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
73 if (gs_num_entries > 256)
74 gs_num_entries = 256;
75
76 ilo_builder_batch_pointer(builder, cmd_len, &dw);
77
78 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
79 dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
80 vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
81 dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
82 (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
83 }
84
85 static inline void
86 gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
87 int subop, int offset, int size)
88 {
89 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
90 GEN6_RENDER_SUBTYPE_3D |
91 subop;
92 const uint8_t cmd_len = 2;
93 uint32_t *dw;
94 int end;
95
96 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
97
98 /* VS, HS, DS, GS, and PS variants */
99 assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
100 subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
101
102 /*
103 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
104 *
105 * "(A table that says the maximum size of each constant buffer is
106 * 16KB")
107 *
108 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
109 *
110 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
111 * may not exceed the maximum value of the Constant Buffer Size."
112 *
113 * Thus, the valid range of buffer end is [0KB, 16KB].
114 */
115 end = (offset + size) / 1024;
116 if (end > 16) {
117 assert(!"invalid constant buffer end");
118 end = 16;
119 }
120
121 /* the valid range of buffer offset is [0KB, 15KB] */
122 offset = (offset + 1023) / 1024;
123 if (offset > 15) {
124 assert(!"invalid constant buffer offset");
125 offset = 15;
126 }
127
128 if (offset > end) {
129 assert(!size);
130 offset = end;
131 }
132
133 /* the valid range of buffer size is [0KB, 15KB] */
134 size = end - offset;
135 if (size > 15) {
136 assert(!"invalid constant buffer size");
137 size = 15;
138 }
139
140 ilo_builder_batch_pointer(builder, cmd_len, &dw);
141
142 dw[0] = cmd | (cmd_len - 2);
143 dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
144 size;
145 }
146
147 static inline void
148 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
149 int offset, int size)
150 {
151 gen7_3dstate_push_constant_alloc(builder,
152 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
153 }
154
155 static inline void
156 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
157 int offset, int size)
158 {
159 gen7_3dstate_push_constant_alloc(builder,
160 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
161 }
162
163 static inline void
164 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
165 int offset, int size)
166 {
167 gen7_3dstate_push_constant_alloc(builder,
168 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
169 }
170
171 static inline void
172 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
173 int offset, int size)
174 {
175 gen7_3dstate_push_constant_alloc(builder,
176 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
177 }
178
179 static inline void
180 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
181 int offset, int size)
182 {
183 gen7_3dstate_push_constant_alloc(builder,
184 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
185 }
186
187 static inline void
188 gen7_3dstate_urb(struct ilo_builder *builder,
189 int subop, int offset, int size,
190 int entry_size)
191 {
192 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
193 GEN6_RENDER_SUBTYPE_3D |
194 subop;
195 const uint8_t cmd_len = 2;
196 const int row_size = 64; /* 512 bits */
197 int alloc_size, num_entries, min_entries, max_entries;
198 uint32_t *dw;
199
200 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
201
202 /* VS, HS, DS, and GS variants */
203 assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
204 subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
205
206 /* in multiples of 8KB */
207 assert(offset % 8192 == 0);
208 offset /= 8192;
209
210 /* in multiple of 512-bit rows */
211 alloc_size = (entry_size + row_size - 1) / row_size;
212 if (!alloc_size)
213 alloc_size = 1;
214
215 /*
216 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
217 *
218 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
219 * cause performance to decrease due to banking in the URB. Element
220 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
221 */
222 if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
223 alloc_size = 6;
224
225 /* in multiples of 8 */
226 num_entries = (size / row_size / alloc_size) & ~7;
227
228 switch (subop) {
229 case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
230 switch (ilo_dev_gen(builder->dev)) {
231 case ILO_GEN(7.5):
232 max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
233 min_entries = (builder->dev->gt >= 2) ? 64 : 32;
234 break;
235 case ILO_GEN(7):
236 default:
237 max_entries = (builder->dev->gt == 2) ? 704 : 512;
238 min_entries = 32;
239 break;
240 }
241
242 assert(num_entries >= min_entries);
243 if (num_entries > max_entries)
244 num_entries = max_entries;
245 break;
246 case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
247 max_entries = (builder->dev->gt == 2) ? 64 : 32;
248 if (num_entries > max_entries)
249 num_entries = max_entries;
250 break;
251 case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
252 if (num_entries)
253 assert(num_entries >= 138);
254 break;
255 case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
256 switch (ilo_dev_gen(builder->dev)) {
257 case ILO_GEN(7.5):
258 max_entries = (builder->dev->gt >= 2) ? 640 : 256;
259 break;
260 case ILO_GEN(7):
261 default:
262 max_entries = (builder->dev->gt == 2) ? 320 : 192;
263 break;
264 }
265
266 if (num_entries > max_entries)
267 num_entries = max_entries;
268 break;
269 default:
270 break;
271 }
272
273 ilo_builder_batch_pointer(builder, cmd_len, &dw);
274
275 dw[0] = cmd | (cmd_len - 2);
276 dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
277 (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
278 num_entries;
279 }
280
281 static inline void
282 gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
283 int offset, int size, int entry_size)
284 {
285 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
286 offset, size, entry_size);
287 }
288
289 static inline void
290 gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
291 int offset, int size, int entry_size)
292 {
293 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
294 offset, size, entry_size);
295 }
296
297 static inline void
298 gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
299 int offset, int size, int entry_size)
300 {
301 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
302 offset, size, entry_size);
303 }
304
305 static inline void
306 gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
307 int offset, int size, int entry_size)
308 {
309 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
310 offset, size, entry_size);
311 }
312
313 static inline void
314 gen75_3DSTATE_VF(struct ilo_builder *builder,
315 bool enable_cut_index,
316 uint32_t cut_index)
317 {
318 const uint8_t cmd_len = 2;
319 uint32_t *dw;
320
321 ILO_DEV_ASSERT(builder->dev, 7.5, 7.5);
322
323 ilo_builder_batch_pointer(builder, cmd_len, &dw);
324
325 dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
326 if (enable_cut_index)
327 dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
328
329 dw[1] = cut_index;
330 }
331
332 static inline void
333 gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
334 bool enable)
335 {
336 const uint8_t cmd_len = 1;
337 const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
338 enable;
339
340 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
341
342 ilo_builder_batch_write(builder, cmd_len, &dw0);
343 }
344
345 static inline void
346 gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
347 const struct ilo_ve_state *ve,
348 const struct ilo_vb_state *vb)
349 {
350 uint8_t cmd_len;
351 uint32_t *dw;
352 unsigned pos, hw_idx;
353
354 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
355
356 /*
357 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
358 *
359 * "From 1 to 33 VBs can be specified..."
360 */
361 assert(ve->vb_count <= 33);
362
363 if (!ve->vb_count)
364 return;
365
366 cmd_len = 1 + 4 * ve->vb_count;
367 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
368
369 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
370 dw++;
371 pos++;
372
373 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
374 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
375 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
376 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
377
378 dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
379
380 if (instance_divisor)
381 dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
382 else
383 dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
384
385 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
386 dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
387
388 /* use null vb if there is no buffer or the stride is out of range */
389 if (cso->buffer && cso->stride <= 2048) {
390 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
391 const uint32_t start_offset = cso->buffer_offset;
392 const uint32_t end_offset = buf->bo_size - 1;
393
394 dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
395 ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
396 ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
397 }
398 else {
399 dw[0] |= 1 << 13;
400 dw[1] = 0;
401 dw[2] = 0;
402 }
403
404 dw[3] = instance_divisor;
405
406 dw += 4;
407 pos += 4;
408 }
409 }
410
411 /* the user vertex buffer must be uploaded with gen6_user_vertex_buffer() */
412 static inline void
413 gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
414 uint32_t vb_begin, uint32_t vb_end,
415 uint32_t stride)
416 {
417 const struct ilo_builder_writer *bat =
418 &builder->writers[ILO_BUILDER_WRITER_BATCH];
419 const uint8_t cmd_len = 1 + 4;
420 uint32_t *dw;
421 unsigned pos;
422
423 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
424
425 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
426
427 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
428 dw++;
429 pos++;
430
431 /* VERTEX_BUFFER_STATE */
432 dw[0] = 0 << GEN6_VB_STATE_DW0_INDEX__SHIFT |
433 GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA |
434 stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
435 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
436 dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
437
438 dw[3] = 0;
439
440 ilo_builder_batch_reloc(builder, pos + 1, bat->bo, vb_begin, 0);
441 ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0);
442 }
443
444 static inline void
445 gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
446 const struct ilo_ve_state *ve)
447 {
448 uint8_t cmd_len;
449 uint32_t *dw;
450 unsigned i;
451
452 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
453
454 /*
455 * From the Sandy Bridge PRM, volume 2 part 1, page 92:
456 *
457 * "At least one VERTEX_ELEMENT_STATE structure must be included."
458 *
459 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
460 *
461 * "Up to 34 (DevSNB+) vertex elements are supported."
462 */
463 assert(ve->count + ve->prepend_nosrc_cso >= 1);
464 assert(ve->count + ve->prepend_nosrc_cso <= 34);
465
466 STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
467
468 cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
469 ilo_builder_batch_pointer(builder, cmd_len, &dw);
470
471 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
472 dw++;
473
474 if (ve->prepend_nosrc_cso) {
475 memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
476 dw += 2;
477 }
478
479 for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
480 memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
481 dw += 2;
482 }
483
484 if (ve->last_cso_edgeflag)
485 memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
486 }
487
488 static inline void
489 gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
490 const struct ilo_ib_state *ib,
491 bool enable_cut_index)
492 {
493 const uint8_t cmd_len = 3;
494 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
495 uint32_t start_offset, end_offset;
496 int format;
497 uint32_t *dw;
498 unsigned pos;
499
500 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
501
502 if (!buf)
503 return;
504
505 /* this is moved to the new 3DSTATE_VF */
506 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
507 assert(!enable_cut_index);
508
509 switch (ib->hw_index_size) {
510 case 4:
511 format = GEN6_IB_DW0_FORMAT_DWORD;
512 break;
513 case 2:
514 format = GEN6_IB_DW0_FORMAT_WORD;
515 break;
516 case 1:
517 format = GEN6_IB_DW0_FORMAT_BYTE;
518 break;
519 default:
520 assert(!"unknown index size");
521 format = GEN6_IB_DW0_FORMAT_BYTE;
522 break;
523 }
524
525 /*
526 * set start_offset to 0 here and adjust pipe_draw_info::start with
527 * ib->draw_start_offset in 3DPRIMITIVE
528 */
529 start_offset = 0;
530 end_offset = buf->bo_size;
531
532 /* end_offset must also be aligned and is inclusive */
533 end_offset -= (end_offset % ib->hw_index_size);
534 end_offset--;
535
536 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
537
538 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
539 format |
540 (cmd_len - 2);
541 if (enable_cut_index)
542 dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
543
544 ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
545 ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
546 }
547
548 static inline void
549 gen6_3DSTATE_VS(struct ilo_builder *builder,
550 const struct ilo_shader_state *vs)
551 {
552 const uint8_t cmd_len = 6;
553 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
554 const struct ilo_shader_cso *cso;
555 uint32_t dw2, dw4, dw5, *dw;
556
557 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
558
559 if (!vs) {
560 ilo_builder_batch_pointer(builder, cmd_len, &dw);
561 dw[0] = dw0;
562 dw[1] = 0;
563 dw[2] = 0;
564 dw[3] = 0;
565 dw[4] = 0;
566 dw[5] = 0;
567
568 return;
569 }
570
571 cso = ilo_shader_get_kernel_cso(vs);
572 dw2 = cso->payload[0];
573 dw4 = cso->payload[1];
574 dw5 = cso->payload[2];
575
576 ilo_builder_batch_pointer(builder, cmd_len, &dw);
577 dw[0] = dw0;
578 dw[1] = ilo_shader_get_kernel_offset(vs);
579 dw[2] = dw2;
580 dw[3] = 0; /* scratch */
581 dw[4] = dw4;
582 dw[5] = dw5;
583 }
584
585 static inline void
586 gen7_3DSTATE_HS(struct ilo_builder *builder,
587 const struct ilo_shader_state *hs)
588 {
589 const uint8_t cmd_len = 7;
590 uint32_t *dw;
591
592 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
593
594 assert(!hs);
595
596 ilo_builder_batch_pointer(builder, cmd_len, &dw);
597
598 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
599 dw[1] = 0;
600 dw[2] = 0;
601 dw[3] = 0;
602 dw[4] = 0;
603 dw[5] = 0;
604 dw[6] = 0;
605 }
606
607 static inline void
608 gen7_3DSTATE_TE(struct ilo_builder *builder)
609 {
610 const uint8_t cmd_len = 4;
611 uint32_t *dw;
612
613 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
614
615 ilo_builder_batch_pointer(builder, cmd_len, &dw);
616
617 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
618 dw[1] = 0;
619 dw[2] = 0;
620 dw[3] = 0;
621 }
622
623 static inline void
624 gen7_3DSTATE_DS(struct ilo_builder *builder,
625 const struct ilo_shader_state *ds)
626 {
627 const uint8_t cmd_len = 6;
628 uint32_t *dw;
629
630 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
631
632 assert(!ds);
633
634 ilo_builder_batch_pointer(builder, cmd_len, &dw);
635
636 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
637 dw[1] = 0;
638 dw[2] = 0;
639 dw[3] = 0;
640 dw[4] = 0;
641 dw[5] = 0;
642 }
643
644 static inline void
645 gen6_3DSTATE_GS(struct ilo_builder *builder,
646 const struct ilo_shader_state *gs,
647 const struct ilo_shader_state *vs,
648 int verts_per_prim)
649 {
650 const uint8_t cmd_len = 7;
651 uint32_t dw1, dw2, dw4, dw5, dw6, *dw;
652
653 ILO_DEV_ASSERT(builder->dev, 6, 6);
654
655 if (gs) {
656 const struct ilo_shader_cso *cso;
657
658 dw1 = ilo_shader_get_kernel_offset(gs);
659
660 cso = ilo_shader_get_kernel_cso(gs);
661 dw2 = cso->payload[0];
662 dw4 = cso->payload[1];
663 dw5 = cso->payload[2];
664 dw6 = cso->payload[3];
665 }
666 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
667 struct ilo_shader_cso cso;
668 enum ilo_kernel_param param;
669
670 switch (verts_per_prim) {
671 case 1:
672 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
673 break;
674 case 2:
675 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
676 break;
677 default:
678 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
679 break;
680 }
681
682 dw1 = ilo_shader_get_kernel_offset(vs) +
683 ilo_shader_get_kernel_param(vs, param);
684
685 /* cannot use VS's CSO */
686 ilo_gpe_init_gs_cso(builder->dev, vs, &cso);
687 dw2 = cso.payload[0];
688 dw4 = cso.payload[1];
689 dw5 = cso.payload[2];
690 dw6 = cso.payload[3];
691 }
692 else {
693 dw1 = 0;
694 dw2 = 0;
695 dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
696 dw5 = GEN6_GS_DW5_STATISTICS;
697 dw6 = 0;
698 }
699
700 ilo_builder_batch_pointer(builder, cmd_len, &dw);
701
702 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
703 dw[1] = dw1;
704 dw[2] = dw2;
705 dw[3] = 0;
706 dw[4] = dw4;
707 dw[5] = dw5;
708 dw[6] = dw6;
709 }
710
711 static inline void
712 gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
713 int index, unsigned svbi,
714 unsigned max_svbi,
715 bool load_vertex_count)
716 {
717 const uint8_t cmd_len = 4;
718 uint32_t *dw;
719
720 ILO_DEV_ASSERT(builder->dev, 6, 6);
721 assert(index >= 0 && index < 4);
722
723 ilo_builder_batch_pointer(builder, cmd_len, &dw);
724
725 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | (cmd_len - 2);
726
727 dw[1] = index << GEN6_SVBI_DW1_INDEX__SHIFT;
728 if (load_vertex_count)
729 dw[1] |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
730
731 dw[2] = svbi;
732 dw[3] = max_svbi;
733 }
734
735 static inline void
736 gen7_3DSTATE_GS(struct ilo_builder *builder,
737 const struct ilo_shader_state *gs)
738 {
739 const uint8_t cmd_len = 7;
740 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
741 const struct ilo_shader_cso *cso;
742 uint32_t dw2, dw4, dw5, *dw;
743
744 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
745
746 if (!gs) {
747 ilo_builder_batch_pointer(builder, cmd_len, &dw);
748 dw[0] = dw0;
749 dw[1] = 0;
750 dw[2] = 0;
751 dw[3] = 0;
752 dw[4] = 0;
753 dw[5] = GEN7_GS_DW5_STATISTICS;
754 dw[6] = 0;
755 return;
756 }
757
758 cso = ilo_shader_get_kernel_cso(gs);
759 dw2 = cso->payload[0];
760 dw4 = cso->payload[1];
761 dw5 = cso->payload[2];
762
763 ilo_builder_batch_pointer(builder, cmd_len, &dw);
764
765 dw[0] = dw0;
766 dw[1] = ilo_shader_get_kernel_offset(gs);
767 dw[2] = dw2;
768 dw[3] = 0; /* scratch */
769 dw[4] = dw4;
770 dw[5] = dw5;
771 dw[6] = 0;
772 }
773
774 static inline void
775 gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
776 unsigned buffer_mask,
777 int vertex_attrib_count,
778 bool rasterizer_discard)
779 {
780 const uint8_t cmd_len = 3;
781 const bool enable = (buffer_mask != 0);
782 const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) |
783 (cmd_len - 2);
784 uint32_t dw1, dw2, *dw;
785 int read_len;
786
787 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
788
789 if (!enable) {
790 dw1 = 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
791 if (rasterizer_discard)
792 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
793
794 dw2 = 0;
795
796 ilo_builder_batch_pointer(builder, cmd_len, &dw);
797 dw[0] = dw0;
798 dw[1] = dw1;
799 dw[2] = dw2;
800 return;
801 }
802
803 read_len = (vertex_attrib_count + 1) / 2;
804 if (!read_len)
805 read_len = 1;
806
807 dw1 = GEN7_SO_DW1_SO_ENABLE |
808 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
809 GEN7_SO_DW1_STATISTICS |
810 buffer_mask << 8;
811
812 if (rasterizer_discard)
813 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
814
815 /* API_OPENGL */
816 if (true)
817 dw1 |= GEN7_SO_DW1_REORDER_TRAILING;
818
819 dw2 = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
820 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
821 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
822 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
823 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
824 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
825 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
826 (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
827
828 ilo_builder_batch_pointer(builder, cmd_len, &dw);
829
830 dw[0] = dw0;
831 dw[1] = dw1;
832 dw[2] = dw2;
833 }
834
835 static inline void
836 gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
837 const struct pipe_stream_output_info *so_info)
838 {
839 uint16_t cmd_len;
840 int buffer_selects, num_entries, i;
841 uint16_t so_decls[128];
842 uint32_t *dw;
843
844 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
845
846 buffer_selects = 0;
847 num_entries = 0;
848
849 if (so_info) {
850 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
851
852 memset(buffer_offsets, 0, sizeof(buffer_offsets));
853
854 for (i = 0; i < so_info->num_outputs; i++) {
855 unsigned decl, buf, reg, mask;
856
857 buf = so_info->output[i].output_buffer;
858
859 /* pad with holes */
860 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
861 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
862 int num_dwords;
863
864 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
865 if (num_dwords > 4)
866 num_dwords = 4;
867
868 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
869 GEN7_SO_DECL_HOLE_FLAG |
870 ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
871
872 so_decls[num_entries++] = decl;
873 buffer_offsets[buf] += num_dwords;
874 }
875
876 reg = so_info->output[i].register_index;
877 mask = ((1 << so_info->output[i].num_components) - 1) <<
878 so_info->output[i].start_component;
879
880 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
881 reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
882 mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
883
884 so_decls[num_entries++] = decl;
885 buffer_selects |= 1 << buf;
886 buffer_offsets[buf] += so_info->output[i].num_components;
887 }
888 }
889
890 /*
891 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
892 *
893 * "Errata: All 128 decls for all four streams must be included
894 * whenever this command is issued. The "Num Entries [n]" fields still
895 * contain the actual numbers of valid decls."
896 *
897 * Also note that "DWord Length" has 9 bits for this command, and the type
898 * of cmd_len is thus uint16_t.
899 */
900 cmd_len = 2 * 128 + 3;
901
902 ilo_builder_batch_pointer(builder, cmd_len, &dw);
903
904 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
905 dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
906 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
907 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
908 buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
909 dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
910 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
911 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
912 num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
913 dw += 3;
914
915 for (i = 0; i < num_entries; i++) {
916 dw[0] = so_decls[i];
917 dw[1] = 0;
918 dw += 2;
919 }
920 for (; i < 128; i++) {
921 dw[0] = 0;
922 dw[1] = 0;
923 dw += 2;
924 }
925 }
926
927 static inline void
928 gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
929 int index, int base, int stride,
930 const struct pipe_stream_output_target *so_target)
931 {
932 const uint8_t cmd_len = 4;
933 const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) |
934 (cmd_len - 2);
935 struct ilo_buffer *buf;
936 int end;
937 uint32_t *dw;
938 unsigned pos;
939
940 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
941
942 if (!so_target || !so_target->buffer) {
943 ilo_builder_batch_pointer(builder, cmd_len, &dw);
944 dw[0] = dw0;
945 dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
946 dw[2] = 0;
947 dw[3] = 0;
948
949 return;
950 }
951
952 buf = ilo_buffer(so_target->buffer);
953
954 /* DWord-aligned */
955 assert(stride % 4 == 0 && base % 4 == 0);
956 assert(so_target->buffer_offset % 4 == 0);
957
958 stride &= ~3;
959 base = (base + so_target->buffer_offset) & ~3;
960 end = (base + so_target->buffer_size) & ~3;
961
962 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
963
964 dw[0] = dw0;
965 dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
966 stride;
967
968 ilo_builder_batch_reloc(builder, pos + 2,
969 buf->bo, base, INTEL_RELOC_WRITE);
970 ilo_builder_batch_reloc(builder, pos + 3,
971 buf->bo, end, INTEL_RELOC_WRITE);
972 }
973
974 static inline void
975 gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
976 uint32_t vs_binding_table,
977 uint32_t gs_binding_table,
978 uint32_t ps_binding_table)
979 {
980 const uint8_t cmd_len = 4;
981 uint32_t *dw;
982
983 ILO_DEV_ASSERT(builder->dev, 6, 6);
984
985 ilo_builder_batch_pointer(builder, cmd_len, &dw);
986
987 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
988 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
989 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
990 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
991 (cmd_len - 2);
992 dw[1] = vs_binding_table;
993 dw[2] = gs_binding_table;
994 dw[3] = ps_binding_table;
995 }
996
997 static inline void
998 gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
999 uint32_t vs_sampler_state,
1000 uint32_t gs_sampler_state,
1001 uint32_t ps_sampler_state)
1002 {
1003 const uint8_t cmd_len = 4;
1004 uint32_t *dw;
1005
1006 ILO_DEV_ASSERT(builder->dev, 6, 6);
1007
1008 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1009
1010 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
1011 GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
1012 GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
1013 GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
1014 (cmd_len - 2);
1015 dw[1] = vs_sampler_state;
1016 dw[2] = gs_sampler_state;
1017 dw[3] = ps_sampler_state;
1018 }
1019
1020 static inline void
1021 gen7_3dstate_pointer(struct ilo_builder *builder,
1022 int subop, uint32_t pointer)
1023 {
1024 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
1025 GEN6_RENDER_SUBTYPE_3D |
1026 subop;
1027 const uint8_t cmd_len = 2;
1028 uint32_t *dw;
1029
1030 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
1031
1032 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1033
1034 dw[0] = cmd | (cmd_len - 2);
1035 dw[1] = pointer;
1036 }
1037
1038 static inline void
1039 gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(struct ilo_builder *builder,
1040 uint32_t binding_table)
1041 {
1042 gen7_3dstate_pointer(builder,
1043 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS,
1044 binding_table);
1045 }
1046
1047 static inline void
1048 gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(struct ilo_builder *builder,
1049 uint32_t binding_table)
1050 {
1051 gen7_3dstate_pointer(builder,
1052 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS,
1053 binding_table);
1054 }
1055
1056 static inline void
1057 gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(struct ilo_builder *builder,
1058 uint32_t binding_table)
1059 {
1060 gen7_3dstate_pointer(builder,
1061 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS,
1062 binding_table);
1063 }
1064
1065 static inline void
1066 gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder,
1067 uint32_t binding_table)
1068 {
1069 gen7_3dstate_pointer(builder,
1070 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS,
1071 binding_table);
1072 }
1073
1074 static inline void
1075 gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder,
1076 uint32_t sampler_state)
1077 {
1078 gen7_3dstate_pointer(builder,
1079 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS,
1080 sampler_state);
1081 }
1082
1083 static inline void
1084 gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS(struct ilo_builder *builder,
1085 uint32_t sampler_state)
1086 {
1087 gen7_3dstate_pointer(builder,
1088 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS,
1089 sampler_state);
1090 }
1091
1092 static inline void
1093 gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS(struct ilo_builder *builder,
1094 uint32_t sampler_state)
1095 {
1096 gen7_3dstate_pointer(builder,
1097 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS,
1098 sampler_state);
1099 }
1100
1101 static inline void
1102 gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder,
1103 uint32_t sampler_state)
1104 {
1105 gen7_3dstate_pointer(builder,
1106 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS,
1107 sampler_state);
1108 }
1109
1110 static inline unsigned
1111 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1112 const uint32_t *bufs, const int *sizes,
1113 int num_bufs, int max_read_length,
1114 uint32_t *dw, int num_dwords)
1115 {
1116 unsigned enabled = 0x0;
1117 int total_read_length, i;
1118
1119 assert(num_dwords == 4);
1120
1121 total_read_length = 0;
1122 for (i = 0; i < 4; i++) {
1123 if (i < num_bufs && sizes[i]) {
1124 /* in 256-bit units minus one */
1125 const int read_len = (sizes[i] + 31) / 32 - 1;
1126
1127 assert(bufs[i] % 32 == 0);
1128 assert(read_len < 32);
1129
1130 enabled |= 1 << i;
1131 dw[i] = bufs[i] | read_len;
1132
1133 total_read_length += read_len + 1;
1134 }
1135 else {
1136 dw[i] = 0;
1137 }
1138 }
1139
1140 assert(total_read_length <= max_read_length);
1141
1142 return enabled;
1143 }
1144
1145 static inline void
1146 gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
1147 const uint32_t *bufs, const int *sizes,
1148 int num_bufs)
1149 {
1150 const uint8_t cmd_len = 5;
1151 uint32_t buf_dw[4], buf_enabled, *dw;
1152
1153 ILO_DEV_ASSERT(builder->dev, 6, 6);
1154
1155 assert(num_bufs <= 4);
1156
1157 /*
1158 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1159 *
1160 * "The sum of all four read length fields (each incremented to
1161 * represent the actual read length) must be less than or equal to 32"
1162 */
1163 buf_enabled = gen6_fill_3dstate_constant(builder->dev,
1164 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1165
1166 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1167
1168 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
1169 buf_enabled << 12 |
1170 (cmd_len - 2);
1171 memcpy(&dw[1], buf_dw, sizeof(buf_dw));
1172 }
1173
1174 static inline void
1175 gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
1176 const uint32_t *bufs, const int *sizes,
1177 int num_bufs)
1178 {
1179 const uint8_t cmd_len = 5;
1180 uint32_t buf_dw[4], buf_enabled, *dw;
1181
1182 ILO_DEV_ASSERT(builder->dev, 6, 6);
1183
1184 assert(num_bufs <= 4);
1185
1186 /*
1187 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1188 *
1189 * "The sum of all four read length fields (each incremented to
1190 * represent the actual read length) must be less than or equal to 64"
1191 */
1192 buf_enabled = gen6_fill_3dstate_constant(builder->dev,
1193 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1194
1195 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1196
1197 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
1198 buf_enabled << 12 |
1199 (cmd_len - 2);
1200 memcpy(&dw[1], buf_dw, sizeof(buf_dw));
1201 }
1202
1203 static inline void
1204 gen7_3dstate_constant(struct ilo_builder *builder,
1205 int subop,
1206 const uint32_t *bufs, const int *sizes,
1207 int num_bufs)
1208 {
1209 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
1210 GEN6_RENDER_SUBTYPE_3D |
1211 subop;
1212 const uint8_t cmd_len = 7;
1213 uint32_t payload[6], *dw;
1214 int total_read_length, i;
1215
1216 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
1217
1218 /* VS, HS, DS, GS, and PS variants */
1219 assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
1220 subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
1221 subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
1222
1223 assert(num_bufs <= 4);
1224
1225 payload[0] = 0;
1226 payload[1] = 0;
1227
1228 total_read_length = 0;
1229 for (i = 0; i < 4; i++) {
1230 int read_len;
1231
1232 /*
1233 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
1234 *
1235 * "Constant buffers must be enabled in order from Constant Buffer 0
1236 * to Constant Buffer 3 within this command. For example, it is
1237 * not allowed to enable Constant Buffer 1 by programming a
1238 * non-zero value in the VS Constant Buffer 1 Read Length without a
1239 * non-zero value in VS Constant Buffer 0 Read Length."
1240 */
1241 if (i >= num_bufs || !sizes[i]) {
1242 for (; i < 4; i++) {
1243 assert(i >= num_bufs || !sizes[i]);
1244 payload[2 + i] = 0;
1245 }
1246 break;
1247 }
1248
1249 /* read lengths are in 256-bit units */
1250 read_len = (sizes[i] + 31) / 32;
1251 /* the lower 5 bits are used for memory object control state */
1252 assert(bufs[i] % 32 == 0);
1253
1254 payload[i / 2] |= read_len << ((i % 2) ? 16 : 0);
1255 payload[2 + i] = bufs[i];
1256
1257 total_read_length += read_len;
1258 }
1259
1260 /*
1261 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
1262 *
1263 * "The sum of all four read length fields must be less than or equal
1264 * to the size of 64"
1265 */
1266 assert(total_read_length <= 64);
1267
1268 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1269
1270 dw[0] = cmd | (cmd_len - 2);
1271 memcpy(&dw[1], payload, sizeof(payload));
1272 }
1273
1274 static inline void
1275 gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
1276 const uint32_t *bufs, const int *sizes,
1277 int num_bufs)
1278 {
1279 gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
1280 bufs, sizes, num_bufs);
1281 }
1282
1283 static inline void
1284 gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder,
1285 const uint32_t *bufs, const int *sizes,
1286 int num_bufs)
1287 {
1288 gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS,
1289 bufs, sizes, num_bufs);
1290 }
1291
1292 static inline void
1293 gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder,
1294 const uint32_t *bufs, const int *sizes,
1295 int num_bufs)
1296 {
1297 gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS,
1298 bufs, sizes, num_bufs);
1299 }
1300
1301 static inline void
1302 gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
1303 const uint32_t *bufs, const int *sizes,
1304 int num_bufs)
1305 {
1306 gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
1307 bufs, sizes, num_bufs);
1308 }
1309
1310 static inline uint32_t
1311 gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
1312 uint32_t *surface_states,
1313 int num_surface_states)
1314 {
1315 const int state_align = 32;
1316 const int state_len = num_surface_states;
1317
1318 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1319
1320 /*
1321 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
1322 *
1323 * "It is stored as an array of up to 256 elements..."
1324 */
1325 assert(num_surface_states <= 256);
1326
1327 if (!num_surface_states)
1328 return 0;
1329
1330 return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE,
1331 state_align, state_len, surface_states);
1332 }
1333
1334 static inline uint32_t
1335 gen6_SURFACE_STATE(struct ilo_builder *builder,
1336 const struct ilo_view_surface *surf,
1337 bool for_render)
1338 {
1339 const int state_align = 32;
1340 const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6;
1341 uint32_t state_offset;
1342
1343 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1344
1345 state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE,
1346 state_align, state_len, surf->payload);
1347
1348 if (surf->bo) {
1349 ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
1350 surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
1351 }
1352
1353 return state_offset;
1354 }
1355
1356 static inline uint32_t
1357 gen6_so_SURFACE_STATE(struct ilo_builder *builder,
1358 const struct pipe_stream_output_target *so,
1359 const struct pipe_stream_output_info *so_info,
1360 int so_index)
1361 {
1362 struct ilo_buffer *buf = ilo_buffer(so->buffer);
1363 unsigned bo_offset, struct_size;
1364 enum pipe_format elem_format;
1365 struct ilo_view_surface surf;
1366
1367 ILO_DEV_ASSERT(builder->dev, 6, 6);
1368
1369 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
1370 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
1371
1372 switch (so_info->output[so_index].num_components) {
1373 case 1:
1374 elem_format = PIPE_FORMAT_R32_FLOAT;
1375 break;
1376 case 2:
1377 elem_format = PIPE_FORMAT_R32G32_FLOAT;
1378 break;
1379 case 3:
1380 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
1381 break;
1382 case 4:
1383 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
1384 break;
1385 default:
1386 assert(!"unexpected SO components length");
1387 elem_format = PIPE_FORMAT_R32_FLOAT;
1388 break;
1389 }
1390
1391 ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset,
1392 so->buffer_size, struct_size, elem_format, false, true, &surf);
1393
1394 return gen6_SURFACE_STATE(builder, &surf, false);
1395 }
1396
1397 static inline uint32_t
1398 gen6_SAMPLER_STATE(struct ilo_builder *builder,
1399 const struct ilo_sampler_cso * const *samplers,
1400 const struct pipe_sampler_view * const *views,
1401 const uint32_t *sampler_border_colors,
1402 int num_samplers)
1403 {
1404 const int state_align = 32;
1405 const int state_len = 4 * num_samplers;
1406 uint32_t state_offset, *dw;
1407 int i;
1408
1409 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1410
1411 /*
1412 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
1413 *
1414 * "The sampler state is stored as an array of up to 16 elements..."
1415 */
1416 assert(num_samplers <= 16);
1417
1418 if (!num_samplers)
1419 return 0;
1420
1421 /*
1422 * From the Sandy Bridge PRM, volume 2 part 1, page 132:
1423 *
1424 * "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in
1425 * multiples of 4) the vertex shader 0 kernel uses. Used only for
1426 * prefetching the associated sampler state entries.
1427 *
1428 * It also applies to other shader stages.
1429 */
1430 ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
1431
1432 state_offset = ilo_builder_dynamic_pointer(builder,
1433 ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
1434
1435 for (i = 0; i < num_samplers; i++) {
1436 const struct ilo_sampler_cso *sampler = samplers[i];
1437 const struct pipe_sampler_view *view = views[i];
1438 const uint32_t border_color = sampler_border_colors[i];
1439 uint32_t dw_filter, dw_wrap;
1440
1441 /* there may be holes */
1442 if (!sampler || !view) {
1443 /* disabled sampler */
1444 dw[0] = 1 << 31;
1445 dw[1] = 0;
1446 dw[2] = 0;
1447 dw[3] = 0;
1448 dw += 4;
1449
1450 continue;
1451 }
1452
1453 /* determine filter and wrap modes */
1454 switch (view->texture->target) {
1455 case PIPE_TEXTURE_1D:
1456 dw_filter = (sampler->anisotropic) ?
1457 sampler->dw_filter_aniso : sampler->dw_filter;
1458 dw_wrap = sampler->dw_wrap_1d;
1459 break;
1460 case PIPE_TEXTURE_3D:
1461 /*
1462 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
1463 *
1464 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
1465 * surfaces of type SURFTYPE_3D."
1466 */
1467 dw_filter = sampler->dw_filter;
1468 dw_wrap = sampler->dw_wrap;
1469 break;
1470 case PIPE_TEXTURE_CUBE:
1471 dw_filter = (sampler->anisotropic) ?
1472 sampler->dw_filter_aniso : sampler->dw_filter;
1473 dw_wrap = sampler->dw_wrap_cube;
1474 break;
1475 default:
1476 dw_filter = (sampler->anisotropic) ?
1477 sampler->dw_filter_aniso : sampler->dw_filter;
1478 dw_wrap = sampler->dw_wrap;
1479 break;
1480 }
1481
1482 dw[0] = sampler->payload[0];
1483 dw[1] = sampler->payload[1];
1484 assert(!(border_color & 0x1f));
1485 dw[2] = border_color;
1486 dw[3] = sampler->payload[2];
1487
1488 dw[0] |= dw_filter;
1489
1490 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
1491 dw[3] |= dw_wrap;
1492 }
1493 else {
1494 /*
1495 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
1496 *
1497 * "[DevSNB] Errata: Incorrect behavior is observed in cases
1498 * where the min and mag mode filters are different and
1499 * SurfMinLOD is nonzero. The determination of MagMode uses the
1500 * following equation instead of the one in the above
1501 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
1502 *
1503 * As a way to work around that, we set Base to
1504 * view->u.tex.first_level.
1505 */
1506 dw[0] |= view->u.tex.first_level << 22;
1507
1508 dw[1] |= dw_wrap;
1509 }
1510
1511 dw += 4;
1512 }
1513
1514 return state_offset;
1515 }
1516
1517 static inline uint32_t
1518 gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
1519 const struct ilo_sampler_cso *sampler)
1520 {
1521 const int state_align = 32;
1522 const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12;
1523
1524 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1525
1526 assert(Elements(sampler->payload) >= 3 + state_len);
1527
1528 /* see ilo_gpe_init_sampler_cso() */
1529 return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
1530 state_align, state_len, &sampler->payload[3]);
1531 }
1532
1533 static inline uint32_t
1534 gen6_push_constant_buffer(struct ilo_builder *builder,
1535 int size, void **pcb)
1536 {
1537 /*
1538 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
1539 * to 32 bytes, and their sizes are specified in 256-bit units.
1540 */
1541 const int state_align = 32;
1542 const int state_len = align(size, 32) / 4;
1543 uint32_t state_offset;
1544 char *buf;
1545
1546 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1547
1548 state_offset = ilo_builder_dynamic_pointer(builder,
1549 ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf);
1550
1551 /* zero out the unused range */
1552 if (size < state_len * 4)
1553 memset(&buf[size], 0, state_len * 4 - size);
1554
1555 if (pcb)
1556 *pcb = buf;
1557
1558 return state_offset;
1559 }
1560
1561 static inline uint32_t
1562 gen6_user_vertex_buffer(struct ilo_builder *builder,
1563 int size, const void *vertices)
1564 {
1565 const int state_align = 8;
1566 const int state_len = size / 4;
1567
1568 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1569
1570 assert(size % 4 == 0);
1571
1572 return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
1573 state_align, state_len, vertices);
1574 }
1575
1576 #endif /* ILO_BUILDER_3D_TOP_H */