9794e92393ec52bbece0093aa3d4aac9448fe2bb
[mesa.git] / src / gallium / drivers / ilo / ilo_builder_3d_top.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_BUILDER_3D_TOP_H
29 #define ILO_BUILDER_3D_TOP_H
30
31 #include "genhw/genhw.h"
32 #include "intel_winsys.h"
33
34 #include "ilo_common.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_state.h"
38 #include "ilo_state_3d.h"
39 #include "ilo_builder.h"
40
41 static inline void
42 gen6_3DSTATE_URB(struct ilo_builder *builder,
43 int vs_total_size, int gs_total_size,
44 int vs_entry_size, int gs_entry_size)
45 {
46 const uint8_t cmd_len = 3;
47 const int row_size = 128; /* 1024 bits */
48 int vs_alloc_size, gs_alloc_size;
49 int vs_num_entries, gs_num_entries;
50 uint32_t *dw;
51
52 ILO_DEV_ASSERT(builder->dev, 6, 6);
53
54 /* in 1024-bit URB rows */
55 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
56 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
57
58 /* the valid range is [1, 5] */
59 if (!vs_alloc_size)
60 vs_alloc_size = 1;
61 if (!gs_alloc_size)
62 gs_alloc_size = 1;
63 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
64
65 /* the valid range is [24, 256] in multiples of 4 */
66 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
67 if (vs_num_entries > 256)
68 vs_num_entries = 256;
69 assert(vs_num_entries >= 24);
70
71 /* the valid range is [0, 256] in multiples of 4 */
72 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
73 if (gs_num_entries > 256)
74 gs_num_entries = 256;
75
76 ilo_builder_batch_pointer(builder, cmd_len, &dw);
77
78 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_URB) | (cmd_len - 2);
79 dw[1] = (vs_alloc_size - 1) << GEN6_URB_DW1_VS_ENTRY_SIZE__SHIFT |
80 vs_num_entries << GEN6_URB_DW1_VS_ENTRY_COUNT__SHIFT;
81 dw[2] = gs_num_entries << GEN6_URB_DW2_GS_ENTRY_COUNT__SHIFT |
82 (gs_alloc_size - 1) << GEN6_URB_DW2_GS_ENTRY_SIZE__SHIFT;
83 }
84
85 static inline void
86 gen7_3dstate_push_constant_alloc(struct ilo_builder *builder,
87 int subop, int offset, int size)
88 {
89 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
90 GEN6_RENDER_SUBTYPE_3D |
91 subop;
92 const uint8_t cmd_len = 2;
93 uint32_t *dw;
94 int end;
95
96 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
97
98 /* VS, HS, DS, GS, and PS variants */
99 assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS &&
100 subop <= GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS);
101
102 /*
103 * From the Ivy Bridge PRM, volume 2 part 1, page 68:
104 *
105 * "(A table that says the maximum size of each constant buffer is
106 * 16KB")
107 *
108 * From the Ivy Bridge PRM, volume 2 part 1, page 115:
109 *
110 * "The sum of the Constant Buffer Offset and the Constant Buffer Size
111 * may not exceed the maximum value of the Constant Buffer Size."
112 *
113 * Thus, the valid range of buffer end is [0KB, 16KB].
114 */
115 end = (offset + size) / 1024;
116 if (end > 16) {
117 assert(!"invalid constant buffer end");
118 end = 16;
119 }
120
121 /* the valid range of buffer offset is [0KB, 15KB] */
122 offset = (offset + 1023) / 1024;
123 if (offset > 15) {
124 assert(!"invalid constant buffer offset");
125 offset = 15;
126 }
127
128 if (offset > end) {
129 assert(!size);
130 offset = end;
131 }
132
133 /* the valid range of buffer size is [0KB, 15KB] */
134 size = end - offset;
135 if (size > 15) {
136 assert(!"invalid constant buffer size");
137 size = 15;
138 }
139
140 ilo_builder_batch_pointer(builder, cmd_len, &dw);
141
142 dw[0] = cmd | (cmd_len - 2);
143 dw[1] = offset << GEN7_PCB_ALLOC_ANY_DW1_OFFSET__SHIFT |
144 size;
145 }
146
147 static inline void
148 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(struct ilo_builder *builder,
149 int offset, int size)
150 {
151 gen7_3dstate_push_constant_alloc(builder,
152 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_VS, offset, size);
153 }
154
155 static inline void
156 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_HS(struct ilo_builder *builder,
157 int offset, int size)
158 {
159 gen7_3dstate_push_constant_alloc(builder,
160 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_HS, offset, size);
161 }
162
163 static inline void
164 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_DS(struct ilo_builder *builder,
165 int offset, int size)
166 {
167 gen7_3dstate_push_constant_alloc(builder,
168 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_DS, offset, size);
169 }
170
171 static inline void
172 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_GS(struct ilo_builder *builder,
173 int offset, int size)
174 {
175 gen7_3dstate_push_constant_alloc(builder,
176 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_GS, offset, size);
177 }
178
179 static inline void
180 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(struct ilo_builder *builder,
181 int offset, int size)
182 {
183 gen7_3dstate_push_constant_alloc(builder,
184 GEN7_RENDER_OPCODE_3DSTATE_PUSH_CONSTANT_ALLOC_PS, offset, size);
185 }
186
187 static inline void
188 gen7_3dstate_urb(struct ilo_builder *builder,
189 int subop, int offset, int size,
190 int entry_size)
191 {
192 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
193 GEN6_RENDER_SUBTYPE_3D |
194 subop;
195 const uint8_t cmd_len = 2;
196 const int row_size = 64; /* 512 bits */
197 int alloc_size, num_entries, min_entries, max_entries;
198 uint32_t *dw;
199
200 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
201
202 /* VS, HS, DS, and GS variants */
203 assert(subop >= GEN7_RENDER_OPCODE_3DSTATE_URB_VS &&
204 subop <= GEN7_RENDER_OPCODE_3DSTATE_URB_GS);
205
206 /* in multiples of 8KB */
207 assert(offset % 8192 == 0);
208 offset /= 8192;
209
210 /* in multiple of 512-bit rows */
211 alloc_size = (entry_size + row_size - 1) / row_size;
212 if (!alloc_size)
213 alloc_size = 1;
214
215 /*
216 * From the Ivy Bridge PRM, volume 2 part 1, page 34:
217 *
218 * "VS URB Entry Allocation Size equal to 4(5 512-bit URB rows) may
219 * cause performance to decrease due to banking in the URB. Element
220 * sizes of 16 to 20 should be programmed with six 512-bit URB rows."
221 */
222 if (subop == GEN7_RENDER_OPCODE_3DSTATE_URB_VS && alloc_size == 5)
223 alloc_size = 6;
224
225 /* in multiples of 8 */
226 num_entries = (size / row_size / alloc_size) & ~7;
227
228 switch (subop) {
229 case GEN7_RENDER_OPCODE_3DSTATE_URB_VS:
230 switch (ilo_dev_gen(builder->dev)) {
231 case ILO_GEN(7.5):
232 max_entries = (builder->dev->gt >= 2) ? 1664 : 640;
233 min_entries = (builder->dev->gt >= 2) ? 64 : 32;
234 break;
235 case ILO_GEN(7):
236 default:
237 max_entries = (builder->dev->gt == 2) ? 704 : 512;
238 min_entries = 32;
239 break;
240 }
241
242 assert(num_entries >= min_entries);
243 if (num_entries > max_entries)
244 num_entries = max_entries;
245 break;
246 case GEN7_RENDER_OPCODE_3DSTATE_URB_HS:
247 max_entries = (builder->dev->gt == 2) ? 64 : 32;
248 if (num_entries > max_entries)
249 num_entries = max_entries;
250 break;
251 case GEN7_RENDER_OPCODE_3DSTATE_URB_DS:
252 if (num_entries)
253 assert(num_entries >= 138);
254 break;
255 case GEN7_RENDER_OPCODE_3DSTATE_URB_GS:
256 switch (ilo_dev_gen(builder->dev)) {
257 case ILO_GEN(7.5):
258 max_entries = (builder->dev->gt >= 2) ? 640 : 256;
259 break;
260 case ILO_GEN(7):
261 default:
262 max_entries = (builder->dev->gt == 2) ? 320 : 192;
263 break;
264 }
265
266 if (num_entries > max_entries)
267 num_entries = max_entries;
268 break;
269 default:
270 break;
271 }
272
273 ilo_builder_batch_pointer(builder, cmd_len, &dw);
274
275 dw[0] = cmd | (cmd_len - 2);
276 dw[1] = offset << GEN7_URB_ANY_DW1_OFFSET__SHIFT |
277 (alloc_size - 1) << GEN7_URB_ANY_DW1_ENTRY_SIZE__SHIFT |
278 num_entries;
279 }
280
281 static inline void
282 gen7_3DSTATE_URB_VS(struct ilo_builder *builder,
283 int offset, int size, int entry_size)
284 {
285 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_VS,
286 offset, size, entry_size);
287 }
288
289 static inline void
290 gen7_3DSTATE_URB_HS(struct ilo_builder *builder,
291 int offset, int size, int entry_size)
292 {
293 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_HS,
294 offset, size, entry_size);
295 }
296
297 static inline void
298 gen7_3DSTATE_URB_DS(struct ilo_builder *builder,
299 int offset, int size, int entry_size)
300 {
301 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_DS,
302 offset, size, entry_size);
303 }
304
305 static inline void
306 gen7_3DSTATE_URB_GS(struct ilo_builder *builder,
307 int offset, int size, int entry_size)
308 {
309 gen7_3dstate_urb(builder, GEN7_RENDER_OPCODE_3DSTATE_URB_GS,
310 offset, size, entry_size);
311 }
312
313 static inline void
314 gen75_3DSTATE_VF(struct ilo_builder *builder,
315 bool enable_cut_index,
316 uint32_t cut_index)
317 {
318 const uint8_t cmd_len = 2;
319 uint32_t *dw;
320
321 ILO_DEV_ASSERT(builder->dev, 7.5, 7.5);
322
323 ilo_builder_batch_pointer(builder, cmd_len, &dw);
324
325 dw[0] = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2);
326 if (enable_cut_index)
327 dw[0] |= GEN75_VF_DW0_CUT_INDEX_ENABLE;
328
329 dw[1] = cut_index;
330 }
331
332 static inline void
333 gen6_3DSTATE_VF_STATISTICS(struct ilo_builder *builder,
334 bool enable)
335 {
336 const uint8_t cmd_len = 1;
337 const uint32_t dw0 = GEN6_RENDER_CMD(SINGLE_DW, 3DSTATE_VF_STATISTICS) |
338 enable;
339
340 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
341
342 ilo_builder_batch_write(builder, cmd_len, &dw0);
343 }
344
345 static inline void
346 gen6_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
347 const struct ilo_ve_state *ve,
348 const struct ilo_vb_state *vb)
349 {
350 uint8_t cmd_len;
351 uint32_t *dw;
352 unsigned pos, hw_idx;
353
354 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
355
356 /*
357 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
358 *
359 * "From 1 to 33 VBs can be specified..."
360 */
361 assert(ve->vb_count <= 33);
362
363 if (!ve->vb_count)
364 return;
365
366 cmd_len = 1 + 4 * ve->vb_count;
367 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
368
369 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
370 dw++;
371 pos++;
372
373 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
374 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
375 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
376 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
377
378 dw[0] = hw_idx << GEN6_VB_STATE_DW0_INDEX__SHIFT;
379
380 if (instance_divisor)
381 dw[0] |= GEN6_VB_STATE_DW0_ACCESS_INSTANCEDATA;
382 else
383 dw[0] |= GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA;
384
385 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
386 dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
387
388 /* use null vb if there is no buffer or the stride is out of range */
389 if (cso->buffer && cso->stride <= 2048) {
390 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
391 const uint32_t start_offset = cso->buffer_offset;
392 const uint32_t end_offset = buf->bo_size - 1;
393
394 dw[0] |= cso->stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
395 ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
396 ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
397 }
398 else {
399 dw[0] |= 1 << 13;
400 dw[1] = 0;
401 dw[2] = 0;
402 }
403
404 dw[3] = instance_divisor;
405
406 dw += 4;
407 pos += 4;
408 }
409 }
410
411 /* the user vertex buffer must be uploaded with gen6_user_vertex_buffer() */
412 static inline void
413 gen6_user_3DSTATE_VERTEX_BUFFERS(struct ilo_builder *builder,
414 uint32_t vb_begin, uint32_t vb_end,
415 uint32_t stride)
416 {
417 const struct ilo_builder_writer *bat =
418 &builder->writers[ILO_BUILDER_WRITER_BATCH];
419 const uint8_t cmd_len = 1 + 4;
420 uint32_t *dw;
421 unsigned pos;
422
423 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
424
425 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
426
427 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_BUFFERS) | (cmd_len - 2);
428 dw++;
429 pos++;
430
431 /* VERTEX_BUFFER_STATE */
432 dw[0] = 0 << GEN6_VB_STATE_DW0_INDEX__SHIFT |
433 GEN6_VB_STATE_DW0_ACCESS_VERTEXDATA |
434 stride << GEN6_VB_STATE_DW0_PITCH__SHIFT;
435 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7))
436 dw[0] |= GEN7_VB_STATE_DW0_ADDR_MODIFIED;
437
438 dw[3] = 0;
439
440 ilo_builder_batch_reloc(builder, pos + 1, bat->bo, vb_begin, 0);
441 ilo_builder_batch_reloc(builder, pos + 2, bat->bo, vb_end, 0);
442 }
443
444 static inline void
445 gen6_3DSTATE_VERTEX_ELEMENTS(struct ilo_builder *builder,
446 const struct ilo_ve_state *ve)
447 {
448 uint8_t cmd_len;
449 uint32_t *dw;
450 unsigned i;
451
452 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
453
454 /*
455 * From the Sandy Bridge PRM, volume 2 part 1, page 92:
456 *
457 * "At least one VERTEX_ELEMENT_STATE structure must be included."
458 *
459 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
460 *
461 * "Up to 34 (DevSNB+) vertex elements are supported."
462 */
463 assert(ve->count + ve->prepend_nosrc_cso >= 1);
464 assert(ve->count + ve->prepend_nosrc_cso <= 34);
465
466 STATIC_ASSERT(Elements(ve->cso[0].payload) == 2);
467
468 cmd_len = 1 + 2 * (ve->count + ve->prepend_nosrc_cso);
469 ilo_builder_batch_pointer(builder, cmd_len, &dw);
470
471 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VERTEX_ELEMENTS) | (cmd_len - 2);
472 dw++;
473
474 if (ve->prepend_nosrc_cso) {
475 memcpy(dw, ve->nosrc_cso.payload, sizeof(ve->nosrc_cso.payload));
476 dw += 2;
477 }
478
479 for (i = 0; i < ve->count - ve->last_cso_edgeflag; i++) {
480 memcpy(dw, ve->cso[i].payload, sizeof(ve->cso[i].payload));
481 dw += 2;
482 }
483
484 if (ve->last_cso_edgeflag)
485 memcpy(dw, ve->edgeflag_cso.payload, sizeof(ve->edgeflag_cso.payload));
486 }
487
488 static inline void
489 gen6_3DSTATE_INDEX_BUFFER(struct ilo_builder *builder,
490 const struct ilo_ib_state *ib,
491 bool enable_cut_index)
492 {
493 const uint8_t cmd_len = 3;
494 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
495 uint32_t start_offset, end_offset;
496 int format;
497 uint32_t *dw;
498 unsigned pos;
499
500 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
501
502 if (!buf)
503 return;
504
505 /* this is moved to the new 3DSTATE_VF */
506 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7.5))
507 assert(!enable_cut_index);
508
509 switch (ib->hw_index_size) {
510 case 4:
511 format = GEN6_IB_DW0_FORMAT_DWORD;
512 break;
513 case 2:
514 format = GEN6_IB_DW0_FORMAT_WORD;
515 break;
516 case 1:
517 format = GEN6_IB_DW0_FORMAT_BYTE;
518 break;
519 default:
520 assert(!"unknown index size");
521 format = GEN6_IB_DW0_FORMAT_BYTE;
522 break;
523 }
524
525 /*
526 * set start_offset to 0 here and adjust pipe_draw_info::start with
527 * ib->draw_start_offset in 3DPRIMITIVE
528 */
529 start_offset = 0;
530 end_offset = buf->bo_size;
531
532 /* end_offset must also be aligned and is inclusive */
533 end_offset -= (end_offset % ib->hw_index_size);
534 end_offset--;
535
536 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
537
538 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) |
539 format |
540 (cmd_len - 2);
541 if (enable_cut_index)
542 dw[0] |= GEN6_IB_DW0_CUT_INDEX_ENABLE;
543
544 ilo_builder_batch_reloc(builder, pos + 1, buf->bo, start_offset, 0);
545 ilo_builder_batch_reloc(builder, pos + 2, buf->bo, end_offset, 0);
546 }
547
548 static inline void
549 gen6_3DSTATE_VS(struct ilo_builder *builder,
550 const struct ilo_shader_state *vs)
551 {
552 const uint8_t cmd_len = 6;
553 const struct ilo_shader_cso *cso;
554 uint32_t dw2, dw4, dw5, *dw;
555
556 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
557
558 cso = ilo_shader_get_kernel_cso(vs);
559 dw2 = cso->payload[0];
560 dw4 = cso->payload[1];
561 dw5 = cso->payload[2];
562
563 ilo_builder_batch_pointer(builder, cmd_len, &dw);
564
565 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
566 dw[1] = ilo_shader_get_kernel_offset(vs);
567 dw[2] = dw2;
568 dw[3] = 0; /* scratch */
569 dw[4] = dw4;
570 dw[5] = dw5;
571 }
572
573 static inline void
574 gen6_disable_3DSTATE_VS(struct ilo_builder *builder)
575 {
576 const uint8_t cmd_len = 6;
577 uint32_t *dw;
578
579 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
580
581 ilo_builder_batch_pointer(builder, cmd_len, &dw);
582
583 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2);
584 dw[1] = 0;
585 dw[2] = 0;
586 dw[3] = 0;
587 dw[4] = 0;
588 dw[5] = 0;
589 }
590
591 static inline void
592 gen7_3DSTATE_HS(struct ilo_builder *builder,
593 const struct ilo_shader_state *hs)
594 {
595 const uint8_t cmd_len = 7;
596 uint32_t *dw;
597
598 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
599
600 assert(!hs);
601
602 ilo_builder_batch_pointer(builder, cmd_len, &dw);
603
604 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_HS) | (cmd_len - 2);
605 dw[1] = 0;
606 dw[2] = 0;
607 dw[3] = 0;
608 dw[4] = 0;
609 dw[5] = 0;
610 dw[6] = 0;
611 }
612
613 static inline void
614 gen7_3DSTATE_TE(struct ilo_builder *builder)
615 {
616 const uint8_t cmd_len = 4;
617 uint32_t *dw;
618
619 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
620
621 ilo_builder_batch_pointer(builder, cmd_len, &dw);
622
623 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_TE) | (cmd_len - 2);
624 dw[1] = 0;
625 dw[2] = 0;
626 dw[3] = 0;
627 }
628
629 static inline void
630 gen7_3DSTATE_DS(struct ilo_builder *builder,
631 const struct ilo_shader_state *ds)
632 {
633 const uint8_t cmd_len = 6;
634 uint32_t *dw;
635
636 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
637
638 assert(!ds);
639
640 ilo_builder_batch_pointer(builder, cmd_len, &dw);
641
642 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_DS) | (cmd_len - 2);
643 dw[1] = 0;
644 dw[2] = 0;
645 dw[3] = 0;
646 dw[4] = 0;
647 dw[5] = 0;
648 }
649
650 static inline void
651 gen6_3DSTATE_GS(struct ilo_builder *builder,
652 const struct ilo_shader_state *gs,
653 const struct ilo_shader_state *vs,
654 int verts_per_prim)
655 {
656 const uint8_t cmd_len = 7;
657 uint32_t dw1, dw2, dw4, dw5, dw6, *dw;
658
659 ILO_DEV_ASSERT(builder->dev, 6, 6);
660
661 if (gs) {
662 const struct ilo_shader_cso *cso;
663
664 dw1 = ilo_shader_get_kernel_offset(gs);
665
666 cso = ilo_shader_get_kernel_cso(gs);
667 dw2 = cso->payload[0];
668 dw4 = cso->payload[1];
669 dw5 = cso->payload[2];
670 dw6 = cso->payload[3];
671 }
672 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
673 struct ilo_shader_cso cso;
674 enum ilo_kernel_param param;
675
676 switch (verts_per_prim) {
677 case 1:
678 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
679 break;
680 case 2:
681 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
682 break;
683 default:
684 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
685 break;
686 }
687
688 dw1 = ilo_shader_get_kernel_offset(vs) +
689 ilo_shader_get_kernel_param(vs, param);
690
691 /* cannot use VS's CSO */
692 ilo_gpe_init_gs_cso(builder->dev, vs, &cso);
693 dw2 = cso.payload[0];
694 dw4 = cso.payload[1];
695 dw5 = cso.payload[2];
696 dw6 = cso.payload[3];
697 }
698 else {
699 dw1 = 0;
700 dw2 = 0;
701 dw4 = 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT;
702 dw5 = GEN6_GS_DW5_STATISTICS;
703 dw6 = 0;
704 }
705
706 ilo_builder_batch_pointer(builder, cmd_len, &dw);
707
708 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
709 dw[1] = dw1;
710 dw[2] = dw2;
711 dw[3] = 0;
712 dw[4] = dw4;
713 dw[5] = dw5;
714 dw[6] = dw6;
715 }
716
717 static inline void
718 gen6_3DSTATE_GS_SVB_INDEX(struct ilo_builder *builder,
719 int index, unsigned svbi,
720 unsigned max_svbi,
721 bool load_vertex_count)
722 {
723 const uint8_t cmd_len = 4;
724 uint32_t *dw;
725
726 ILO_DEV_ASSERT(builder->dev, 6, 6);
727 assert(index >= 0 && index < 4);
728
729 ilo_builder_batch_pointer(builder, cmd_len, &dw);
730
731 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_GS_SVB_INDEX) | (cmd_len - 2);
732
733 dw[1] = index << GEN6_SVBI_DW1_INDEX__SHIFT;
734 if (load_vertex_count)
735 dw[1] |= GEN6_SVBI_DW1_LOAD_INTERNAL_VERTEX_COUNT;
736
737 dw[2] = svbi;
738 dw[3] = max_svbi;
739 }
740
741 static inline void
742 gen7_3DSTATE_GS(struct ilo_builder *builder,
743 const struct ilo_shader_state *gs)
744 {
745 const uint8_t cmd_len = 7;
746 const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2);
747 const struct ilo_shader_cso *cso;
748 uint32_t dw2, dw4, dw5, *dw;
749
750 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
751
752 if (!gs) {
753 ilo_builder_batch_pointer(builder, cmd_len, &dw);
754 dw[0] = dw0;
755 dw[1] = 0;
756 dw[2] = 0;
757 dw[3] = 0;
758 dw[4] = 0;
759 dw[5] = GEN7_GS_DW5_STATISTICS;
760 dw[6] = 0;
761 return;
762 }
763
764 cso = ilo_shader_get_kernel_cso(gs);
765 dw2 = cso->payload[0];
766 dw4 = cso->payload[1];
767 dw5 = cso->payload[2];
768
769 ilo_builder_batch_pointer(builder, cmd_len, &dw);
770
771 dw[0] = dw0;
772 dw[1] = ilo_shader_get_kernel_offset(gs);
773 dw[2] = dw2;
774 dw[3] = 0; /* scratch */
775 dw[4] = dw4;
776 dw[5] = dw5;
777 dw[6] = 0;
778 }
779
780 static inline void
781 gen7_3DSTATE_STREAMOUT(struct ilo_builder *builder,
782 unsigned buffer_mask,
783 int vertex_attrib_count,
784 bool rasterizer_discard)
785 {
786 const uint8_t cmd_len = 3;
787 const bool enable = (buffer_mask != 0);
788 const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_STREAMOUT) |
789 (cmd_len - 2);
790 uint32_t dw1, dw2, *dw;
791 int read_len;
792
793 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
794
795 if (!enable) {
796 dw1 = 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT;
797 if (rasterizer_discard)
798 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
799
800 dw2 = 0;
801
802 ilo_builder_batch_pointer(builder, cmd_len, &dw);
803 dw[0] = dw0;
804 dw[1] = dw1;
805 dw[2] = dw2;
806 return;
807 }
808
809 read_len = (vertex_attrib_count + 1) / 2;
810 if (!read_len)
811 read_len = 1;
812
813 dw1 = GEN7_SO_DW1_SO_ENABLE |
814 0 << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
815 GEN7_SO_DW1_STATISTICS |
816 buffer_mask << 8;
817
818 if (rasterizer_discard)
819 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
820
821 /* API_OPENGL */
822 if (true)
823 dw1 |= GEN7_SO_DW1_REORDER_TRAILING;
824
825 dw2 = 0 << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
826 0 << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
827 0 << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
828 0 << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
829 0 << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
830 0 << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
831 0 << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
832 (read_len - 1) << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
833
834 ilo_builder_batch_pointer(builder, cmd_len, &dw);
835
836 dw[0] = dw0;
837 dw[1] = dw1;
838 dw[2] = dw2;
839 }
840
841 static inline void
842 gen7_3DSTATE_SO_DECL_LIST(struct ilo_builder *builder,
843 const struct pipe_stream_output_info *so_info)
844 {
845 uint16_t cmd_len;
846 int buffer_selects, num_entries, i;
847 uint16_t so_decls[128];
848 uint32_t *dw;
849
850 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
851
852 buffer_selects = 0;
853 num_entries = 0;
854
855 if (so_info) {
856 int buffer_offsets[PIPE_MAX_SO_BUFFERS];
857
858 memset(buffer_offsets, 0, sizeof(buffer_offsets));
859
860 for (i = 0; i < so_info->num_outputs; i++) {
861 unsigned decl, buf, reg, mask;
862
863 buf = so_info->output[i].output_buffer;
864
865 /* pad with holes */
866 assert(buffer_offsets[buf] <= so_info->output[i].dst_offset);
867 while (buffer_offsets[buf] < so_info->output[i].dst_offset) {
868 int num_dwords;
869
870 num_dwords = so_info->output[i].dst_offset - buffer_offsets[buf];
871 if (num_dwords > 4)
872 num_dwords = 4;
873
874 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
875 GEN7_SO_DECL_HOLE_FLAG |
876 ((1 << num_dwords) - 1) << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
877
878 so_decls[num_entries++] = decl;
879 buffer_offsets[buf] += num_dwords;
880 }
881
882 reg = so_info->output[i].register_index;
883 mask = ((1 << so_info->output[i].num_components) - 1) <<
884 so_info->output[i].start_component;
885
886 decl = buf << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
887 reg << GEN7_SO_DECL_REG_INDEX__SHIFT |
888 mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
889
890 so_decls[num_entries++] = decl;
891 buffer_selects |= 1 << buf;
892 buffer_offsets[buf] += so_info->output[i].num_components;
893 }
894 }
895
896 /*
897 * From the Ivy Bridge PRM, volume 2 part 1, page 201:
898 *
899 * "Errata: All 128 decls for all four streams must be included
900 * whenever this command is issued. The "Num Entries [n]" fields still
901 * contain the actual numbers of valid decls."
902 *
903 * Also note that "DWord Length" has 9 bits for this command, and the type
904 * of cmd_len is thus uint16_t.
905 */
906 cmd_len = 2 * 128 + 3;
907
908 ilo_builder_batch_pointer(builder, cmd_len, &dw);
909
910 dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SO_DECL_LIST) | (cmd_len - 2);
911 dw[1] = 0 << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
912 0 << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
913 0 << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
914 buffer_selects << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
915 dw[2] = 0 << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
916 0 << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
917 0 << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
918 num_entries << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
919 dw += 3;
920
921 for (i = 0; i < num_entries; i++) {
922 dw[0] = so_decls[i];
923 dw[1] = 0;
924 dw += 2;
925 }
926 for (; i < 128; i++) {
927 dw[0] = 0;
928 dw[1] = 0;
929 dw += 2;
930 }
931 }
932
933 static inline void
934 gen7_3DSTATE_SO_BUFFER(struct ilo_builder *builder,
935 int index, int base, int stride,
936 const struct pipe_stream_output_target *so_target)
937 {
938 const uint8_t cmd_len = 4;
939 const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_SO_BUFFER) |
940 (cmd_len - 2);
941 struct ilo_buffer *buf;
942 int end;
943 uint32_t *dw;
944 unsigned pos;
945
946 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
947
948 if (!so_target || !so_target->buffer) {
949 ilo_builder_batch_pointer(builder, cmd_len, &dw);
950 dw[0] = dw0;
951 dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT;
952 dw[2] = 0;
953 dw[3] = 0;
954
955 return;
956 }
957
958 buf = ilo_buffer(so_target->buffer);
959
960 /* DWord-aligned */
961 assert(stride % 4 == 0 && base % 4 == 0);
962 assert(so_target->buffer_offset % 4 == 0);
963
964 stride &= ~3;
965 base = (base + so_target->buffer_offset) & ~3;
966 end = (base + so_target->buffer_size) & ~3;
967
968 pos = ilo_builder_batch_pointer(builder, cmd_len, &dw);
969
970 dw[0] = dw0;
971 dw[1] = index << GEN7_SO_BUF_DW1_INDEX__SHIFT |
972 stride;
973
974 ilo_builder_batch_reloc(builder, pos + 2,
975 buf->bo, base, INTEL_RELOC_WRITE);
976 ilo_builder_batch_reloc(builder, pos + 3,
977 buf->bo, end, INTEL_RELOC_WRITE);
978 }
979
980 static inline void
981 gen6_3DSTATE_BINDING_TABLE_POINTERS(struct ilo_builder *builder,
982 uint32_t vs_binding_table,
983 uint32_t gs_binding_table,
984 uint32_t ps_binding_table)
985 {
986 const uint8_t cmd_len = 4;
987 uint32_t *dw;
988
989 ILO_DEV_ASSERT(builder->dev, 6, 6);
990
991 ilo_builder_batch_pointer(builder, cmd_len, &dw);
992
993 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) |
994 GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED |
995 GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED |
996 GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED |
997 (cmd_len - 2);
998 dw[1] = vs_binding_table;
999 dw[2] = gs_binding_table;
1000 dw[3] = ps_binding_table;
1001 }
1002
1003 static inline void
1004 gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct ilo_builder *builder,
1005 uint32_t vs_sampler_state,
1006 uint32_t gs_sampler_state,
1007 uint32_t ps_sampler_state)
1008 {
1009 const uint8_t cmd_len = 4;
1010 uint32_t *dw;
1011
1012 ILO_DEV_ASSERT(builder->dev, 6, 6);
1013
1014 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1015
1016 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) |
1017 GEN6_PTR_SAMPLER_DW0_VS_CHANGED |
1018 GEN6_PTR_SAMPLER_DW0_GS_CHANGED |
1019 GEN6_PTR_SAMPLER_DW0_PS_CHANGED |
1020 (cmd_len - 2);
1021 dw[1] = vs_sampler_state;
1022 dw[2] = gs_sampler_state;
1023 dw[3] = ps_sampler_state;
1024 }
1025
1026 static inline void
1027 gen7_3dstate_pointer(struct ilo_builder *builder,
1028 int subop, uint32_t pointer)
1029 {
1030 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
1031 GEN6_RENDER_SUBTYPE_3D |
1032 subop;
1033 const uint8_t cmd_len = 2;
1034 uint32_t *dw;
1035
1036 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
1037
1038 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1039
1040 dw[0] = cmd | (cmd_len - 2);
1041 dw[1] = pointer;
1042 }
1043
1044 static inline void
1045 gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(struct ilo_builder *builder,
1046 uint32_t binding_table)
1047 {
1048 gen7_3dstate_pointer(builder,
1049 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS,
1050 binding_table);
1051 }
1052
1053 static inline void
1054 gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(struct ilo_builder *builder,
1055 uint32_t binding_table)
1056 {
1057 gen7_3dstate_pointer(builder,
1058 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS,
1059 binding_table);
1060 }
1061
1062 static inline void
1063 gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(struct ilo_builder *builder,
1064 uint32_t binding_table)
1065 {
1066 gen7_3dstate_pointer(builder,
1067 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS,
1068 binding_table);
1069 }
1070
1071 static inline void
1072 gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(struct ilo_builder *builder,
1073 uint32_t binding_table)
1074 {
1075 gen7_3dstate_pointer(builder,
1076 GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS,
1077 binding_table);
1078 }
1079
1080 static inline void
1081 gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(struct ilo_builder *builder,
1082 uint32_t sampler_state)
1083 {
1084 gen7_3dstate_pointer(builder,
1085 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS,
1086 sampler_state);
1087 }
1088
1089 static inline void
1090 gen7_3DSTATE_SAMPLER_STATE_POINTERS_HS(struct ilo_builder *builder,
1091 uint32_t sampler_state)
1092 {
1093 gen7_3dstate_pointer(builder,
1094 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS,
1095 sampler_state);
1096 }
1097
1098 static inline void
1099 gen7_3DSTATE_SAMPLER_STATE_POINTERS_DS(struct ilo_builder *builder,
1100 uint32_t sampler_state)
1101 {
1102 gen7_3dstate_pointer(builder,
1103 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS,
1104 sampler_state);
1105 }
1106
1107 static inline void
1108 gen7_3DSTATE_SAMPLER_STATE_POINTERS_GS(struct ilo_builder *builder,
1109 uint32_t sampler_state)
1110 {
1111 gen7_3dstate_pointer(builder,
1112 GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS,
1113 sampler_state);
1114 }
1115
1116 static inline unsigned
1117 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1118 const uint32_t *bufs, const int *sizes,
1119 int num_bufs, int max_read_length,
1120 uint32_t *dw, int num_dwords)
1121 {
1122 unsigned enabled = 0x0;
1123 int total_read_length, i;
1124
1125 assert(num_dwords == 4);
1126
1127 total_read_length = 0;
1128 for (i = 0; i < 4; i++) {
1129 if (i < num_bufs && sizes[i]) {
1130 /* in 256-bit units minus one */
1131 const int read_len = (sizes[i] + 31) / 32 - 1;
1132
1133 assert(bufs[i] % 32 == 0);
1134 assert(read_len < 32);
1135
1136 enabled |= 1 << i;
1137 dw[i] = bufs[i] | read_len;
1138
1139 total_read_length += read_len + 1;
1140 }
1141 else {
1142 dw[i] = 0;
1143 }
1144 }
1145
1146 assert(total_read_length <= max_read_length);
1147
1148 return enabled;
1149 }
1150
1151 static inline void
1152 gen6_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
1153 const uint32_t *bufs, const int *sizes,
1154 int num_bufs)
1155 {
1156 const uint8_t cmd_len = 5;
1157 uint32_t buf_dw[4], buf_enabled, *dw;
1158
1159 ILO_DEV_ASSERT(builder->dev, 6, 6);
1160
1161 assert(num_bufs <= 4);
1162
1163 /*
1164 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1165 *
1166 * "The sum of all four read length fields (each incremented to
1167 * represent the actual read length) must be less than or equal to 32"
1168 */
1169 buf_enabled = gen6_fill_3dstate_constant(builder->dev,
1170 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1171
1172 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1173
1174 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_VS) |
1175 buf_enabled << 12 |
1176 (cmd_len - 2);
1177 memcpy(&dw[1], buf_dw, sizeof(buf_dw));
1178 }
1179
1180 static inline void
1181 gen6_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
1182 const uint32_t *bufs, const int *sizes,
1183 int num_bufs)
1184 {
1185 const uint8_t cmd_len = 5;
1186 uint32_t buf_dw[4], buf_enabled, *dw;
1187
1188 ILO_DEV_ASSERT(builder->dev, 6, 6);
1189
1190 assert(num_bufs <= 4);
1191
1192 /*
1193 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1194 *
1195 * "The sum of all four read length fields (each incremented to
1196 * represent the actual read length) must be less than or equal to 64"
1197 */
1198 buf_enabled = gen6_fill_3dstate_constant(builder->dev,
1199 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1200
1201 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1202
1203 dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_CONSTANT_GS) |
1204 buf_enabled << 12 |
1205 (cmd_len - 2);
1206 memcpy(&dw[1], buf_dw, sizeof(buf_dw));
1207 }
1208
1209 static inline void
1210 gen7_3dstate_constant(struct ilo_builder *builder,
1211 int subop,
1212 const uint32_t *bufs, const int *sizes,
1213 int num_bufs)
1214 {
1215 const uint32_t cmd = GEN6_RENDER_TYPE_RENDER |
1216 GEN6_RENDER_SUBTYPE_3D |
1217 subop;
1218 const uint8_t cmd_len = 7;
1219 uint32_t payload[6], *dw;
1220 int total_read_length, i;
1221
1222 ILO_DEV_ASSERT(builder->dev, 7, 7.5);
1223
1224 /* VS, HS, DS, GS, and PS variants */
1225 assert(subop >= GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS &&
1226 subop <= GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS &&
1227 subop != GEN6_RENDER_OPCODE_3DSTATE_SAMPLE_MASK);
1228
1229 assert(num_bufs <= 4);
1230
1231 payload[0] = 0;
1232 payload[1] = 0;
1233
1234 total_read_length = 0;
1235 for (i = 0; i < 4; i++) {
1236 int read_len;
1237
1238 /*
1239 * From the Ivy Bridge PRM, volume 2 part 1, page 112:
1240 *
1241 * "Constant buffers must be enabled in order from Constant Buffer 0
1242 * to Constant Buffer 3 within this command. For example, it is
1243 * not allowed to enable Constant Buffer 1 by programming a
1244 * non-zero value in the VS Constant Buffer 1 Read Length without a
1245 * non-zero value in VS Constant Buffer 0 Read Length."
1246 */
1247 if (i >= num_bufs || !sizes[i]) {
1248 for (; i < 4; i++) {
1249 assert(i >= num_bufs || !sizes[i]);
1250 payload[2 + i] = 0;
1251 }
1252 break;
1253 }
1254
1255 /* read lengths are in 256-bit units */
1256 read_len = (sizes[i] + 31) / 32;
1257 /* the lower 5 bits are used for memory object control state */
1258 assert(bufs[i] % 32 == 0);
1259
1260 payload[i / 2] |= read_len << ((i % 2) ? 16 : 0);
1261 payload[2 + i] = bufs[i];
1262
1263 total_read_length += read_len;
1264 }
1265
1266 /*
1267 * From the Ivy Bridge PRM, volume 2 part 1, page 113:
1268 *
1269 * "The sum of all four read length fields must be less than or equal
1270 * to the size of 64"
1271 */
1272 assert(total_read_length <= 64);
1273
1274 ilo_builder_batch_pointer(builder, cmd_len, &dw);
1275
1276 dw[0] = cmd | (cmd_len - 2);
1277 memcpy(&dw[1], payload, sizeof(payload));
1278 }
1279
1280 static inline void
1281 gen7_3DSTATE_CONSTANT_VS(struct ilo_builder *builder,
1282 const uint32_t *bufs, const int *sizes,
1283 int num_bufs)
1284 {
1285 gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS,
1286 bufs, sizes, num_bufs);
1287 }
1288
1289 static inline void
1290 gen7_3DSTATE_CONSTANT_HS(struct ilo_builder *builder,
1291 const uint32_t *bufs, const int *sizes,
1292 int num_bufs)
1293 {
1294 gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_HS,
1295 bufs, sizes, num_bufs);
1296 }
1297
1298 static inline void
1299 gen7_3DSTATE_CONSTANT_DS(struct ilo_builder *builder,
1300 const uint32_t *bufs, const int *sizes,
1301 int num_bufs)
1302 {
1303 gen7_3dstate_constant(builder, GEN7_RENDER_OPCODE_3DSTATE_CONSTANT_DS,
1304 bufs, sizes, num_bufs);
1305 }
1306
1307 static inline void
1308 gen7_3DSTATE_CONSTANT_GS(struct ilo_builder *builder,
1309 const uint32_t *bufs, const int *sizes,
1310 int num_bufs)
1311 {
1312 gen7_3dstate_constant(builder, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_GS,
1313 bufs, sizes, num_bufs);
1314 }
1315
1316 static inline uint32_t
1317 gen6_BINDING_TABLE_STATE(struct ilo_builder *builder,
1318 uint32_t *surface_states,
1319 int num_surface_states)
1320 {
1321 const int state_align = 32;
1322 const int state_len = num_surface_states;
1323
1324 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1325
1326 /*
1327 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
1328 *
1329 * "It is stored as an array of up to 256 elements..."
1330 */
1331 assert(num_surface_states <= 256);
1332
1333 if (!num_surface_states)
1334 return 0;
1335
1336 return ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_BINDING_TABLE,
1337 state_align, state_len, surface_states);
1338 }
1339
1340 static inline uint32_t
1341 gen6_SURFACE_STATE(struct ilo_builder *builder,
1342 const struct ilo_view_surface *surf,
1343 bool for_render)
1344 {
1345 const int state_align = 32;
1346 const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 8 : 6;
1347 uint32_t state_offset;
1348
1349 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1350
1351 state_offset = ilo_builder_surface_write(builder, ILO_BUILDER_ITEM_SURFACE,
1352 state_align, state_len, surf->payload);
1353
1354 if (surf->bo) {
1355 ilo_builder_surface_reloc(builder, state_offset, 1, surf->bo,
1356 surf->payload[1], (for_render) ? INTEL_RELOC_WRITE : 0);
1357 }
1358
1359 return state_offset;
1360 }
1361
1362 static inline uint32_t
1363 gen6_so_SURFACE_STATE(struct ilo_builder *builder,
1364 const struct pipe_stream_output_target *so,
1365 const struct pipe_stream_output_info *so_info,
1366 int so_index)
1367 {
1368 struct ilo_buffer *buf = ilo_buffer(so->buffer);
1369 unsigned bo_offset, struct_size;
1370 enum pipe_format elem_format;
1371 struct ilo_view_surface surf;
1372
1373 ILO_DEV_ASSERT(builder->dev, 6, 6);
1374
1375 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
1376 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
1377
1378 switch (so_info->output[so_index].num_components) {
1379 case 1:
1380 elem_format = PIPE_FORMAT_R32_FLOAT;
1381 break;
1382 case 2:
1383 elem_format = PIPE_FORMAT_R32G32_FLOAT;
1384 break;
1385 case 3:
1386 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
1387 break;
1388 case 4:
1389 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
1390 break;
1391 default:
1392 assert(!"unexpected SO components length");
1393 elem_format = PIPE_FORMAT_R32_FLOAT;
1394 break;
1395 }
1396
1397 ilo_gpe_init_view_surface_for_buffer(builder->dev, buf, bo_offset,
1398 so->buffer_size, struct_size, elem_format, false, true, &surf);
1399
1400 return gen6_SURFACE_STATE(builder, &surf, false);
1401 }
1402
1403 static inline uint32_t
1404 gen6_SAMPLER_STATE(struct ilo_builder *builder,
1405 const struct ilo_sampler_cso * const *samplers,
1406 const struct pipe_sampler_view * const *views,
1407 const uint32_t *sampler_border_colors,
1408 int num_samplers)
1409 {
1410 const int state_align = 32;
1411 const int state_len = 4 * num_samplers;
1412 uint32_t state_offset, *dw;
1413 int i;
1414
1415 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1416
1417 /*
1418 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
1419 *
1420 * "The sampler state is stored as an array of up to 16 elements..."
1421 */
1422 assert(num_samplers <= 16);
1423
1424 if (!num_samplers)
1425 return 0;
1426
1427 /*
1428 * From the Sandy Bridge PRM, volume 2 part 1, page 132:
1429 *
1430 * "(Sampler Count of 3DSTATE_VS) Specifies how many samplers (in
1431 * multiples of 4) the vertex shader 0 kernel uses. Used only for
1432 * prefetching the associated sampler state entries.
1433 *
1434 * It also applies to other shader stages.
1435 */
1436 ilo_builder_dynamic_pad_top(builder, 4 * (4 - (num_samplers % 4)));
1437
1438 state_offset = ilo_builder_dynamic_pointer(builder,
1439 ILO_BUILDER_ITEM_SAMPLER, state_align, state_len, &dw);
1440
1441 for (i = 0; i < num_samplers; i++) {
1442 const struct ilo_sampler_cso *sampler = samplers[i];
1443 const struct pipe_sampler_view *view = views[i];
1444 const uint32_t border_color = sampler_border_colors[i];
1445 uint32_t dw_filter, dw_wrap;
1446
1447 /* there may be holes */
1448 if (!sampler || !view) {
1449 /* disabled sampler */
1450 dw[0] = 1 << 31;
1451 dw[1] = 0;
1452 dw[2] = 0;
1453 dw[3] = 0;
1454 dw += 4;
1455
1456 continue;
1457 }
1458
1459 /* determine filter and wrap modes */
1460 switch (view->texture->target) {
1461 case PIPE_TEXTURE_1D:
1462 dw_filter = (sampler->anisotropic) ?
1463 sampler->dw_filter_aniso : sampler->dw_filter;
1464 dw_wrap = sampler->dw_wrap_1d;
1465 break;
1466 case PIPE_TEXTURE_3D:
1467 /*
1468 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
1469 *
1470 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
1471 * surfaces of type SURFTYPE_3D."
1472 */
1473 dw_filter = sampler->dw_filter;
1474 dw_wrap = sampler->dw_wrap;
1475 break;
1476 case PIPE_TEXTURE_CUBE:
1477 dw_filter = (sampler->anisotropic) ?
1478 sampler->dw_filter_aniso : sampler->dw_filter;
1479 dw_wrap = sampler->dw_wrap_cube;
1480 break;
1481 default:
1482 dw_filter = (sampler->anisotropic) ?
1483 sampler->dw_filter_aniso : sampler->dw_filter;
1484 dw_wrap = sampler->dw_wrap;
1485 break;
1486 }
1487
1488 dw[0] = sampler->payload[0];
1489 dw[1] = sampler->payload[1];
1490 assert(!(border_color & 0x1f));
1491 dw[2] = border_color;
1492 dw[3] = sampler->payload[2];
1493
1494 dw[0] |= dw_filter;
1495
1496 if (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) {
1497 dw[3] |= dw_wrap;
1498 }
1499 else {
1500 /*
1501 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
1502 *
1503 * "[DevSNB] Errata: Incorrect behavior is observed in cases
1504 * where the min and mag mode filters are different and
1505 * SurfMinLOD is nonzero. The determination of MagMode uses the
1506 * following equation instead of the one in the above
1507 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
1508 *
1509 * As a way to work around that, we set Base to
1510 * view->u.tex.first_level.
1511 */
1512 dw[0] |= view->u.tex.first_level << 22;
1513
1514 dw[1] |= dw_wrap;
1515 }
1516
1517 dw += 4;
1518 }
1519
1520 return state_offset;
1521 }
1522
1523 static inline uint32_t
1524 gen6_SAMPLER_BORDER_COLOR_STATE(struct ilo_builder *builder,
1525 const struct ilo_sampler_cso *sampler)
1526 {
1527 const int state_align = 32;
1528 const int state_len = (ilo_dev_gen(builder->dev) >= ILO_GEN(7)) ? 4 : 12;
1529
1530 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1531
1532 assert(Elements(sampler->payload) >= 3 + state_len);
1533
1534 /* see ilo_gpe_init_sampler_cso() */
1535 return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
1536 state_align, state_len, &sampler->payload[3]);
1537 }
1538
1539 static inline uint32_t
1540 gen6_push_constant_buffer(struct ilo_builder *builder,
1541 int size, void **pcb)
1542 {
1543 /*
1544 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
1545 * to 32 bytes, and their sizes are specified in 256-bit units.
1546 */
1547 const int state_align = 32;
1548 const int state_len = align(size, 32) / 4;
1549 uint32_t state_offset;
1550 char *buf;
1551
1552 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1553
1554 state_offset = ilo_builder_dynamic_pointer(builder,
1555 ILO_BUILDER_ITEM_BLOB, state_align, state_len, (uint32_t **) &buf);
1556
1557 /* zero out the unused range */
1558 if (size < state_len * 4)
1559 memset(&buf[size], 0, state_len * 4 - size);
1560
1561 if (pcb)
1562 *pcb = buf;
1563
1564 return state_offset;
1565 }
1566
1567 static inline uint32_t
1568 gen6_user_vertex_buffer(struct ilo_builder *builder,
1569 int size, const void *vertices)
1570 {
1571 const int state_align = 8;
1572 const int state_len = size / 4;
1573
1574 ILO_DEV_ASSERT(builder->dev, 6, 7.5);
1575
1576 assert(size % 4 == 0);
1577
1578 return ilo_builder_dynamic_write(builder, ILO_BUILDER_ITEM_BLOB,
1579 state_align, state_len, vertices);
1580 }
1581
1582 #endif /* ILO_BUILDER_3D_TOP_H */