d8d71d7308888a9313ba3430eee60a11331de285
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
34
35 #include "ilo_common.h"
36 #include "ilo_cp.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
40 #include "ilo_gpe.h"
41
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
44
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
47
48 /**
49 * Commands that GEN6 GPE could emit.
50 */
51 enum ilo_gpe_gen6_command {
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
93
94 ILO_GPE_GEN6_COMMAND_COUNT,
95 };
96
97 /**
98 * Indirect states that GEN6 GPE could emit.
99 */
100 enum ilo_gpe_gen6_state {
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
102 ILO_GPE_GEN6_SF_VIEWPORT,
103 ILO_GPE_GEN6_CLIP_VIEWPORT,
104 ILO_GPE_GEN6_CC_VIEWPORT,
105 ILO_GPE_GEN6_COLOR_CALC_STATE,
106 ILO_GPE_GEN6_BLEND_STATE,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
108 ILO_GPE_GEN6_SCISSOR_RECT,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE,
110 ILO_GPE_GEN6_SURFACE_STATE,
111 ILO_GPE_GEN6_SAMPLER_STATE,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
114
115 ILO_GPE_GEN6_STATE_COUNT,
116 };
117
118 int
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
120 enum ilo_gpe_gen6_command cmd,
121 int arg);
122
123 int
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
125 enum ilo_gpe_gen6_state state,
126 int arg);
127
128 /**
129 * Translate winsys tiling to hardware tiling.
130 */
131 static inline int
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
133 {
134 switch (tiling) {
135 case INTEL_TILING_NONE:
136 return 0;
137 case INTEL_TILING_X:
138 return BRW_SURFACE_TILED;
139 case INTEL_TILING_Y:
140 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
141 default:
142 assert(!"unknown tiling");
143 return 0;
144 }
145 }
146
147 /**
148 * Translate a pipe primitive type to the matching hardware primitive type.
149 */
150 static inline int
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
152 {
153 static const int prim_mapping[PIPE_PRIM_MAX] = {
154 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
155 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
156 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
157 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
158 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
159 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
160 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
161 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
162 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
163 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
164 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
168 };
169
170 assert(prim_mapping[prim]);
171
172 return prim_mapping[prim];
173 }
174
175 /**
176 * Translate a pipe texture target to the matching hardware surface type.
177 */
178 static inline int
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
180 {
181 switch (target) {
182 case PIPE_BUFFER:
183 return BRW_SURFACE_BUFFER;
184 case PIPE_TEXTURE_1D:
185 case PIPE_TEXTURE_1D_ARRAY:
186 return BRW_SURFACE_1D;
187 case PIPE_TEXTURE_2D:
188 case PIPE_TEXTURE_RECT:
189 case PIPE_TEXTURE_2D_ARRAY:
190 return BRW_SURFACE_2D;
191 case PIPE_TEXTURE_3D:
192 return BRW_SURFACE_3D;
193 case PIPE_TEXTURE_CUBE:
194 case PIPE_TEXTURE_CUBE_ARRAY:
195 return BRW_SURFACE_CUBE;
196 default:
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER;
199 }
200 }
201
202 /**
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
204 */
205 static inline void
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
207 const struct ilo_rasterizer_state *rasterizer,
208 int num_samples,
209 enum pipe_format depth_format,
210 uint32_t *payload, unsigned payload_len)
211 {
212 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
213
214 assert(payload_len == Elements(sf->payload));
215
216 if (sf) {
217 memcpy(payload, sf->payload, sizeof(sf->payload));
218
219 if (num_samples > 1)
220 payload[1] |= sf->dw_msaa;
221
222 if (dev->gen >= ILO_GEN(7)) {
223 int format;
224
225 /* separate stencil */
226 switch (depth_format) {
227 case PIPE_FORMAT_Z16_UNORM:
228 format = BRW_DEPTHFORMAT_D16_UNORM;
229 break;
230 case PIPE_FORMAT_Z32_FLOAT:
231 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
232 format = BRW_DEPTHFORMAT_D32_FLOAT;
233 break;
234 case PIPE_FORMAT_Z24X8_UNORM:
235 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
236 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
237 break;
238 default:
239 /* FLOAT surface is assumed when there is no depth buffer */
240 format = BRW_DEPTHFORMAT_D32_FLOAT;
241 break;
242 }
243
244 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
245 }
246 }
247 else {
248 payload[0] = 0;
249 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
250 payload[2] = 0;
251 payload[3] = 0;
252 payload[4] = 0;
253 payload[5] = 0;
254 }
255 }
256
257 /**
258 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
259 */
260 static inline void
261 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
262 const struct ilo_rasterizer_state *rasterizer,
263 const struct ilo_shader_state *fs,
264 uint32_t *dw, int num_dwords)
265 {
266 int output_count, vue_offset, vue_len;
267 const struct ilo_kernel_routing *routing;
268
269 ILO_GPE_VALID_GEN(dev, 6, 7);
270 assert(num_dwords == 13);
271
272 if (!fs) {
273 memset(dw, 0, sizeof(dw[0]) * num_dwords);
274
275 if (dev->gen >= ILO_GEN(7))
276 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
277 else
278 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
279
280 return;
281 }
282
283 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
284 assert(output_count <= 32);
285
286 routing = ilo_shader_get_kernel_routing(fs);
287
288 vue_offset = routing->source_skip;
289 assert(vue_offset % 2 == 0);
290 vue_offset /= 2;
291
292 vue_len = (routing->source_len + 1) / 2;
293 if (!vue_len)
294 vue_len = 1;
295
296 if (dev->gen >= ILO_GEN(7)) {
297 dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
298 vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
299 vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
300 if (routing->swizzle_enable)
301 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
302 }
303 else {
304 dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
305 vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
306 vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
307 if (routing->swizzle_enable)
308 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
309 }
310
311 switch (rasterizer->state.sprite_coord_mode) {
312 case PIPE_SPRITE_COORD_UPPER_LEFT:
313 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
314 break;
315 case PIPE_SPRITE_COORD_LOWER_LEFT:
316 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
317 break;
318 }
319
320 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
321 memcpy(&dw[1], routing->swizzles, 2 * 16);
322
323 /*
324 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
325 *
326 * "This field (Point Sprite Texture Coordinate Enable) must be
327 * programmed to 0 when non-point primitives are rendered."
328 *
329 * TODO We do not check that yet.
330 */
331 dw[9] = routing->point_sprite_enable;
332
333 dw[10] = routing->const_interp_enable;
334
335 /* WrapShortest enables */
336 dw[11] = 0;
337 dw[12] = 0;
338 }
339
340 static inline void
341 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
342 struct intel_bo *general_state_bo,
343 struct intel_bo *surface_state_bo,
344 struct intel_bo *dynamic_state_bo,
345 struct intel_bo *indirect_object_bo,
346 struct intel_bo *instruction_bo,
347 uint32_t general_state_size,
348 uint32_t dynamic_state_size,
349 uint32_t indirect_object_size,
350 uint32_t instruction_size,
351 struct ilo_cp *cp)
352 {
353 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
354 const uint8_t cmd_len = 10;
355
356 ILO_GPE_VALID_GEN(dev, 6, 7);
357
358 /* 4K-page aligned */
359 assert(((general_state_size | dynamic_state_size |
360 indirect_object_size | instruction_size) & 0xfff) == 0);
361
362 ilo_cp_begin(cp, cmd_len);
363 ilo_cp_write(cp, cmd | (cmd_len - 2));
364
365 ilo_cp_write_bo(cp, 1, general_state_bo,
366 INTEL_DOMAIN_RENDER,
367 0);
368 ilo_cp_write_bo(cp, 1, surface_state_bo,
369 INTEL_DOMAIN_SAMPLER,
370 0);
371 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
372 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
373 0);
374 ilo_cp_write_bo(cp, 1, indirect_object_bo,
375 0,
376 0);
377 ilo_cp_write_bo(cp, 1, instruction_bo,
378 INTEL_DOMAIN_INSTRUCTION,
379 0);
380
381 if (general_state_size) {
382 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
383 INTEL_DOMAIN_RENDER,
384 0);
385 }
386 else {
387 /* skip range check */
388 ilo_cp_write(cp, 1);
389 }
390
391 if (dynamic_state_size) {
392 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
393 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
394 0);
395 }
396 else {
397 /* skip range check */
398 ilo_cp_write(cp, 0xfffff000 + 1);
399 }
400
401 if (indirect_object_size) {
402 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
403 0,
404 0);
405 }
406 else {
407 /* skip range check */
408 ilo_cp_write(cp, 0xfffff000 + 1);
409 }
410
411 if (instruction_size) {
412 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
413 INTEL_DOMAIN_INSTRUCTION,
414 0);
415 }
416 else {
417 /* skip range check */
418 ilo_cp_write(cp, 1);
419 }
420
421 ilo_cp_end(cp);
422 }
423
424 static inline void
425 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
426 uint32_t sip,
427 struct ilo_cp *cp)
428 {
429 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
430 const uint8_t cmd_len = 2;
431
432 ILO_GPE_VALID_GEN(dev, 6, 7);
433
434 ilo_cp_begin(cp, cmd_len);
435 ilo_cp_write(cp, cmd | (cmd_len - 2));
436 ilo_cp_write(cp, sip);
437 ilo_cp_end(cp);
438 }
439
440 static inline void
441 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
442 bool enable,
443 struct ilo_cp *cp)
444 {
445 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
446 const uint8_t cmd_len = 1;
447
448 ILO_GPE_VALID_GEN(dev, 6, 7);
449
450 ilo_cp_begin(cp, cmd_len);
451 ilo_cp_write(cp, cmd | enable);
452 ilo_cp_end(cp);
453 }
454
455 static inline void
456 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
457 int pipeline,
458 struct ilo_cp *cp)
459 {
460 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
461 const uint8_t cmd_len = 1;
462
463 ILO_GPE_VALID_GEN(dev, 6, 7);
464
465 /* 3D or media */
466 assert(pipeline == 0x0 || pipeline == 0x1);
467
468 ilo_cp_begin(cp, cmd_len);
469 ilo_cp_write(cp, cmd | pipeline);
470 ilo_cp_end(cp);
471 }
472
473 static inline void
474 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
475 int max_threads, int num_urb_entries,
476 int urb_entry_size,
477 struct ilo_cp *cp)
478 {
479 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
480 const uint8_t cmd_len = 8;
481 uint32_t dw2, dw4;
482
483 ILO_GPE_VALID_GEN(dev, 6, 6);
484
485 dw2 = (max_threads - 1) << 16 |
486 num_urb_entries << 8 |
487 1 << 7 | /* Reset Gateway Timer */
488 1 << 6; /* Bypass Gateway Control */
489
490 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
491 480; /* CURBE Allocation Size */
492
493 ilo_cp_begin(cp, cmd_len);
494 ilo_cp_write(cp, cmd | (cmd_len - 2));
495 ilo_cp_write(cp, 0); /* scratch */
496 ilo_cp_write(cp, dw2);
497 ilo_cp_write(cp, 0); /* MBZ */
498 ilo_cp_write(cp, dw4);
499 ilo_cp_write(cp, 0); /* scoreboard */
500 ilo_cp_write(cp, 0);
501 ilo_cp_write(cp, 0);
502 ilo_cp_end(cp);
503 }
504
505 static inline void
506 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
507 uint32_t buf, int size,
508 struct ilo_cp *cp)
509 {
510 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
511 const uint8_t cmd_len = 4;
512
513 ILO_GPE_VALID_GEN(dev, 6, 6);
514
515 assert(buf % 32 == 0);
516 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
517 size = align(size, 32);
518
519 ilo_cp_begin(cp, cmd_len);
520 ilo_cp_write(cp, cmd | (cmd_len - 2));
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, size);
523 ilo_cp_write(cp, buf);
524 ilo_cp_end(cp);
525 }
526
527 static inline void
528 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
529 uint32_t offset, int num_ids,
530 struct ilo_cp *cp)
531 {
532 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
533 const uint8_t cmd_len = 4;
534
535 ILO_GPE_VALID_GEN(dev, 6, 6);
536
537 assert(offset % 32 == 0);
538
539 ilo_cp_begin(cp, cmd_len);
540 ilo_cp_write(cp, cmd | (cmd_len - 2));
541 ilo_cp_write(cp, 0); /* MBZ */
542 /* every ID has 8 DWords */
543 ilo_cp_write(cp, num_ids * 8 * 4);
544 ilo_cp_write(cp, offset);
545 ilo_cp_end(cp);
546 }
547
548 static inline void
549 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
550 int id, int byte, int thread_count,
551 struct ilo_cp *cp)
552 {
553 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
554 const uint8_t cmd_len = 2;
555 uint32_t dw1;
556
557 ILO_GPE_VALID_GEN(dev, 6, 6);
558
559 dw1 = id << 16 |
560 byte << 8 |
561 thread_count;
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, dw1);
566 ilo_cp_end(cp);
567 }
568
569 static inline void
570 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
571 int thread_count_water_mark,
572 int barrier_mask,
573 struct ilo_cp *cp)
574 {
575 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
576 const uint8_t cmd_len = 2;
577 uint32_t dw1;
578
579 ILO_GPE_VALID_GEN(dev, 6, 6);
580
581 dw1 = thread_count_water_mark << 16 |
582 barrier_mask;
583
584 ilo_cp_begin(cp, cmd_len);
585 ilo_cp_write(cp, cmd | (cmd_len - 2));
586 ilo_cp_write(cp, dw1);
587 ilo_cp_end(cp);
588 }
589
590 static inline void
591 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
592 struct ilo_cp *cp)
593 {
594 assert(!"MEDIA_OBJECT_WALKER unsupported");
595 }
596
597 static inline void
598 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
599 uint32_t vs_binding_table,
600 uint32_t gs_binding_table,
601 uint32_t ps_binding_table,
602 struct ilo_cp *cp)
603 {
604 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
605 const uint8_t cmd_len = 4;
606
607 ILO_GPE_VALID_GEN(dev, 6, 6);
608
609 ilo_cp_begin(cp, cmd_len);
610 ilo_cp_write(cp, cmd | (cmd_len - 2) |
611 GEN6_BINDING_TABLE_MODIFY_VS |
612 GEN6_BINDING_TABLE_MODIFY_GS |
613 GEN6_BINDING_TABLE_MODIFY_PS);
614 ilo_cp_write(cp, vs_binding_table);
615 ilo_cp_write(cp, gs_binding_table);
616 ilo_cp_write(cp, ps_binding_table);
617 ilo_cp_end(cp);
618 }
619
620 static inline void
621 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
622 uint32_t vs_sampler_state,
623 uint32_t gs_sampler_state,
624 uint32_t ps_sampler_state,
625 struct ilo_cp *cp)
626 {
627 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
628 const uint8_t cmd_len = 4;
629
630 ILO_GPE_VALID_GEN(dev, 6, 6);
631
632 ilo_cp_begin(cp, cmd_len);
633 ilo_cp_write(cp, cmd | (cmd_len - 2) |
634 VS_SAMPLER_STATE_CHANGE |
635 GS_SAMPLER_STATE_CHANGE |
636 PS_SAMPLER_STATE_CHANGE);
637 ilo_cp_write(cp, vs_sampler_state);
638 ilo_cp_write(cp, gs_sampler_state);
639 ilo_cp_write(cp, ps_sampler_state);
640 ilo_cp_end(cp);
641 }
642
643 static inline void
644 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
645 int vs_total_size, int gs_total_size,
646 int vs_entry_size, int gs_entry_size,
647 struct ilo_cp *cp)
648 {
649 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
650 const uint8_t cmd_len = 3;
651 const int row_size = 128; /* 1024 bits */
652 int vs_alloc_size, gs_alloc_size;
653 int vs_num_entries, gs_num_entries;
654
655 ILO_GPE_VALID_GEN(dev, 6, 6);
656
657 /* in 1024-bit URB rows */
658 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
659 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
660
661 /* the valid range is [1, 5] */
662 if (!vs_alloc_size)
663 vs_alloc_size = 1;
664 if (!gs_alloc_size)
665 gs_alloc_size = 1;
666 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
667
668 /* the valid range is [24, 256] in multiples of 4 */
669 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
670 if (vs_num_entries > 256)
671 vs_num_entries = 256;
672 assert(vs_num_entries >= 24);
673
674 /* the valid range is [0, 256] in multiples of 4 */
675 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
676 if (gs_num_entries > 256)
677 gs_num_entries = 256;
678
679 ilo_cp_begin(cp, cmd_len);
680 ilo_cp_write(cp, cmd | (cmd_len - 2));
681 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
682 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
683 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
684 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
685 ilo_cp_end(cp);
686 }
687
688 static inline void
689 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
690 const struct ilo_ve_state *ve,
691 const struct ilo_vb_state *vb,
692 struct ilo_cp *cp)
693 {
694 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
695 uint8_t cmd_len;
696 unsigned hw_idx;
697
698 ILO_GPE_VALID_GEN(dev, 6, 7);
699
700 /*
701 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
702 *
703 * "From 1 to 33 VBs can be specified..."
704 */
705 assert(ve->vb_count <= 33);
706
707 if (!ve->vb_count)
708 return;
709
710 cmd_len = 1 + 4 * ve->vb_count;
711
712 ilo_cp_begin(cp, cmd_len);
713 ilo_cp_write(cp, cmd | (cmd_len - 2));
714
715 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
716 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
717 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
718 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
719 uint32_t dw;
720
721 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
722
723 if (instance_divisor)
724 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
725 else
726 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
727
728 if (dev->gen >= ILO_GEN(7))
729 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
730
731 /* use null vb if there is no buffer or the stride is out of range */
732 if (cso->buffer && cso->stride <= 2048) {
733 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
734 const uint32_t start_offset = cso->buffer_offset;
735 /*
736 * As noted in ilo_translate_format(), we treat some 3-component
737 * formats as 4-component formats to work around hardware
738 * limitations. Imagine the case where the vertex buffer holds a
739 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
740 * The hardware would not be able to fetch it because the vertex
741 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
742 * and that takes at least 8 bytes.
743 *
744 * For the workaround to work, we query the physical size, which is
745 * page aligned, to calculate end_offset so that the last vertex has
746 * a better chance to be fetched.
747 */
748 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
749
750 dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
751
752 ilo_cp_write(cp, dw);
753 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
754 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
755 ilo_cp_write(cp, instance_divisor);
756 }
757 else {
758 dw |= 1 << 13;
759
760 ilo_cp_write(cp, dw);
761 ilo_cp_write(cp, 0);
762 ilo_cp_write(cp, 0);
763 ilo_cp_write(cp, instance_divisor);
764 }
765 }
766
767 ilo_cp_end(cp);
768 }
769
770 static inline void
771 ve_init_cso_with_components(const struct ilo_dev_info *dev,
772 int comp0, int comp1, int comp2, int comp3,
773 struct ilo_ve_cso *cso)
774 {
775 ILO_GPE_VALID_GEN(dev, 6, 7);
776
777 STATIC_ASSERT(Elements(cso->payload) >= 2);
778 cso->payload[0] = GEN6_VE0_VALID;
779 cso->payload[1] =
780 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
781 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
782 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
783 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
784 }
785
786 static inline void
787 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
788 struct ilo_ve_cso *cso)
789 {
790 int format;
791
792 ILO_GPE_VALID_GEN(dev, 6, 7);
793
794 /*
795 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
796 *
797 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
798 * valid VERTEX_ELEMENT structure.
799 *
800 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
801 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
802 *
803 * - The Source Element Format must be set to the UINT format.
804 *
805 * - [DevSNB]: Edge Flags are not supported for QUADLIST
806 * primitives. Software may elect to convert QUADLIST primitives
807 * to some set of corresponding edge-flag-supported primitive
808 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
809 */
810
811 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
812 cso->payload[1] =
813 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
814 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
815 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
816 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
817
818 /*
819 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
820 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
821 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
822 *
823 * Since all the hardware cares about is whether the flags are zero or not,
824 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
825 */
826 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
827 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
828 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
829 BRW_SURFACEFORMAT_R32_FLOAT - 1);
830
831 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
832 }
833 else {
834 assert(format == BRW_SURFACEFORMAT_R8_UINT);
835 }
836 }
837
838 static inline void
839 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
840 const struct ilo_ve_state *ve,
841 bool last_velement_edgeflag,
842 bool prepend_generated_ids,
843 struct ilo_cp *cp)
844 {
845 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
846 uint8_t cmd_len;
847 unsigned i;
848
849 ILO_GPE_VALID_GEN(dev, 6, 7);
850
851 /*
852 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
853 *
854 * "Up to 34 (DevSNB+) vertex elements are supported."
855 */
856 assert(ve->count + prepend_generated_ids <= 34);
857
858 if (!ve->count && !prepend_generated_ids) {
859 struct ilo_ve_cso dummy;
860
861 ve_init_cso_with_components(dev,
862 BRW_VE1_COMPONENT_STORE_0,
863 BRW_VE1_COMPONENT_STORE_0,
864 BRW_VE1_COMPONENT_STORE_0,
865 BRW_VE1_COMPONENT_STORE_1_FLT,
866 &dummy);
867
868 cmd_len = 3;
869 ilo_cp_begin(cp, cmd_len);
870 ilo_cp_write(cp, cmd | (cmd_len - 2));
871 ilo_cp_write_multi(cp, dummy.payload, 2);
872 ilo_cp_end(cp);
873
874 return;
875 }
876
877 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
878
879 ilo_cp_begin(cp, cmd_len);
880 ilo_cp_write(cp, cmd | (cmd_len - 2));
881
882 if (prepend_generated_ids) {
883 struct ilo_ve_cso gen_ids;
884
885 ve_init_cso_with_components(dev,
886 BRW_VE1_COMPONENT_STORE_VID,
887 BRW_VE1_COMPONENT_STORE_IID,
888 BRW_VE1_COMPONENT_NOSTORE,
889 BRW_VE1_COMPONENT_NOSTORE,
890 &gen_ids);
891
892 ilo_cp_write_multi(cp, gen_ids.payload, 2);
893 }
894
895 if (last_velement_edgeflag) {
896 struct ilo_ve_cso edgeflag;
897
898 for (i = 0; i < ve->count - 1; i++)
899 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
900
901 edgeflag = ve->cso[i];
902 ve_set_cso_edgeflag(dev, &edgeflag);
903 ilo_cp_write_multi(cp, edgeflag.payload, 2);
904 }
905 else {
906 for (i = 0; i < ve->count; i++)
907 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
908 }
909
910 ilo_cp_end(cp);
911 }
912
913 static inline void
914 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
915 const struct ilo_ib_state *ib,
916 bool enable_cut_index,
917 struct ilo_cp *cp)
918 {
919 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
920 const uint8_t cmd_len = 3;
921 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
922 uint32_t start_offset, end_offset;
923 int format;
924
925 ILO_GPE_VALID_GEN(dev, 6, 7);
926
927 if (!buf)
928 return;
929
930 switch (ib->hw_index_size) {
931 case 4:
932 format = BRW_INDEX_DWORD;
933 break;
934 case 2:
935 format = BRW_INDEX_WORD;
936 break;
937 case 1:
938 format = BRW_INDEX_BYTE;
939 break;
940 default:
941 assert(!"unknown index size");
942 format = BRW_INDEX_BYTE;
943 break;
944 }
945
946 /*
947 * set start_offset to 0 here and adjust pipe_draw_info::start with
948 * ib->draw_start_offset in 3DPRIMITIVE
949 */
950 start_offset = 0;
951 end_offset = buf->bo_size;
952
953 /* end_offset must also be aligned and is inclusive */
954 end_offset -= (end_offset % ib->hw_index_size);
955 end_offset--;
956
957 ilo_cp_begin(cp, cmd_len);
958 ilo_cp_write(cp, cmd | (cmd_len - 2) |
959 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
960 format << 8);
961 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
962 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
963 ilo_cp_end(cp);
964 }
965
966 static inline void
967 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
968 uint32_t clip_viewport,
969 uint32_t sf_viewport,
970 uint32_t cc_viewport,
971 struct ilo_cp *cp)
972 {
973 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
974 const uint8_t cmd_len = 4;
975
976 ILO_GPE_VALID_GEN(dev, 6, 6);
977
978 ilo_cp_begin(cp, cmd_len);
979 ilo_cp_write(cp, cmd | (cmd_len - 2) |
980 GEN6_CLIP_VIEWPORT_MODIFY |
981 GEN6_SF_VIEWPORT_MODIFY |
982 GEN6_CC_VIEWPORT_MODIFY);
983 ilo_cp_write(cp, clip_viewport);
984 ilo_cp_write(cp, sf_viewport);
985 ilo_cp_write(cp, cc_viewport);
986 ilo_cp_end(cp);
987 }
988
989 static inline void
990 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
991 uint32_t blend_state,
992 uint32_t depth_stencil_state,
993 uint32_t color_calc_state,
994 struct ilo_cp *cp)
995 {
996 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
997 const uint8_t cmd_len = 4;
998
999 ILO_GPE_VALID_GEN(dev, 6, 6);
1000
1001 ilo_cp_begin(cp, cmd_len);
1002 ilo_cp_write(cp, cmd | (cmd_len - 2));
1003 ilo_cp_write(cp, blend_state | 1);
1004 ilo_cp_write(cp, depth_stencil_state | 1);
1005 ilo_cp_write(cp, color_calc_state | 1);
1006 ilo_cp_end(cp);
1007 }
1008
1009 static inline void
1010 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1011 uint32_t scissor_rect,
1012 struct ilo_cp *cp)
1013 {
1014 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1015 const uint8_t cmd_len = 2;
1016
1017 ILO_GPE_VALID_GEN(dev, 6, 7);
1018
1019 ilo_cp_begin(cp, cmd_len);
1020 ilo_cp_write(cp, cmd | (cmd_len - 2));
1021 ilo_cp_write(cp, scissor_rect);
1022 ilo_cp_end(cp);
1023 }
1024
1025 static inline void
1026 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1027 const struct ilo_shader_state *vs,
1028 int num_samplers,
1029 struct ilo_cp *cp)
1030 {
1031 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1032 const uint8_t cmd_len = 6;
1033 const struct ilo_shader_cso *cso;
1034 uint32_t dw2, dw4, dw5;
1035
1036 ILO_GPE_VALID_GEN(dev, 6, 7);
1037
1038 if (!vs) {
1039 ilo_cp_begin(cp, cmd_len);
1040 ilo_cp_write(cp, cmd | (cmd_len - 2));
1041 ilo_cp_write(cp, 0);
1042 ilo_cp_write(cp, 0);
1043 ilo_cp_write(cp, 0);
1044 ilo_cp_write(cp, 0);
1045 ilo_cp_write(cp, 0);
1046 ilo_cp_end(cp);
1047 return;
1048 }
1049
1050 cso = ilo_shader_get_kernel_cso(vs);
1051 dw2 = cso->payload[0];
1052 dw4 = cso->payload[1];
1053 dw5 = cso->payload[2];
1054
1055 dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1056
1057 ilo_cp_begin(cp, cmd_len);
1058 ilo_cp_write(cp, cmd | (cmd_len - 2));
1059 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1060 ilo_cp_write(cp, dw2);
1061 ilo_cp_write(cp, 0); /* scratch */
1062 ilo_cp_write(cp, dw4);
1063 ilo_cp_write(cp, dw5);
1064 ilo_cp_end(cp);
1065 }
1066
1067 static inline void
1068 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1069 const struct ilo_shader_state *gs,
1070 const struct ilo_shader_state *vs,
1071 int verts_per_prim,
1072 struct ilo_cp *cp)
1073 {
1074 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1075 const uint8_t cmd_len = 7;
1076 uint32_t dw1, dw2, dw4, dw5, dw6;
1077
1078 ILO_GPE_VALID_GEN(dev, 6, 6);
1079
1080 if (gs) {
1081 const struct ilo_shader_cso *cso;
1082
1083 dw1 = ilo_shader_get_kernel_offset(gs);
1084
1085 cso = ilo_shader_get_kernel_cso(gs);
1086 dw2 = cso->payload[0];
1087 dw4 = cso->payload[1];
1088 dw5 = cso->payload[2];
1089 dw6 = cso->payload[3];
1090 }
1091 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1092 struct ilo_shader_cso cso;
1093 enum ilo_kernel_param param;
1094
1095 switch (verts_per_prim) {
1096 case 1:
1097 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1098 break;
1099 case 2:
1100 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1101 break;
1102 default:
1103 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1104 break;
1105 }
1106
1107 dw1 = ilo_shader_get_kernel_offset(vs) +
1108 ilo_shader_get_kernel_param(vs, param);
1109
1110 /* cannot use VS's CSO */
1111 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1112 dw2 = cso.payload[0];
1113 dw4 = cso.payload[1];
1114 dw5 = cso.payload[2];
1115 dw6 = cso.payload[3];
1116 }
1117 else {
1118 dw1 = 0;
1119 dw2 = 0;
1120 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1121 dw5 = GEN6_GS_STATISTICS_ENABLE;
1122 dw6 = 0;
1123 }
1124
1125 ilo_cp_begin(cp, cmd_len);
1126 ilo_cp_write(cp, cmd | (cmd_len - 2));
1127 ilo_cp_write(cp, dw1);
1128 ilo_cp_write(cp, dw2);
1129 ilo_cp_write(cp, 0);
1130 ilo_cp_write(cp, dw4);
1131 ilo_cp_write(cp, dw5);
1132 ilo_cp_write(cp, dw6);
1133 ilo_cp_end(cp);
1134 }
1135
1136 static inline void
1137 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1138 const struct ilo_rasterizer_state *rasterizer,
1139 const struct ilo_shader_state *fs,
1140 bool enable_guardband,
1141 int num_viewports,
1142 struct ilo_cp *cp)
1143 {
1144 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1145 const uint8_t cmd_len = 4;
1146 uint32_t dw1, dw2, dw3;
1147
1148 ILO_GPE_VALID_GEN(dev, 6, 7);
1149
1150 if (rasterizer) {
1151 int interps;
1152
1153 dw1 = rasterizer->clip.payload[0];
1154 dw2 = rasterizer->clip.payload[1];
1155 dw3 = rasterizer->clip.payload[2];
1156
1157 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1158 dw2 |= GEN6_CLIP_GB_TEST;
1159
1160 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1161 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1162
1163 if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1164 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1165 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
1166 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1167
1168 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1169 (num_viewports - 1);
1170 }
1171 else {
1172 dw1 = 0;
1173 dw2 = 0;
1174 dw3 = 0;
1175 }
1176
1177 ilo_cp_begin(cp, cmd_len);
1178 ilo_cp_write(cp, cmd | (cmd_len - 2));
1179 ilo_cp_write(cp, dw1);
1180 ilo_cp_write(cp, dw2);
1181 ilo_cp_write(cp, dw3);
1182 ilo_cp_end(cp);
1183 }
1184
1185 static inline void
1186 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1187 const struct ilo_rasterizer_state *rasterizer,
1188 const struct ilo_shader_state *fs,
1189 struct ilo_cp *cp)
1190 {
1191 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1192 const uint8_t cmd_len = 20;
1193 uint32_t payload_raster[6], payload_sbe[13];
1194
1195 ILO_GPE_VALID_GEN(dev, 6, 6);
1196
1197 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1198 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1199 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1200 fs, payload_sbe, Elements(payload_sbe));
1201
1202 ilo_cp_begin(cp, cmd_len);
1203 ilo_cp_write(cp, cmd | (cmd_len - 2));
1204 ilo_cp_write(cp, payload_sbe[0]);
1205 ilo_cp_write_multi(cp, payload_raster, 6);
1206 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1207 ilo_cp_end(cp);
1208 }
1209
1210 static inline void
1211 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1212 const struct ilo_shader_state *fs,
1213 int num_samplers,
1214 const struct ilo_rasterizer_state *rasterizer,
1215 bool dual_blend, bool cc_may_kill,
1216 struct ilo_cp *cp)
1217 {
1218 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1219 const uint8_t cmd_len = 9;
1220 const int num_samples = 1;
1221 const struct ilo_shader_cso *fs_cso;
1222 uint32_t dw2, dw4, dw5, dw6;
1223
1224 ILO_GPE_VALID_GEN(dev, 6, 6);
1225
1226 if (!fs) {
1227 /* see brwCreateContext() */
1228 const int max_threads = (dev->gt == 2) ? 80 : 40;
1229
1230 ilo_cp_begin(cp, cmd_len);
1231 ilo_cp_write(cp, cmd | (cmd_len - 2));
1232 ilo_cp_write(cp, 0);
1233 ilo_cp_write(cp, 0);
1234 ilo_cp_write(cp, 0);
1235 ilo_cp_write(cp, 0);
1236 /* honor the valid range even if dispatching is disabled */
1237 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1238 ilo_cp_write(cp, 0);
1239 ilo_cp_write(cp, 0);
1240 ilo_cp_write(cp, 0);
1241 ilo_cp_end(cp);
1242
1243 return;
1244 }
1245
1246 fs_cso = ilo_shader_get_kernel_cso(fs);
1247 dw2 = fs_cso->payload[0];
1248 dw4 = fs_cso->payload[1];
1249 dw5 = fs_cso->payload[2];
1250 dw6 = fs_cso->payload[3];
1251
1252 dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1253
1254 if (true) {
1255 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1256 }
1257 else {
1258 /*
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1260 *
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1263 * Resolve Enable or Depth Buffer Resolve Enable."
1264 */
1265 dw4 |= GEN6_WM_DEPTH_CLEAR;
1266 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1267 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1268 }
1269
1270 if (cc_may_kill) {
1271 dw5 |= GEN6_WM_KILL_ENABLE |
1272 GEN6_WM_DISPATCH_ENABLE;
1273 }
1274
1275 if (dual_blend)
1276 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1277
1278 dw5 |= rasterizer->wm.payload[0];
1279
1280 dw6 |= rasterizer->wm.payload[1];
1281
1282 if (num_samples > 1) {
1283 dw6 |= rasterizer->wm.dw_msaa_rast |
1284 rasterizer->wm.dw_msaa_disp;
1285 }
1286
1287 ilo_cp_begin(cp, cmd_len);
1288 ilo_cp_write(cp, cmd | (cmd_len - 2));
1289 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1290 ilo_cp_write(cp, dw2);
1291 ilo_cp_write(cp, 0); /* scratch */
1292 ilo_cp_write(cp, dw4);
1293 ilo_cp_write(cp, dw5);
1294 ilo_cp_write(cp, dw6);
1295 ilo_cp_write(cp, 0); /* kernel 1 */
1296 ilo_cp_write(cp, 0); /* kernel 2 */
1297 ilo_cp_end(cp);
1298 }
1299
1300 static inline unsigned
1301 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1302 const uint32_t *bufs, const int *sizes,
1303 int num_bufs, int max_read_length,
1304 uint32_t *dw, int num_dwords)
1305 {
1306 unsigned enabled = 0x0;
1307 int total_read_length, i;
1308
1309 assert(num_dwords == 4);
1310
1311 total_read_length = 0;
1312 for (i = 0; i < 4; i++) {
1313 if (i < num_bufs && sizes[i]) {
1314 /* in 256-bit units minus one */
1315 const int read_len = (sizes[i] + 31) / 32 - 1;
1316
1317 assert(bufs[i] % 32 == 0);
1318 assert(read_len < 32);
1319
1320 enabled |= 1 << i;
1321 dw[i] = bufs[i] | read_len;
1322
1323 total_read_length += read_len + 1;
1324 }
1325 else {
1326 dw[i] = 0;
1327 }
1328 }
1329
1330 assert(total_read_length <= max_read_length);
1331
1332 return enabled;
1333 }
1334
1335 static inline void
1336 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1337 const uint32_t *bufs, const int *sizes,
1338 int num_bufs,
1339 struct ilo_cp *cp)
1340 {
1341 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
1342 const uint8_t cmd_len = 5;
1343 uint32_t buf_dw[4], buf_enabled;
1344
1345 ILO_GPE_VALID_GEN(dev, 6, 6);
1346 assert(num_bufs <= 4);
1347
1348 /*
1349 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1350 *
1351 * "The sum of all four read length fields (each incremented to
1352 * represent the actual read length) must be less than or equal to 32"
1353 */
1354 buf_enabled = gen6_fill_3dstate_constant(dev,
1355 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1356
1357 ilo_cp_begin(cp, cmd_len);
1358 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1359 ilo_cp_write(cp, buf_dw[0]);
1360 ilo_cp_write(cp, buf_dw[1]);
1361 ilo_cp_write(cp, buf_dw[2]);
1362 ilo_cp_write(cp, buf_dw[3]);
1363 ilo_cp_end(cp);
1364 }
1365
1366 static inline void
1367 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1368 const uint32_t *bufs, const int *sizes,
1369 int num_bufs,
1370 struct ilo_cp *cp)
1371 {
1372 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
1373 const uint8_t cmd_len = 5;
1374 uint32_t buf_dw[4], buf_enabled;
1375
1376 ILO_GPE_VALID_GEN(dev, 6, 6);
1377 assert(num_bufs <= 4);
1378
1379 /*
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1381 *
1382 * "The sum of all four read length fields (each incremented to
1383 * represent the actual read length) must be less than or equal to 64"
1384 */
1385 buf_enabled = gen6_fill_3dstate_constant(dev,
1386 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1387
1388 ilo_cp_begin(cp, cmd_len);
1389 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1390 ilo_cp_write(cp, buf_dw[0]);
1391 ilo_cp_write(cp, buf_dw[1]);
1392 ilo_cp_write(cp, buf_dw[2]);
1393 ilo_cp_write(cp, buf_dw[3]);
1394 ilo_cp_end(cp);
1395 }
1396
1397 static inline void
1398 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1399 const uint32_t *bufs, const int *sizes,
1400 int num_bufs,
1401 struct ilo_cp *cp)
1402 {
1403 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
1404 const uint8_t cmd_len = 5;
1405 uint32_t buf_dw[4], buf_enabled;
1406
1407 ILO_GPE_VALID_GEN(dev, 6, 6);
1408 assert(num_bufs <= 4);
1409
1410 /*
1411 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1412 *
1413 * "The sum of all four read length fields (each incremented to
1414 * represent the actual read length) must be less than or equal to 64"
1415 */
1416 buf_enabled = gen6_fill_3dstate_constant(dev,
1417 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1418
1419 ilo_cp_begin(cp, cmd_len);
1420 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1421 ilo_cp_write(cp, buf_dw[0]);
1422 ilo_cp_write(cp, buf_dw[1]);
1423 ilo_cp_write(cp, buf_dw[2]);
1424 ilo_cp_write(cp, buf_dw[3]);
1425 ilo_cp_end(cp);
1426 }
1427
1428 static inline void
1429 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1430 unsigned sample_mask,
1431 struct ilo_cp *cp)
1432 {
1433 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
1434 const uint8_t cmd_len = 2;
1435 const unsigned valid_mask = 0xf;
1436
1437 ILO_GPE_VALID_GEN(dev, 6, 6);
1438
1439 sample_mask &= valid_mask;
1440
1441 ilo_cp_begin(cp, cmd_len);
1442 ilo_cp_write(cp, cmd | (cmd_len - 2));
1443 ilo_cp_write(cp, sample_mask);
1444 ilo_cp_end(cp);
1445 }
1446
1447 static inline void
1448 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1449 unsigned x, unsigned y,
1450 unsigned width, unsigned height,
1451 struct ilo_cp *cp)
1452 {
1453 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
1454 const uint8_t cmd_len = 4;
1455 unsigned xmax = x + width - 1;
1456 unsigned ymax = y + height - 1;
1457 int rect_limit;
1458
1459 ILO_GPE_VALID_GEN(dev, 6, 7);
1460
1461 if (dev->gen >= ILO_GEN(7)) {
1462 rect_limit = 16383;
1463 }
1464 else {
1465 /*
1466 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1467 *
1468 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1469 * must be an even number"
1470 */
1471 assert(y % 2 == 0);
1472
1473 rect_limit = 8191;
1474 }
1475
1476 if (x > rect_limit) x = rect_limit;
1477 if (y > rect_limit) y = rect_limit;
1478 if (xmax > rect_limit) xmax = rect_limit;
1479 if (ymax > rect_limit) ymax = rect_limit;
1480
1481 ilo_cp_begin(cp, cmd_len);
1482 ilo_cp_write(cp, cmd | (cmd_len - 2));
1483 ilo_cp_write(cp, y << 16 | x);
1484 ilo_cp_write(cp, ymax << 16 | xmax);
1485
1486 /*
1487 * There is no need to set the origin. It is intended to support front
1488 * buffer rendering.
1489 */
1490 ilo_cp_write(cp, 0);
1491
1492 ilo_cp_end(cp);
1493 }
1494
1495 static inline void
1496 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1497 const struct ilo_zs_surface *zs,
1498 struct ilo_cp *cp)
1499 {
1500 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1501 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1502 const uint8_t cmd_len = 7;
1503
1504 ILO_GPE_VALID_GEN(dev, 6, 7);
1505
1506 ilo_cp_begin(cp, cmd_len);
1507 ilo_cp_write(cp, cmd | (cmd_len - 2));
1508 ilo_cp_write(cp, zs->payload[0]);
1509 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
1510 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1511 ilo_cp_write(cp, zs->payload[2]);
1512 ilo_cp_write(cp, zs->payload[3]);
1513 ilo_cp_write(cp, zs->payload[4]);
1514 ilo_cp_write(cp, zs->payload[5]);
1515 ilo_cp_end(cp);
1516 }
1517
1518 static inline void
1519 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1520 int x_offset, int y_offset,
1521 struct ilo_cp *cp)
1522 {
1523 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
1524 const uint8_t cmd_len = 2;
1525
1526 ILO_GPE_VALID_GEN(dev, 6, 7);
1527 assert(x_offset >= 0 && x_offset <= 31);
1528 assert(y_offset >= 0 && y_offset <= 31);
1529
1530 ilo_cp_begin(cp, cmd_len);
1531 ilo_cp_write(cp, cmd | (cmd_len - 2));
1532 ilo_cp_write(cp, x_offset << 8 | y_offset);
1533 ilo_cp_end(cp);
1534 }
1535
1536 static inline void
1537 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1538 const struct pipe_poly_stipple *pattern,
1539 struct ilo_cp *cp)
1540 {
1541 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
1542 const uint8_t cmd_len = 33;
1543 int i;
1544
1545 ILO_GPE_VALID_GEN(dev, 6, 7);
1546 assert(Elements(pattern->stipple) == 32);
1547
1548 ilo_cp_begin(cp, cmd_len);
1549 ilo_cp_write(cp, cmd | (cmd_len - 2));
1550 for (i = 0; i < 32; i++)
1551 ilo_cp_write(cp, pattern->stipple[i]);
1552 ilo_cp_end(cp);
1553 }
1554
1555 static inline void
1556 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1557 unsigned pattern, unsigned factor,
1558 struct ilo_cp *cp)
1559 {
1560 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
1561 const uint8_t cmd_len = 3;
1562 unsigned inverse;
1563
1564 ILO_GPE_VALID_GEN(dev, 6, 7);
1565 assert((pattern & 0xffff) == pattern);
1566 assert(factor >= 1 && factor <= 256);
1567
1568 ilo_cp_begin(cp, cmd_len);
1569 ilo_cp_write(cp, cmd | (cmd_len - 2));
1570 ilo_cp_write(cp, pattern);
1571
1572 if (dev->gen >= ILO_GEN(7)) {
1573 /* in U1.16 */
1574 inverse = (unsigned) (65536.0f / factor);
1575 ilo_cp_write(cp, inverse << 15 | factor);
1576 }
1577 else {
1578 /* in U1.13 */
1579 inverse = (unsigned) (8192.0f / factor);
1580 ilo_cp_write(cp, inverse << 16 | factor);
1581 }
1582
1583 ilo_cp_end(cp);
1584 }
1585
1586 static inline void
1587 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1588 struct ilo_cp *cp)
1589 {
1590 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
1591 const uint8_t cmd_len = 3;
1592
1593 ILO_GPE_VALID_GEN(dev, 6, 7);
1594
1595 ilo_cp_begin(cp, cmd_len);
1596 ilo_cp_write(cp, cmd | (cmd_len - 2));
1597 ilo_cp_write(cp, 0 << 16 | 0);
1598 ilo_cp_write(cp, 0 << 16 | 0);
1599 ilo_cp_end(cp);
1600 }
1601
1602 static inline void
1603 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1604 int index, unsigned svbi,
1605 unsigned max_svbi,
1606 bool load_vertex_count,
1607 struct ilo_cp *cp)
1608 {
1609 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
1610 const uint8_t cmd_len = 4;
1611 uint32_t dw1;
1612
1613 ILO_GPE_VALID_GEN(dev, 6, 6);
1614 assert(index >= 0 && index < 4);
1615
1616 dw1 = index << SVB_INDEX_SHIFT;
1617 if (load_vertex_count)
1618 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
1619
1620 ilo_cp_begin(cp, cmd_len);
1621 ilo_cp_write(cp, cmd | (cmd_len - 2));
1622 ilo_cp_write(cp, dw1);
1623 ilo_cp_write(cp, svbi);
1624 ilo_cp_write(cp, max_svbi);
1625 ilo_cp_end(cp);
1626 }
1627
1628 static inline void
1629 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1630 int num_samples,
1631 const uint32_t *packed_sample_pos,
1632 bool pixel_location_center,
1633 struct ilo_cp *cp)
1634 {
1635 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
1636 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1637 uint32_t dw1, dw2, dw3;
1638
1639 ILO_GPE_VALID_GEN(dev, 6, 7);
1640
1641 dw1 = (pixel_location_center) ?
1642 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
1643
1644 switch (num_samples) {
1645 case 0:
1646 case 1:
1647 dw1 |= MS_NUMSAMPLES_1;
1648 dw2 = 0;
1649 dw3 = 0;
1650 break;
1651 case 4:
1652 dw1 |= MS_NUMSAMPLES_4;
1653 dw2 = packed_sample_pos[0];
1654 dw3 = 0;
1655 break;
1656 case 8:
1657 assert(dev->gen >= ILO_GEN(7));
1658 dw1 |= MS_NUMSAMPLES_8;
1659 dw2 = packed_sample_pos[0];
1660 dw3 = packed_sample_pos[1];
1661 break;
1662 default:
1663 assert(!"unsupported sample count");
1664 dw1 |= MS_NUMSAMPLES_1;
1665 dw2 = 0;
1666 dw3 = 0;
1667 break;
1668 }
1669
1670 ilo_cp_begin(cp, cmd_len);
1671 ilo_cp_write(cp, cmd | (cmd_len - 2));
1672 ilo_cp_write(cp, dw1);
1673 ilo_cp_write(cp, dw2);
1674 if (dev->gen >= ILO_GEN(7))
1675 ilo_cp_write(cp, dw3);
1676 ilo_cp_end(cp);
1677 }
1678
1679 static inline void
1680 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1681 const struct ilo_zs_surface *zs,
1682 struct ilo_cp *cp)
1683 {
1684 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1685 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1686 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1687 const uint8_t cmd_len = 3;
1688
1689 ILO_GPE_VALID_GEN(dev, 6, 7);
1690
1691 ilo_cp_begin(cp, cmd_len);
1692 ilo_cp_write(cp, cmd | (cmd_len - 2));
1693 /* see ilo_gpe_init_zs_surface() */
1694 ilo_cp_write(cp, zs->payload[6]);
1695 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
1696 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1697 ilo_cp_end(cp);
1698 }
1699
1700 static inline void
1701 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1702 const struct ilo_zs_surface *zs,
1703 struct ilo_cp *cp)
1704 {
1705 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1708 const uint8_t cmd_len = 3;
1709
1710 ILO_GPE_VALID_GEN(dev, 6, 7);
1711
1712 ilo_cp_begin(cp, cmd_len);
1713 ilo_cp_write(cp, cmd | (cmd_len - 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp, zs->payload[8]);
1716 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
1717 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1718 ilo_cp_end(cp);
1719 }
1720
1721 static inline void
1722 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1723 uint32_t clear_val,
1724 struct ilo_cp *cp)
1725 {
1726 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
1727 const uint8_t cmd_len = 2;
1728
1729 ILO_GPE_VALID_GEN(dev, 6, 6);
1730
1731 ilo_cp_begin(cp, cmd_len);
1732 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1733 GEN5_DEPTH_CLEAR_VALID);
1734 ilo_cp_write(cp, clear_val);
1735 ilo_cp_end(cp);
1736 }
1737
1738 static inline void
1739 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1740 uint32_t dw1,
1741 struct intel_bo *bo, uint32_t bo_offset,
1742 bool write_qword,
1743 struct ilo_cp *cp)
1744 {
1745 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
1746 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1747 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
1748 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
1749
1750 ILO_GPE_VALID_GEN(dev, 6, 7);
1751
1752 if (dw1 & PIPE_CONTROL_CS_STALL) {
1753 /*
1754 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1755 *
1756 * "1 of the following must also be set (when CS stall is set):
1757 *
1758 * * Depth Cache Flush Enable ([0] of DW1)
1759 * * Stall at Pixel Scoreboard ([1] of DW1)
1760 * * Depth Stall ([13] of DW1)
1761 * * Post-Sync Operation ([13] of DW1)
1762 * * Render Target Cache Flush Enable ([12] of DW1)
1763 * * Notify Enable ([8] of DW1)"
1764 *
1765 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1766 *
1767 * "One of the following must also be set (when CS stall is set):
1768 *
1769 * * Render Target Cache Flush Enable ([12] of DW1)
1770 * * Depth Cache Flush Enable ([0] of DW1)
1771 * * Stall at Pixel Scoreboard ([1] of DW1)
1772 * * Depth Stall ([13] of DW1)
1773 * * Post-Sync Operation ([13] of DW1)"
1774 */
1775 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
1776 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1777 PIPE_CONTROL_STALL_AT_SCOREBOARD |
1778 PIPE_CONTROL_DEPTH_STALL;
1779
1780 /* post-sync op */
1781 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
1782 PIPE_CONTROL_WRITE_DEPTH_COUNT |
1783 PIPE_CONTROL_WRITE_TIMESTAMP;
1784
1785 if (dev->gen == ILO_GEN(6))
1786 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
1787
1788 assert(dw1 & bit_test);
1789 }
1790
1791 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
1792 /*
1793 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1794 *
1795 * "Following bits must be clear (when Depth Stall is set):
1796 *
1797 * * Render Target Cache Flush Enable ([12] of DW1)
1798 * * Depth Cache Flush Enable ([0] of DW1)"
1799 */
1800 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
1801 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1802 }
1803
1804 ilo_cp_begin(cp, cmd_len);
1805 ilo_cp_write(cp, cmd | (cmd_len - 2));
1806 ilo_cp_write(cp, dw1);
1807 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
1808 ilo_cp_write(cp, 0);
1809 if (write_qword)
1810 ilo_cp_write(cp, 0);
1811 ilo_cp_end(cp);
1812 }
1813
1814 static inline void
1815 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1816 const struct pipe_draw_info *info,
1817 const struct ilo_ib_state *ib,
1818 bool rectlist,
1819 struct ilo_cp *cp)
1820 {
1821 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1822 const uint8_t cmd_len = 6;
1823 const int prim = (rectlist) ?
1824 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1825 const int vb_access = (info->indexed) ?
1826 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1827 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1828 const uint32_t vb_start = info->start +
1829 ((info->indexed) ? ib->draw_start_offset : 0);
1830
1831 ILO_GPE_VALID_GEN(dev, 6, 6);
1832
1833 ilo_cp_begin(cp, cmd_len);
1834 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1835 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
1836 vb_access);
1837 ilo_cp_write(cp, info->count);
1838 ilo_cp_write(cp, vb_start);
1839 ilo_cp_write(cp, info->instance_count);
1840 ilo_cp_write(cp, info->start_instance);
1841 ilo_cp_write(cp, info->index_bias);
1842 ilo_cp_end(cp);
1843 }
1844
1845 static inline uint32_t
1846 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1847 const struct ilo_shader_state **cs,
1848 uint32_t *sampler_state,
1849 int *num_samplers,
1850 uint32_t *binding_table_state,
1851 int *num_surfaces,
1852 int num_ids,
1853 struct ilo_cp *cp)
1854 {
1855 /*
1856 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1857 *
1858 * "(Interface Descriptor Total Length) This field must have the same
1859 * alignment as the Interface Descriptor Data Start Address.
1860 *
1861 * It must be DQWord (32-byte) aligned..."
1862 *
1863 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1864 *
1865 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1866 * aligned address of the Interface Descriptor data."
1867 */
1868 const int state_align = 32 / 4;
1869 const int state_len = (32 / 4) * num_ids;
1870 uint32_t state_offset, *dw;
1871 int i;
1872
1873 ILO_GPE_VALID_GEN(dev, 6, 6);
1874
1875 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
1876 state_len, state_align, &state_offset);
1877
1878 for (i = 0; i < num_ids; i++) {
1879 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1880 dw[1] = 1 << 18; /* SPF */
1881 dw[2] = sampler_state[i] |
1882 (num_samplers[i] + 3) / 4 << 2;
1883 dw[3] = binding_table_state[i] |
1884 num_surfaces[i];
1885 dw[4] = 0 << 16 | /* CURBE Read Length */
1886 0; /* CURBE Read Offset */
1887 dw[5] = 0; /* Barrier ID */
1888 dw[6] = 0;
1889 dw[7] = 0;
1890
1891 dw += 8;
1892 }
1893
1894 return state_offset;
1895 }
1896
1897 static inline uint32_t
1898 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1899 const struct ilo_viewport_cso *viewports,
1900 unsigned num_viewports,
1901 struct ilo_cp *cp)
1902 {
1903 const int state_align = 32 / 4;
1904 const int state_len = 8 * num_viewports;
1905 uint32_t state_offset, *dw;
1906 unsigned i;
1907
1908 ILO_GPE_VALID_GEN(dev, 6, 6);
1909
1910 /*
1911 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1912 *
1913 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1914 * stored as an array of up to 16 elements..."
1915 */
1916 assert(num_viewports && num_viewports <= 16);
1917
1918 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
1919 state_len, state_align, &state_offset);
1920
1921 for (i = 0; i < num_viewports; i++) {
1922 const struct ilo_viewport_cso *vp = &viewports[i];
1923
1924 dw[0] = fui(vp->m00);
1925 dw[1] = fui(vp->m11);
1926 dw[2] = fui(vp->m22);
1927 dw[3] = fui(vp->m30);
1928 dw[4] = fui(vp->m31);
1929 dw[5] = fui(vp->m32);
1930 dw[6] = 0;
1931 dw[7] = 0;
1932
1933 dw += 8;
1934 }
1935
1936 return state_offset;
1937 }
1938
1939 static inline uint32_t
1940 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1941 const struct ilo_viewport_cso *viewports,
1942 unsigned num_viewports,
1943 struct ilo_cp *cp)
1944 {
1945 const int state_align = 32 / 4;
1946 const int state_len = 4 * num_viewports;
1947 uint32_t state_offset, *dw;
1948 unsigned i;
1949
1950 ILO_GPE_VALID_GEN(dev, 6, 6);
1951
1952 /*
1953 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1954 *
1955 * "The viewport-related state is stored as an array of up to 16
1956 * elements..."
1957 */
1958 assert(num_viewports && num_viewports <= 16);
1959
1960 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
1961 state_len, state_align, &state_offset);
1962
1963 for (i = 0; i < num_viewports; i++) {
1964 const struct ilo_viewport_cso *vp = &viewports[i];
1965
1966 dw[0] = fui(vp->min_gbx);
1967 dw[1] = fui(vp->max_gbx);
1968 dw[2] = fui(vp->min_gby);
1969 dw[3] = fui(vp->max_gby);
1970
1971 dw += 4;
1972 }
1973
1974 return state_offset;
1975 }
1976
1977 static inline uint32_t
1978 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
1979 const struct ilo_viewport_cso *viewports,
1980 unsigned num_viewports,
1981 struct ilo_cp *cp)
1982 {
1983 const int state_align = 32 / 4;
1984 const int state_len = 2 * num_viewports;
1985 uint32_t state_offset, *dw;
1986 unsigned i;
1987
1988 ILO_GPE_VALID_GEN(dev, 6, 7);
1989
1990 /*
1991 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
1992 *
1993 * "The viewport state is stored as an array of up to 16 elements..."
1994 */
1995 assert(num_viewports && num_viewports <= 16);
1996
1997 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
1998 state_len, state_align, &state_offset);
1999
2000 for (i = 0; i < num_viewports; i++) {
2001 const struct ilo_viewport_cso *vp = &viewports[i];
2002
2003 dw[0] = fui(vp->min_z);
2004 dw[1] = fui(vp->max_z);
2005
2006 dw += 2;
2007 }
2008
2009 return state_offset;
2010 }
2011
2012 static inline uint32_t
2013 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2014 const struct pipe_stencil_ref *stencil_ref,
2015 ubyte alpha_ref,
2016 const struct pipe_blend_color *blend_color,
2017 struct ilo_cp *cp)
2018 {
2019 const int state_align = 64 / 4;
2020 const int state_len = 6;
2021 uint32_t state_offset, *dw;
2022
2023 ILO_GPE_VALID_GEN(dev, 6, 7);
2024
2025 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
2026 state_len, state_align, &state_offset);
2027
2028 dw[0] = stencil_ref->ref_value[0] << 24 |
2029 stencil_ref->ref_value[1] << 16 |
2030 BRW_ALPHATEST_FORMAT_UNORM8;
2031 dw[1] = alpha_ref;
2032 dw[2] = fui(blend_color->color[0]);
2033 dw[3] = fui(blend_color->color[1]);
2034 dw[4] = fui(blend_color->color[2]);
2035 dw[5] = fui(blend_color->color[3]);
2036
2037 return state_offset;
2038 }
2039
2040 static inline uint32_t
2041 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2042 const struct ilo_blend_state *blend,
2043 const struct ilo_fb_state *fb,
2044 const struct ilo_dsa_state *dsa,
2045 struct ilo_cp *cp)
2046 {
2047 const int state_align = 64 / 4;
2048 int state_len;
2049 uint32_t state_offset, *dw;
2050 unsigned num_targets, i;
2051
2052 ILO_GPE_VALID_GEN(dev, 6, 7);
2053
2054 /*
2055 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2056 *
2057 * "The blend state is stored as an array of up to 8 elements..."
2058 */
2059 num_targets = fb->state.nr_cbufs;
2060 assert(num_targets <= 8);
2061
2062 if (!num_targets) {
2063 if (!dsa->dw_alpha)
2064 return 0;
2065 /* to be able to reference alpha func */
2066 num_targets = 1;
2067 }
2068
2069 state_len = 2 * num_targets;
2070
2071 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
2072 state_len, state_align, &state_offset);
2073
2074 for (i = 0; i < num_targets; i++) {
2075 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2076 const struct ilo_blend_cso *cso = &blend->cso[idx];
2077 const int num_samples = fb->num_samples;
2078 const struct util_format_description *format_desc =
2079 (idx < fb->state.nr_cbufs) ?
2080 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2081 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2082
2083 rt_is_unorm = true;
2084 rt_is_pure_integer = false;
2085 rt_dst_alpha_forced_one = false;
2086
2087 if (format_desc) {
2088 int ch;
2089
2090 switch (format_desc->format) {
2091 case PIPE_FORMAT_B8G8R8X8_UNORM:
2092 /* force alpha to one when the HW format has alpha */
2093 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
2094 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
2095 rt_dst_alpha_forced_one = true;
2096 break;
2097 default:
2098 break;
2099 }
2100
2101 for (ch = 0; ch < 4; ch++) {
2102 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2103 continue;
2104
2105 if (format_desc->channel[ch].pure_integer) {
2106 rt_is_unorm = false;
2107 rt_is_pure_integer = true;
2108 break;
2109 }
2110
2111 if (!format_desc->channel[ch].normalized ||
2112 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2113 rt_is_unorm = false;
2114 }
2115 }
2116
2117 dw[0] = cso->payload[0];
2118 dw[1] = cso->payload[1];
2119
2120 if (!rt_is_pure_integer) {
2121 if (rt_dst_alpha_forced_one)
2122 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2123 else
2124 dw[0] |= cso->dw_blend;
2125 }
2126
2127 /*
2128 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2129 *
2130 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2131 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2132 *
2133 * Since logicop is ignored for non-UNORM color buffers, no special care
2134 * is needed.
2135 */
2136 if (rt_is_unorm)
2137 dw[1] |= cso->dw_logicop;
2138
2139 /*
2140 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2141 *
2142 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2143 * Dither both must be disabled."
2144 *
2145 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2146 * requires that anyway.
2147 */
2148 if (num_samples > 1)
2149 dw[1] |= cso->dw_alpha_mod;
2150
2151 /*
2152 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2153 *
2154 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2155 * alpha value."
2156 */
2157 if (!rt_is_pure_integer)
2158 dw[1] |= dsa->dw_alpha;
2159
2160 dw += 2;
2161 }
2162
2163 return state_offset;
2164 }
2165
2166 static inline uint32_t
2167 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2168 const struct ilo_dsa_state *dsa,
2169 struct ilo_cp *cp)
2170 {
2171 const int state_align = 64 / 4;
2172 const int state_len = 3;
2173 uint32_t state_offset, *dw;
2174
2175
2176 ILO_GPE_VALID_GEN(dev, 6, 7);
2177
2178 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
2179 state_len, state_align, &state_offset);
2180
2181 dw[0] = dsa->payload[0];
2182 dw[1] = dsa->payload[1];
2183 dw[2] = dsa->payload[2];
2184
2185 return state_offset;
2186 }
2187
2188 static inline uint32_t
2189 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2190 const struct ilo_scissor_state *scissor,
2191 unsigned num_viewports,
2192 struct ilo_cp *cp)
2193 {
2194 const int state_align = 32 / 4;
2195 const int state_len = 2 * num_viewports;
2196 uint32_t state_offset, *dw;
2197
2198 ILO_GPE_VALID_GEN(dev, 6, 7);
2199
2200 /*
2201 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2202 *
2203 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2204 * stored as an array of up to 16 elements..."
2205 */
2206 assert(num_viewports && num_viewports <= 16);
2207
2208 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
2209 state_len, state_align, &state_offset);
2210
2211 memcpy(dw, scissor->payload, state_len * 4);
2212
2213 return state_offset;
2214 }
2215
2216 static inline uint32_t
2217 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2218 uint32_t *surface_states,
2219 int num_surface_states,
2220 struct ilo_cp *cp)
2221 {
2222 const int state_align = 32 / 4;
2223 const int state_len = num_surface_states;
2224 uint32_t state_offset, *dw;
2225
2226 ILO_GPE_VALID_GEN(dev, 6, 7);
2227
2228 /*
2229 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2230 *
2231 * "It is stored as an array of up to 256 elements..."
2232 */
2233 assert(num_surface_states <= 256);
2234
2235 if (!num_surface_states)
2236 return 0;
2237
2238 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
2239 state_len, state_align, &state_offset);
2240 memcpy(dw, surface_states,
2241 num_surface_states * sizeof(surface_states[0]));
2242
2243 return state_offset;
2244 }
2245
2246 static inline uint32_t
2247 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2248 const struct ilo_view_surface *surf,
2249 bool for_render,
2250 struct ilo_cp *cp)
2251 {
2252 const int state_align = 32 / 4;
2253 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2254 uint32_t state_offset;
2255 uint32_t read_domains, write_domain;
2256
2257 ILO_GPE_VALID_GEN(dev, 6, 7);
2258
2259 if (for_render) {
2260 read_domains = INTEL_DOMAIN_RENDER;
2261 write_domain = INTEL_DOMAIN_RENDER;
2262 }
2263 else {
2264 read_domains = INTEL_DOMAIN_SAMPLER;
2265 write_domain = 0;
2266 }
2267
2268 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
2269
2270 STATIC_ASSERT(Elements(surf->payload) >= 8);
2271
2272 ilo_cp_write(cp, surf->payload[0]);
2273 ilo_cp_write_bo(cp, surf->payload[1],
2274 surf->bo, read_domains, write_domain);
2275 ilo_cp_write(cp, surf->payload[2]);
2276 ilo_cp_write(cp, surf->payload[3]);
2277 ilo_cp_write(cp, surf->payload[4]);
2278 ilo_cp_write(cp, surf->payload[5]);
2279
2280 if (dev->gen >= ILO_GEN(7)) {
2281 ilo_cp_write(cp, surf->payload[6]);
2282 ilo_cp_write(cp, surf->payload[7]);
2283 }
2284
2285 ilo_cp_end(cp);
2286
2287 return state_offset;
2288 }
2289
2290 static inline uint32_t
2291 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2292 const struct pipe_stream_output_target *so,
2293 const struct pipe_stream_output_info *so_info,
2294 int so_index,
2295 struct ilo_cp *cp)
2296 {
2297 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2298 unsigned bo_offset, struct_size;
2299 enum pipe_format elem_format;
2300 struct ilo_view_surface surf;
2301
2302 ILO_GPE_VALID_GEN(dev, 6, 6);
2303
2304 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2305 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2306
2307 switch (so_info->output[so_index].num_components) {
2308 case 1:
2309 elem_format = PIPE_FORMAT_R32_FLOAT;
2310 break;
2311 case 2:
2312 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2313 break;
2314 case 3:
2315 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2316 break;
2317 case 4:
2318 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2319 break;
2320 default:
2321 assert(!"unexpected SO components length");
2322 elem_format = PIPE_FORMAT_R32_FLOAT;
2323 break;
2324 }
2325
2326 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2327 struct_size, elem_format, false, true, &surf);
2328
2329 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2330 }
2331
2332 static inline uint32_t
2333 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2334 const struct ilo_sampler_cso * const *samplers,
2335 const struct pipe_sampler_view * const *views,
2336 const uint32_t *sampler_border_colors,
2337 int num_samplers,
2338 struct ilo_cp *cp)
2339 {
2340 const int state_align = 32 / 4;
2341 const int state_len = 4 * num_samplers;
2342 uint32_t state_offset, *dw;
2343 int i;
2344
2345 ILO_GPE_VALID_GEN(dev, 6, 7);
2346
2347 /*
2348 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2349 *
2350 * "The sampler state is stored as an array of up to 16 elements..."
2351 */
2352 assert(num_samplers <= 16);
2353
2354 if (!num_samplers)
2355 return 0;
2356
2357 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
2358 state_len, state_align, &state_offset);
2359
2360 for (i = 0; i < num_samplers; i++) {
2361 const struct ilo_sampler_cso *sampler = samplers[i];
2362 const struct pipe_sampler_view *view = views[i];
2363 const uint32_t border_color = sampler_border_colors[i];
2364 uint32_t dw_filter, dw_wrap;
2365
2366 /* there may be holes */
2367 if (!sampler || !view) {
2368 /* disabled sampler */
2369 dw[0] = 1 << 31;
2370 dw[1] = 0;
2371 dw[2] = 0;
2372 dw[3] = 0;
2373 dw += 4;
2374
2375 continue;
2376 }
2377
2378 /* determine filter and wrap modes */
2379 switch (view->texture->target) {
2380 case PIPE_TEXTURE_1D:
2381 dw_filter = (sampler->anisotropic) ?
2382 sampler->dw_filter_aniso : sampler->dw_filter;
2383 dw_wrap = sampler->dw_wrap_1d;
2384 break;
2385 case PIPE_TEXTURE_3D:
2386 /*
2387 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2388 *
2389 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2390 * surfaces of type SURFTYPE_3D."
2391 */
2392 dw_filter = sampler->dw_filter;
2393 dw_wrap = sampler->dw_wrap;
2394 break;
2395 case PIPE_TEXTURE_CUBE:
2396 dw_filter = (sampler->anisotropic) ?
2397 sampler->dw_filter_aniso : sampler->dw_filter;
2398 dw_wrap = sampler->dw_wrap_cube;
2399 break;
2400 default:
2401 dw_filter = (sampler->anisotropic) ?
2402 sampler->dw_filter_aniso : sampler->dw_filter;
2403 dw_wrap = sampler->dw_wrap;
2404 break;
2405 }
2406
2407 dw[0] = sampler->payload[0];
2408 dw[1] = sampler->payload[1];
2409 assert(!(border_color & 0x1f));
2410 dw[2] = border_color;
2411 dw[3] = sampler->payload[2];
2412
2413 dw[0] |= dw_filter;
2414
2415 if (dev->gen >= ILO_GEN(7)) {
2416 dw[3] |= dw_wrap;
2417 }
2418 else {
2419 /*
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2421 *
2422 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2423 * where the min and mag mode filters are different and
2424 * SurfMinLOD is nonzero. The determination of MagMode uses the
2425 * following equation instead of the one in the above
2426 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2427 *
2428 * As a way to work around that, we set Base to
2429 * view->u.tex.first_level.
2430 */
2431 dw[0] |= view->u.tex.first_level << 22;
2432
2433 dw[1] |= dw_wrap;
2434 }
2435
2436 dw += 4;
2437 }
2438
2439 return state_offset;
2440 }
2441
2442 static inline uint32_t
2443 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2444 const struct ilo_sampler_cso *sampler,
2445 struct ilo_cp *cp)
2446 {
2447 const int state_align = 32 / 4;
2448 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2449 uint32_t state_offset, *dw;
2450
2451 ILO_GPE_VALID_GEN(dev, 6, 7);
2452
2453 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
2454 state_len, state_align, &state_offset);
2455
2456 /* see ilo_gpe_init_sampler_cso() */
2457 memcpy(dw, &sampler->payload[3], state_len * 4);
2458
2459 return state_offset;
2460 }
2461
2462 static inline uint32_t
2463 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2464 int size, void **pcb,
2465 struct ilo_cp *cp)
2466 {
2467 /*
2468 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2469 * to 32 bytes, and their sizes are specified in 256-bit units.
2470 */
2471 const int state_align = 32 / 4;
2472 const int state_len = align(size, 32) / 4;
2473 uint32_t state_offset;
2474 char *buf;
2475
2476 ILO_GPE_VALID_GEN(dev, 6, 7);
2477
2478 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
2479 state_len, state_align, &state_offset);
2480
2481 /* zero out the unused range */
2482 if (size < state_len * 4)
2483 memset(&buf[size], 0, state_len * 4 - size);
2484
2485 if (pcb)
2486 *pcb = buf;
2487
2488 return state_offset;
2489 }
2490
2491 #endif /* ILO_GPE_GEN6_H */