ilo: preliminary GEN 7.5 support
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
34
35 #include "ilo_common.h"
36 #include "ilo_cp.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
40 #include "ilo_gpe.h"
41
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
44
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
47
48 /**
49 * Commands that GEN6 GPE could emit.
50 */
51 enum ilo_gpe_gen6_command {
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
93
94 ILO_GPE_GEN6_COMMAND_COUNT,
95 };
96
97 /**
98 * Indirect states that GEN6 GPE could emit.
99 */
100 enum ilo_gpe_gen6_state {
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
102 ILO_GPE_GEN6_SF_VIEWPORT,
103 ILO_GPE_GEN6_CLIP_VIEWPORT,
104 ILO_GPE_GEN6_CC_VIEWPORT,
105 ILO_GPE_GEN6_COLOR_CALC_STATE,
106 ILO_GPE_GEN6_BLEND_STATE,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
108 ILO_GPE_GEN6_SCISSOR_RECT,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE,
110 ILO_GPE_GEN6_SURFACE_STATE,
111 ILO_GPE_GEN6_SAMPLER_STATE,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
114
115 ILO_GPE_GEN6_STATE_COUNT,
116 };
117
118 int
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
120 enum ilo_gpe_gen6_command cmd,
121 int arg);
122
123 int
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
125 enum ilo_gpe_gen6_state state,
126 int arg);
127
128 /**
129 * Translate winsys tiling to hardware tiling.
130 */
131 static inline int
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
133 {
134 switch (tiling) {
135 case INTEL_TILING_NONE:
136 return 0;
137 case INTEL_TILING_X:
138 return BRW_SURFACE_TILED;
139 case INTEL_TILING_Y:
140 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
141 default:
142 assert(!"unknown tiling");
143 return 0;
144 }
145 }
146
147 /**
148 * Translate a pipe primitive type to the matching hardware primitive type.
149 */
150 static inline int
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
152 {
153 static const int prim_mapping[PIPE_PRIM_MAX] = {
154 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
155 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
156 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
157 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
158 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
159 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
160 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
161 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
162 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
163 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
164 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
168 };
169
170 assert(prim_mapping[prim]);
171
172 return prim_mapping[prim];
173 }
174
175 /**
176 * Translate a pipe texture target to the matching hardware surface type.
177 */
178 static inline int
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
180 {
181 switch (target) {
182 case PIPE_BUFFER:
183 return BRW_SURFACE_BUFFER;
184 case PIPE_TEXTURE_1D:
185 case PIPE_TEXTURE_1D_ARRAY:
186 return BRW_SURFACE_1D;
187 case PIPE_TEXTURE_2D:
188 case PIPE_TEXTURE_RECT:
189 case PIPE_TEXTURE_2D_ARRAY:
190 return BRW_SURFACE_2D;
191 case PIPE_TEXTURE_3D:
192 return BRW_SURFACE_3D;
193 case PIPE_TEXTURE_CUBE:
194 case PIPE_TEXTURE_CUBE_ARRAY:
195 return BRW_SURFACE_CUBE;
196 default:
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER;
199 }
200 }
201
202 /**
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
204 */
205 static inline void
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
207 const struct ilo_rasterizer_state *rasterizer,
208 int num_samples,
209 enum pipe_format depth_format,
210 uint32_t *payload, unsigned payload_len)
211 {
212 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
213
214 assert(payload_len == Elements(sf->payload));
215
216 if (sf) {
217 memcpy(payload, sf->payload, sizeof(sf->payload));
218
219 if (num_samples > 1)
220 payload[1] |= sf->dw_msaa;
221
222 if (dev->gen >= ILO_GEN(7)) {
223 int format;
224
225 /* separate stencil */
226 switch (depth_format) {
227 case PIPE_FORMAT_Z16_UNORM:
228 format = BRW_DEPTHFORMAT_D16_UNORM;
229 break;
230 case PIPE_FORMAT_Z32_FLOAT:
231 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
232 format = BRW_DEPTHFORMAT_D32_FLOAT;
233 break;
234 case PIPE_FORMAT_Z24X8_UNORM:
235 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
236 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
237 break;
238 default:
239 /* FLOAT surface is assumed when there is no depth buffer */
240 format = BRW_DEPTHFORMAT_D32_FLOAT;
241 break;
242 }
243
244 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
245 }
246 }
247 else {
248 payload[0] = 0;
249 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
250 payload[2] = 0;
251 payload[3] = 0;
252 payload[4] = 0;
253 payload[5] = 0;
254 }
255 }
256
257 /**
258 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
259 */
260 static inline void
261 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
262 const struct ilo_rasterizer_state *rasterizer,
263 const struct ilo_shader_state *fs,
264 uint32_t *dw, int num_dwords)
265 {
266 int output_count, vue_offset, vue_len;
267 const struct ilo_kernel_routing *routing;
268
269 ILO_GPE_VALID_GEN(dev, 6, 7.5);
270 assert(num_dwords == 13);
271
272 if (!fs) {
273 memset(dw, 0, sizeof(dw[0]) * num_dwords);
274
275 if (dev->gen >= ILO_GEN(7))
276 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
277 else
278 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
279
280 return;
281 }
282
283 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
284 assert(output_count <= 32);
285
286 routing = ilo_shader_get_kernel_routing(fs);
287
288 vue_offset = routing->source_skip;
289 assert(vue_offset % 2 == 0);
290 vue_offset /= 2;
291
292 vue_len = (routing->source_len + 1) / 2;
293 if (!vue_len)
294 vue_len = 1;
295
296 if (dev->gen >= ILO_GEN(7)) {
297 dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
298 vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
299 vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
300 if (routing->swizzle_enable)
301 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
302 }
303 else {
304 dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
305 vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
306 vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
307 if (routing->swizzle_enable)
308 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
309 }
310
311 switch (rasterizer->state.sprite_coord_mode) {
312 case PIPE_SPRITE_COORD_UPPER_LEFT:
313 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
314 break;
315 case PIPE_SPRITE_COORD_LOWER_LEFT:
316 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
317 break;
318 }
319
320 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
321 memcpy(&dw[1], routing->swizzles, 2 * 16);
322
323 /*
324 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
325 *
326 * "This field (Point Sprite Texture Coordinate Enable) must be
327 * programmed to 0 when non-point primitives are rendered."
328 *
329 * TODO We do not check that yet.
330 */
331 dw[9] = routing->point_sprite_enable;
332
333 dw[10] = routing->const_interp_enable;
334
335 /* WrapShortest enables */
336 dw[11] = 0;
337 dw[12] = 0;
338 }
339
340 static inline void
341 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
342 struct intel_bo *general_state_bo,
343 struct intel_bo *surface_state_bo,
344 struct intel_bo *dynamic_state_bo,
345 struct intel_bo *indirect_object_bo,
346 struct intel_bo *instruction_bo,
347 uint32_t general_state_size,
348 uint32_t dynamic_state_size,
349 uint32_t indirect_object_size,
350 uint32_t instruction_size,
351 struct ilo_cp *cp)
352 {
353 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
354 const uint8_t cmd_len = 10;
355
356 ILO_GPE_VALID_GEN(dev, 6, 7.5);
357
358 /* 4K-page aligned */
359 assert(((general_state_size | dynamic_state_size |
360 indirect_object_size | instruction_size) & 0xfff) == 0);
361
362 ilo_cp_begin(cp, cmd_len);
363 ilo_cp_write(cp, cmd | (cmd_len - 2));
364
365 ilo_cp_write_bo(cp, 1, general_state_bo,
366 INTEL_DOMAIN_RENDER,
367 0);
368 ilo_cp_write_bo(cp, 1, surface_state_bo,
369 INTEL_DOMAIN_SAMPLER,
370 0);
371 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
372 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
373 0);
374 ilo_cp_write_bo(cp, 1, indirect_object_bo,
375 0,
376 0);
377 ilo_cp_write_bo(cp, 1, instruction_bo,
378 INTEL_DOMAIN_INSTRUCTION,
379 0);
380
381 if (general_state_size) {
382 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
383 INTEL_DOMAIN_RENDER,
384 0);
385 }
386 else {
387 /* skip range check */
388 ilo_cp_write(cp, 1);
389 }
390
391 if (dynamic_state_size) {
392 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
393 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
394 0);
395 }
396 else {
397 /* skip range check */
398 ilo_cp_write(cp, 0xfffff000 + 1);
399 }
400
401 if (indirect_object_size) {
402 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
403 0,
404 0);
405 }
406 else {
407 /* skip range check */
408 ilo_cp_write(cp, 0xfffff000 + 1);
409 }
410
411 if (instruction_size) {
412 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
413 INTEL_DOMAIN_INSTRUCTION,
414 0);
415 }
416 else {
417 /* skip range check */
418 ilo_cp_write(cp, 1);
419 }
420
421 ilo_cp_end(cp);
422 }
423
424 static inline void
425 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
426 uint32_t sip,
427 struct ilo_cp *cp)
428 {
429 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
430 const uint8_t cmd_len = 2;
431
432 ILO_GPE_VALID_GEN(dev, 6, 7.5);
433
434 ilo_cp_begin(cp, cmd_len);
435 ilo_cp_write(cp, cmd | (cmd_len - 2));
436 ilo_cp_write(cp, sip);
437 ilo_cp_end(cp);
438 }
439
440 static inline void
441 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
442 bool enable,
443 struct ilo_cp *cp)
444 {
445 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
446 const uint8_t cmd_len = 1;
447
448 ILO_GPE_VALID_GEN(dev, 6, 7.5);
449
450 ilo_cp_begin(cp, cmd_len);
451 ilo_cp_write(cp, cmd | enable);
452 ilo_cp_end(cp);
453 }
454
455 static inline void
456 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
457 int pipeline,
458 struct ilo_cp *cp)
459 {
460 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
461 const uint8_t cmd_len = 1;
462
463 ILO_GPE_VALID_GEN(dev, 6, 7.5);
464
465 /* 3D or media */
466 assert(pipeline == 0x0 || pipeline == 0x1);
467
468 ilo_cp_begin(cp, cmd_len);
469 ilo_cp_write(cp, cmd | pipeline);
470 ilo_cp_end(cp);
471 }
472
473 static inline void
474 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
475 int max_threads, int num_urb_entries,
476 int urb_entry_size,
477 struct ilo_cp *cp)
478 {
479 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
480 const uint8_t cmd_len = 8;
481 uint32_t dw2, dw4;
482
483 ILO_GPE_VALID_GEN(dev, 6, 6);
484
485 dw2 = (max_threads - 1) << 16 |
486 num_urb_entries << 8 |
487 1 << 7 | /* Reset Gateway Timer */
488 1 << 6; /* Bypass Gateway Control */
489
490 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
491 480; /* CURBE Allocation Size */
492
493 ilo_cp_begin(cp, cmd_len);
494 ilo_cp_write(cp, cmd | (cmd_len - 2));
495 ilo_cp_write(cp, 0); /* scratch */
496 ilo_cp_write(cp, dw2);
497 ilo_cp_write(cp, 0); /* MBZ */
498 ilo_cp_write(cp, dw4);
499 ilo_cp_write(cp, 0); /* scoreboard */
500 ilo_cp_write(cp, 0);
501 ilo_cp_write(cp, 0);
502 ilo_cp_end(cp);
503 }
504
505 static inline void
506 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
507 uint32_t buf, int size,
508 struct ilo_cp *cp)
509 {
510 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
511 const uint8_t cmd_len = 4;
512
513 ILO_GPE_VALID_GEN(dev, 6, 6);
514
515 assert(buf % 32 == 0);
516 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
517 size = align(size, 32);
518
519 ilo_cp_begin(cp, cmd_len);
520 ilo_cp_write(cp, cmd | (cmd_len - 2));
521 ilo_cp_write(cp, 0); /* MBZ */
522 ilo_cp_write(cp, size);
523 ilo_cp_write(cp, buf);
524 ilo_cp_end(cp);
525 }
526
527 static inline void
528 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
529 uint32_t offset, int num_ids,
530 struct ilo_cp *cp)
531 {
532 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
533 const uint8_t cmd_len = 4;
534
535 ILO_GPE_VALID_GEN(dev, 6, 6);
536
537 assert(offset % 32 == 0);
538
539 ilo_cp_begin(cp, cmd_len);
540 ilo_cp_write(cp, cmd | (cmd_len - 2));
541 ilo_cp_write(cp, 0); /* MBZ */
542 /* every ID has 8 DWords */
543 ilo_cp_write(cp, num_ids * 8 * 4);
544 ilo_cp_write(cp, offset);
545 ilo_cp_end(cp);
546 }
547
548 static inline void
549 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
550 int id, int byte, int thread_count,
551 struct ilo_cp *cp)
552 {
553 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
554 const uint8_t cmd_len = 2;
555 uint32_t dw1;
556
557 ILO_GPE_VALID_GEN(dev, 6, 6);
558
559 dw1 = id << 16 |
560 byte << 8 |
561 thread_count;
562
563 ilo_cp_begin(cp, cmd_len);
564 ilo_cp_write(cp, cmd | (cmd_len - 2));
565 ilo_cp_write(cp, dw1);
566 ilo_cp_end(cp);
567 }
568
569 static inline void
570 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
571 int thread_count_water_mark,
572 int barrier_mask,
573 struct ilo_cp *cp)
574 {
575 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
576 const uint8_t cmd_len = 2;
577 uint32_t dw1;
578
579 ILO_GPE_VALID_GEN(dev, 6, 6);
580
581 dw1 = thread_count_water_mark << 16 |
582 barrier_mask;
583
584 ilo_cp_begin(cp, cmd_len);
585 ilo_cp_write(cp, cmd | (cmd_len - 2));
586 ilo_cp_write(cp, dw1);
587 ilo_cp_end(cp);
588 }
589
590 static inline void
591 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
592 struct ilo_cp *cp)
593 {
594 assert(!"MEDIA_OBJECT_WALKER unsupported");
595 }
596
597 static inline void
598 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
599 uint32_t vs_binding_table,
600 uint32_t gs_binding_table,
601 uint32_t ps_binding_table,
602 struct ilo_cp *cp)
603 {
604 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
605 const uint8_t cmd_len = 4;
606
607 ILO_GPE_VALID_GEN(dev, 6, 6);
608
609 ilo_cp_begin(cp, cmd_len);
610 ilo_cp_write(cp, cmd | (cmd_len - 2) |
611 GEN6_BINDING_TABLE_MODIFY_VS |
612 GEN6_BINDING_TABLE_MODIFY_GS |
613 GEN6_BINDING_TABLE_MODIFY_PS);
614 ilo_cp_write(cp, vs_binding_table);
615 ilo_cp_write(cp, gs_binding_table);
616 ilo_cp_write(cp, ps_binding_table);
617 ilo_cp_end(cp);
618 }
619
620 static inline void
621 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
622 uint32_t vs_sampler_state,
623 uint32_t gs_sampler_state,
624 uint32_t ps_sampler_state,
625 struct ilo_cp *cp)
626 {
627 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
628 const uint8_t cmd_len = 4;
629
630 ILO_GPE_VALID_GEN(dev, 6, 6);
631
632 ilo_cp_begin(cp, cmd_len);
633 ilo_cp_write(cp, cmd | (cmd_len - 2) |
634 VS_SAMPLER_STATE_CHANGE |
635 GS_SAMPLER_STATE_CHANGE |
636 PS_SAMPLER_STATE_CHANGE);
637 ilo_cp_write(cp, vs_sampler_state);
638 ilo_cp_write(cp, gs_sampler_state);
639 ilo_cp_write(cp, ps_sampler_state);
640 ilo_cp_end(cp);
641 }
642
643 static inline void
644 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
645 int vs_total_size, int gs_total_size,
646 int vs_entry_size, int gs_entry_size,
647 struct ilo_cp *cp)
648 {
649 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
650 const uint8_t cmd_len = 3;
651 const int row_size = 128; /* 1024 bits */
652 int vs_alloc_size, gs_alloc_size;
653 int vs_num_entries, gs_num_entries;
654
655 ILO_GPE_VALID_GEN(dev, 6, 6);
656
657 /* in 1024-bit URB rows */
658 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
659 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
660
661 /* the valid range is [1, 5] */
662 if (!vs_alloc_size)
663 vs_alloc_size = 1;
664 if (!gs_alloc_size)
665 gs_alloc_size = 1;
666 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
667
668 /* the valid range is [24, 256] in multiples of 4 */
669 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
670 if (vs_num_entries > 256)
671 vs_num_entries = 256;
672 assert(vs_num_entries >= 24);
673
674 /* the valid range is [0, 256] in multiples of 4 */
675 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
676 if (gs_num_entries > 256)
677 gs_num_entries = 256;
678
679 ilo_cp_begin(cp, cmd_len);
680 ilo_cp_write(cp, cmd | (cmd_len - 2));
681 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
682 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
683 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
684 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
685 ilo_cp_end(cp);
686 }
687
688 static inline void
689 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
690 const struct ilo_ve_state *ve,
691 const struct ilo_vb_state *vb,
692 struct ilo_cp *cp)
693 {
694 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
695 uint8_t cmd_len;
696 unsigned hw_idx;
697
698 ILO_GPE_VALID_GEN(dev, 6, 7.5);
699
700 /*
701 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
702 *
703 * "From 1 to 33 VBs can be specified..."
704 */
705 assert(ve->vb_count <= 33);
706
707 if (!ve->vb_count)
708 return;
709
710 cmd_len = 1 + 4 * ve->vb_count;
711
712 ilo_cp_begin(cp, cmd_len);
713 ilo_cp_write(cp, cmd | (cmd_len - 2));
714
715 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
716 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
717 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
718 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
719 uint32_t dw;
720
721 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
722
723 if (instance_divisor)
724 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
725 else
726 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
727
728 if (dev->gen >= ILO_GEN(7))
729 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
730
731 /* use null vb if there is no buffer or the stride is out of range */
732 if (cso->buffer && cso->stride <= 2048) {
733 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
734 const uint32_t start_offset = cso->buffer_offset;
735 /*
736 * As noted in ilo_translate_format(), we treat some 3-component
737 * formats as 4-component formats to work around hardware
738 * limitations. Imagine the case where the vertex buffer holds a
739 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
740 * The hardware would not be able to fetch it because the vertex
741 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
742 * and that takes at least 8 bytes.
743 *
744 * For the workaround to work, we query the physical size, which is
745 * page aligned, to calculate end_offset so that the last vertex has
746 * a better chance to be fetched.
747 */
748 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
749
750 dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
751
752 ilo_cp_write(cp, dw);
753 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
754 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
755 ilo_cp_write(cp, instance_divisor);
756 }
757 else {
758 dw |= 1 << 13;
759
760 ilo_cp_write(cp, dw);
761 ilo_cp_write(cp, 0);
762 ilo_cp_write(cp, 0);
763 ilo_cp_write(cp, instance_divisor);
764 }
765 }
766
767 ilo_cp_end(cp);
768 }
769
770 static inline void
771 ve_init_cso_with_components(const struct ilo_dev_info *dev,
772 int comp0, int comp1, int comp2, int comp3,
773 struct ilo_ve_cso *cso)
774 {
775 ILO_GPE_VALID_GEN(dev, 6, 7.5);
776
777 STATIC_ASSERT(Elements(cso->payload) >= 2);
778 cso->payload[0] = GEN6_VE0_VALID;
779 cso->payload[1] =
780 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
781 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
782 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
783 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
784 }
785
786 static inline void
787 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
788 struct ilo_ve_cso *cso)
789 {
790 int format;
791
792 ILO_GPE_VALID_GEN(dev, 6, 7.5);
793
794 /*
795 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
796 *
797 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
798 * valid VERTEX_ELEMENT structure.
799 *
800 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
801 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
802 *
803 * - The Source Element Format must be set to the UINT format.
804 *
805 * - [DevSNB]: Edge Flags are not supported for QUADLIST
806 * primitives. Software may elect to convert QUADLIST primitives
807 * to some set of corresponding edge-flag-supported primitive
808 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
809 */
810
811 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
812 cso->payload[1] =
813 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
814 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
815 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
816 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
817
818 /*
819 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
820 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
821 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
822 *
823 * Since all the hardware cares about is whether the flags are zero or not,
824 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
825 */
826 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
827 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
828 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
829 BRW_SURFACEFORMAT_R32_FLOAT - 1);
830
831 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
832 }
833 else {
834 assert(format == BRW_SURFACEFORMAT_R8_UINT);
835 }
836 }
837
838 static inline void
839 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
840 const struct ilo_ve_state *ve,
841 bool last_velement_edgeflag,
842 bool prepend_generated_ids,
843 struct ilo_cp *cp)
844 {
845 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
846 uint8_t cmd_len;
847 unsigned i;
848
849 ILO_GPE_VALID_GEN(dev, 6, 7.5);
850
851 /*
852 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
853 *
854 * "Up to 34 (DevSNB+) vertex elements are supported."
855 */
856 assert(ve->count + prepend_generated_ids <= 34);
857
858 if (!ve->count && !prepend_generated_ids) {
859 struct ilo_ve_cso dummy;
860
861 ve_init_cso_with_components(dev,
862 BRW_VE1_COMPONENT_STORE_0,
863 BRW_VE1_COMPONENT_STORE_0,
864 BRW_VE1_COMPONENT_STORE_0,
865 BRW_VE1_COMPONENT_STORE_1_FLT,
866 &dummy);
867
868 cmd_len = 3;
869 ilo_cp_begin(cp, cmd_len);
870 ilo_cp_write(cp, cmd | (cmd_len - 2));
871 ilo_cp_write_multi(cp, dummy.payload, 2);
872 ilo_cp_end(cp);
873
874 return;
875 }
876
877 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
878
879 ilo_cp_begin(cp, cmd_len);
880 ilo_cp_write(cp, cmd | (cmd_len - 2));
881
882 if (prepend_generated_ids) {
883 struct ilo_ve_cso gen_ids;
884
885 ve_init_cso_with_components(dev,
886 BRW_VE1_COMPONENT_STORE_VID,
887 BRW_VE1_COMPONENT_STORE_IID,
888 BRW_VE1_COMPONENT_NOSTORE,
889 BRW_VE1_COMPONENT_NOSTORE,
890 &gen_ids);
891
892 ilo_cp_write_multi(cp, gen_ids.payload, 2);
893 }
894
895 if (last_velement_edgeflag) {
896 struct ilo_ve_cso edgeflag;
897
898 for (i = 0; i < ve->count - 1; i++)
899 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
900
901 edgeflag = ve->cso[i];
902 ve_set_cso_edgeflag(dev, &edgeflag);
903 ilo_cp_write_multi(cp, edgeflag.payload, 2);
904 }
905 else {
906 for (i = 0; i < ve->count; i++)
907 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
908 }
909
910 ilo_cp_end(cp);
911 }
912
913 static inline void
914 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
915 const struct ilo_ib_state *ib,
916 bool enable_cut_index,
917 struct ilo_cp *cp)
918 {
919 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
920 const uint8_t cmd_len = 3;
921 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
922 uint32_t start_offset, end_offset;
923 int format;
924
925 ILO_GPE_VALID_GEN(dev, 6, 7.5);
926
927 if (!buf)
928 return;
929
930 /* this is moved to the new 3DSTATE_VF */
931 if (dev->gen >= ILO_GEN(7.5))
932 assert(!enable_cut_index);
933
934 switch (ib->hw_index_size) {
935 case 4:
936 format = BRW_INDEX_DWORD;
937 break;
938 case 2:
939 format = BRW_INDEX_WORD;
940 break;
941 case 1:
942 format = BRW_INDEX_BYTE;
943 break;
944 default:
945 assert(!"unknown index size");
946 format = BRW_INDEX_BYTE;
947 break;
948 }
949
950 /*
951 * set start_offset to 0 here and adjust pipe_draw_info::start with
952 * ib->draw_start_offset in 3DPRIMITIVE
953 */
954 start_offset = 0;
955 end_offset = buf->bo_size;
956
957 /* end_offset must also be aligned and is inclusive */
958 end_offset -= (end_offset % ib->hw_index_size);
959 end_offset--;
960
961 ilo_cp_begin(cp, cmd_len);
962 ilo_cp_write(cp, cmd | (cmd_len - 2) |
963 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
964 format << 8);
965 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
966 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
967 ilo_cp_end(cp);
968 }
969
970 static inline void
971 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
972 uint32_t clip_viewport,
973 uint32_t sf_viewport,
974 uint32_t cc_viewport,
975 struct ilo_cp *cp)
976 {
977 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
978 const uint8_t cmd_len = 4;
979
980 ILO_GPE_VALID_GEN(dev, 6, 6);
981
982 ilo_cp_begin(cp, cmd_len);
983 ilo_cp_write(cp, cmd | (cmd_len - 2) |
984 GEN6_CLIP_VIEWPORT_MODIFY |
985 GEN6_SF_VIEWPORT_MODIFY |
986 GEN6_CC_VIEWPORT_MODIFY);
987 ilo_cp_write(cp, clip_viewport);
988 ilo_cp_write(cp, sf_viewport);
989 ilo_cp_write(cp, cc_viewport);
990 ilo_cp_end(cp);
991 }
992
993 static inline void
994 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
995 uint32_t blend_state,
996 uint32_t depth_stencil_state,
997 uint32_t color_calc_state,
998 struct ilo_cp *cp)
999 {
1000 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1001 const uint8_t cmd_len = 4;
1002
1003 ILO_GPE_VALID_GEN(dev, 6, 6);
1004
1005 ilo_cp_begin(cp, cmd_len);
1006 ilo_cp_write(cp, cmd | (cmd_len - 2));
1007 ilo_cp_write(cp, blend_state | 1);
1008 ilo_cp_write(cp, depth_stencil_state | 1);
1009 ilo_cp_write(cp, color_calc_state | 1);
1010 ilo_cp_end(cp);
1011 }
1012
1013 static inline void
1014 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1015 uint32_t scissor_rect,
1016 struct ilo_cp *cp)
1017 {
1018 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1019 const uint8_t cmd_len = 2;
1020
1021 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1022
1023 ilo_cp_begin(cp, cmd_len);
1024 ilo_cp_write(cp, cmd | (cmd_len - 2));
1025 ilo_cp_write(cp, scissor_rect);
1026 ilo_cp_end(cp);
1027 }
1028
1029 static inline void
1030 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1031 const struct ilo_shader_state *vs,
1032 int num_samplers,
1033 struct ilo_cp *cp)
1034 {
1035 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1036 const uint8_t cmd_len = 6;
1037 const struct ilo_shader_cso *cso;
1038 uint32_t dw2, dw4, dw5;
1039
1040 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1041
1042 if (!vs) {
1043 ilo_cp_begin(cp, cmd_len);
1044 ilo_cp_write(cp, cmd | (cmd_len - 2));
1045 ilo_cp_write(cp, 0);
1046 ilo_cp_write(cp, 0);
1047 ilo_cp_write(cp, 0);
1048 ilo_cp_write(cp, 0);
1049 ilo_cp_write(cp, 0);
1050 ilo_cp_end(cp);
1051 return;
1052 }
1053
1054 cso = ilo_shader_get_kernel_cso(vs);
1055 dw2 = cso->payload[0];
1056 dw4 = cso->payload[1];
1057 dw5 = cso->payload[2];
1058
1059 dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1060
1061 ilo_cp_begin(cp, cmd_len);
1062 ilo_cp_write(cp, cmd | (cmd_len - 2));
1063 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1064 ilo_cp_write(cp, dw2);
1065 ilo_cp_write(cp, 0); /* scratch */
1066 ilo_cp_write(cp, dw4);
1067 ilo_cp_write(cp, dw5);
1068 ilo_cp_end(cp);
1069 }
1070
1071 static inline void
1072 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1073 const struct ilo_shader_state *gs,
1074 const struct ilo_shader_state *vs,
1075 int verts_per_prim,
1076 struct ilo_cp *cp)
1077 {
1078 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1079 const uint8_t cmd_len = 7;
1080 uint32_t dw1, dw2, dw4, dw5, dw6;
1081
1082 ILO_GPE_VALID_GEN(dev, 6, 6);
1083
1084 if (gs) {
1085 const struct ilo_shader_cso *cso;
1086
1087 dw1 = ilo_shader_get_kernel_offset(gs);
1088
1089 cso = ilo_shader_get_kernel_cso(gs);
1090 dw2 = cso->payload[0];
1091 dw4 = cso->payload[1];
1092 dw5 = cso->payload[2];
1093 dw6 = cso->payload[3];
1094 }
1095 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1096 struct ilo_shader_cso cso;
1097 enum ilo_kernel_param param;
1098
1099 switch (verts_per_prim) {
1100 case 1:
1101 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1102 break;
1103 case 2:
1104 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1105 break;
1106 default:
1107 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1108 break;
1109 }
1110
1111 dw1 = ilo_shader_get_kernel_offset(vs) +
1112 ilo_shader_get_kernel_param(vs, param);
1113
1114 /* cannot use VS's CSO */
1115 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1116 dw2 = cso.payload[0];
1117 dw4 = cso.payload[1];
1118 dw5 = cso.payload[2];
1119 dw6 = cso.payload[3];
1120 }
1121 else {
1122 dw1 = 0;
1123 dw2 = 0;
1124 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1125 dw5 = GEN6_GS_STATISTICS_ENABLE;
1126 dw6 = 0;
1127 }
1128
1129 ilo_cp_begin(cp, cmd_len);
1130 ilo_cp_write(cp, cmd | (cmd_len - 2));
1131 ilo_cp_write(cp, dw1);
1132 ilo_cp_write(cp, dw2);
1133 ilo_cp_write(cp, 0);
1134 ilo_cp_write(cp, dw4);
1135 ilo_cp_write(cp, dw5);
1136 ilo_cp_write(cp, dw6);
1137 ilo_cp_end(cp);
1138 }
1139
1140 static inline void
1141 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1142 const struct ilo_rasterizer_state *rasterizer,
1143 const struct ilo_shader_state *fs,
1144 bool enable_guardband,
1145 int num_viewports,
1146 struct ilo_cp *cp)
1147 {
1148 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1149 const uint8_t cmd_len = 4;
1150 uint32_t dw1, dw2, dw3;
1151
1152 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1153
1154 if (rasterizer) {
1155 int interps;
1156
1157 dw1 = rasterizer->clip.payload[0];
1158 dw2 = rasterizer->clip.payload[1];
1159 dw3 = rasterizer->clip.payload[2];
1160
1161 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1162 dw2 |= GEN6_CLIP_GB_TEST;
1163
1164 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1165 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1166
1167 if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1168 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1169 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
1170 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1171
1172 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1173 (num_viewports - 1);
1174 }
1175 else {
1176 dw1 = 0;
1177 dw2 = 0;
1178 dw3 = 0;
1179 }
1180
1181 ilo_cp_begin(cp, cmd_len);
1182 ilo_cp_write(cp, cmd | (cmd_len - 2));
1183 ilo_cp_write(cp, dw1);
1184 ilo_cp_write(cp, dw2);
1185 ilo_cp_write(cp, dw3);
1186 ilo_cp_end(cp);
1187 }
1188
1189 static inline void
1190 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1191 const struct ilo_rasterizer_state *rasterizer,
1192 const struct ilo_shader_state *fs,
1193 struct ilo_cp *cp)
1194 {
1195 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1196 const uint8_t cmd_len = 20;
1197 uint32_t payload_raster[6], payload_sbe[13];
1198
1199 ILO_GPE_VALID_GEN(dev, 6, 6);
1200
1201 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1202 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1203 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1204 fs, payload_sbe, Elements(payload_sbe));
1205
1206 ilo_cp_begin(cp, cmd_len);
1207 ilo_cp_write(cp, cmd | (cmd_len - 2));
1208 ilo_cp_write(cp, payload_sbe[0]);
1209 ilo_cp_write_multi(cp, payload_raster, 6);
1210 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1211 ilo_cp_end(cp);
1212 }
1213
1214 static inline void
1215 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1216 const struct ilo_shader_state *fs,
1217 int num_samplers,
1218 const struct ilo_rasterizer_state *rasterizer,
1219 bool dual_blend, bool cc_may_kill,
1220 struct ilo_cp *cp)
1221 {
1222 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1223 const uint8_t cmd_len = 9;
1224 const int num_samples = 1;
1225 const struct ilo_shader_cso *fs_cso;
1226 uint32_t dw2, dw4, dw5, dw6;
1227
1228 ILO_GPE_VALID_GEN(dev, 6, 6);
1229
1230 if (!fs) {
1231 /* see brwCreateContext() */
1232 const int max_threads = (dev->gt == 2) ? 80 : 40;
1233
1234 ilo_cp_begin(cp, cmd_len);
1235 ilo_cp_write(cp, cmd | (cmd_len - 2));
1236 ilo_cp_write(cp, 0);
1237 ilo_cp_write(cp, 0);
1238 ilo_cp_write(cp, 0);
1239 ilo_cp_write(cp, 0);
1240 /* honor the valid range even if dispatching is disabled */
1241 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1242 ilo_cp_write(cp, 0);
1243 ilo_cp_write(cp, 0);
1244 ilo_cp_write(cp, 0);
1245 ilo_cp_end(cp);
1246
1247 return;
1248 }
1249
1250 fs_cso = ilo_shader_get_kernel_cso(fs);
1251 dw2 = fs_cso->payload[0];
1252 dw4 = fs_cso->payload[1];
1253 dw5 = fs_cso->payload[2];
1254 dw6 = fs_cso->payload[3];
1255
1256 dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1257
1258 if (true) {
1259 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1260 }
1261 else {
1262 /*
1263 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1264 *
1265 * "This bit (Statistics Enable) must be disabled if either of these
1266 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1267 * Resolve Enable or Depth Buffer Resolve Enable."
1268 */
1269 dw4 |= GEN6_WM_DEPTH_CLEAR;
1270 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1271 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1272 }
1273
1274 if (cc_may_kill) {
1275 dw5 |= GEN6_WM_KILL_ENABLE |
1276 GEN6_WM_DISPATCH_ENABLE;
1277 }
1278
1279 if (dual_blend)
1280 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1281
1282 dw5 |= rasterizer->wm.payload[0];
1283
1284 dw6 |= rasterizer->wm.payload[1];
1285
1286 if (num_samples > 1) {
1287 dw6 |= rasterizer->wm.dw_msaa_rast |
1288 rasterizer->wm.dw_msaa_disp;
1289 }
1290
1291 ilo_cp_begin(cp, cmd_len);
1292 ilo_cp_write(cp, cmd | (cmd_len - 2));
1293 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1294 ilo_cp_write(cp, dw2);
1295 ilo_cp_write(cp, 0); /* scratch */
1296 ilo_cp_write(cp, dw4);
1297 ilo_cp_write(cp, dw5);
1298 ilo_cp_write(cp, dw6);
1299 ilo_cp_write(cp, 0); /* kernel 1 */
1300 ilo_cp_write(cp, 0); /* kernel 2 */
1301 ilo_cp_end(cp);
1302 }
1303
1304 static inline unsigned
1305 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1306 const uint32_t *bufs, const int *sizes,
1307 int num_bufs, int max_read_length,
1308 uint32_t *dw, int num_dwords)
1309 {
1310 unsigned enabled = 0x0;
1311 int total_read_length, i;
1312
1313 assert(num_dwords == 4);
1314
1315 total_read_length = 0;
1316 for (i = 0; i < 4; i++) {
1317 if (i < num_bufs && sizes[i]) {
1318 /* in 256-bit units minus one */
1319 const int read_len = (sizes[i] + 31) / 32 - 1;
1320
1321 assert(bufs[i] % 32 == 0);
1322 assert(read_len < 32);
1323
1324 enabled |= 1 << i;
1325 dw[i] = bufs[i] | read_len;
1326
1327 total_read_length += read_len + 1;
1328 }
1329 else {
1330 dw[i] = 0;
1331 }
1332 }
1333
1334 assert(total_read_length <= max_read_length);
1335
1336 return enabled;
1337 }
1338
1339 static inline void
1340 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1341 const uint32_t *bufs, const int *sizes,
1342 int num_bufs,
1343 struct ilo_cp *cp)
1344 {
1345 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
1346 const uint8_t cmd_len = 5;
1347 uint32_t buf_dw[4], buf_enabled;
1348
1349 ILO_GPE_VALID_GEN(dev, 6, 6);
1350 assert(num_bufs <= 4);
1351
1352 /*
1353 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1354 *
1355 * "The sum of all four read length fields (each incremented to
1356 * represent the actual read length) must be less than or equal to 32"
1357 */
1358 buf_enabled = gen6_fill_3dstate_constant(dev,
1359 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1360
1361 ilo_cp_begin(cp, cmd_len);
1362 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1363 ilo_cp_write(cp, buf_dw[0]);
1364 ilo_cp_write(cp, buf_dw[1]);
1365 ilo_cp_write(cp, buf_dw[2]);
1366 ilo_cp_write(cp, buf_dw[3]);
1367 ilo_cp_end(cp);
1368 }
1369
1370 static inline void
1371 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1372 const uint32_t *bufs, const int *sizes,
1373 int num_bufs,
1374 struct ilo_cp *cp)
1375 {
1376 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
1377 const uint8_t cmd_len = 5;
1378 uint32_t buf_dw[4], buf_enabled;
1379
1380 ILO_GPE_VALID_GEN(dev, 6, 6);
1381 assert(num_bufs <= 4);
1382
1383 /*
1384 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1385 *
1386 * "The sum of all four read length fields (each incremented to
1387 * represent the actual read length) must be less than or equal to 64"
1388 */
1389 buf_enabled = gen6_fill_3dstate_constant(dev,
1390 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1391
1392 ilo_cp_begin(cp, cmd_len);
1393 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1394 ilo_cp_write(cp, buf_dw[0]);
1395 ilo_cp_write(cp, buf_dw[1]);
1396 ilo_cp_write(cp, buf_dw[2]);
1397 ilo_cp_write(cp, buf_dw[3]);
1398 ilo_cp_end(cp);
1399 }
1400
1401 static inline void
1402 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1403 const uint32_t *bufs, const int *sizes,
1404 int num_bufs,
1405 struct ilo_cp *cp)
1406 {
1407 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
1408 const uint8_t cmd_len = 5;
1409 uint32_t buf_dw[4], buf_enabled;
1410
1411 ILO_GPE_VALID_GEN(dev, 6, 6);
1412 assert(num_bufs <= 4);
1413
1414 /*
1415 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1416 *
1417 * "The sum of all four read length fields (each incremented to
1418 * represent the actual read length) must be less than or equal to 64"
1419 */
1420 buf_enabled = gen6_fill_3dstate_constant(dev,
1421 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1422
1423 ilo_cp_begin(cp, cmd_len);
1424 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1425 ilo_cp_write(cp, buf_dw[0]);
1426 ilo_cp_write(cp, buf_dw[1]);
1427 ilo_cp_write(cp, buf_dw[2]);
1428 ilo_cp_write(cp, buf_dw[3]);
1429 ilo_cp_end(cp);
1430 }
1431
1432 static inline void
1433 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1434 unsigned sample_mask,
1435 struct ilo_cp *cp)
1436 {
1437 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
1438 const uint8_t cmd_len = 2;
1439 const unsigned valid_mask = 0xf;
1440
1441 ILO_GPE_VALID_GEN(dev, 6, 6);
1442
1443 sample_mask &= valid_mask;
1444
1445 ilo_cp_begin(cp, cmd_len);
1446 ilo_cp_write(cp, cmd | (cmd_len - 2));
1447 ilo_cp_write(cp, sample_mask);
1448 ilo_cp_end(cp);
1449 }
1450
1451 static inline void
1452 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1453 unsigned x, unsigned y,
1454 unsigned width, unsigned height,
1455 struct ilo_cp *cp)
1456 {
1457 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
1458 const uint8_t cmd_len = 4;
1459 unsigned xmax = x + width - 1;
1460 unsigned ymax = y + height - 1;
1461 int rect_limit;
1462
1463 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1464
1465 if (dev->gen >= ILO_GEN(7)) {
1466 rect_limit = 16383;
1467 }
1468 else {
1469 /*
1470 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1471 *
1472 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1473 * must be an even number"
1474 */
1475 assert(y % 2 == 0);
1476
1477 rect_limit = 8191;
1478 }
1479
1480 if (x > rect_limit) x = rect_limit;
1481 if (y > rect_limit) y = rect_limit;
1482 if (xmax > rect_limit) xmax = rect_limit;
1483 if (ymax > rect_limit) ymax = rect_limit;
1484
1485 ilo_cp_begin(cp, cmd_len);
1486 ilo_cp_write(cp, cmd | (cmd_len - 2));
1487 ilo_cp_write(cp, y << 16 | x);
1488 ilo_cp_write(cp, ymax << 16 | xmax);
1489
1490 /*
1491 * There is no need to set the origin. It is intended to support front
1492 * buffer rendering.
1493 */
1494 ilo_cp_write(cp, 0);
1495
1496 ilo_cp_end(cp);
1497 }
1498
1499 static inline void
1500 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1501 const struct ilo_zs_surface *zs,
1502 struct ilo_cp *cp)
1503 {
1504 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1505 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1506 const uint8_t cmd_len = 7;
1507
1508 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1509
1510 ilo_cp_begin(cp, cmd_len);
1511 ilo_cp_write(cp, cmd | (cmd_len - 2));
1512 ilo_cp_write(cp, zs->payload[0]);
1513 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
1514 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1515 ilo_cp_write(cp, zs->payload[2]);
1516 ilo_cp_write(cp, zs->payload[3]);
1517 ilo_cp_write(cp, zs->payload[4]);
1518 ilo_cp_write(cp, zs->payload[5]);
1519 ilo_cp_end(cp);
1520 }
1521
1522 static inline void
1523 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1524 int x_offset, int y_offset,
1525 struct ilo_cp *cp)
1526 {
1527 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
1528 const uint8_t cmd_len = 2;
1529
1530 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1531 assert(x_offset >= 0 && x_offset <= 31);
1532 assert(y_offset >= 0 && y_offset <= 31);
1533
1534 ilo_cp_begin(cp, cmd_len);
1535 ilo_cp_write(cp, cmd | (cmd_len - 2));
1536 ilo_cp_write(cp, x_offset << 8 | y_offset);
1537 ilo_cp_end(cp);
1538 }
1539
1540 static inline void
1541 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1542 const struct pipe_poly_stipple *pattern,
1543 struct ilo_cp *cp)
1544 {
1545 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
1546 const uint8_t cmd_len = 33;
1547 int i;
1548
1549 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1550 assert(Elements(pattern->stipple) == 32);
1551
1552 ilo_cp_begin(cp, cmd_len);
1553 ilo_cp_write(cp, cmd | (cmd_len - 2));
1554 for (i = 0; i < 32; i++)
1555 ilo_cp_write(cp, pattern->stipple[i]);
1556 ilo_cp_end(cp);
1557 }
1558
1559 static inline void
1560 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1561 unsigned pattern, unsigned factor,
1562 struct ilo_cp *cp)
1563 {
1564 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
1565 const uint8_t cmd_len = 3;
1566 unsigned inverse;
1567
1568 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1569 assert((pattern & 0xffff) == pattern);
1570 assert(factor >= 1 && factor <= 256);
1571
1572 ilo_cp_begin(cp, cmd_len);
1573 ilo_cp_write(cp, cmd | (cmd_len - 2));
1574 ilo_cp_write(cp, pattern);
1575
1576 if (dev->gen >= ILO_GEN(7)) {
1577 /* in U1.16 */
1578 inverse = (unsigned) (65536.0f / factor);
1579 ilo_cp_write(cp, inverse << 15 | factor);
1580 }
1581 else {
1582 /* in U1.13 */
1583 inverse = (unsigned) (8192.0f / factor);
1584 ilo_cp_write(cp, inverse << 16 | factor);
1585 }
1586
1587 ilo_cp_end(cp);
1588 }
1589
1590 static inline void
1591 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1592 struct ilo_cp *cp)
1593 {
1594 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
1595 const uint8_t cmd_len = 3;
1596
1597 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1598
1599 ilo_cp_begin(cp, cmd_len);
1600 ilo_cp_write(cp, cmd | (cmd_len - 2));
1601 ilo_cp_write(cp, 0 << 16 | 0);
1602 ilo_cp_write(cp, 0 << 16 | 0);
1603 ilo_cp_end(cp);
1604 }
1605
1606 static inline void
1607 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1608 int index, unsigned svbi,
1609 unsigned max_svbi,
1610 bool load_vertex_count,
1611 struct ilo_cp *cp)
1612 {
1613 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
1614 const uint8_t cmd_len = 4;
1615 uint32_t dw1;
1616
1617 ILO_GPE_VALID_GEN(dev, 6, 6);
1618 assert(index >= 0 && index < 4);
1619
1620 dw1 = index << SVB_INDEX_SHIFT;
1621 if (load_vertex_count)
1622 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
1623
1624 ilo_cp_begin(cp, cmd_len);
1625 ilo_cp_write(cp, cmd | (cmd_len - 2));
1626 ilo_cp_write(cp, dw1);
1627 ilo_cp_write(cp, svbi);
1628 ilo_cp_write(cp, max_svbi);
1629 ilo_cp_end(cp);
1630 }
1631
1632 static inline void
1633 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1634 int num_samples,
1635 const uint32_t *packed_sample_pos,
1636 bool pixel_location_center,
1637 struct ilo_cp *cp)
1638 {
1639 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
1640 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1641 uint32_t dw1, dw2, dw3;
1642
1643 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1644
1645 dw1 = (pixel_location_center) ?
1646 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
1647
1648 switch (num_samples) {
1649 case 0:
1650 case 1:
1651 dw1 |= MS_NUMSAMPLES_1;
1652 dw2 = 0;
1653 dw3 = 0;
1654 break;
1655 case 4:
1656 dw1 |= MS_NUMSAMPLES_4;
1657 dw2 = packed_sample_pos[0];
1658 dw3 = 0;
1659 break;
1660 case 8:
1661 assert(dev->gen >= ILO_GEN(7));
1662 dw1 |= MS_NUMSAMPLES_8;
1663 dw2 = packed_sample_pos[0];
1664 dw3 = packed_sample_pos[1];
1665 break;
1666 default:
1667 assert(!"unsupported sample count");
1668 dw1 |= MS_NUMSAMPLES_1;
1669 dw2 = 0;
1670 dw3 = 0;
1671 break;
1672 }
1673
1674 ilo_cp_begin(cp, cmd_len);
1675 ilo_cp_write(cp, cmd | (cmd_len - 2));
1676 ilo_cp_write(cp, dw1);
1677 ilo_cp_write(cp, dw2);
1678 if (dev->gen >= ILO_GEN(7))
1679 ilo_cp_write(cp, dw3);
1680 ilo_cp_end(cp);
1681 }
1682
1683 static inline void
1684 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1685 const struct ilo_zs_surface *zs,
1686 struct ilo_cp *cp)
1687 {
1688 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1689 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1690 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1691 const uint8_t cmd_len = 3;
1692
1693 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1694
1695 ilo_cp_begin(cp, cmd_len);
1696 ilo_cp_write(cp, cmd | (cmd_len - 2));
1697 /* see ilo_gpe_init_zs_surface() */
1698 ilo_cp_write(cp, zs->payload[6]);
1699 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
1700 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1701 ilo_cp_end(cp);
1702 }
1703
1704 static inline void
1705 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1706 const struct ilo_zs_surface *zs,
1707 struct ilo_cp *cp)
1708 {
1709 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1710 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1711 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1712 const uint8_t cmd_len = 3;
1713
1714 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1715
1716 ilo_cp_begin(cp, cmd_len);
1717 ilo_cp_write(cp, cmd | (cmd_len - 2));
1718 /* see ilo_gpe_init_zs_surface() */
1719 ilo_cp_write(cp, zs->payload[8]);
1720 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
1721 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1722 ilo_cp_end(cp);
1723 }
1724
1725 static inline void
1726 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1727 uint32_t clear_val,
1728 struct ilo_cp *cp)
1729 {
1730 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
1731 const uint8_t cmd_len = 2;
1732
1733 ILO_GPE_VALID_GEN(dev, 6, 6);
1734
1735 ilo_cp_begin(cp, cmd_len);
1736 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1737 GEN5_DEPTH_CLEAR_VALID);
1738 ilo_cp_write(cp, clear_val);
1739 ilo_cp_end(cp);
1740 }
1741
1742 static inline void
1743 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1744 uint32_t dw1,
1745 struct intel_bo *bo, uint32_t bo_offset,
1746 bool write_qword,
1747 struct ilo_cp *cp)
1748 {
1749 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
1750 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1751 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
1752 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
1753
1754 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1755
1756 if (dw1 & PIPE_CONTROL_CS_STALL) {
1757 /*
1758 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1759 *
1760 * "1 of the following must also be set (when CS stall is set):
1761 *
1762 * * Depth Cache Flush Enable ([0] of DW1)
1763 * * Stall at Pixel Scoreboard ([1] of DW1)
1764 * * Depth Stall ([13] of DW1)
1765 * * Post-Sync Operation ([13] of DW1)
1766 * * Render Target Cache Flush Enable ([12] of DW1)
1767 * * Notify Enable ([8] of DW1)"
1768 *
1769 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1770 *
1771 * "One of the following must also be set (when CS stall is set):
1772 *
1773 * * Render Target Cache Flush Enable ([12] of DW1)
1774 * * Depth Cache Flush Enable ([0] of DW1)
1775 * * Stall at Pixel Scoreboard ([1] of DW1)
1776 * * Depth Stall ([13] of DW1)
1777 * * Post-Sync Operation ([13] of DW1)"
1778 */
1779 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
1780 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1781 PIPE_CONTROL_STALL_AT_SCOREBOARD |
1782 PIPE_CONTROL_DEPTH_STALL;
1783
1784 /* post-sync op */
1785 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
1786 PIPE_CONTROL_WRITE_DEPTH_COUNT |
1787 PIPE_CONTROL_WRITE_TIMESTAMP;
1788
1789 if (dev->gen == ILO_GEN(6))
1790 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
1791
1792 assert(dw1 & bit_test);
1793 }
1794
1795 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
1796 /*
1797 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1798 *
1799 * "Following bits must be clear (when Depth Stall is set):
1800 *
1801 * * Render Target Cache Flush Enable ([12] of DW1)
1802 * * Depth Cache Flush Enable ([0] of DW1)"
1803 */
1804 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
1805 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1806 }
1807
1808 ilo_cp_begin(cp, cmd_len);
1809 ilo_cp_write(cp, cmd | (cmd_len - 2));
1810 ilo_cp_write(cp, dw1);
1811 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
1812 ilo_cp_write(cp, 0);
1813 if (write_qword)
1814 ilo_cp_write(cp, 0);
1815 ilo_cp_end(cp);
1816 }
1817
1818 static inline void
1819 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1820 const struct pipe_draw_info *info,
1821 const struct ilo_ib_state *ib,
1822 bool rectlist,
1823 struct ilo_cp *cp)
1824 {
1825 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1826 const uint8_t cmd_len = 6;
1827 const int prim = (rectlist) ?
1828 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1829 const int vb_access = (info->indexed) ?
1830 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1831 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1832 const uint32_t vb_start = info->start +
1833 ((info->indexed) ? ib->draw_start_offset : 0);
1834
1835 ILO_GPE_VALID_GEN(dev, 6, 6);
1836
1837 ilo_cp_begin(cp, cmd_len);
1838 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1839 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
1840 vb_access);
1841 ilo_cp_write(cp, info->count);
1842 ilo_cp_write(cp, vb_start);
1843 ilo_cp_write(cp, info->instance_count);
1844 ilo_cp_write(cp, info->start_instance);
1845 ilo_cp_write(cp, info->index_bias);
1846 ilo_cp_end(cp);
1847 }
1848
1849 static inline uint32_t
1850 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1851 const struct ilo_shader_state **cs,
1852 uint32_t *sampler_state,
1853 int *num_samplers,
1854 uint32_t *binding_table_state,
1855 int *num_surfaces,
1856 int num_ids,
1857 struct ilo_cp *cp)
1858 {
1859 /*
1860 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1861 *
1862 * "(Interface Descriptor Total Length) This field must have the same
1863 * alignment as the Interface Descriptor Data Start Address.
1864 *
1865 * It must be DQWord (32-byte) aligned..."
1866 *
1867 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1868 *
1869 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1870 * aligned address of the Interface Descriptor data."
1871 */
1872 const int state_align = 32 / 4;
1873 const int state_len = (32 / 4) * num_ids;
1874 uint32_t state_offset, *dw;
1875 int i;
1876
1877 ILO_GPE_VALID_GEN(dev, 6, 6);
1878
1879 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
1880 state_len, state_align, &state_offset);
1881
1882 for (i = 0; i < num_ids; i++) {
1883 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1884 dw[1] = 1 << 18; /* SPF */
1885 dw[2] = sampler_state[i] |
1886 (num_samplers[i] + 3) / 4 << 2;
1887 dw[3] = binding_table_state[i] |
1888 num_surfaces[i];
1889 dw[4] = 0 << 16 | /* CURBE Read Length */
1890 0; /* CURBE Read Offset */
1891 dw[5] = 0; /* Barrier ID */
1892 dw[6] = 0;
1893 dw[7] = 0;
1894
1895 dw += 8;
1896 }
1897
1898 return state_offset;
1899 }
1900
1901 static inline uint32_t
1902 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1903 const struct ilo_viewport_cso *viewports,
1904 unsigned num_viewports,
1905 struct ilo_cp *cp)
1906 {
1907 const int state_align = 32 / 4;
1908 const int state_len = 8 * num_viewports;
1909 uint32_t state_offset, *dw;
1910 unsigned i;
1911
1912 ILO_GPE_VALID_GEN(dev, 6, 6);
1913
1914 /*
1915 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1916 *
1917 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1918 * stored as an array of up to 16 elements..."
1919 */
1920 assert(num_viewports && num_viewports <= 16);
1921
1922 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
1923 state_len, state_align, &state_offset);
1924
1925 for (i = 0; i < num_viewports; i++) {
1926 const struct ilo_viewport_cso *vp = &viewports[i];
1927
1928 dw[0] = fui(vp->m00);
1929 dw[1] = fui(vp->m11);
1930 dw[2] = fui(vp->m22);
1931 dw[3] = fui(vp->m30);
1932 dw[4] = fui(vp->m31);
1933 dw[5] = fui(vp->m32);
1934 dw[6] = 0;
1935 dw[7] = 0;
1936
1937 dw += 8;
1938 }
1939
1940 return state_offset;
1941 }
1942
1943 static inline uint32_t
1944 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1945 const struct ilo_viewport_cso *viewports,
1946 unsigned num_viewports,
1947 struct ilo_cp *cp)
1948 {
1949 const int state_align = 32 / 4;
1950 const int state_len = 4 * num_viewports;
1951 uint32_t state_offset, *dw;
1952 unsigned i;
1953
1954 ILO_GPE_VALID_GEN(dev, 6, 6);
1955
1956 /*
1957 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1958 *
1959 * "The viewport-related state is stored as an array of up to 16
1960 * elements..."
1961 */
1962 assert(num_viewports && num_viewports <= 16);
1963
1964 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
1965 state_len, state_align, &state_offset);
1966
1967 for (i = 0; i < num_viewports; i++) {
1968 const struct ilo_viewport_cso *vp = &viewports[i];
1969
1970 dw[0] = fui(vp->min_gbx);
1971 dw[1] = fui(vp->max_gbx);
1972 dw[2] = fui(vp->min_gby);
1973 dw[3] = fui(vp->max_gby);
1974
1975 dw += 4;
1976 }
1977
1978 return state_offset;
1979 }
1980
1981 static inline uint32_t
1982 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
1983 const struct ilo_viewport_cso *viewports,
1984 unsigned num_viewports,
1985 struct ilo_cp *cp)
1986 {
1987 const int state_align = 32 / 4;
1988 const int state_len = 2 * num_viewports;
1989 uint32_t state_offset, *dw;
1990 unsigned i;
1991
1992 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1993
1994 /*
1995 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
1996 *
1997 * "The viewport state is stored as an array of up to 16 elements..."
1998 */
1999 assert(num_viewports && num_viewports <= 16);
2000
2001 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
2002 state_len, state_align, &state_offset);
2003
2004 for (i = 0; i < num_viewports; i++) {
2005 const struct ilo_viewport_cso *vp = &viewports[i];
2006
2007 dw[0] = fui(vp->min_z);
2008 dw[1] = fui(vp->max_z);
2009
2010 dw += 2;
2011 }
2012
2013 return state_offset;
2014 }
2015
2016 static inline uint32_t
2017 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2018 const struct pipe_stencil_ref *stencil_ref,
2019 ubyte alpha_ref,
2020 const struct pipe_blend_color *blend_color,
2021 struct ilo_cp *cp)
2022 {
2023 const int state_align = 64 / 4;
2024 const int state_len = 6;
2025 uint32_t state_offset, *dw;
2026
2027 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2028
2029 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
2030 state_len, state_align, &state_offset);
2031
2032 dw[0] = stencil_ref->ref_value[0] << 24 |
2033 stencil_ref->ref_value[1] << 16 |
2034 BRW_ALPHATEST_FORMAT_UNORM8;
2035 dw[1] = alpha_ref;
2036 dw[2] = fui(blend_color->color[0]);
2037 dw[3] = fui(blend_color->color[1]);
2038 dw[4] = fui(blend_color->color[2]);
2039 dw[5] = fui(blend_color->color[3]);
2040
2041 return state_offset;
2042 }
2043
2044 static inline uint32_t
2045 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2046 const struct ilo_blend_state *blend,
2047 const struct ilo_fb_state *fb,
2048 const struct ilo_dsa_state *dsa,
2049 struct ilo_cp *cp)
2050 {
2051 const int state_align = 64 / 4;
2052 int state_len;
2053 uint32_t state_offset, *dw;
2054 unsigned num_targets, i;
2055
2056 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2057
2058 /*
2059 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2060 *
2061 * "The blend state is stored as an array of up to 8 elements..."
2062 */
2063 num_targets = fb->state.nr_cbufs;
2064 assert(num_targets <= 8);
2065
2066 if (!num_targets) {
2067 if (!dsa->dw_alpha)
2068 return 0;
2069 /* to be able to reference alpha func */
2070 num_targets = 1;
2071 }
2072
2073 state_len = 2 * num_targets;
2074
2075 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
2076 state_len, state_align, &state_offset);
2077
2078 for (i = 0; i < num_targets; i++) {
2079 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2080 const struct ilo_blend_cso *cso = &blend->cso[idx];
2081 const int num_samples = fb->num_samples;
2082 const struct util_format_description *format_desc =
2083 (idx < fb->state.nr_cbufs) ?
2084 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2085 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2086
2087 rt_is_unorm = true;
2088 rt_is_pure_integer = false;
2089 rt_dst_alpha_forced_one = false;
2090
2091 if (format_desc) {
2092 int ch;
2093
2094 switch (format_desc->format) {
2095 case PIPE_FORMAT_B8G8R8X8_UNORM:
2096 /* force alpha to one when the HW format has alpha */
2097 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
2098 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
2099 rt_dst_alpha_forced_one = true;
2100 break;
2101 default:
2102 break;
2103 }
2104
2105 for (ch = 0; ch < 4; ch++) {
2106 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2107 continue;
2108
2109 if (format_desc->channel[ch].pure_integer) {
2110 rt_is_unorm = false;
2111 rt_is_pure_integer = true;
2112 break;
2113 }
2114
2115 if (!format_desc->channel[ch].normalized ||
2116 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2117 rt_is_unorm = false;
2118 }
2119 }
2120
2121 dw[0] = cso->payload[0];
2122 dw[1] = cso->payload[1];
2123
2124 if (!rt_is_pure_integer) {
2125 if (rt_dst_alpha_forced_one)
2126 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2127 else
2128 dw[0] |= cso->dw_blend;
2129 }
2130
2131 /*
2132 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2133 *
2134 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2135 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2136 *
2137 * Since logicop is ignored for non-UNORM color buffers, no special care
2138 * is needed.
2139 */
2140 if (rt_is_unorm)
2141 dw[1] |= cso->dw_logicop;
2142
2143 /*
2144 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2145 *
2146 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2147 * Dither both must be disabled."
2148 *
2149 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2150 * requires that anyway.
2151 */
2152 if (num_samples > 1)
2153 dw[1] |= cso->dw_alpha_mod;
2154
2155 /*
2156 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2157 *
2158 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2159 * alpha value."
2160 */
2161 if (!rt_is_pure_integer)
2162 dw[1] |= dsa->dw_alpha;
2163
2164 dw += 2;
2165 }
2166
2167 return state_offset;
2168 }
2169
2170 static inline uint32_t
2171 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2172 const struct ilo_dsa_state *dsa,
2173 struct ilo_cp *cp)
2174 {
2175 const int state_align = 64 / 4;
2176 const int state_len = 3;
2177 uint32_t state_offset, *dw;
2178
2179
2180 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2181
2182 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
2183 state_len, state_align, &state_offset);
2184
2185 dw[0] = dsa->payload[0];
2186 dw[1] = dsa->payload[1];
2187 dw[2] = dsa->payload[2];
2188
2189 return state_offset;
2190 }
2191
2192 static inline uint32_t
2193 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2194 const struct ilo_scissor_state *scissor,
2195 unsigned num_viewports,
2196 struct ilo_cp *cp)
2197 {
2198 const int state_align = 32 / 4;
2199 const int state_len = 2 * num_viewports;
2200 uint32_t state_offset, *dw;
2201
2202 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2203
2204 /*
2205 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2206 *
2207 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2208 * stored as an array of up to 16 elements..."
2209 */
2210 assert(num_viewports && num_viewports <= 16);
2211
2212 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
2213 state_len, state_align, &state_offset);
2214
2215 memcpy(dw, scissor->payload, state_len * 4);
2216
2217 return state_offset;
2218 }
2219
2220 static inline uint32_t
2221 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2222 uint32_t *surface_states,
2223 int num_surface_states,
2224 struct ilo_cp *cp)
2225 {
2226 const int state_align = 32 / 4;
2227 const int state_len = num_surface_states;
2228 uint32_t state_offset, *dw;
2229
2230 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2231
2232 /*
2233 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2234 *
2235 * "It is stored as an array of up to 256 elements..."
2236 */
2237 assert(num_surface_states <= 256);
2238
2239 if (!num_surface_states)
2240 return 0;
2241
2242 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
2243 state_len, state_align, &state_offset);
2244 memcpy(dw, surface_states,
2245 num_surface_states * sizeof(surface_states[0]));
2246
2247 return state_offset;
2248 }
2249
2250 static inline uint32_t
2251 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2252 const struct ilo_view_surface *surf,
2253 bool for_render,
2254 struct ilo_cp *cp)
2255 {
2256 const int state_align = 32 / 4;
2257 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2258 uint32_t state_offset;
2259 uint32_t read_domains, write_domain;
2260
2261 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2262
2263 if (for_render) {
2264 read_domains = INTEL_DOMAIN_RENDER;
2265 write_domain = INTEL_DOMAIN_RENDER;
2266 }
2267 else {
2268 read_domains = INTEL_DOMAIN_SAMPLER;
2269 write_domain = 0;
2270 }
2271
2272 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
2273
2274 STATIC_ASSERT(Elements(surf->payload) >= 8);
2275
2276 ilo_cp_write(cp, surf->payload[0]);
2277 ilo_cp_write_bo(cp, surf->payload[1],
2278 surf->bo, read_domains, write_domain);
2279 ilo_cp_write(cp, surf->payload[2]);
2280 ilo_cp_write(cp, surf->payload[3]);
2281 ilo_cp_write(cp, surf->payload[4]);
2282 ilo_cp_write(cp, surf->payload[5]);
2283
2284 if (dev->gen >= ILO_GEN(7)) {
2285 ilo_cp_write(cp, surf->payload[6]);
2286 ilo_cp_write(cp, surf->payload[7]);
2287 }
2288
2289 ilo_cp_end(cp);
2290
2291 return state_offset;
2292 }
2293
2294 static inline uint32_t
2295 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2296 const struct pipe_stream_output_target *so,
2297 const struct pipe_stream_output_info *so_info,
2298 int so_index,
2299 struct ilo_cp *cp)
2300 {
2301 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2302 unsigned bo_offset, struct_size;
2303 enum pipe_format elem_format;
2304 struct ilo_view_surface surf;
2305
2306 ILO_GPE_VALID_GEN(dev, 6, 6);
2307
2308 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2309 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2310
2311 switch (so_info->output[so_index].num_components) {
2312 case 1:
2313 elem_format = PIPE_FORMAT_R32_FLOAT;
2314 break;
2315 case 2:
2316 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2317 break;
2318 case 3:
2319 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2320 break;
2321 case 4:
2322 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2323 break;
2324 default:
2325 assert(!"unexpected SO components length");
2326 elem_format = PIPE_FORMAT_R32_FLOAT;
2327 break;
2328 }
2329
2330 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2331 struct_size, elem_format, false, true, &surf);
2332
2333 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2334 }
2335
2336 static inline uint32_t
2337 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2338 const struct ilo_sampler_cso * const *samplers,
2339 const struct pipe_sampler_view * const *views,
2340 const uint32_t *sampler_border_colors,
2341 int num_samplers,
2342 struct ilo_cp *cp)
2343 {
2344 const int state_align = 32 / 4;
2345 const int state_len = 4 * num_samplers;
2346 uint32_t state_offset, *dw;
2347 int i;
2348
2349 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2350
2351 /*
2352 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2353 *
2354 * "The sampler state is stored as an array of up to 16 elements..."
2355 */
2356 assert(num_samplers <= 16);
2357
2358 if (!num_samplers)
2359 return 0;
2360
2361 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
2362 state_len, state_align, &state_offset);
2363
2364 for (i = 0; i < num_samplers; i++) {
2365 const struct ilo_sampler_cso *sampler = samplers[i];
2366 const struct pipe_sampler_view *view = views[i];
2367 const uint32_t border_color = sampler_border_colors[i];
2368 uint32_t dw_filter, dw_wrap;
2369
2370 /* there may be holes */
2371 if (!sampler || !view) {
2372 /* disabled sampler */
2373 dw[0] = 1 << 31;
2374 dw[1] = 0;
2375 dw[2] = 0;
2376 dw[3] = 0;
2377 dw += 4;
2378
2379 continue;
2380 }
2381
2382 /* determine filter and wrap modes */
2383 switch (view->texture->target) {
2384 case PIPE_TEXTURE_1D:
2385 dw_filter = (sampler->anisotropic) ?
2386 sampler->dw_filter_aniso : sampler->dw_filter;
2387 dw_wrap = sampler->dw_wrap_1d;
2388 break;
2389 case PIPE_TEXTURE_3D:
2390 /*
2391 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2392 *
2393 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2394 * surfaces of type SURFTYPE_3D."
2395 */
2396 dw_filter = sampler->dw_filter;
2397 dw_wrap = sampler->dw_wrap;
2398 break;
2399 case PIPE_TEXTURE_CUBE:
2400 dw_filter = (sampler->anisotropic) ?
2401 sampler->dw_filter_aniso : sampler->dw_filter;
2402 dw_wrap = sampler->dw_wrap_cube;
2403 break;
2404 default:
2405 dw_filter = (sampler->anisotropic) ?
2406 sampler->dw_filter_aniso : sampler->dw_filter;
2407 dw_wrap = sampler->dw_wrap;
2408 break;
2409 }
2410
2411 dw[0] = sampler->payload[0];
2412 dw[1] = sampler->payload[1];
2413 assert(!(border_color & 0x1f));
2414 dw[2] = border_color;
2415 dw[3] = sampler->payload[2];
2416
2417 dw[0] |= dw_filter;
2418
2419 if (dev->gen >= ILO_GEN(7)) {
2420 dw[3] |= dw_wrap;
2421 }
2422 else {
2423 /*
2424 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2425 *
2426 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2427 * where the min and mag mode filters are different and
2428 * SurfMinLOD is nonzero. The determination of MagMode uses the
2429 * following equation instead of the one in the above
2430 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2431 *
2432 * As a way to work around that, we set Base to
2433 * view->u.tex.first_level.
2434 */
2435 dw[0] |= view->u.tex.first_level << 22;
2436
2437 dw[1] |= dw_wrap;
2438 }
2439
2440 dw += 4;
2441 }
2442
2443 return state_offset;
2444 }
2445
2446 static inline uint32_t
2447 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2448 const struct ilo_sampler_cso *sampler,
2449 struct ilo_cp *cp)
2450 {
2451 const int state_align = 32 / 4;
2452 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2453 uint32_t state_offset, *dw;
2454
2455 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2456
2457 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
2458 state_len, state_align, &state_offset);
2459
2460 /* see ilo_gpe_init_sampler_cso() */
2461 memcpy(dw, &sampler->payload[3], state_len * 4);
2462
2463 return state_offset;
2464 }
2465
2466 static inline uint32_t
2467 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2468 int size, void **pcb,
2469 struct ilo_cp *cp)
2470 {
2471 /*
2472 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2473 * to 32 bytes, and their sizes are specified in 256-bit units.
2474 */
2475 const int state_align = 32 / 4;
2476 const int state_len = align(size, 32) / 4;
2477 uint32_t state_offset;
2478 char *buf;
2479
2480 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2481
2482 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
2483 state_len, state_align, &state_offset);
2484
2485 /* zero out the unused range */
2486 if (size < state_len * 4)
2487 memset(&buf[size], 0, state_len * 4 - size);
2488
2489 if (pcb)
2490 *pcb = buf;
2491
2492 return state_offset;
2493 }
2494
2495 #endif /* ILO_GPE_GEN6_H */