radeonsi: correct sampler function names
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
34
35 #include "ilo_common.h"
36 #include "ilo_cp.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
40 #include "ilo_gpe.h"
41
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
44
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
47
48 /**
49 * Commands that GEN6 GPE could emit.
50 */
51 enum ilo_gpe_gen6_command {
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
93
94 ILO_GPE_GEN6_COMMAND_COUNT,
95 };
96
97 /**
98 * Indirect states that GEN6 GPE could emit.
99 */
100 enum ilo_gpe_gen6_state {
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
102 ILO_GPE_GEN6_SF_VIEWPORT,
103 ILO_GPE_GEN6_CLIP_VIEWPORT,
104 ILO_GPE_GEN6_CC_VIEWPORT,
105 ILO_GPE_GEN6_COLOR_CALC_STATE,
106 ILO_GPE_GEN6_BLEND_STATE,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
108 ILO_GPE_GEN6_SCISSOR_RECT,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE,
110 ILO_GPE_GEN6_SURFACE_STATE,
111 ILO_GPE_GEN6_SAMPLER_STATE,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
114
115 ILO_GPE_GEN6_STATE_COUNT,
116 };
117
118 int
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
120 enum ilo_gpe_gen6_command cmd,
121 int arg);
122
123 int
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
125 enum ilo_gpe_gen6_state state,
126 int arg);
127
128 /**
129 * Translate winsys tiling to hardware tiling.
130 */
131 static inline int
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
133 {
134 switch (tiling) {
135 case INTEL_TILING_NONE:
136 return 0;
137 case INTEL_TILING_X:
138 return BRW_SURFACE_TILED;
139 case INTEL_TILING_Y:
140 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
141 default:
142 assert(!"unknown tiling");
143 return 0;
144 }
145 }
146
147 /**
148 * Translate a pipe primitive type to the matching hardware primitive type.
149 */
150 static inline int
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
152 {
153 static const int prim_mapping[PIPE_PRIM_MAX] = {
154 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
155 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
156 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
157 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
158 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
159 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
160 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
161 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
162 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
163 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
164 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
168 };
169
170 assert(prim_mapping[prim]);
171
172 return prim_mapping[prim];
173 }
174
175 /**
176 * Translate a pipe texture target to the matching hardware surface type.
177 */
178 static inline int
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
180 {
181 switch (target) {
182 case PIPE_BUFFER:
183 return BRW_SURFACE_BUFFER;
184 case PIPE_TEXTURE_1D:
185 case PIPE_TEXTURE_1D_ARRAY:
186 return BRW_SURFACE_1D;
187 case PIPE_TEXTURE_2D:
188 case PIPE_TEXTURE_RECT:
189 case PIPE_TEXTURE_2D_ARRAY:
190 return BRW_SURFACE_2D;
191 case PIPE_TEXTURE_3D:
192 return BRW_SURFACE_3D;
193 case PIPE_TEXTURE_CUBE:
194 case PIPE_TEXTURE_CUBE_ARRAY:
195 return BRW_SURFACE_CUBE;
196 default:
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER;
199 }
200 }
201
202 /**
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
204 */
205 static inline void
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
207 const struct ilo_rasterizer_state *rasterizer,
208 int num_samples,
209 enum pipe_format depth_format,
210 uint32_t *payload, unsigned payload_len)
211 {
212 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
213
214 assert(payload_len == Elements(sf->payload));
215
216 if (sf) {
217 memcpy(payload, sf->payload, sizeof(sf->payload));
218
219 if (num_samples > 1)
220 payload[1] |= sf->dw_msaa;
221
222 if (dev->gen >= ILO_GEN(7)) {
223 int format;
224
225 /* separate stencil */
226 switch (depth_format) {
227 case PIPE_FORMAT_Z16_UNORM:
228 format = BRW_DEPTHFORMAT_D16_UNORM;
229 break;
230 case PIPE_FORMAT_Z32_FLOAT:
231 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
232 format = BRW_DEPTHFORMAT_D32_FLOAT;
233 break;
234 case PIPE_FORMAT_Z24X8_UNORM:
235 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
236 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
237 break;
238 default:
239 /* FLOAT surface is assumed when there is no depth buffer */
240 format = BRW_DEPTHFORMAT_D32_FLOAT;
241 break;
242 }
243
244 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
245 }
246 }
247 else {
248 payload[0] = 0;
249 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
250 payload[2] = 0;
251 payload[3] = 0;
252 payload[4] = 0;
253 payload[5] = 0;
254 }
255 }
256
257 /**
258 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
259 */
260 static inline void
261 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
262 const struct ilo_rasterizer_state *rasterizer,
263 const struct ilo_shader_state *fs,
264 const struct ilo_shader_state *last_sh,
265 uint32_t *dw, int num_dwords)
266 {
267 int output_count, vue_offset, vue_len;
268 const struct ilo_kernel_routing *routing;
269
270 ILO_GPE_VALID_GEN(dev, 6, 7);
271 assert(num_dwords == 13);
272
273 if (!fs) {
274 memset(dw, 0, sizeof(dw[0]) * num_dwords);
275
276 if (dev->gen >= ILO_GEN(7))
277 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
278 else
279 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
280
281 return;
282 }
283
284 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
285 assert(output_count <= 32);
286
287 routing = ilo_shader_get_kernel_routing(fs);
288
289 vue_offset = routing->source_skip;
290 assert(vue_offset % 2 == 0);
291 vue_offset /= 2;
292
293 vue_len = (routing->source_len + 1) / 2;
294 if (!vue_len)
295 vue_len = 1;
296
297 if (dev->gen >= ILO_GEN(7)) {
298 dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
299 vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
300 vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
301 if (routing->swizzle_enable)
302 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
303 }
304 else {
305 dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
306 vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
307 vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
308 if (routing->swizzle_enable)
309 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
310 }
311
312 switch (rasterizer->state.sprite_coord_mode) {
313 case PIPE_SPRITE_COORD_UPPER_LEFT:
314 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
315 break;
316 case PIPE_SPRITE_COORD_LOWER_LEFT:
317 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
318 break;
319 }
320
321 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
322 memcpy(&dw[1], routing->swizzles, 2 * 16);
323
324 /*
325 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
326 *
327 * "This field (Point Sprite Texture Coordinate Enable) must be
328 * programmed to 0 when non-point primitives are rendered."
329 *
330 * TODO We do not check that yet.
331 */
332 dw[9] = routing->point_sprite_enable;
333
334 dw[10] = routing->const_interp_enable;
335
336 /* WrapShortest enables */
337 dw[11] = 0;
338 dw[12] = 0;
339 }
340
341 static inline void
342 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
343 struct intel_bo *general_state_bo,
344 struct intel_bo *surface_state_bo,
345 struct intel_bo *dynamic_state_bo,
346 struct intel_bo *indirect_object_bo,
347 struct intel_bo *instruction_bo,
348 uint32_t general_state_size,
349 uint32_t dynamic_state_size,
350 uint32_t indirect_object_size,
351 uint32_t instruction_size,
352 struct ilo_cp *cp)
353 {
354 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
355 const uint8_t cmd_len = 10;
356
357 ILO_GPE_VALID_GEN(dev, 6, 7);
358
359 /* 4K-page aligned */
360 assert(((general_state_size | dynamic_state_size |
361 indirect_object_size | instruction_size) & 0xfff) == 0);
362
363 ilo_cp_begin(cp, cmd_len);
364 ilo_cp_write(cp, cmd | (cmd_len - 2));
365
366 ilo_cp_write_bo(cp, 1, general_state_bo,
367 INTEL_DOMAIN_RENDER,
368 0);
369 ilo_cp_write_bo(cp, 1, surface_state_bo,
370 INTEL_DOMAIN_SAMPLER,
371 0);
372 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
373 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
374 0);
375 ilo_cp_write_bo(cp, 1, indirect_object_bo,
376 0,
377 0);
378 ilo_cp_write_bo(cp, 1, instruction_bo,
379 INTEL_DOMAIN_INSTRUCTION,
380 0);
381
382 if (general_state_size) {
383 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
384 INTEL_DOMAIN_RENDER,
385 0);
386 }
387 else {
388 /* skip range check */
389 ilo_cp_write(cp, 1);
390 }
391
392 if (dynamic_state_size) {
393 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
394 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
395 0);
396 }
397 else {
398 /* skip range check */
399 ilo_cp_write(cp, 0xfffff000 + 1);
400 }
401
402 if (indirect_object_size) {
403 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
404 0,
405 0);
406 }
407 else {
408 /* skip range check */
409 ilo_cp_write(cp, 0xfffff000 + 1);
410 }
411
412 if (instruction_size) {
413 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
414 INTEL_DOMAIN_INSTRUCTION,
415 0);
416 }
417 else {
418 /* skip range check */
419 ilo_cp_write(cp, 1);
420 }
421
422 ilo_cp_end(cp);
423 }
424
425 static inline void
426 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
427 uint32_t sip,
428 struct ilo_cp *cp)
429 {
430 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
431 const uint8_t cmd_len = 2;
432
433 ILO_GPE_VALID_GEN(dev, 6, 7);
434
435 ilo_cp_begin(cp, cmd_len | (cmd_len - 2));
436 ilo_cp_write(cp, cmd);
437 ilo_cp_write(cp, sip);
438 ilo_cp_end(cp);
439 }
440
441 static inline void
442 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
443 bool enable,
444 struct ilo_cp *cp)
445 {
446 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
447 const uint8_t cmd_len = 1;
448
449 ILO_GPE_VALID_GEN(dev, 6, 7);
450
451 ilo_cp_begin(cp, cmd_len);
452 ilo_cp_write(cp, cmd | enable);
453 ilo_cp_end(cp);
454 }
455
456 static inline void
457 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
458 int pipeline,
459 struct ilo_cp *cp)
460 {
461 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
462 const uint8_t cmd_len = 1;
463
464 ILO_GPE_VALID_GEN(dev, 6, 7);
465
466 /* 3D or media */
467 assert(pipeline == 0x0 || pipeline == 0x1);
468
469 ilo_cp_begin(cp, cmd_len);
470 ilo_cp_write(cp, cmd | pipeline);
471 ilo_cp_end(cp);
472 }
473
474 static inline void
475 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
476 int max_threads, int num_urb_entries,
477 int urb_entry_size,
478 struct ilo_cp *cp)
479 {
480 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
481 const uint8_t cmd_len = 8;
482 uint32_t dw2, dw4;
483
484 ILO_GPE_VALID_GEN(dev, 6, 6);
485
486 dw2 = (max_threads - 1) << 16 |
487 num_urb_entries << 8 |
488 1 << 7 | /* Reset Gateway Timer */
489 1 << 6; /* Bypass Gateway Control */
490
491 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
492 480; /* CURBE Allocation Size */
493
494 ilo_cp_begin(cp, cmd_len);
495 ilo_cp_write(cp, cmd | (cmd_len - 2));
496 ilo_cp_write(cp, 0); /* scratch */
497 ilo_cp_write(cp, dw2);
498 ilo_cp_write(cp, 0); /* MBZ */
499 ilo_cp_write(cp, dw4);
500 ilo_cp_write(cp, 0); /* scoreboard */
501 ilo_cp_write(cp, 0);
502 ilo_cp_write(cp, 0);
503 ilo_cp_end(cp);
504 }
505
506 static inline void
507 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
508 uint32_t buf, int size,
509 struct ilo_cp *cp)
510 {
511 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
512 const uint8_t cmd_len = 4;
513
514 ILO_GPE_VALID_GEN(dev, 6, 6);
515
516 assert(buf % 32 == 0);
517 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
518 size = align(size, 32);
519
520 ilo_cp_begin(cp, cmd_len);
521 ilo_cp_write(cp, cmd | (cmd_len - 2));
522 ilo_cp_write(cp, 0); /* MBZ */
523 ilo_cp_write(cp, size);
524 ilo_cp_write(cp, buf);
525 ilo_cp_end(cp);
526 }
527
528 static inline void
529 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
530 uint32_t offset, int num_ids,
531 struct ilo_cp *cp)
532 {
533 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
534 const uint8_t cmd_len = 4;
535
536 ILO_GPE_VALID_GEN(dev, 6, 6);
537
538 assert(offset % 32 == 0);
539
540 ilo_cp_begin(cp, cmd_len);
541 ilo_cp_write(cp, cmd | (cmd_len - 2));
542 ilo_cp_write(cp, 0); /* MBZ */
543 /* every ID has 8 DWords */
544 ilo_cp_write(cp, num_ids * 8 * 4);
545 ilo_cp_write(cp, offset);
546 ilo_cp_end(cp);
547 }
548
549 static inline void
550 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
551 int id, int byte, int thread_count,
552 struct ilo_cp *cp)
553 {
554 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
555 const uint8_t cmd_len = 2;
556 uint32_t dw1;
557
558 ILO_GPE_VALID_GEN(dev, 6, 6);
559
560 dw1 = id << 16 |
561 byte << 8 |
562 thread_count;
563
564 ilo_cp_begin(cp, cmd_len);
565 ilo_cp_write(cp, cmd | (cmd_len - 2));
566 ilo_cp_write(cp, dw1);
567 ilo_cp_end(cp);
568 }
569
570 static inline void
571 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
572 int thread_count_water_mark,
573 int barrier_mask,
574 struct ilo_cp *cp)
575 {
576 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
577 const uint8_t cmd_len = 2;
578 uint32_t dw1;
579
580 ILO_GPE_VALID_GEN(dev, 6, 6);
581
582 dw1 = thread_count_water_mark << 16 |
583 barrier_mask;
584
585 ilo_cp_begin(cp, cmd_len);
586 ilo_cp_write(cp, cmd | (cmd_len - 2));
587 ilo_cp_write(cp, dw1);
588 ilo_cp_end(cp);
589 }
590
591 static inline void
592 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
593 struct ilo_cp *cp)
594 {
595 assert(!"MEDIA_OBJECT_WALKER unsupported");
596 }
597
598 static inline void
599 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
600 uint32_t vs_binding_table,
601 uint32_t gs_binding_table,
602 uint32_t ps_binding_table,
603 struct ilo_cp *cp)
604 {
605 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
606 const uint8_t cmd_len = 4;
607
608 ILO_GPE_VALID_GEN(dev, 6, 6);
609
610 ilo_cp_begin(cp, cmd_len);
611 ilo_cp_write(cp, cmd | (cmd_len - 2) |
612 GEN6_BINDING_TABLE_MODIFY_VS |
613 GEN6_BINDING_TABLE_MODIFY_GS |
614 GEN6_BINDING_TABLE_MODIFY_PS);
615 ilo_cp_write(cp, vs_binding_table);
616 ilo_cp_write(cp, gs_binding_table);
617 ilo_cp_write(cp, ps_binding_table);
618 ilo_cp_end(cp);
619 }
620
621 static inline void
622 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
623 uint32_t vs_sampler_state,
624 uint32_t gs_sampler_state,
625 uint32_t ps_sampler_state,
626 struct ilo_cp *cp)
627 {
628 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
629 const uint8_t cmd_len = 4;
630
631 ILO_GPE_VALID_GEN(dev, 6, 6);
632
633 ilo_cp_begin(cp, cmd_len);
634 ilo_cp_write(cp, cmd | (cmd_len - 2) |
635 VS_SAMPLER_STATE_CHANGE |
636 GS_SAMPLER_STATE_CHANGE |
637 PS_SAMPLER_STATE_CHANGE);
638 ilo_cp_write(cp, vs_sampler_state);
639 ilo_cp_write(cp, gs_sampler_state);
640 ilo_cp_write(cp, ps_sampler_state);
641 ilo_cp_end(cp);
642 }
643
644 static inline void
645 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
646 int vs_total_size, int gs_total_size,
647 int vs_entry_size, int gs_entry_size,
648 struct ilo_cp *cp)
649 {
650 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
651 const uint8_t cmd_len = 3;
652 const int row_size = 128; /* 1024 bits */
653 int vs_alloc_size, gs_alloc_size;
654 int vs_num_entries, gs_num_entries;
655
656 ILO_GPE_VALID_GEN(dev, 6, 6);
657
658 /* in 1024-bit URB rows */
659 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
660 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
661
662 /* the valid range is [1, 5] */
663 if (!vs_alloc_size)
664 vs_alloc_size = 1;
665 if (!gs_alloc_size)
666 gs_alloc_size = 1;
667 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
668
669 /* the valid range is [24, 256] in multiples of 4 */
670 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
671 if (vs_num_entries > 256)
672 vs_num_entries = 256;
673 assert(vs_num_entries >= 24);
674
675 /* the valid range is [0, 256] in multiples of 4 */
676 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
677 if (gs_num_entries > 256)
678 gs_num_entries = 256;
679
680 ilo_cp_begin(cp, cmd_len);
681 ilo_cp_write(cp, cmd | (cmd_len - 2));
682 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
683 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
684 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
685 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
686 ilo_cp_end(cp);
687 }
688
689 static inline void
690 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
691 const struct ilo_ve_state *ve,
692 const struct ilo_vb_state *vb,
693 struct ilo_cp *cp)
694 {
695 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
696 uint8_t cmd_len;
697 unsigned hw_idx;
698
699 ILO_GPE_VALID_GEN(dev, 6, 7);
700
701 /*
702 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
703 *
704 * "From 1 to 33 VBs can be specified..."
705 */
706 assert(ve->vb_count <= 33);
707
708 if (!ve->vb_count)
709 return;
710
711 cmd_len = 1 + 4 * ve->vb_count;
712
713 ilo_cp_begin(cp, cmd_len);
714 ilo_cp_write(cp, cmd | (cmd_len - 2));
715
716 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
717 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
718 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
719 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
720 uint32_t dw;
721
722 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
723
724 if (instance_divisor)
725 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
726 else
727 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
728
729 if (dev->gen >= ILO_GEN(7))
730 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
731
732 /* use null vb if there is no buffer or the stride is out of range */
733 if (cso->buffer && cso->stride <= 2048) {
734 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
735 const uint32_t start_offset = cso->buffer_offset;
736 /*
737 * As noted in ilo_translate_format(), we treat some 3-component
738 * formats as 4-component formats to work around hardware
739 * limitations. Imagine the case where the vertex buffer holds a
740 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
741 * The hardware would not be able to fetch it because the vertex
742 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
743 * and that takes at least 8 bytes.
744 *
745 * For the workaround to work, we query the physical size, which is
746 * page aligned, to calculate end_offset so that the last vertex has
747 * a better chance to be fetched.
748 */
749 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
750
751 dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
752
753 ilo_cp_write(cp, dw);
754 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
755 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
756 ilo_cp_write(cp, instance_divisor);
757 }
758 else {
759 dw |= 1 << 13;
760
761 ilo_cp_write(cp, dw);
762 ilo_cp_write(cp, 0);
763 ilo_cp_write(cp, 0);
764 ilo_cp_write(cp, instance_divisor);
765 }
766 }
767
768 ilo_cp_end(cp);
769 }
770
771 static inline void
772 ve_init_cso_with_components(const struct ilo_dev_info *dev,
773 int comp0, int comp1, int comp2, int comp3,
774 struct ilo_ve_cso *cso)
775 {
776 ILO_GPE_VALID_GEN(dev, 6, 7);
777
778 STATIC_ASSERT(Elements(cso->payload) >= 2);
779 cso->payload[0] = GEN6_VE0_VALID;
780 cso->payload[1] =
781 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
782 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
783 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
784 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
785 }
786
787 static inline void
788 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
789 struct ilo_ve_cso *cso)
790 {
791 int format;
792
793 ILO_GPE_VALID_GEN(dev, 6, 7);
794
795 /*
796 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
797 *
798 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
799 * valid VERTEX_ELEMENT structure.
800 *
801 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
802 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
803 *
804 * - The Source Element Format must be set to the UINT format.
805 *
806 * - [DevSNB]: Edge Flags are not supported for QUADLIST
807 * primitives. Software may elect to convert QUADLIST primitives
808 * to some set of corresponding edge-flag-supported primitive
809 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
810 */
811
812 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
813 cso->payload[1] =
814 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
815 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
816 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
817 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
818
819 /*
820 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
821 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
822 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
823 *
824 * Since all the hardware cares about is whether the flags are zero or not,
825 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
826 */
827 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
828 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
829 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
830 BRW_SURFACEFORMAT_R32_FLOAT - 1);
831
832 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
833 }
834 else {
835 assert(format == BRW_SURFACEFORMAT_R8_UINT);
836 }
837 }
838
839 static inline void
840 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
841 const struct ilo_ve_state *ve,
842 bool last_velement_edgeflag,
843 bool prepend_generated_ids,
844 struct ilo_cp *cp)
845 {
846 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
847 uint8_t cmd_len;
848 unsigned i;
849
850 ILO_GPE_VALID_GEN(dev, 6, 7);
851
852 /*
853 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
854 *
855 * "Up to 34 (DevSNB+) vertex elements are supported."
856 */
857 assert(ve->count + prepend_generated_ids <= 34);
858
859 if (!ve->count && !prepend_generated_ids) {
860 struct ilo_ve_cso dummy;
861
862 ve_init_cso_with_components(dev,
863 BRW_VE1_COMPONENT_STORE_0,
864 BRW_VE1_COMPONENT_STORE_0,
865 BRW_VE1_COMPONENT_STORE_0,
866 BRW_VE1_COMPONENT_STORE_1_FLT,
867 &dummy);
868
869 cmd_len = 3;
870 ilo_cp_begin(cp, cmd_len);
871 ilo_cp_write(cp, cmd | (cmd_len - 2));
872 ilo_cp_write_multi(cp, dummy.payload, 2);
873 ilo_cp_end(cp);
874
875 return;
876 }
877
878 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
879
880 ilo_cp_begin(cp, cmd_len);
881 ilo_cp_write(cp, cmd | (cmd_len - 2));
882
883 if (prepend_generated_ids) {
884 struct ilo_ve_cso gen_ids;
885
886 ve_init_cso_with_components(dev,
887 BRW_VE1_COMPONENT_STORE_VID,
888 BRW_VE1_COMPONENT_STORE_IID,
889 BRW_VE1_COMPONENT_NOSTORE,
890 BRW_VE1_COMPONENT_NOSTORE,
891 &gen_ids);
892
893 ilo_cp_write_multi(cp, gen_ids.payload, 2);
894 }
895
896 if (last_velement_edgeflag) {
897 struct ilo_ve_cso edgeflag;
898
899 for (i = 0; i < ve->count - 1; i++)
900 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
901
902 edgeflag = ve->cso[i];
903 ve_set_cso_edgeflag(dev, &edgeflag);
904 ilo_cp_write_multi(cp, edgeflag.payload, 2);
905 }
906 else {
907 for (i = 0; i < ve->count; i++)
908 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
909 }
910
911 ilo_cp_end(cp);
912 }
913
914 static inline void
915 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
916 const struct ilo_ib_state *ib,
917 bool enable_cut_index,
918 struct ilo_cp *cp)
919 {
920 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
921 const uint8_t cmd_len = 3;
922 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
923 uint32_t start_offset, end_offset;
924 int format;
925
926 ILO_GPE_VALID_GEN(dev, 6, 7);
927
928 if (!buf)
929 return;
930
931 switch (ib->hw_index_size) {
932 case 4:
933 format = BRW_INDEX_DWORD;
934 break;
935 case 2:
936 format = BRW_INDEX_WORD;
937 break;
938 case 1:
939 format = BRW_INDEX_BYTE;
940 break;
941 default:
942 assert(!"unknown index size");
943 format = BRW_INDEX_BYTE;
944 break;
945 }
946
947 /*
948 * set start_offset to 0 here and adjust pipe_draw_info::start with
949 * ib->draw_start_offset in 3DPRIMITIVE
950 */
951 start_offset = 0;
952 end_offset = buf->bo_size;
953
954 /* end_offset must also be aligned and is inclusive */
955 end_offset -= (end_offset % ib->hw_index_size);
956 end_offset--;
957
958 ilo_cp_begin(cp, cmd_len);
959 ilo_cp_write(cp, cmd | (cmd_len - 2) |
960 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
961 format << 8);
962 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
963 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
964 ilo_cp_end(cp);
965 }
966
967 static inline void
968 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
969 uint32_t clip_viewport,
970 uint32_t sf_viewport,
971 uint32_t cc_viewport,
972 struct ilo_cp *cp)
973 {
974 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
975 const uint8_t cmd_len = 4;
976
977 ILO_GPE_VALID_GEN(dev, 6, 6);
978
979 ilo_cp_begin(cp, cmd_len);
980 ilo_cp_write(cp, cmd | (cmd_len - 2) |
981 GEN6_CLIP_VIEWPORT_MODIFY |
982 GEN6_SF_VIEWPORT_MODIFY |
983 GEN6_CC_VIEWPORT_MODIFY);
984 ilo_cp_write(cp, clip_viewport);
985 ilo_cp_write(cp, sf_viewport);
986 ilo_cp_write(cp, cc_viewport);
987 ilo_cp_end(cp);
988 }
989
990 static inline void
991 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
992 uint32_t blend_state,
993 uint32_t depth_stencil_state,
994 uint32_t color_calc_state,
995 struct ilo_cp *cp)
996 {
997 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
998 const uint8_t cmd_len = 4;
999
1000 ILO_GPE_VALID_GEN(dev, 6, 6);
1001
1002 ilo_cp_begin(cp, cmd_len);
1003 ilo_cp_write(cp, cmd | (cmd_len - 2));
1004 ilo_cp_write(cp, blend_state | 1);
1005 ilo_cp_write(cp, depth_stencil_state | 1);
1006 ilo_cp_write(cp, color_calc_state | 1);
1007 ilo_cp_end(cp);
1008 }
1009
1010 static inline void
1011 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1012 uint32_t scissor_rect,
1013 struct ilo_cp *cp)
1014 {
1015 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1016 const uint8_t cmd_len = 2;
1017
1018 ILO_GPE_VALID_GEN(dev, 6, 7);
1019
1020 ilo_cp_begin(cp, cmd_len);
1021 ilo_cp_write(cp, cmd | (cmd_len - 2));
1022 ilo_cp_write(cp, scissor_rect);
1023 ilo_cp_end(cp);
1024 }
1025
1026 static inline void
1027 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1028 const struct ilo_shader_state *vs,
1029 int num_samplers,
1030 struct ilo_cp *cp)
1031 {
1032 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1033 const uint8_t cmd_len = 6;
1034 const struct ilo_shader_cso *cso;
1035 uint32_t dw2, dw4, dw5;
1036
1037 ILO_GPE_VALID_GEN(dev, 6, 7);
1038
1039 if (!vs) {
1040 ilo_cp_begin(cp, cmd_len);
1041 ilo_cp_write(cp, cmd | (cmd_len - 2));
1042 ilo_cp_write(cp, 0);
1043 ilo_cp_write(cp, 0);
1044 ilo_cp_write(cp, 0);
1045 ilo_cp_write(cp, 0);
1046 ilo_cp_write(cp, 0);
1047 ilo_cp_end(cp);
1048 return;
1049 }
1050
1051 cso = ilo_shader_get_kernel_cso(vs);
1052 dw2 = cso->payload[0];
1053 dw4 = cso->payload[1];
1054 dw5 = cso->payload[2];
1055
1056 dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1057
1058 ilo_cp_begin(cp, cmd_len);
1059 ilo_cp_write(cp, cmd | (cmd_len - 2));
1060 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1061 ilo_cp_write(cp, dw2);
1062 ilo_cp_write(cp, 0); /* scratch */
1063 ilo_cp_write(cp, dw4);
1064 ilo_cp_write(cp, dw5);
1065 ilo_cp_end(cp);
1066 }
1067
1068 static inline void
1069 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1070 const struct ilo_shader_state *gs,
1071 const struct ilo_shader_state *vs,
1072 int verts_per_prim,
1073 struct ilo_cp *cp)
1074 {
1075 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1076 const uint8_t cmd_len = 7;
1077 uint32_t dw1, dw2, dw4, dw5, dw6;
1078
1079 ILO_GPE_VALID_GEN(dev, 6, 6);
1080
1081 if (gs) {
1082 const struct ilo_shader_cso *cso;
1083
1084 dw1 = ilo_shader_get_kernel_offset(gs);
1085
1086 cso = ilo_shader_get_kernel_cso(gs);
1087 dw2 = cso->payload[0];
1088 dw4 = cso->payload[1];
1089 dw5 = cso->payload[2];
1090 dw6 = cso->payload[3];
1091 }
1092 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1093 struct ilo_shader_cso cso;
1094 enum ilo_kernel_param param;
1095
1096 switch (verts_per_prim) {
1097 case 1:
1098 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1099 break;
1100 case 2:
1101 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1102 break;
1103 default:
1104 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1105 break;
1106 }
1107
1108 dw1 = ilo_shader_get_kernel_offset(vs) +
1109 ilo_shader_get_kernel_param(vs, param);
1110
1111 /* cannot use VS's CSO */
1112 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1113 dw2 = cso.payload[0];
1114 dw4 = cso.payload[1];
1115 dw5 = cso.payload[2];
1116 dw6 = cso.payload[3];
1117 }
1118 else {
1119 dw1 = 0;
1120 dw2 = 0;
1121 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1122 dw5 = GEN6_GS_STATISTICS_ENABLE;
1123 dw6 = 0;
1124 }
1125
1126 ilo_cp_begin(cp, cmd_len);
1127 ilo_cp_write(cp, cmd | (cmd_len - 2));
1128 ilo_cp_write(cp, dw1);
1129 ilo_cp_write(cp, dw2);
1130 ilo_cp_write(cp, 0);
1131 ilo_cp_write(cp, dw4);
1132 ilo_cp_write(cp, dw5);
1133 ilo_cp_write(cp, dw6);
1134 ilo_cp_end(cp);
1135 }
1136
1137 static inline void
1138 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1139 const struct ilo_rasterizer_state *rasterizer,
1140 const struct ilo_shader_state *fs,
1141 bool enable_guardband,
1142 int num_viewports,
1143 struct ilo_cp *cp)
1144 {
1145 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1146 const uint8_t cmd_len = 4;
1147 uint32_t dw1, dw2, dw3;
1148
1149 if (rasterizer) {
1150 int interps;
1151
1152 dw1 = rasterizer->clip.payload[0];
1153 dw2 = rasterizer->clip.payload[1];
1154 dw3 = rasterizer->clip.payload[2];
1155
1156 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1157 dw2 |= GEN6_CLIP_GB_TEST;
1158
1159 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1160 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1161
1162 if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1163 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1164 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
1165 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1166
1167 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1168 (num_viewports - 1);
1169 }
1170 else {
1171 dw1 = 0;
1172 dw2 = 0;
1173 dw3 = 0;
1174 }
1175
1176 ilo_cp_begin(cp, cmd_len);
1177 ilo_cp_write(cp, cmd | (cmd_len - 2));
1178 ilo_cp_write(cp, dw1);
1179 ilo_cp_write(cp, dw2);
1180 ilo_cp_write(cp, dw3);
1181 ilo_cp_end(cp);
1182 }
1183
1184 static inline void
1185 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1186 const struct ilo_rasterizer_state *rasterizer,
1187 const struct ilo_shader_state *fs,
1188 const struct ilo_shader_state *last_sh,
1189 struct ilo_cp *cp)
1190 {
1191 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1192 const uint8_t cmd_len = 20;
1193 uint32_t payload_raster[6], payload_sbe[13];
1194
1195 ILO_GPE_VALID_GEN(dev, 6, 6);
1196
1197 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1198 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1199 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1200 fs, last_sh, payload_sbe, Elements(payload_sbe));
1201
1202 ilo_cp_begin(cp, cmd_len);
1203 ilo_cp_write(cp, cmd | (cmd_len - 2));
1204 ilo_cp_write(cp, payload_sbe[0]);
1205 ilo_cp_write_multi(cp, payload_raster, 6);
1206 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1207 ilo_cp_end(cp);
1208 }
1209
1210 static inline void
1211 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1212 const struct ilo_shader_state *fs,
1213 int num_samplers,
1214 const struct ilo_rasterizer_state *rasterizer,
1215 bool dual_blend, bool cc_may_kill,
1216 struct ilo_cp *cp)
1217 {
1218 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1219 const uint8_t cmd_len = 9;
1220 const int num_samples = 1;
1221 const struct ilo_shader_cso *fs_cso;
1222 uint32_t dw2, dw4, dw5, dw6;
1223
1224 ILO_GPE_VALID_GEN(dev, 6, 6);
1225
1226 if (!fs) {
1227 /* see brwCreateContext() */
1228 const int max_threads = (dev->gt == 2) ? 80 : 40;
1229
1230 ilo_cp_begin(cp, cmd_len);
1231 ilo_cp_write(cp, cmd | (cmd_len - 2));
1232 ilo_cp_write(cp, 0);
1233 ilo_cp_write(cp, 0);
1234 ilo_cp_write(cp, 0);
1235 ilo_cp_write(cp, 0);
1236 /* honor the valid range even if dispatching is disabled */
1237 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1238 ilo_cp_write(cp, 0);
1239 ilo_cp_write(cp, 0);
1240 ilo_cp_write(cp, 0);
1241 ilo_cp_end(cp);
1242
1243 return;
1244 }
1245
1246 fs_cso = ilo_shader_get_kernel_cso(fs);
1247 dw2 = fs_cso->payload[0];
1248 dw4 = fs_cso->payload[1];
1249 dw5 = fs_cso->payload[2];
1250 dw6 = fs_cso->payload[3];
1251
1252 dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1253
1254 if (true) {
1255 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1256 }
1257 else {
1258 /*
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1260 *
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer
1263 * Resolve Enable or Depth Buffer Resolve Enable."
1264 */
1265 dw4 |= GEN6_WM_DEPTH_CLEAR;
1266 dw4 |= GEN6_WM_DEPTH_RESOLVE;
1267 dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
1268 }
1269
1270 if (cc_may_kill) {
1271 dw5 |= GEN6_WM_KILL_ENABLE |
1272 GEN6_WM_DISPATCH_ENABLE;
1273 }
1274
1275 if (dual_blend)
1276 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1277
1278 dw5 |= rasterizer->wm.payload[0];
1279
1280 dw6 |= rasterizer->wm.payload[1];
1281
1282 if (num_samples > 1) {
1283 dw6 |= rasterizer->wm.dw_msaa_rast |
1284 rasterizer->wm.dw_msaa_disp;
1285 }
1286
1287 ilo_cp_begin(cp, cmd_len);
1288 ilo_cp_write(cp, cmd | (cmd_len - 2));
1289 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1290 ilo_cp_write(cp, dw2);
1291 ilo_cp_write(cp, 0); /* scratch */
1292 ilo_cp_write(cp, dw4);
1293 ilo_cp_write(cp, dw5);
1294 ilo_cp_write(cp, dw6);
1295 ilo_cp_write(cp, 0); /* kernel 1 */
1296 ilo_cp_write(cp, 0); /* kernel 2 */
1297 ilo_cp_end(cp);
1298 }
1299
1300 static inline unsigned
1301 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1302 const uint32_t *bufs, const int *sizes,
1303 int num_bufs, int max_read_length,
1304 uint32_t *dw, int num_dwords)
1305 {
1306 unsigned enabled = 0x0;
1307 int total_read_length, i;
1308
1309 assert(num_dwords == 4);
1310
1311 total_read_length = 0;
1312 for (i = 0; i < 4; i++) {
1313 if (i < num_bufs && sizes[i]) {
1314 /* in 256-bit units minus one */
1315 const int read_len = (sizes[i] + 31) / 32 - 1;
1316
1317 assert(bufs[i] % 32 == 0);
1318 assert(read_len < 32);
1319
1320 enabled |= 1 << i;
1321 dw[i] = bufs[i] | read_len;
1322
1323 total_read_length += read_len + 1;
1324 }
1325 else {
1326 dw[i] = 0;
1327 }
1328 }
1329
1330 assert(total_read_length <= max_read_length);
1331
1332 return enabled;
1333 }
1334
1335 static inline void
1336 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1337 const uint32_t *bufs, const int *sizes,
1338 int num_bufs,
1339 struct ilo_cp *cp)
1340 {
1341 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
1342 const uint8_t cmd_len = 5;
1343 uint32_t buf_dw[4], buf_enabled;
1344
1345 ILO_GPE_VALID_GEN(dev, 6, 6);
1346 assert(num_bufs <= 4);
1347
1348 /*
1349 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1350 *
1351 * "The sum of all four read length fields (each incremented to
1352 * represent the actual read length) must be less than or equal to 32"
1353 */
1354 buf_enabled = gen6_fill_3dstate_constant(dev,
1355 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1356
1357 ilo_cp_begin(cp, cmd_len);
1358 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1359 ilo_cp_write(cp, buf_dw[0]);
1360 ilo_cp_write(cp, buf_dw[1]);
1361 ilo_cp_write(cp, buf_dw[2]);
1362 ilo_cp_write(cp, buf_dw[3]);
1363 ilo_cp_end(cp);
1364 }
1365
1366 static inline void
1367 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1368 const uint32_t *bufs, const int *sizes,
1369 int num_bufs,
1370 struct ilo_cp *cp)
1371 {
1372 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
1373 const uint8_t cmd_len = 5;
1374 uint32_t buf_dw[4], buf_enabled;
1375
1376 ILO_GPE_VALID_GEN(dev, 6, 6);
1377 assert(num_bufs <= 4);
1378
1379 /*
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1381 *
1382 * "The sum of all four read length fields (each incremented to
1383 * represent the actual read length) must be less than or equal to 64"
1384 */
1385 buf_enabled = gen6_fill_3dstate_constant(dev,
1386 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1387
1388 ilo_cp_begin(cp, cmd_len);
1389 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1390 ilo_cp_write(cp, buf_dw[0]);
1391 ilo_cp_write(cp, buf_dw[1]);
1392 ilo_cp_write(cp, buf_dw[2]);
1393 ilo_cp_write(cp, buf_dw[3]);
1394 ilo_cp_end(cp);
1395 }
1396
1397 static inline void
1398 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1399 const uint32_t *bufs, const int *sizes,
1400 int num_bufs,
1401 struct ilo_cp *cp)
1402 {
1403 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
1404 const uint8_t cmd_len = 5;
1405 uint32_t buf_dw[4], buf_enabled;
1406
1407 ILO_GPE_VALID_GEN(dev, 6, 6);
1408 assert(num_bufs <= 4);
1409
1410 /*
1411 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1412 *
1413 * "The sum of all four read length fields (each incremented to
1414 * represent the actual read length) must be less than or equal to 64"
1415 */
1416 buf_enabled = gen6_fill_3dstate_constant(dev,
1417 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1418
1419 ilo_cp_begin(cp, cmd_len);
1420 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1421 ilo_cp_write(cp, buf_dw[0]);
1422 ilo_cp_write(cp, buf_dw[1]);
1423 ilo_cp_write(cp, buf_dw[2]);
1424 ilo_cp_write(cp, buf_dw[3]);
1425 ilo_cp_end(cp);
1426 }
1427
1428 static inline void
1429 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1430 unsigned sample_mask,
1431 struct ilo_cp *cp)
1432 {
1433 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
1434 const uint8_t cmd_len = 2;
1435 const unsigned valid_mask = 0xf;
1436
1437 ILO_GPE_VALID_GEN(dev, 6, 6);
1438
1439 sample_mask &= valid_mask;
1440
1441 ilo_cp_begin(cp, cmd_len);
1442 ilo_cp_write(cp, cmd | (cmd_len - 2));
1443 ilo_cp_write(cp, sample_mask);
1444 ilo_cp_end(cp);
1445 }
1446
1447 static inline void
1448 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1449 unsigned x, unsigned y,
1450 unsigned width, unsigned height,
1451 struct ilo_cp *cp)
1452 {
1453 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
1454 const uint8_t cmd_len = 4;
1455 unsigned xmax = x + width - 1;
1456 unsigned ymax = y + height - 1;
1457 int rect_limit;
1458
1459 ILO_GPE_VALID_GEN(dev, 6, 7);
1460
1461 if (dev->gen >= ILO_GEN(7)) {
1462 rect_limit = 16383;
1463 }
1464 else {
1465 /*
1466 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1467 *
1468 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1469 * must be an even number"
1470 */
1471 assert(y % 2 == 0);
1472
1473 rect_limit = 8191;
1474 }
1475
1476 if (x > rect_limit) x = rect_limit;
1477 if (y > rect_limit) y = rect_limit;
1478 if (xmax > rect_limit) xmax = rect_limit;
1479 if (ymax > rect_limit) ymax = rect_limit;
1480
1481 ilo_cp_begin(cp, cmd_len);
1482 ilo_cp_write(cp, cmd | (cmd_len - 2));
1483 ilo_cp_write(cp, y << 16 | x);
1484 ilo_cp_write(cp, ymax << 16 | xmax);
1485
1486 /*
1487 * There is no need to set the origin. It is intended to support front
1488 * buffer rendering.
1489 */
1490 ilo_cp_write(cp, 0);
1491
1492 ilo_cp_end(cp);
1493 }
1494
1495 static inline void
1496 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1497 const struct ilo_zs_surface *zs,
1498 struct ilo_cp *cp)
1499 {
1500 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1501 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1502 const uint8_t cmd_len = 7;
1503
1504 ILO_GPE_VALID_GEN(dev, 6, 7);
1505
1506 ilo_cp_begin(cp, cmd_len);
1507 ilo_cp_write(cp, cmd | (cmd_len - 2));
1508 ilo_cp_write(cp, zs->payload[0]);
1509 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
1510 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1511 ilo_cp_write(cp, zs->payload[2]);
1512 ilo_cp_write(cp, zs->payload[3]);
1513 ilo_cp_write(cp, zs->payload[4]);
1514 ilo_cp_write(cp, zs->payload[5]);
1515 ilo_cp_end(cp);
1516 }
1517
1518 static inline void
1519 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1520 int x_offset, int y_offset,
1521 struct ilo_cp *cp)
1522 {
1523 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
1524 const uint8_t cmd_len = 2;
1525
1526 ILO_GPE_VALID_GEN(dev, 6, 7);
1527 assert(x_offset >= 0 && x_offset <= 31);
1528 assert(y_offset >= 0 && y_offset <= 31);
1529
1530 ilo_cp_begin(cp, cmd_len);
1531 ilo_cp_write(cp, cmd | (cmd_len - 2));
1532 ilo_cp_write(cp, x_offset << 8 | y_offset);
1533 ilo_cp_end(cp);
1534 }
1535
1536 static inline void
1537 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1538 const struct pipe_poly_stipple *pattern,
1539 struct ilo_cp *cp)
1540 {
1541 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
1542 const uint8_t cmd_len = 33;
1543 int i;
1544
1545 ILO_GPE_VALID_GEN(dev, 6, 7);
1546 assert(Elements(pattern->stipple) == 32);
1547
1548 ilo_cp_begin(cp, cmd_len);
1549 ilo_cp_write(cp, cmd | (cmd_len - 2));
1550 for (i = 0; i < 32; i++)
1551 ilo_cp_write(cp, pattern->stipple[i]);
1552 ilo_cp_end(cp);
1553 }
1554
1555 static inline void
1556 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1557 unsigned pattern, unsigned factor,
1558 struct ilo_cp *cp)
1559 {
1560 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
1561 const uint8_t cmd_len = 3;
1562 unsigned inverse;
1563
1564 ILO_GPE_VALID_GEN(dev, 6, 7);
1565 assert((pattern & 0xffff) == pattern);
1566 assert(factor >= 1 && factor <= 256);
1567
1568 ilo_cp_begin(cp, cmd_len);
1569 ilo_cp_write(cp, cmd | (cmd_len - 2));
1570 ilo_cp_write(cp, pattern);
1571
1572 if (dev->gen >= ILO_GEN(7)) {
1573 /* in U1.16 */
1574 inverse = (unsigned) (65536.0f / factor);
1575 ilo_cp_write(cp, inverse << 15 | factor);
1576 }
1577 else {
1578 /* in U1.13 */
1579 inverse = (unsigned) (8192.0f / factor);
1580 ilo_cp_write(cp, inverse << 16 | factor);
1581 }
1582
1583 ilo_cp_end(cp);
1584 }
1585
1586 static inline void
1587 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1588 struct ilo_cp *cp)
1589 {
1590 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
1591 const uint8_t cmd_len = 3;
1592
1593 ILO_GPE_VALID_GEN(dev, 6, 7);
1594
1595 ilo_cp_begin(cp, cmd_len);
1596 ilo_cp_write(cp, cmd | (cmd_len - 2));
1597 ilo_cp_write(cp, 0 << 16 | 0);
1598 ilo_cp_write(cp, 0 << 16 | 0);
1599 ilo_cp_end(cp);
1600 }
1601
1602 static inline void
1603 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1604 int index, unsigned svbi,
1605 unsigned max_svbi,
1606 bool load_vertex_count,
1607 struct ilo_cp *cp)
1608 {
1609 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
1610 const uint8_t cmd_len = 4;
1611 uint32_t dw1;
1612
1613 ILO_GPE_VALID_GEN(dev, 6, 6);
1614 assert(index >= 0 && index < 4);
1615
1616 dw1 = index << SVB_INDEX_SHIFT;
1617 if (load_vertex_count)
1618 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
1619
1620 ilo_cp_begin(cp, cmd_len);
1621 ilo_cp_write(cp, cmd | (cmd_len - 2));
1622 ilo_cp_write(cp, dw1);
1623 ilo_cp_write(cp, svbi);
1624 ilo_cp_write(cp, max_svbi);
1625 ilo_cp_end(cp);
1626 }
1627
1628 static inline void
1629 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1630 int num_samples,
1631 const uint32_t *packed_sample_pos,
1632 bool pixel_location_center,
1633 struct ilo_cp *cp)
1634 {
1635 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
1636 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1637 uint32_t dw1, dw2, dw3;
1638
1639 ILO_GPE_VALID_GEN(dev, 6, 7);
1640
1641 dw1 = (pixel_location_center) ?
1642 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
1643
1644 switch (num_samples) {
1645 case 0:
1646 case 1:
1647 dw1 |= MS_NUMSAMPLES_1;
1648 dw2 = 0;
1649 dw3 = 0;
1650 break;
1651 case 4:
1652 dw1 |= MS_NUMSAMPLES_4;
1653 dw2 = packed_sample_pos[0];
1654 dw3 = 0;
1655 break;
1656 case 8:
1657 assert(dev->gen >= ILO_GEN(7));
1658 dw1 |= MS_NUMSAMPLES_8;
1659 dw2 = packed_sample_pos[0];
1660 dw3 = packed_sample_pos[1];
1661 break;
1662 default:
1663 assert(!"unsupported sample count");
1664 dw1 |= MS_NUMSAMPLES_1;
1665 dw2 = 0;
1666 dw3 = 0;
1667 break;
1668 }
1669
1670 ilo_cp_begin(cp, cmd_len);
1671 ilo_cp_write(cp, cmd | (cmd_len - 2));
1672 ilo_cp_write(cp, dw1);
1673 ilo_cp_write(cp, dw2);
1674 if (dev->gen >= ILO_GEN(7))
1675 ilo_cp_write(cp, dw3);
1676 ilo_cp_end(cp);
1677 }
1678
1679 static inline void
1680 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1681 const struct ilo_zs_surface *zs,
1682 struct ilo_cp *cp)
1683 {
1684 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1685 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1686 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1687 const uint8_t cmd_len = 3;
1688
1689 ILO_GPE_VALID_GEN(dev, 6, 7);
1690
1691 ilo_cp_begin(cp, cmd_len);
1692 ilo_cp_write(cp, cmd | (cmd_len - 2));
1693 /* see ilo_gpe_init_zs_surface() */
1694 ilo_cp_write(cp, zs->payload[6]);
1695 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
1696 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1697 ilo_cp_end(cp);
1698 }
1699
1700 static inline void
1701 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1702 const struct ilo_zs_surface *zs,
1703 struct ilo_cp *cp)
1704 {
1705 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1706 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1707 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1708 const uint8_t cmd_len = 3;
1709
1710 ILO_GPE_VALID_GEN(dev, 6, 7);
1711
1712 ilo_cp_begin(cp, cmd_len);
1713 ilo_cp_write(cp, cmd | (cmd_len - 2));
1714 /* see ilo_gpe_init_zs_surface() */
1715 ilo_cp_write(cp, zs->payload[8]);
1716 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
1717 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1718 ilo_cp_end(cp);
1719 }
1720
1721 static inline void
1722 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1723 uint32_t clear_val,
1724 struct ilo_cp *cp)
1725 {
1726 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
1727 const uint8_t cmd_len = 2;
1728
1729 ILO_GPE_VALID_GEN(dev, 6, 6);
1730
1731 ilo_cp_begin(cp, cmd_len);
1732 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1733 GEN5_DEPTH_CLEAR_VALID);
1734 ilo_cp_write(cp, clear_val);
1735 ilo_cp_end(cp);
1736 }
1737
1738 static inline void
1739 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1740 uint32_t dw1,
1741 struct intel_bo *bo, uint32_t bo_offset,
1742 bool write_qword,
1743 struct ilo_cp *cp)
1744 {
1745 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
1746 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1747 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
1748 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
1749
1750 ILO_GPE_VALID_GEN(dev, 6, 7);
1751
1752 if (dw1 & PIPE_CONTROL_CS_STALL) {
1753 /*
1754 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1755 *
1756 * "1 of the following must also be set (when CS stall is set):
1757 *
1758 * * Depth Cache Flush Enable ([0] of DW1)
1759 * * Stall at Pixel Scoreboard ([1] of DW1)
1760 * * Depth Stall ([13] of DW1)
1761 * * Post-Sync Operation ([13] of DW1)
1762 * * Render Target Cache Flush Enable ([12] of DW1)
1763 * * Notify Enable ([8] of DW1)"
1764 *
1765 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1766 *
1767 * "One of the following must also be set (when CS stall is set):
1768 *
1769 * * Render Target Cache Flush Enable ([12] of DW1)
1770 * * Depth Cache Flush Enable ([0] of DW1)
1771 * * Stall at Pixel Scoreboard ([1] of DW1)
1772 * * Depth Stall ([13] of DW1)
1773 * * Post-Sync Operation ([13] of DW1)"
1774 */
1775 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
1776 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1777 PIPE_CONTROL_STALL_AT_SCOREBOARD |
1778 PIPE_CONTROL_DEPTH_STALL;
1779
1780 /* post-sync op */
1781 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
1782 PIPE_CONTROL_WRITE_DEPTH_COUNT |
1783 PIPE_CONTROL_WRITE_TIMESTAMP;
1784
1785 if (dev->gen == ILO_GEN(6))
1786 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
1787
1788 assert(dw1 & bit_test);
1789 }
1790
1791 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
1792 /*
1793 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1794 *
1795 * "Following bits must be clear (when Depth Stall is set):
1796 *
1797 * * Render Target Cache Flush Enable ([12] of DW1)
1798 * * Depth Cache Flush Enable ([0] of DW1)"
1799 */
1800 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
1801 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1802 }
1803
1804 ilo_cp_begin(cp, cmd_len);
1805 ilo_cp_write(cp, cmd | (cmd_len - 2));
1806 ilo_cp_write(cp, dw1);
1807 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
1808 ilo_cp_write(cp, 0);
1809 if (write_qword)
1810 ilo_cp_write(cp, 0);
1811 ilo_cp_end(cp);
1812 }
1813
1814 static inline void
1815 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1816 const struct pipe_draw_info *info,
1817 const struct ilo_ib_state *ib,
1818 bool rectlist,
1819 struct ilo_cp *cp)
1820 {
1821 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1822 const uint8_t cmd_len = 6;
1823 const int prim = (rectlist) ?
1824 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1825 const int vb_access = (info->indexed) ?
1826 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1827 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1828 const uint32_t vb_start = info->start +
1829 ((info->indexed) ? ib->draw_start_offset : 0);
1830
1831 ILO_GPE_VALID_GEN(dev, 6, 6);
1832
1833 ilo_cp_begin(cp, cmd_len);
1834 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1835 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
1836 vb_access);
1837 ilo_cp_write(cp, info->count);
1838 ilo_cp_write(cp, vb_start);
1839 ilo_cp_write(cp, info->instance_count);
1840 ilo_cp_write(cp, info->start_instance);
1841 ilo_cp_write(cp, info->index_bias);
1842 ilo_cp_end(cp);
1843 }
1844
1845 static inline uint32_t
1846 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1847 const struct ilo_shader_state **cs,
1848 uint32_t *sampler_state,
1849 int *num_samplers,
1850 uint32_t *binding_table_state,
1851 int *num_surfaces,
1852 int num_ids,
1853 struct ilo_cp *cp)
1854 {
1855 /*
1856 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1857 *
1858 * "(Interface Descriptor Total Length) This field must have the same
1859 * alignment as the Interface Descriptor Data Start Address.
1860 *
1861 * It must be DQWord (32-byte) aligned..."
1862 *
1863 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1864 *
1865 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1866 * aligned address of the Interface Descriptor data."
1867 */
1868 const int state_align = 32 / 4;
1869 const int state_len = (32 / 4) * num_ids;
1870 uint32_t state_offset, *dw;
1871 int i;
1872
1873 ILO_GPE_VALID_GEN(dev, 6, 6);
1874
1875 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
1876 state_len, state_align, &state_offset);
1877
1878 for (i = 0; i < num_ids; i++) {
1879 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1880 dw[1] = 1 << 18; /* SPF */
1881 dw[2] = sampler_state[i] |
1882 (num_samplers[i] + 3) / 4 << 2;
1883 dw[3] = binding_table_state[i] |
1884 num_surfaces[i];
1885 dw[4] = 0 << 16 | /* CURBE Read Length */
1886 0; /* CURBE Read Offset */
1887 dw[5] = 0; /* Barrier ID */
1888 dw[6] = 0;
1889 dw[7] = 0;
1890
1891 dw += 8;
1892 }
1893
1894 return state_offset;
1895 }
1896
1897 static inline uint32_t
1898 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1899 const struct ilo_viewport_cso *viewports,
1900 unsigned num_viewports,
1901 struct ilo_cp *cp)
1902 {
1903 const int state_align = 32 / 4;
1904 const int state_len = 8 * num_viewports;
1905 uint32_t state_offset, *dw;
1906 unsigned i;
1907
1908 ILO_GPE_VALID_GEN(dev, 6, 6);
1909
1910 /*
1911 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1912 *
1913 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1914 * stored as an array of up to 16 elements..."
1915 */
1916 assert(num_viewports && num_viewports <= 16);
1917
1918 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
1919 state_len, state_align, &state_offset);
1920
1921 for (i = 0; i < num_viewports; i++) {
1922 const struct ilo_viewport_cso *vp = &viewports[i];
1923
1924 dw[0] = fui(vp->m00);
1925 dw[1] = fui(vp->m11);
1926 dw[2] = fui(vp->m22);
1927 dw[3] = fui(vp->m30);
1928 dw[4] = fui(vp->m31);
1929 dw[5] = fui(vp->m32);
1930 dw[6] = 0;
1931 dw[7] = 0;
1932
1933 dw += 8;
1934 }
1935
1936 return state_offset;
1937 }
1938
1939 static inline uint32_t
1940 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1941 const struct ilo_viewport_cso *viewports,
1942 unsigned num_viewports,
1943 struct ilo_cp *cp)
1944 {
1945 const int state_align = 32 / 4;
1946 const int state_len = 4 * num_viewports;
1947 uint32_t state_offset, *dw;
1948 unsigned i;
1949
1950 ILO_GPE_VALID_GEN(dev, 6, 6);
1951
1952 /*
1953 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1954 *
1955 * "The viewport-related state is stored as an array of up to 16
1956 * elements..."
1957 */
1958 assert(num_viewports && num_viewports <= 16);
1959
1960 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
1961 state_len, state_align, &state_offset);
1962
1963 for (i = 0; i < num_viewports; i++) {
1964 const struct ilo_viewport_cso *vp = &viewports[i];
1965
1966 dw[0] = fui(vp->min_gbx);
1967 dw[1] = fui(vp->max_gbx);
1968 dw[2] = fui(vp->min_gby);
1969 dw[3] = fui(vp->max_gby);
1970
1971 dw += 4;
1972 }
1973
1974 return state_offset;
1975 }
1976
1977 static inline uint32_t
1978 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
1979 const struct ilo_viewport_cso *viewports,
1980 unsigned num_viewports,
1981 struct ilo_cp *cp)
1982 {
1983 const int state_align = 32 / 4;
1984 const int state_len = 2 * num_viewports;
1985 uint32_t state_offset, *dw;
1986 unsigned i;
1987
1988 ILO_GPE_VALID_GEN(dev, 6, 7);
1989
1990 /*
1991 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
1992 *
1993 * "The viewport state is stored as an array of up to 16 elements..."
1994 */
1995 assert(num_viewports && num_viewports <= 16);
1996
1997 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
1998 state_len, state_align, &state_offset);
1999
2000 for (i = 0; i < num_viewports; i++) {
2001 const struct ilo_viewport_cso *vp = &viewports[i];
2002
2003 dw[0] = fui(vp->min_z);
2004 dw[1] = fui(vp->max_z);
2005
2006 dw += 2;
2007 }
2008
2009 return state_offset;
2010 }
2011
2012 static inline uint32_t
2013 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2014 const struct pipe_stencil_ref *stencil_ref,
2015 ubyte alpha_ref,
2016 const struct pipe_blend_color *blend_color,
2017 struct ilo_cp *cp)
2018 {
2019 const int state_align = 64 / 4;
2020 const int state_len = 6;
2021 uint32_t state_offset, *dw;
2022
2023 ILO_GPE_VALID_GEN(dev, 6, 7);
2024
2025 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
2026 state_len, state_align, &state_offset);
2027
2028 dw[0] = stencil_ref->ref_value[0] << 24 |
2029 stencil_ref->ref_value[1] << 16 |
2030 BRW_ALPHATEST_FORMAT_UNORM8;
2031 dw[1] = alpha_ref;
2032 dw[2] = fui(blend_color->color[0]);
2033 dw[3] = fui(blend_color->color[1]);
2034 dw[4] = fui(blend_color->color[2]);
2035 dw[5] = fui(blend_color->color[3]);
2036
2037 return state_offset;
2038 }
2039
2040 static inline uint32_t
2041 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2042 const struct ilo_blend_state *blend,
2043 const struct ilo_fb_state *fb,
2044 const struct ilo_dsa_state *dsa,
2045 struct ilo_cp *cp)
2046 {
2047 const int state_align = 64 / 4;
2048 int state_len;
2049 uint32_t state_offset, *dw;
2050 unsigned num_targets, i;
2051
2052 ILO_GPE_VALID_GEN(dev, 6, 7);
2053
2054 /*
2055 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2056 *
2057 * "The blend state is stored as an array of up to 8 elements..."
2058 */
2059 num_targets = fb->state.nr_cbufs;
2060 assert(num_targets <= 8);
2061
2062 if (!num_targets) {
2063 if (!dsa->dw_alpha)
2064 return 0;
2065 /* to be able to reference alpha func */
2066 num_targets = 1;
2067 }
2068
2069 state_len = 2 * num_targets;
2070
2071 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
2072 state_len, state_align, &state_offset);
2073
2074 for (i = 0; i < num_targets; i++) {
2075 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2076 const struct ilo_blend_cso *cso = &blend->cso[idx];
2077 const int num_samples = fb->num_samples;
2078 const struct util_format_description *format_desc =
2079 (idx < fb->state.nr_cbufs) ?
2080 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2081 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2082
2083 rt_is_unorm = true;
2084 rt_is_pure_integer = false;
2085 rt_dst_alpha_forced_one = false;
2086
2087 if (format_desc) {
2088 int ch;
2089
2090 switch (format_desc->format) {
2091 case PIPE_FORMAT_B8G8R8X8_UNORM:
2092 /* force alpha to one when the HW format has alpha */
2093 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
2094 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
2095 rt_dst_alpha_forced_one = true;
2096 break;
2097 default:
2098 break;
2099 }
2100
2101 for (ch = 0; ch < 4; ch++) {
2102 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2103 continue;
2104
2105 if (format_desc->channel[ch].pure_integer) {
2106 rt_is_unorm = false;
2107 rt_is_pure_integer = true;
2108 break;
2109 }
2110
2111 if (!format_desc->channel[ch].normalized ||
2112 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2113 rt_is_unorm = false;
2114 }
2115 }
2116
2117 dw[0] = cso->payload[0];
2118 dw[1] = cso->payload[1];
2119
2120 if (!rt_is_pure_integer) {
2121 if (rt_dst_alpha_forced_one)
2122 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2123 else
2124 dw[0] |= cso->dw_blend;
2125 }
2126
2127 /*
2128 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2129 *
2130 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2131 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2132 *
2133 * Since logicop is ignored for non-UNORM color buffers, no special care
2134 * is needed.
2135 */
2136 if (rt_is_unorm)
2137 dw[1] |= cso->dw_logicop;
2138
2139 /*
2140 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2141 *
2142 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2143 * Dither both must be disabled."
2144 *
2145 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2146 * requires that anyway.
2147 */
2148 if (num_samples > 1)
2149 dw[1] |= cso->dw_alpha_mod;
2150
2151 /*
2152 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2153 *
2154 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2155 * alpha value."
2156 */
2157 if (!rt_is_pure_integer)
2158 dw[1] |= dsa->dw_alpha;
2159
2160 dw += 2;
2161 }
2162
2163 return state_offset;
2164 }
2165
2166 static inline uint32_t
2167 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2168 const struct ilo_dsa_state *dsa,
2169 struct ilo_cp *cp)
2170 {
2171 const int state_align = 64 / 4;
2172 const int state_len = 3;
2173 uint32_t state_offset, *dw;
2174
2175
2176 ILO_GPE_VALID_GEN(dev, 6, 7);
2177
2178 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
2179 state_len, state_align, &state_offset);
2180
2181 dw[0] = dsa->payload[0];
2182 dw[1] = dsa->payload[1];
2183 dw[2] = dsa->payload[2];
2184
2185 return state_offset;
2186 }
2187
2188 static inline uint32_t
2189 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2190 const struct ilo_scissor_state *scissor,
2191 unsigned num_viewports,
2192 struct ilo_cp *cp)
2193 {
2194 const int state_align = 32 / 4;
2195 const int state_len = 2 * num_viewports;
2196 uint32_t state_offset, *dw;
2197
2198 ILO_GPE_VALID_GEN(dev, 6, 7);
2199
2200 /*
2201 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2202 *
2203 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2204 * stored as an array of up to 16 elements..."
2205 */
2206 assert(num_viewports && num_viewports <= 16);
2207
2208 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
2209 state_len, state_align, &state_offset);
2210
2211 memcpy(dw, scissor->payload, state_len * 4);
2212
2213 return state_offset;
2214 }
2215
2216 static inline uint32_t
2217 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2218 uint32_t *surface_states,
2219 int num_surface_states,
2220 struct ilo_cp *cp)
2221 {
2222 const int state_align = 32 / 4;
2223 const int state_len = num_surface_states;
2224 uint32_t state_offset, *dw;
2225
2226 ILO_GPE_VALID_GEN(dev, 6, 7);
2227
2228 /*
2229 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2230 *
2231 * "It is stored as an array of up to 256 elements..."
2232 */
2233 assert(num_surface_states <= 256);
2234
2235 if (!num_surface_states)
2236 return 0;
2237
2238 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
2239 state_len, state_align, &state_offset);
2240 memcpy(dw, surface_states,
2241 num_surface_states * sizeof(surface_states[0]));
2242
2243 return state_offset;
2244 }
2245
2246 static inline uint32_t
2247 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2248 const struct ilo_view_surface *surf,
2249 bool for_render,
2250 struct ilo_cp *cp)
2251 {
2252 const int state_align = 32 / 4;
2253 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2254 uint32_t state_offset;
2255 uint32_t read_domains, write_domain;
2256
2257 ILO_GPE_VALID_GEN(dev, 6, 7);
2258
2259 if (for_render) {
2260 read_domains = INTEL_DOMAIN_RENDER;
2261 write_domain = INTEL_DOMAIN_RENDER;
2262 }
2263 else {
2264 read_domains = INTEL_DOMAIN_SAMPLER;
2265 write_domain = 0;
2266 }
2267
2268 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
2269
2270 STATIC_ASSERT(Elements(surf->payload) >= 8);
2271
2272 ilo_cp_write(cp, surf->payload[0]);
2273 ilo_cp_write_bo(cp, surf->payload[1],
2274 surf->bo, read_domains, write_domain);
2275 ilo_cp_write(cp, surf->payload[2]);
2276 ilo_cp_write(cp, surf->payload[3]);
2277 ilo_cp_write(cp, surf->payload[4]);
2278 ilo_cp_write(cp, surf->payload[5]);
2279
2280 if (dev->gen >= ILO_GEN(7)) {
2281 ilo_cp_write(cp, surf->payload[6]);
2282 ilo_cp_write(cp, surf->payload[7]);
2283 }
2284
2285 ilo_cp_end(cp);
2286
2287 return state_offset;
2288 }
2289
2290 static inline uint32_t
2291 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2292 const struct pipe_stream_output_target *so,
2293 const struct pipe_stream_output_info *so_info,
2294 int so_index,
2295 struct ilo_cp *cp)
2296 {
2297 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2298 unsigned bo_offset, struct_size;
2299 enum pipe_format elem_format;
2300 struct ilo_view_surface surf;
2301
2302 ILO_GPE_VALID_GEN(dev, 6, 6);
2303
2304 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2305 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2306
2307 switch (so_info->output[so_index].num_components) {
2308 case 1:
2309 elem_format = PIPE_FORMAT_R32_FLOAT;
2310 break;
2311 case 2:
2312 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2313 break;
2314 case 3:
2315 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2316 break;
2317 case 4:
2318 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2319 break;
2320 default:
2321 assert(!"unexpected SO components length");
2322 elem_format = PIPE_FORMAT_R32_FLOAT;
2323 break;
2324 }
2325
2326 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2327 struct_size, elem_format, false, true, &surf);
2328
2329 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2330 }
2331
2332 static inline uint32_t
2333 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2334 const struct ilo_sampler_cso * const *samplers,
2335 const struct pipe_sampler_view * const *views,
2336 const uint32_t *sampler_border_colors,
2337 int num_samplers,
2338 struct ilo_cp *cp)
2339 {
2340 const int state_align = 32 / 4;
2341 const int state_len = 4 * num_samplers;
2342 uint32_t state_offset, *dw;
2343 int i;
2344
2345 ILO_GPE_VALID_GEN(dev, 6, 7);
2346
2347 /*
2348 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2349 *
2350 * "The sampler state is stored as an array of up to 16 elements..."
2351 */
2352 assert(num_samplers <= 16);
2353
2354 if (!num_samplers)
2355 return 0;
2356
2357 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
2358 state_len, state_align, &state_offset);
2359
2360 for (i = 0; i < num_samplers; i++) {
2361 const struct ilo_sampler_cso *sampler = samplers[i];
2362 const struct pipe_sampler_view *view = views[i];
2363 const uint32_t border_color = sampler_border_colors[i];
2364 uint32_t dw_filter, dw_wrap;
2365
2366 /* there may be holes */
2367 if (!sampler || !view) {
2368 /* disabled sampler */
2369 dw[0] = 1 << 31;
2370 dw[1] = 0;
2371 dw[2] = 0;
2372 dw[3] = 0;
2373 dw += 4;
2374
2375 continue;
2376 }
2377
2378 /* determine filter and wrap modes */
2379 switch (view->texture->target) {
2380 case PIPE_TEXTURE_1D:
2381 dw_filter = (sampler->anisotropic) ?
2382 sampler->dw_filter_aniso : sampler->dw_filter;
2383 dw_wrap = sampler->dw_wrap_1d;
2384 break;
2385 case PIPE_TEXTURE_3D:
2386 /*
2387 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2388 *
2389 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2390 * surfaces of type SURFTYPE_3D."
2391 */
2392 dw_filter = sampler->dw_filter;
2393 dw_wrap = sampler->dw_wrap;
2394 break;
2395 case PIPE_TEXTURE_CUBE:
2396 dw_filter = (sampler->anisotropic) ?
2397 sampler->dw_filter_aniso : sampler->dw_filter;
2398 dw_wrap = sampler->dw_wrap_cube;
2399 break;
2400 default:
2401 dw_filter = (sampler->anisotropic) ?
2402 sampler->dw_filter_aniso : sampler->dw_filter;
2403 dw_wrap = sampler->dw_wrap;
2404 break;
2405 }
2406
2407 dw[0] = sampler->payload[0];
2408 dw[1] = sampler->payload[1];
2409 assert(!(border_color & 0x1f));
2410 dw[2] = border_color;
2411 dw[3] = sampler->payload[2];
2412
2413 dw[0] |= dw_filter;
2414
2415 if (dev->gen >= ILO_GEN(7)) {
2416 dw[3] |= dw_wrap;
2417 }
2418 else {
2419 /*
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2421 *
2422 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2423 * where the min and mag mode filters are different and
2424 * SurfMinLOD is nonzero. The determination of MagMode uses the
2425 * following equation instead of the one in the above
2426 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2427 *
2428 * As a way to work around that, we set Base to
2429 * view->u.tex.first_level.
2430 */
2431 dw[0] |= view->u.tex.first_level << 22;
2432
2433 dw[1] |= dw_wrap;
2434 }
2435
2436 dw += 4;
2437 }
2438
2439 return state_offset;
2440 }
2441
2442 static inline uint32_t
2443 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2444 const struct ilo_sampler_cso *sampler,
2445 struct ilo_cp *cp)
2446 {
2447 const int state_align = 32 / 4;
2448 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2449 uint32_t state_offset, *dw;
2450
2451 ILO_GPE_VALID_GEN(dev, 6, 7);
2452
2453 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
2454 state_len, state_align, &state_offset);
2455
2456 /* see ilo_gpe_init_sampler_cso() */
2457 memcpy(dw, &sampler->payload[3], state_len * 4);
2458
2459 return state_offset;
2460 }
2461
2462 static inline uint32_t
2463 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2464 int size, void **pcb,
2465 struct ilo_cp *cp)
2466 {
2467 /*
2468 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2469 * to 32 bytes, and their sizes are specified in 256-bit units.
2470 */
2471 const int state_align = 32 / 4;
2472 const int state_len = align(size, 32) / 4;
2473 uint32_t state_offset;
2474 char *buf;
2475
2476 ILO_GPE_VALID_GEN(dev, 6, 7);
2477
2478 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
2479 state_len, state_align, &state_offset);
2480
2481 /* zero out the unused range */
2482 if (size < state_len * 4)
2483 memset(&buf[size], 0, state_len * 4 - size);
2484
2485 if (pcb)
2486 *pcb = buf;
2487
2488 return state_offset;
2489 }
2490
2491 #endif /* ILO_GPE_GEN6_H */