r600g,radeonsi: share r600_surface
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.h
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #ifndef ILO_GPE_GEN6_H
29 #define ILO_GPE_GEN6_H
30
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33 #include "intel_winsys.h"
34
35 #include "ilo_common.h"
36 #include "ilo_cp.h"
37 #include "ilo_format.h"
38 #include "ilo_resource.h"
39 #include "ilo_shader.h"
40 #include "ilo_gpe.h"
41
42 #define ILO_GPE_VALID_GEN(dev, min_gen, max_gen) \
43 assert((dev)->gen >= ILO_GEN(min_gen) && (dev)->gen <= ILO_GEN(max_gen))
44
45 #define ILO_GPE_CMD(pipeline, op, subop) \
46 (0x3 << 29 | (pipeline) << 27 | (op) << 24 | (subop) << 16)
47
48 /**
49 * Commands that GEN6 GPE could emit.
50 */
51 enum ilo_gpe_gen6_command {
52 ILO_GPE_GEN6_STATE_BASE_ADDRESS, /* (0x0, 0x1, 0x01) */
53 ILO_GPE_GEN6_STATE_SIP, /* (0x0, 0x1, 0x02) */
54 ILO_GPE_GEN6_3DSTATE_VF_STATISTICS, /* (0x1, 0x0, 0x0b) */
55 ILO_GPE_GEN6_PIPELINE_SELECT, /* (0x1, 0x1, 0x04) */
56 ILO_GPE_GEN6_MEDIA_VFE_STATE, /* (0x2, 0x0, 0x00) */
57 ILO_GPE_GEN6_MEDIA_CURBE_LOAD, /* (0x2, 0x0, 0x01) */
58 ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD, /* (0x2, 0x0, 0x02) */
59 ILO_GPE_GEN6_MEDIA_GATEWAY_STATE, /* (0x2, 0x0, 0x03) */
60 ILO_GPE_GEN6_MEDIA_STATE_FLUSH, /* (0x2, 0x0, 0x04) */
61 ILO_GPE_GEN6_MEDIA_OBJECT_WALKER, /* (0x2, 0x1, 0x03) */
62 ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS, /* (0x3, 0x0, 0x01) */
63 ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS, /* (0x3, 0x0, 0x02) */
64 ILO_GPE_GEN6_3DSTATE_URB, /* (0x3, 0x0, 0x05) */
65 ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS, /* (0x3, 0x0, 0x08) */
66 ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS, /* (0x3, 0x0, 0x09) */
67 ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER, /* (0x3, 0x0, 0x0a) */
68 ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS, /* (0x3, 0x0, 0x0d) */
69 ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS, /* (0x3, 0x0, 0x0e) */
70 ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS, /* (0x3, 0x0, 0x0f) */
71 ILO_GPE_GEN6_3DSTATE_VS, /* (0x3, 0x0, 0x10) */
72 ILO_GPE_GEN6_3DSTATE_GS, /* (0x3, 0x0, 0x11) */
73 ILO_GPE_GEN6_3DSTATE_CLIP, /* (0x3, 0x0, 0x12) */
74 ILO_GPE_GEN6_3DSTATE_SF, /* (0x3, 0x0, 0x13) */
75 ILO_GPE_GEN6_3DSTATE_WM, /* (0x3, 0x0, 0x14) */
76 ILO_GPE_GEN6_3DSTATE_CONSTANT_VS, /* (0x3, 0x0, 0x15) */
77 ILO_GPE_GEN6_3DSTATE_CONSTANT_GS, /* (0x3, 0x0, 0x16) */
78 ILO_GPE_GEN6_3DSTATE_CONSTANT_PS, /* (0x3, 0x0, 0x17) */
79 ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK, /* (0x3, 0x0, 0x18) */
80 ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE, /* (0x3, 0x1, 0x00) */
81 ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER, /* (0x3, 0x1, 0x05) */
82 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET, /* (0x3, 0x1, 0x06) */
83 ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN, /* (0x3, 0x1, 0x07) */
84 ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE, /* (0x3, 0x1, 0x08) */
85 ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS, /* (0x3, 0x1, 0x0a) */
86 ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX, /* (0x3, 0x1, 0x0b) */
87 ILO_GPE_GEN6_3DSTATE_MULTISAMPLE, /* (0x3, 0x1, 0x0d) */
88 ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER, /* (0x3, 0x1, 0x0e) */
89 ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER, /* (0x3, 0x1, 0x0f) */
90 ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS, /* (0x3, 0x1, 0x10) */
91 ILO_GPE_GEN6_PIPE_CONTROL, /* (0x3, 0x2, 0x00) */
92 ILO_GPE_GEN6_3DPRIMITIVE, /* (0x3, 0x3, 0x00) */
93
94 ILO_GPE_GEN6_COMMAND_COUNT,
95 };
96
97 /**
98 * Indirect states that GEN6 GPE could emit.
99 */
100 enum ilo_gpe_gen6_state {
101 ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA,
102 ILO_GPE_GEN6_SF_VIEWPORT,
103 ILO_GPE_GEN6_CLIP_VIEWPORT,
104 ILO_GPE_GEN6_CC_VIEWPORT,
105 ILO_GPE_GEN6_COLOR_CALC_STATE,
106 ILO_GPE_GEN6_BLEND_STATE,
107 ILO_GPE_GEN6_DEPTH_STENCIL_STATE,
108 ILO_GPE_GEN6_SCISSOR_RECT,
109 ILO_GPE_GEN6_BINDING_TABLE_STATE,
110 ILO_GPE_GEN6_SURFACE_STATE,
111 ILO_GPE_GEN6_SAMPLER_STATE,
112 ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE,
113 ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER,
114
115 ILO_GPE_GEN6_STATE_COUNT,
116 };
117
118 int
119 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
120 enum ilo_gpe_gen6_command cmd,
121 int arg);
122
123 int
124 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
125 enum ilo_gpe_gen6_state state,
126 int arg);
127
128 /**
129 * Translate winsys tiling to hardware tiling.
130 */
131 static inline int
132 ilo_gpe_gen6_translate_winsys_tiling(enum intel_tiling_mode tiling)
133 {
134 switch (tiling) {
135 case INTEL_TILING_NONE:
136 return 0;
137 case INTEL_TILING_X:
138 return BRW_SURFACE_TILED;
139 case INTEL_TILING_Y:
140 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
141 default:
142 assert(!"unknown tiling");
143 return 0;
144 }
145 }
146
147 /**
148 * Translate a pipe primitive type to the matching hardware primitive type.
149 */
150 static inline int
151 ilo_gpe_gen6_translate_pipe_prim(unsigned prim)
152 {
153 static const int prim_mapping[PIPE_PRIM_MAX] = {
154 [PIPE_PRIM_POINTS] = _3DPRIM_POINTLIST,
155 [PIPE_PRIM_LINES] = _3DPRIM_LINELIST,
156 [PIPE_PRIM_LINE_LOOP] = _3DPRIM_LINELOOP,
157 [PIPE_PRIM_LINE_STRIP] = _3DPRIM_LINESTRIP,
158 [PIPE_PRIM_TRIANGLES] = _3DPRIM_TRILIST,
159 [PIPE_PRIM_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
160 [PIPE_PRIM_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
161 [PIPE_PRIM_QUADS] = _3DPRIM_QUADLIST,
162 [PIPE_PRIM_QUAD_STRIP] = _3DPRIM_QUADSTRIP,
163 [PIPE_PRIM_POLYGON] = _3DPRIM_POLYGON,
164 [PIPE_PRIM_LINES_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
165 [PIPE_PRIM_LINE_STRIP_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
166 [PIPE_PRIM_TRIANGLES_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
167 [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
168 };
169
170 assert(prim_mapping[prim]);
171
172 return prim_mapping[prim];
173 }
174
175 /**
176 * Translate a pipe texture target to the matching hardware surface type.
177 */
178 static inline int
179 ilo_gpe_gen6_translate_texture(enum pipe_texture_target target)
180 {
181 switch (target) {
182 case PIPE_BUFFER:
183 return BRW_SURFACE_BUFFER;
184 case PIPE_TEXTURE_1D:
185 case PIPE_TEXTURE_1D_ARRAY:
186 return BRW_SURFACE_1D;
187 case PIPE_TEXTURE_2D:
188 case PIPE_TEXTURE_RECT:
189 case PIPE_TEXTURE_2D_ARRAY:
190 return BRW_SURFACE_2D;
191 case PIPE_TEXTURE_3D:
192 return BRW_SURFACE_3D;
193 case PIPE_TEXTURE_CUBE:
194 case PIPE_TEXTURE_CUBE_ARRAY:
195 return BRW_SURFACE_CUBE;
196 default:
197 assert(!"unknown texture target");
198 return BRW_SURFACE_BUFFER;
199 }
200 }
201
202 /**
203 * Fill in DW2 to DW7 of 3DSTATE_SF.
204 */
205 static inline void
206 ilo_gpe_gen6_fill_3dstate_sf_raster(const struct ilo_dev_info *dev,
207 const struct ilo_rasterizer_state *rasterizer,
208 int num_samples,
209 enum pipe_format depth_format,
210 uint32_t *payload, unsigned payload_len)
211 {
212 assert(payload_len == Elements(rasterizer->sf.payload));
213
214 if (rasterizer) {
215 const struct ilo_rasterizer_sf *sf = &rasterizer->sf;
216
217 memcpy(payload, sf->payload, sizeof(sf->payload));
218 if (num_samples > 1)
219 payload[1] |= sf->dw_msaa;
220 }
221 else {
222 payload[0] = 0;
223 payload[1] = (num_samples > 1) ? GEN6_SF_MSRAST_ON_PATTERN : 0;
224 payload[2] = 0;
225 payload[3] = 0;
226 payload[4] = 0;
227 payload[5] = 0;
228 }
229
230 if (dev->gen >= ILO_GEN(7)) {
231 int format;
232
233 /* separate stencil */
234 switch (depth_format) {
235 case PIPE_FORMAT_Z16_UNORM:
236 format = BRW_DEPTHFORMAT_D16_UNORM;
237 break;
238 case PIPE_FORMAT_Z32_FLOAT:
239 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
240 format = BRW_DEPTHFORMAT_D32_FLOAT;
241 break;
242 case PIPE_FORMAT_Z24X8_UNORM:
243 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
244 format = BRW_DEPTHFORMAT_D24_UNORM_X8_UINT;
245 break;
246 default:
247 /* FLOAT surface is assumed when there is no depth buffer */
248 format = BRW_DEPTHFORMAT_D32_FLOAT;
249 break;
250 }
251
252 payload[0] |= format << GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT;
253 }
254 }
255
256 /**
257 * Fill in DW1 and DW8 to DW19 of 3DSTATE_SF.
258 */
259 static inline void
260 ilo_gpe_gen6_fill_3dstate_sf_sbe(const struct ilo_dev_info *dev,
261 const struct ilo_rasterizer_state *rasterizer,
262 const struct ilo_shader_state *fs,
263 uint32_t *dw, int num_dwords)
264 {
265 int output_count, vue_offset, vue_len;
266 const struct ilo_kernel_routing *routing;
267
268 ILO_GPE_VALID_GEN(dev, 6, 7.5);
269 assert(num_dwords == 13);
270
271 if (!fs) {
272 memset(dw, 0, sizeof(dw[0]) * num_dwords);
273
274 if (dev->gen >= ILO_GEN(7))
275 dw[0] = 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT;
276 else
277 dw[0] = 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT;
278
279 return;
280 }
281
282 output_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
283 assert(output_count <= 32);
284
285 routing = ilo_shader_get_kernel_routing(fs);
286
287 vue_offset = routing->source_skip;
288 assert(vue_offset % 2 == 0);
289 vue_offset /= 2;
290
291 vue_len = (routing->source_len + 1) / 2;
292 if (!vue_len)
293 vue_len = 1;
294
295 if (dev->gen >= ILO_GEN(7)) {
296 dw[0] = output_count << GEN7_SBE_NUM_OUTPUTS_SHIFT |
297 vue_len << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
298 vue_offset << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT;
299 if (routing->swizzle_enable)
300 dw[0] |= GEN7_SBE_SWIZZLE_ENABLE;
301 }
302 else {
303 dw[0] = output_count << GEN6_SF_NUM_OUTPUTS_SHIFT |
304 vue_len << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
305 vue_offset << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT;
306 if (routing->swizzle_enable)
307 dw[0] |= GEN6_SF_SWIZZLE_ENABLE;
308 }
309
310 switch (rasterizer->state.sprite_coord_mode) {
311 case PIPE_SPRITE_COORD_UPPER_LEFT:
312 dw[0] |= GEN6_SF_POINT_SPRITE_UPPERLEFT;
313 break;
314 case PIPE_SPRITE_COORD_LOWER_LEFT:
315 dw[0] |= GEN6_SF_POINT_SPRITE_LOWERLEFT;
316 break;
317 }
318
319 STATIC_ASSERT(Elements(routing->swizzles) >= 16);
320 memcpy(&dw[1], routing->swizzles, 2 * 16);
321
322 /*
323 * From the Ivy Bridge PRM, volume 2 part 1, page 268:
324 *
325 * "This field (Point Sprite Texture Coordinate Enable) must be
326 * programmed to 0 when non-point primitives are rendered."
327 *
328 * TODO We do not check that yet.
329 */
330 dw[9] = routing->point_sprite_enable;
331
332 dw[10] = routing->const_interp_enable;
333
334 /* WrapShortest enables */
335 dw[11] = 0;
336 dw[12] = 0;
337 }
338
339 static inline void
340 gen6_emit_STATE_BASE_ADDRESS(const struct ilo_dev_info *dev,
341 struct intel_bo *general_state_bo,
342 struct intel_bo *surface_state_bo,
343 struct intel_bo *dynamic_state_bo,
344 struct intel_bo *indirect_object_bo,
345 struct intel_bo *instruction_bo,
346 uint32_t general_state_size,
347 uint32_t dynamic_state_size,
348 uint32_t indirect_object_size,
349 uint32_t instruction_size,
350 struct ilo_cp *cp)
351 {
352 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x01);
353 const uint8_t cmd_len = 10;
354
355 ILO_GPE_VALID_GEN(dev, 6, 7.5);
356
357 /* 4K-page aligned */
358 assert(((general_state_size | dynamic_state_size |
359 indirect_object_size | instruction_size) & 0xfff) == 0);
360
361 ilo_cp_begin(cp, cmd_len);
362 ilo_cp_write(cp, cmd | (cmd_len - 2));
363
364 ilo_cp_write_bo(cp, 1, general_state_bo,
365 INTEL_DOMAIN_RENDER,
366 0);
367 ilo_cp_write_bo(cp, 1, surface_state_bo,
368 INTEL_DOMAIN_SAMPLER,
369 0);
370 ilo_cp_write_bo(cp, 1, dynamic_state_bo,
371 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
372 0);
373 ilo_cp_write_bo(cp, 1, indirect_object_bo,
374 0,
375 0);
376 ilo_cp_write_bo(cp, 1, instruction_bo,
377 INTEL_DOMAIN_INSTRUCTION,
378 0);
379
380 if (general_state_size) {
381 ilo_cp_write_bo(cp, general_state_size | 1, general_state_bo,
382 INTEL_DOMAIN_RENDER,
383 0);
384 }
385 else {
386 /* skip range check */
387 ilo_cp_write(cp, 1);
388 }
389
390 if (dynamic_state_size) {
391 ilo_cp_write_bo(cp, dynamic_state_size | 1, dynamic_state_bo,
392 INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION,
393 0);
394 }
395 else {
396 /* skip range check */
397 ilo_cp_write(cp, 0xfffff000 + 1);
398 }
399
400 if (indirect_object_size) {
401 ilo_cp_write_bo(cp, indirect_object_size | 1, indirect_object_bo,
402 0,
403 0);
404 }
405 else {
406 /* skip range check */
407 ilo_cp_write(cp, 0xfffff000 + 1);
408 }
409
410 if (instruction_size) {
411 ilo_cp_write_bo(cp, instruction_size | 1, instruction_bo,
412 INTEL_DOMAIN_INSTRUCTION,
413 0);
414 }
415 else {
416 /* skip range check */
417 ilo_cp_write(cp, 1);
418 }
419
420 ilo_cp_end(cp);
421 }
422
423 static inline void
424 gen6_emit_STATE_SIP(const struct ilo_dev_info *dev,
425 uint32_t sip,
426 struct ilo_cp *cp)
427 {
428 const uint32_t cmd = ILO_GPE_CMD(0x0, 0x1, 0x02);
429 const uint8_t cmd_len = 2;
430
431 ILO_GPE_VALID_GEN(dev, 6, 7.5);
432
433 ilo_cp_begin(cp, cmd_len);
434 ilo_cp_write(cp, cmd | (cmd_len - 2));
435 ilo_cp_write(cp, sip);
436 ilo_cp_end(cp);
437 }
438
439 static inline void
440 gen6_emit_3DSTATE_VF_STATISTICS(const struct ilo_dev_info *dev,
441 bool enable,
442 struct ilo_cp *cp)
443 {
444 const uint32_t cmd = ILO_GPE_CMD(0x1, 0x0, 0x0b);
445 const uint8_t cmd_len = 1;
446
447 ILO_GPE_VALID_GEN(dev, 6, 7.5);
448
449 ilo_cp_begin(cp, cmd_len);
450 ilo_cp_write(cp, cmd | enable);
451 ilo_cp_end(cp);
452 }
453
454 static inline void
455 gen6_emit_PIPELINE_SELECT(const struct ilo_dev_info *dev,
456 int pipeline,
457 struct ilo_cp *cp)
458 {
459 const int cmd = ILO_GPE_CMD(0x1, 0x1, 0x04);
460 const uint8_t cmd_len = 1;
461
462 ILO_GPE_VALID_GEN(dev, 6, 7.5);
463
464 /* 3D or media */
465 assert(pipeline == 0x0 || pipeline == 0x1);
466
467 ilo_cp_begin(cp, cmd_len);
468 ilo_cp_write(cp, cmd | pipeline);
469 ilo_cp_end(cp);
470 }
471
472 static inline void
473 gen6_emit_MEDIA_VFE_STATE(const struct ilo_dev_info *dev,
474 int max_threads, int num_urb_entries,
475 int urb_entry_size,
476 struct ilo_cp *cp)
477 {
478 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x00);
479 const uint8_t cmd_len = 8;
480 uint32_t dw2, dw4;
481
482 ILO_GPE_VALID_GEN(dev, 6, 6);
483
484 dw2 = (max_threads - 1) << 16 |
485 num_urb_entries << 8 |
486 1 << 7 | /* Reset Gateway Timer */
487 1 << 6; /* Bypass Gateway Control */
488
489 dw4 = urb_entry_size << 16 | /* URB Entry Allocation Size */
490 480; /* CURBE Allocation Size */
491
492 ilo_cp_begin(cp, cmd_len);
493 ilo_cp_write(cp, cmd | (cmd_len - 2));
494 ilo_cp_write(cp, 0); /* scratch */
495 ilo_cp_write(cp, dw2);
496 ilo_cp_write(cp, 0); /* MBZ */
497 ilo_cp_write(cp, dw4);
498 ilo_cp_write(cp, 0); /* scoreboard */
499 ilo_cp_write(cp, 0);
500 ilo_cp_write(cp, 0);
501 ilo_cp_end(cp);
502 }
503
504 static inline void
505 gen6_emit_MEDIA_CURBE_LOAD(const struct ilo_dev_info *dev,
506 uint32_t buf, int size,
507 struct ilo_cp *cp)
508 {
509 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x01);
510 const uint8_t cmd_len = 4;
511
512 ILO_GPE_VALID_GEN(dev, 6, 6);
513
514 assert(buf % 32 == 0);
515 /* gen6_emit_push_constant_buffer() allocates buffers in 256-bit units */
516 size = align(size, 32);
517
518 ilo_cp_begin(cp, cmd_len);
519 ilo_cp_write(cp, cmd | (cmd_len - 2));
520 ilo_cp_write(cp, 0); /* MBZ */
521 ilo_cp_write(cp, size);
522 ilo_cp_write(cp, buf);
523 ilo_cp_end(cp);
524 }
525
526 static inline void
527 gen6_emit_MEDIA_INTERFACE_DESCRIPTOR_LOAD(const struct ilo_dev_info *dev,
528 uint32_t offset, int num_ids,
529 struct ilo_cp *cp)
530 {
531 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x02);
532 const uint8_t cmd_len = 4;
533
534 ILO_GPE_VALID_GEN(dev, 6, 6);
535
536 assert(offset % 32 == 0);
537
538 ilo_cp_begin(cp, cmd_len);
539 ilo_cp_write(cp, cmd | (cmd_len - 2));
540 ilo_cp_write(cp, 0); /* MBZ */
541 /* every ID has 8 DWords */
542 ilo_cp_write(cp, num_ids * 8 * 4);
543 ilo_cp_write(cp, offset);
544 ilo_cp_end(cp);
545 }
546
547 static inline void
548 gen6_emit_MEDIA_GATEWAY_STATE(const struct ilo_dev_info *dev,
549 int id, int byte, int thread_count,
550 struct ilo_cp *cp)
551 {
552 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x03);
553 const uint8_t cmd_len = 2;
554 uint32_t dw1;
555
556 ILO_GPE_VALID_GEN(dev, 6, 6);
557
558 dw1 = id << 16 |
559 byte << 8 |
560 thread_count;
561
562 ilo_cp_begin(cp, cmd_len);
563 ilo_cp_write(cp, cmd | (cmd_len - 2));
564 ilo_cp_write(cp, dw1);
565 ilo_cp_end(cp);
566 }
567
568 static inline void
569 gen6_emit_MEDIA_STATE_FLUSH(const struct ilo_dev_info *dev,
570 int thread_count_water_mark,
571 int barrier_mask,
572 struct ilo_cp *cp)
573 {
574 const uint32_t cmd = ILO_GPE_CMD(0x2, 0x0, 0x04);
575 const uint8_t cmd_len = 2;
576 uint32_t dw1;
577
578 ILO_GPE_VALID_GEN(dev, 6, 6);
579
580 dw1 = thread_count_water_mark << 16 |
581 barrier_mask;
582
583 ilo_cp_begin(cp, cmd_len);
584 ilo_cp_write(cp, cmd | (cmd_len - 2));
585 ilo_cp_write(cp, dw1);
586 ilo_cp_end(cp);
587 }
588
589 static inline void
590 gen6_emit_MEDIA_OBJECT_WALKER(const struct ilo_dev_info *dev,
591 struct ilo_cp *cp)
592 {
593 assert(!"MEDIA_OBJECT_WALKER unsupported");
594 }
595
596 static inline void
597 gen6_emit_3DSTATE_BINDING_TABLE_POINTERS(const struct ilo_dev_info *dev,
598 uint32_t vs_binding_table,
599 uint32_t gs_binding_table,
600 uint32_t ps_binding_table,
601 struct ilo_cp *cp)
602 {
603 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x01);
604 const uint8_t cmd_len = 4;
605
606 ILO_GPE_VALID_GEN(dev, 6, 6);
607
608 ilo_cp_begin(cp, cmd_len);
609 ilo_cp_write(cp, cmd | (cmd_len - 2) |
610 GEN6_BINDING_TABLE_MODIFY_VS |
611 GEN6_BINDING_TABLE_MODIFY_GS |
612 GEN6_BINDING_TABLE_MODIFY_PS);
613 ilo_cp_write(cp, vs_binding_table);
614 ilo_cp_write(cp, gs_binding_table);
615 ilo_cp_write(cp, ps_binding_table);
616 ilo_cp_end(cp);
617 }
618
619 static inline void
620 gen6_emit_3DSTATE_SAMPLER_STATE_POINTERS(const struct ilo_dev_info *dev,
621 uint32_t vs_sampler_state,
622 uint32_t gs_sampler_state,
623 uint32_t ps_sampler_state,
624 struct ilo_cp *cp)
625 {
626 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x02);
627 const uint8_t cmd_len = 4;
628
629 ILO_GPE_VALID_GEN(dev, 6, 6);
630
631 ilo_cp_begin(cp, cmd_len);
632 ilo_cp_write(cp, cmd | (cmd_len - 2) |
633 VS_SAMPLER_STATE_CHANGE |
634 GS_SAMPLER_STATE_CHANGE |
635 PS_SAMPLER_STATE_CHANGE);
636 ilo_cp_write(cp, vs_sampler_state);
637 ilo_cp_write(cp, gs_sampler_state);
638 ilo_cp_write(cp, ps_sampler_state);
639 ilo_cp_end(cp);
640 }
641
642 static inline void
643 gen6_emit_3DSTATE_URB(const struct ilo_dev_info *dev,
644 int vs_total_size, int gs_total_size,
645 int vs_entry_size, int gs_entry_size,
646 struct ilo_cp *cp)
647 {
648 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x05);
649 const uint8_t cmd_len = 3;
650 const int row_size = 128; /* 1024 bits */
651 int vs_alloc_size, gs_alloc_size;
652 int vs_num_entries, gs_num_entries;
653
654 ILO_GPE_VALID_GEN(dev, 6, 6);
655
656 /* in 1024-bit URB rows */
657 vs_alloc_size = (vs_entry_size + row_size - 1) / row_size;
658 gs_alloc_size = (gs_entry_size + row_size - 1) / row_size;
659
660 /* the valid range is [1, 5] */
661 if (!vs_alloc_size)
662 vs_alloc_size = 1;
663 if (!gs_alloc_size)
664 gs_alloc_size = 1;
665 assert(vs_alloc_size <= 5 && gs_alloc_size <= 5);
666
667 /* the valid range is [24, 256] in multiples of 4 */
668 vs_num_entries = (vs_total_size / row_size / vs_alloc_size) & ~3;
669 if (vs_num_entries > 256)
670 vs_num_entries = 256;
671 assert(vs_num_entries >= 24);
672
673 /* the valid range is [0, 256] in multiples of 4 */
674 gs_num_entries = (gs_total_size / row_size / gs_alloc_size) & ~3;
675 if (gs_num_entries > 256)
676 gs_num_entries = 256;
677
678 ilo_cp_begin(cp, cmd_len);
679 ilo_cp_write(cp, cmd | (cmd_len - 2));
680 ilo_cp_write(cp, (vs_alloc_size - 1) << GEN6_URB_VS_SIZE_SHIFT |
681 vs_num_entries << GEN6_URB_VS_ENTRIES_SHIFT);
682 ilo_cp_write(cp, gs_num_entries << GEN6_URB_GS_ENTRIES_SHIFT |
683 (gs_alloc_size - 1) << GEN6_URB_GS_SIZE_SHIFT);
684 ilo_cp_end(cp);
685 }
686
687 static inline void
688 gen6_emit_3DSTATE_VERTEX_BUFFERS(const struct ilo_dev_info *dev,
689 const struct ilo_ve_state *ve,
690 const struct ilo_vb_state *vb,
691 struct ilo_cp *cp)
692 {
693 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x08);
694 uint8_t cmd_len;
695 unsigned hw_idx;
696
697 ILO_GPE_VALID_GEN(dev, 6, 7.5);
698
699 /*
700 * From the Sandy Bridge PRM, volume 2 part 1, page 82:
701 *
702 * "From 1 to 33 VBs can be specified..."
703 */
704 assert(ve->vb_count <= 33);
705
706 if (!ve->vb_count)
707 return;
708
709 cmd_len = 1 + 4 * ve->vb_count;
710
711 ilo_cp_begin(cp, cmd_len);
712 ilo_cp_write(cp, cmd | (cmd_len - 2));
713
714 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
715 const unsigned instance_divisor = ve->instance_divisors[hw_idx];
716 const unsigned pipe_idx = ve->vb_mapping[hw_idx];
717 const struct pipe_vertex_buffer *cso = &vb->states[pipe_idx];
718 uint32_t dw;
719
720 dw = hw_idx << GEN6_VB0_INDEX_SHIFT;
721
722 if (instance_divisor)
723 dw |= GEN6_VB0_ACCESS_INSTANCEDATA;
724 else
725 dw |= GEN6_VB0_ACCESS_VERTEXDATA;
726
727 if (dev->gen >= ILO_GEN(7))
728 dw |= GEN7_VB0_ADDRESS_MODIFYENABLE;
729
730 /* use null vb if there is no buffer or the stride is out of range */
731 if (cso->buffer && cso->stride <= 2048) {
732 const struct ilo_buffer *buf = ilo_buffer(cso->buffer);
733 const uint32_t start_offset = cso->buffer_offset;
734 /*
735 * As noted in ilo_translate_format(), we treat some 3-component
736 * formats as 4-component formats to work around hardware
737 * limitations. Imagine the case where the vertex buffer holds a
738 * single PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6.
739 * The hardware would not be able to fetch it because the vertex
740 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
741 * and that takes at least 8 bytes.
742 *
743 * For the workaround to work, we query the physical size, which is
744 * page aligned, to calculate end_offset so that the last vertex has
745 * a better chance to be fetched.
746 */
747 const uint32_t end_offset = intel_bo_get_size(buf->bo) - 1;
748
749 dw |= cso->stride << BRW_VB0_PITCH_SHIFT;
750
751 ilo_cp_write(cp, dw);
752 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
753 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
754 ilo_cp_write(cp, instance_divisor);
755 }
756 else {
757 dw |= 1 << 13;
758
759 ilo_cp_write(cp, dw);
760 ilo_cp_write(cp, 0);
761 ilo_cp_write(cp, 0);
762 ilo_cp_write(cp, instance_divisor);
763 }
764 }
765
766 ilo_cp_end(cp);
767 }
768
769 static inline void
770 ve_init_cso_with_components(const struct ilo_dev_info *dev,
771 int comp0, int comp1, int comp2, int comp3,
772 struct ilo_ve_cso *cso)
773 {
774 ILO_GPE_VALID_GEN(dev, 6, 7.5);
775
776 STATIC_ASSERT(Elements(cso->payload) >= 2);
777 cso->payload[0] = GEN6_VE0_VALID;
778 cso->payload[1] =
779 comp0 << BRW_VE1_COMPONENT_0_SHIFT |
780 comp1 << BRW_VE1_COMPONENT_1_SHIFT |
781 comp2 << BRW_VE1_COMPONENT_2_SHIFT |
782 comp3 << BRW_VE1_COMPONENT_3_SHIFT;
783 }
784
785 static inline void
786 ve_set_cso_edgeflag(const struct ilo_dev_info *dev,
787 struct ilo_ve_cso *cso)
788 {
789 int format;
790
791 ILO_GPE_VALID_GEN(dev, 6, 7.5);
792
793 /*
794 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
795 *
796 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
797 * valid VERTEX_ELEMENT structure.
798 *
799 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
800 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
801 *
802 * - The Source Element Format must be set to the UINT format.
803 *
804 * - [DevSNB]: Edge Flags are not supported for QUADLIST
805 * primitives. Software may elect to convert QUADLIST primitives
806 * to some set of corresponding edge-flag-supported primitive
807 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
808 */
809
810 cso->payload[0] |= GEN6_VE0_EDGE_FLAG_ENABLE;
811 cso->payload[1] =
812 BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
813 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_1_SHIFT |
814 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_2_SHIFT |
815 BRW_VE1_COMPONENT_NOSTORE << BRW_VE1_COMPONENT_3_SHIFT;
816
817 /*
818 * Edge flags have format BRW_SURFACEFORMAT_R8_UINT when defined via
819 * glEdgeFlagPointer(), and format BRW_SURFACEFORMAT_R32_FLOAT when defined
820 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
821 *
822 * Since all the hardware cares about is whether the flags are zero or not,
823 * we can treat them as BRW_SURFACEFORMAT_R32_UINT in the latter case.
824 */
825 format = (cso->payload[0] >> BRW_VE0_FORMAT_SHIFT) & 0x1ff;
826 if (format == BRW_SURFACEFORMAT_R32_FLOAT) {
827 STATIC_ASSERT(BRW_SURFACEFORMAT_R32_UINT ==
828 BRW_SURFACEFORMAT_R32_FLOAT - 1);
829
830 cso->payload[0] -= (1 << BRW_VE0_FORMAT_SHIFT);
831 }
832 else {
833 assert(format == BRW_SURFACEFORMAT_R8_UINT);
834 }
835 }
836
837 static inline void
838 gen6_emit_3DSTATE_VERTEX_ELEMENTS(const struct ilo_dev_info *dev,
839 const struct ilo_ve_state *ve,
840 bool last_velement_edgeflag,
841 bool prepend_generated_ids,
842 struct ilo_cp *cp)
843 {
844 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x09);
845 uint8_t cmd_len;
846 unsigned i;
847
848 ILO_GPE_VALID_GEN(dev, 6, 7.5);
849
850 /*
851 * From the Sandy Bridge PRM, volume 2 part 1, page 93:
852 *
853 * "Up to 34 (DevSNB+) vertex elements are supported."
854 */
855 assert(ve->count + prepend_generated_ids <= 34);
856
857 if (!ve->count && !prepend_generated_ids) {
858 struct ilo_ve_cso dummy;
859
860 ve_init_cso_with_components(dev,
861 BRW_VE1_COMPONENT_STORE_0,
862 BRW_VE1_COMPONENT_STORE_0,
863 BRW_VE1_COMPONENT_STORE_0,
864 BRW_VE1_COMPONENT_STORE_1_FLT,
865 &dummy);
866
867 cmd_len = 3;
868 ilo_cp_begin(cp, cmd_len);
869 ilo_cp_write(cp, cmd | (cmd_len - 2));
870 ilo_cp_write_multi(cp, dummy.payload, 2);
871 ilo_cp_end(cp);
872
873 return;
874 }
875
876 cmd_len = 2 * (ve->count + prepend_generated_ids) + 1;
877
878 ilo_cp_begin(cp, cmd_len);
879 ilo_cp_write(cp, cmd | (cmd_len - 2));
880
881 if (prepend_generated_ids) {
882 struct ilo_ve_cso gen_ids;
883
884 ve_init_cso_with_components(dev,
885 BRW_VE1_COMPONENT_STORE_VID,
886 BRW_VE1_COMPONENT_STORE_IID,
887 BRW_VE1_COMPONENT_NOSTORE,
888 BRW_VE1_COMPONENT_NOSTORE,
889 &gen_ids);
890
891 ilo_cp_write_multi(cp, gen_ids.payload, 2);
892 }
893
894 if (last_velement_edgeflag) {
895 struct ilo_ve_cso edgeflag;
896
897 for (i = 0; i < ve->count - 1; i++)
898 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
899
900 edgeflag = ve->cso[i];
901 ve_set_cso_edgeflag(dev, &edgeflag);
902 ilo_cp_write_multi(cp, edgeflag.payload, 2);
903 }
904 else {
905 for (i = 0; i < ve->count; i++)
906 ilo_cp_write_multi(cp, ve->cso[i].payload, 2);
907 }
908
909 ilo_cp_end(cp);
910 }
911
912 static inline void
913 gen6_emit_3DSTATE_INDEX_BUFFER(const struct ilo_dev_info *dev,
914 const struct ilo_ib_state *ib,
915 bool enable_cut_index,
916 struct ilo_cp *cp)
917 {
918 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0a);
919 const uint8_t cmd_len = 3;
920 struct ilo_buffer *buf = ilo_buffer(ib->hw_resource);
921 uint32_t start_offset, end_offset;
922 int format;
923
924 ILO_GPE_VALID_GEN(dev, 6, 7.5);
925
926 if (!buf)
927 return;
928
929 /* this is moved to the new 3DSTATE_VF */
930 if (dev->gen >= ILO_GEN(7.5))
931 assert(!enable_cut_index);
932
933 switch (ib->hw_index_size) {
934 case 4:
935 format = BRW_INDEX_DWORD;
936 break;
937 case 2:
938 format = BRW_INDEX_WORD;
939 break;
940 case 1:
941 format = BRW_INDEX_BYTE;
942 break;
943 default:
944 assert(!"unknown index size");
945 format = BRW_INDEX_BYTE;
946 break;
947 }
948
949 /*
950 * set start_offset to 0 here and adjust pipe_draw_info::start with
951 * ib->draw_start_offset in 3DPRIMITIVE
952 */
953 start_offset = 0;
954 end_offset = buf->bo_size;
955
956 /* end_offset must also be aligned and is inclusive */
957 end_offset -= (end_offset % ib->hw_index_size);
958 end_offset--;
959
960 ilo_cp_begin(cp, cmd_len);
961 ilo_cp_write(cp, cmd | (cmd_len - 2) |
962 ((enable_cut_index) ? BRW_CUT_INDEX_ENABLE : 0) |
963 format << 8);
964 ilo_cp_write_bo(cp, start_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
965 ilo_cp_write_bo(cp, end_offset, buf->bo, INTEL_DOMAIN_VERTEX, 0);
966 ilo_cp_end(cp);
967 }
968
969 static inline void
970 gen6_emit_3DSTATE_VIEWPORT_STATE_POINTERS(const struct ilo_dev_info *dev,
971 uint32_t clip_viewport,
972 uint32_t sf_viewport,
973 uint32_t cc_viewport,
974 struct ilo_cp *cp)
975 {
976 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0d);
977 const uint8_t cmd_len = 4;
978
979 ILO_GPE_VALID_GEN(dev, 6, 6);
980
981 ilo_cp_begin(cp, cmd_len);
982 ilo_cp_write(cp, cmd | (cmd_len - 2) |
983 GEN6_CLIP_VIEWPORT_MODIFY |
984 GEN6_SF_VIEWPORT_MODIFY |
985 GEN6_CC_VIEWPORT_MODIFY);
986 ilo_cp_write(cp, clip_viewport);
987 ilo_cp_write(cp, sf_viewport);
988 ilo_cp_write(cp, cc_viewport);
989 ilo_cp_end(cp);
990 }
991
992 static inline void
993 gen6_emit_3DSTATE_CC_STATE_POINTERS(const struct ilo_dev_info *dev,
994 uint32_t blend_state,
995 uint32_t depth_stencil_state,
996 uint32_t color_calc_state,
997 struct ilo_cp *cp)
998 {
999 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0e);
1000 const uint8_t cmd_len = 4;
1001
1002 ILO_GPE_VALID_GEN(dev, 6, 6);
1003
1004 ilo_cp_begin(cp, cmd_len);
1005 ilo_cp_write(cp, cmd | (cmd_len - 2));
1006 ilo_cp_write(cp, blend_state | 1);
1007 ilo_cp_write(cp, depth_stencil_state | 1);
1008 ilo_cp_write(cp, color_calc_state | 1);
1009 ilo_cp_end(cp);
1010 }
1011
1012 static inline void
1013 gen6_emit_3DSTATE_SCISSOR_STATE_POINTERS(const struct ilo_dev_info *dev,
1014 uint32_t scissor_rect,
1015 struct ilo_cp *cp)
1016 {
1017 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x0f);
1018 const uint8_t cmd_len = 2;
1019
1020 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1021
1022 ilo_cp_begin(cp, cmd_len);
1023 ilo_cp_write(cp, cmd | (cmd_len - 2));
1024 ilo_cp_write(cp, scissor_rect);
1025 ilo_cp_end(cp);
1026 }
1027
1028 static inline void
1029 gen6_emit_3DSTATE_VS(const struct ilo_dev_info *dev,
1030 const struct ilo_shader_state *vs,
1031 int num_samplers,
1032 struct ilo_cp *cp)
1033 {
1034 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x10);
1035 const uint8_t cmd_len = 6;
1036 const struct ilo_shader_cso *cso;
1037 uint32_t dw2, dw4, dw5;
1038
1039 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1040
1041 if (!vs) {
1042 ilo_cp_begin(cp, cmd_len);
1043 ilo_cp_write(cp, cmd | (cmd_len - 2));
1044 ilo_cp_write(cp, 0);
1045 ilo_cp_write(cp, 0);
1046 ilo_cp_write(cp, 0);
1047 ilo_cp_write(cp, 0);
1048 ilo_cp_write(cp, 0);
1049 ilo_cp_end(cp);
1050 return;
1051 }
1052
1053 cso = ilo_shader_get_kernel_cso(vs);
1054 dw2 = cso->payload[0];
1055 dw4 = cso->payload[1];
1056 dw5 = cso->payload[2];
1057
1058 dw2 |= ((num_samplers + 3) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT;
1059
1060 ilo_cp_begin(cp, cmd_len);
1061 ilo_cp_write(cp, cmd | (cmd_len - 2));
1062 ilo_cp_write(cp, ilo_shader_get_kernel_offset(vs));
1063 ilo_cp_write(cp, dw2);
1064 ilo_cp_write(cp, 0); /* scratch */
1065 ilo_cp_write(cp, dw4);
1066 ilo_cp_write(cp, dw5);
1067 ilo_cp_end(cp);
1068 }
1069
1070 static inline void
1071 gen6_emit_3DSTATE_GS(const struct ilo_dev_info *dev,
1072 const struct ilo_shader_state *gs,
1073 const struct ilo_shader_state *vs,
1074 int verts_per_prim,
1075 struct ilo_cp *cp)
1076 {
1077 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x11);
1078 const uint8_t cmd_len = 7;
1079 uint32_t dw1, dw2, dw4, dw5, dw6;
1080
1081 ILO_GPE_VALID_GEN(dev, 6, 6);
1082
1083 if (gs) {
1084 const struct ilo_shader_cso *cso;
1085
1086 dw1 = ilo_shader_get_kernel_offset(gs);
1087
1088 cso = ilo_shader_get_kernel_cso(gs);
1089 dw2 = cso->payload[0];
1090 dw4 = cso->payload[1];
1091 dw5 = cso->payload[2];
1092 dw6 = cso->payload[3];
1093 }
1094 else if (vs && ilo_shader_get_kernel_param(vs, ILO_KERNEL_VS_GEN6_SO)) {
1095 struct ilo_shader_cso cso;
1096 enum ilo_kernel_param param;
1097
1098 switch (verts_per_prim) {
1099 case 1:
1100 param = ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET;
1101 break;
1102 case 2:
1103 param = ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET;
1104 break;
1105 default:
1106 param = ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET;
1107 break;
1108 }
1109
1110 dw1 = ilo_shader_get_kernel_offset(vs) +
1111 ilo_shader_get_kernel_param(vs, param);
1112
1113 /* cannot use VS's CSO */
1114 ilo_gpe_init_gs_cso_gen6(dev, vs, &cso);
1115 dw2 = cso.payload[0];
1116 dw4 = cso.payload[1];
1117 dw5 = cso.payload[2];
1118 dw6 = cso.payload[3];
1119 }
1120 else {
1121 dw1 = 0;
1122 dw2 = 0;
1123 dw4 = 1 << GEN6_GS_URB_READ_LENGTH_SHIFT;
1124 dw5 = GEN6_GS_STATISTICS_ENABLE;
1125 dw6 = 0;
1126 }
1127
1128 ilo_cp_begin(cp, cmd_len);
1129 ilo_cp_write(cp, cmd | (cmd_len - 2));
1130 ilo_cp_write(cp, dw1);
1131 ilo_cp_write(cp, dw2);
1132 ilo_cp_write(cp, 0);
1133 ilo_cp_write(cp, dw4);
1134 ilo_cp_write(cp, dw5);
1135 ilo_cp_write(cp, dw6);
1136 ilo_cp_end(cp);
1137 }
1138
1139 static inline void
1140 gen6_emit_3DSTATE_CLIP(const struct ilo_dev_info *dev,
1141 const struct ilo_rasterizer_state *rasterizer,
1142 const struct ilo_shader_state *fs,
1143 bool enable_guardband,
1144 int num_viewports,
1145 struct ilo_cp *cp)
1146 {
1147 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x12);
1148 const uint8_t cmd_len = 4;
1149 uint32_t dw1, dw2, dw3;
1150
1151 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1152
1153 if (rasterizer) {
1154 int interps;
1155
1156 dw1 = rasterizer->clip.payload[0];
1157 dw2 = rasterizer->clip.payload[1];
1158 dw3 = rasterizer->clip.payload[2];
1159
1160 if (enable_guardband && rasterizer->clip.can_enable_guardband)
1161 dw2 |= GEN6_CLIP_GB_TEST;
1162
1163 interps = (fs) ? ilo_shader_get_kernel_param(fs,
1164 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS) : 0;
1165
1166 if (interps & (1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC |
1167 1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC |
1168 1 << BRW_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC))
1169 dw2 |= GEN6_CLIP_NON_PERSPECTIVE_BARYCENTRIC_ENABLE;
1170
1171 dw3 |= GEN6_CLIP_FORCE_ZERO_RTAINDEX |
1172 (num_viewports - 1);
1173 }
1174 else {
1175 dw1 = 0;
1176 dw2 = 0;
1177 dw3 = 0;
1178 }
1179
1180 ilo_cp_begin(cp, cmd_len);
1181 ilo_cp_write(cp, cmd | (cmd_len - 2));
1182 ilo_cp_write(cp, dw1);
1183 ilo_cp_write(cp, dw2);
1184 ilo_cp_write(cp, dw3);
1185 ilo_cp_end(cp);
1186 }
1187
1188 static inline void
1189 gen6_emit_3DSTATE_SF(const struct ilo_dev_info *dev,
1190 const struct ilo_rasterizer_state *rasterizer,
1191 const struct ilo_shader_state *fs,
1192 struct ilo_cp *cp)
1193 {
1194 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x13);
1195 const uint8_t cmd_len = 20;
1196 uint32_t payload_raster[6], payload_sbe[13];
1197
1198 ILO_GPE_VALID_GEN(dev, 6, 6);
1199
1200 ilo_gpe_gen6_fill_3dstate_sf_raster(dev, rasterizer,
1201 1, PIPE_FORMAT_NONE, payload_raster, Elements(payload_raster));
1202 ilo_gpe_gen6_fill_3dstate_sf_sbe(dev, rasterizer,
1203 fs, payload_sbe, Elements(payload_sbe));
1204
1205 ilo_cp_begin(cp, cmd_len);
1206 ilo_cp_write(cp, cmd | (cmd_len - 2));
1207 ilo_cp_write(cp, payload_sbe[0]);
1208 ilo_cp_write_multi(cp, payload_raster, 6);
1209 ilo_cp_write_multi(cp, &payload_sbe[1], 12);
1210 ilo_cp_end(cp);
1211 }
1212
1213 static inline void
1214 gen6_emit_3DSTATE_WM(const struct ilo_dev_info *dev,
1215 const struct ilo_shader_state *fs,
1216 int num_samplers,
1217 const struct ilo_rasterizer_state *rasterizer,
1218 bool dual_blend, bool cc_may_kill,
1219 uint32_t hiz_op,
1220 struct ilo_cp *cp)
1221 {
1222 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x14);
1223 const uint8_t cmd_len = 9;
1224 const int num_samples = 1;
1225 const struct ilo_shader_cso *fs_cso;
1226 uint32_t dw2, dw4, dw5, dw6;
1227
1228 ILO_GPE_VALID_GEN(dev, 6, 6);
1229
1230 if (!fs) {
1231 /* see brwCreateContext() */
1232 const int max_threads = (dev->gt == 2) ? 80 : 40;
1233
1234 ilo_cp_begin(cp, cmd_len);
1235 ilo_cp_write(cp, cmd | (cmd_len - 2));
1236 ilo_cp_write(cp, 0);
1237 ilo_cp_write(cp, 0);
1238 ilo_cp_write(cp, 0);
1239 ilo_cp_write(cp, hiz_op);
1240 /* honor the valid range even if dispatching is disabled */
1241 ilo_cp_write(cp, (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT);
1242 ilo_cp_write(cp, 0);
1243 ilo_cp_write(cp, 0);
1244 ilo_cp_write(cp, 0);
1245 ilo_cp_end(cp);
1246
1247 return;
1248 }
1249
1250 fs_cso = ilo_shader_get_kernel_cso(fs);
1251 dw2 = fs_cso->payload[0];
1252 dw4 = fs_cso->payload[1];
1253 dw5 = fs_cso->payload[2];
1254 dw6 = fs_cso->payload[3];
1255
1256 dw2 |= (num_samplers + 3) / 4 << GEN6_WM_SAMPLER_COUNT_SHIFT;
1257
1258 /*
1259 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
1260 *
1261 * "This bit (Statistics Enable) must be disabled if either of these
1262 * bits is set: Depth Buffer Clear , Hierarchical Depth Buffer Resolve
1263 * Enable or Depth Buffer Resolve Enable."
1264 */
1265 assert(!hiz_op);
1266 dw4 |= GEN6_WM_STATISTICS_ENABLE;
1267
1268 if (cc_may_kill) {
1269 dw5 |= GEN6_WM_KILL_ENABLE |
1270 GEN6_WM_DISPATCH_ENABLE;
1271 }
1272
1273 if (dual_blend)
1274 dw5 |= GEN6_WM_DUAL_SOURCE_BLEND_ENABLE;
1275
1276 dw5 |= rasterizer->wm.payload[0];
1277
1278 dw6 |= rasterizer->wm.payload[1];
1279
1280 if (num_samples > 1) {
1281 dw6 |= rasterizer->wm.dw_msaa_rast |
1282 rasterizer->wm.dw_msaa_disp;
1283 }
1284
1285 ilo_cp_begin(cp, cmd_len);
1286 ilo_cp_write(cp, cmd | (cmd_len - 2));
1287 ilo_cp_write(cp, ilo_shader_get_kernel_offset(fs));
1288 ilo_cp_write(cp, dw2);
1289 ilo_cp_write(cp, 0); /* scratch */
1290 ilo_cp_write(cp, dw4);
1291 ilo_cp_write(cp, dw5);
1292 ilo_cp_write(cp, dw6);
1293 ilo_cp_write(cp, 0); /* kernel 1 */
1294 ilo_cp_write(cp, 0); /* kernel 2 */
1295 ilo_cp_end(cp);
1296 }
1297
1298 static inline unsigned
1299 gen6_fill_3dstate_constant(const struct ilo_dev_info *dev,
1300 const uint32_t *bufs, const int *sizes,
1301 int num_bufs, int max_read_length,
1302 uint32_t *dw, int num_dwords)
1303 {
1304 unsigned enabled = 0x0;
1305 int total_read_length, i;
1306
1307 assert(num_dwords == 4);
1308
1309 total_read_length = 0;
1310 for (i = 0; i < 4; i++) {
1311 if (i < num_bufs && sizes[i]) {
1312 /* in 256-bit units minus one */
1313 const int read_len = (sizes[i] + 31) / 32 - 1;
1314
1315 assert(bufs[i] % 32 == 0);
1316 assert(read_len < 32);
1317
1318 enabled |= 1 << i;
1319 dw[i] = bufs[i] | read_len;
1320
1321 total_read_length += read_len + 1;
1322 }
1323 else {
1324 dw[i] = 0;
1325 }
1326 }
1327
1328 assert(total_read_length <= max_read_length);
1329
1330 return enabled;
1331 }
1332
1333 static inline void
1334 gen6_emit_3DSTATE_CONSTANT_VS(const struct ilo_dev_info *dev,
1335 const uint32_t *bufs, const int *sizes,
1336 int num_bufs,
1337 struct ilo_cp *cp)
1338 {
1339 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x15);
1340 const uint8_t cmd_len = 5;
1341 uint32_t buf_dw[4], buf_enabled;
1342
1343 ILO_GPE_VALID_GEN(dev, 6, 6);
1344 assert(num_bufs <= 4);
1345
1346 /*
1347 * From the Sandy Bridge PRM, volume 2 part 1, page 138:
1348 *
1349 * "The sum of all four read length fields (each incremented to
1350 * represent the actual read length) must be less than or equal to 32"
1351 */
1352 buf_enabled = gen6_fill_3dstate_constant(dev,
1353 bufs, sizes, num_bufs, 32, buf_dw, Elements(buf_dw));
1354
1355 ilo_cp_begin(cp, cmd_len);
1356 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1357 ilo_cp_write(cp, buf_dw[0]);
1358 ilo_cp_write(cp, buf_dw[1]);
1359 ilo_cp_write(cp, buf_dw[2]);
1360 ilo_cp_write(cp, buf_dw[3]);
1361 ilo_cp_end(cp);
1362 }
1363
1364 static inline void
1365 gen6_emit_3DSTATE_CONSTANT_GS(const struct ilo_dev_info *dev,
1366 const uint32_t *bufs, const int *sizes,
1367 int num_bufs,
1368 struct ilo_cp *cp)
1369 {
1370 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x16);
1371 const uint8_t cmd_len = 5;
1372 uint32_t buf_dw[4], buf_enabled;
1373
1374 ILO_GPE_VALID_GEN(dev, 6, 6);
1375 assert(num_bufs <= 4);
1376
1377 /*
1378 * From the Sandy Bridge PRM, volume 2 part 1, page 161:
1379 *
1380 * "The sum of all four read length fields (each incremented to
1381 * represent the actual read length) must be less than or equal to 64"
1382 */
1383 buf_enabled = gen6_fill_3dstate_constant(dev,
1384 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1385
1386 ilo_cp_begin(cp, cmd_len);
1387 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1388 ilo_cp_write(cp, buf_dw[0]);
1389 ilo_cp_write(cp, buf_dw[1]);
1390 ilo_cp_write(cp, buf_dw[2]);
1391 ilo_cp_write(cp, buf_dw[3]);
1392 ilo_cp_end(cp);
1393 }
1394
1395 static inline void
1396 gen6_emit_3DSTATE_CONSTANT_PS(const struct ilo_dev_info *dev,
1397 const uint32_t *bufs, const int *sizes,
1398 int num_bufs,
1399 struct ilo_cp *cp)
1400 {
1401 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x17);
1402 const uint8_t cmd_len = 5;
1403 uint32_t buf_dw[4], buf_enabled;
1404
1405 ILO_GPE_VALID_GEN(dev, 6, 6);
1406 assert(num_bufs <= 4);
1407
1408 /*
1409 * From the Sandy Bridge PRM, volume 2 part 1, page 287:
1410 *
1411 * "The sum of all four read length fields (each incremented to
1412 * represent the actual read length) must be less than or equal to 64"
1413 */
1414 buf_enabled = gen6_fill_3dstate_constant(dev,
1415 bufs, sizes, num_bufs, 64, buf_dw, Elements(buf_dw));
1416
1417 ilo_cp_begin(cp, cmd_len);
1418 ilo_cp_write(cp, cmd | (cmd_len - 2) | buf_enabled << 12);
1419 ilo_cp_write(cp, buf_dw[0]);
1420 ilo_cp_write(cp, buf_dw[1]);
1421 ilo_cp_write(cp, buf_dw[2]);
1422 ilo_cp_write(cp, buf_dw[3]);
1423 ilo_cp_end(cp);
1424 }
1425
1426 static inline void
1427 gen6_emit_3DSTATE_SAMPLE_MASK(const struct ilo_dev_info *dev,
1428 unsigned sample_mask,
1429 struct ilo_cp *cp)
1430 {
1431 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x0, 0x18);
1432 const uint8_t cmd_len = 2;
1433 const unsigned valid_mask = 0xf;
1434
1435 ILO_GPE_VALID_GEN(dev, 6, 6);
1436
1437 sample_mask &= valid_mask;
1438
1439 ilo_cp_begin(cp, cmd_len);
1440 ilo_cp_write(cp, cmd | (cmd_len - 2));
1441 ilo_cp_write(cp, sample_mask);
1442 ilo_cp_end(cp);
1443 }
1444
1445 static inline void
1446 gen6_emit_3DSTATE_DRAWING_RECTANGLE(const struct ilo_dev_info *dev,
1447 unsigned x, unsigned y,
1448 unsigned width, unsigned height,
1449 struct ilo_cp *cp)
1450 {
1451 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x00);
1452 const uint8_t cmd_len = 4;
1453 unsigned xmax = x + width - 1;
1454 unsigned ymax = y + height - 1;
1455 int rect_limit;
1456
1457 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1458
1459 if (dev->gen >= ILO_GEN(7)) {
1460 rect_limit = 16383;
1461 }
1462 else {
1463 /*
1464 * From the Sandy Bridge PRM, volume 2 part 1, page 230:
1465 *
1466 * "[DevSNB] Errata: This field (Clipped Drawing Rectangle Y Min)
1467 * must be an even number"
1468 */
1469 assert(y % 2 == 0);
1470
1471 rect_limit = 8191;
1472 }
1473
1474 if (x > rect_limit) x = rect_limit;
1475 if (y > rect_limit) y = rect_limit;
1476 if (xmax > rect_limit) xmax = rect_limit;
1477 if (ymax > rect_limit) ymax = rect_limit;
1478
1479 ilo_cp_begin(cp, cmd_len);
1480 ilo_cp_write(cp, cmd | (cmd_len - 2));
1481 ilo_cp_write(cp, y << 16 | x);
1482 ilo_cp_write(cp, ymax << 16 | xmax);
1483
1484 /*
1485 * There is no need to set the origin. It is intended to support front
1486 * buffer rendering.
1487 */
1488 ilo_cp_write(cp, 0);
1489
1490 ilo_cp_end(cp);
1491 }
1492
1493 static inline void
1494 zs_align_surface(const struct ilo_dev_info *dev,
1495 unsigned align_w, unsigned align_h,
1496 struct ilo_zs_surface *zs)
1497 {
1498 unsigned mask, shift_w, shift_h;
1499 unsigned width, height;
1500 uint32_t dw3;
1501
1502 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1503
1504 if (dev->gen >= ILO_GEN(7)) {
1505 shift_w = 4;
1506 shift_h = 18;
1507 mask = 0x3fff;
1508 }
1509 else {
1510 shift_w = 6;
1511 shift_h = 19;
1512 mask = 0x1fff;
1513 }
1514
1515 dw3 = zs->payload[2];
1516
1517 /* aligned width and height */
1518 width = align(((dw3 >> shift_w) & mask) + 1, align_w);
1519 height = align(((dw3 >> shift_h) & mask) + 1, align_h);
1520
1521 dw3 = (dw3 & ~((mask << shift_w) | (mask << shift_h))) |
1522 (width - 1) << shift_w |
1523 (height - 1) << shift_h;
1524
1525 zs->payload[2] = dw3;
1526 }
1527
1528 static inline void
1529 gen6_emit_3DSTATE_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1530 const struct ilo_zs_surface *zs,
1531 struct ilo_cp *cp)
1532 {
1533 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1534 ILO_GPE_CMD(0x3, 0x0, 0x05) : ILO_GPE_CMD(0x3, 0x1, 0x05);
1535 const uint8_t cmd_len = 7;
1536
1537 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1538
1539 ilo_cp_begin(cp, cmd_len);
1540 ilo_cp_write(cp, cmd | (cmd_len - 2));
1541 ilo_cp_write(cp, zs->payload[0]);
1542 ilo_cp_write_bo(cp, zs->payload[1], zs->bo,
1543 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1544 ilo_cp_write(cp, zs->payload[2]);
1545 ilo_cp_write(cp, zs->payload[3]);
1546 ilo_cp_write(cp, zs->payload[4]);
1547 ilo_cp_write(cp, zs->payload[5]);
1548 ilo_cp_end(cp);
1549 }
1550
1551 static inline void
1552 gen6_emit_3DSTATE_POLY_STIPPLE_OFFSET(const struct ilo_dev_info *dev,
1553 int x_offset, int y_offset,
1554 struct ilo_cp *cp)
1555 {
1556 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x06);
1557 const uint8_t cmd_len = 2;
1558
1559 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1560 assert(x_offset >= 0 && x_offset <= 31);
1561 assert(y_offset >= 0 && y_offset <= 31);
1562
1563 ilo_cp_begin(cp, cmd_len);
1564 ilo_cp_write(cp, cmd | (cmd_len - 2));
1565 ilo_cp_write(cp, x_offset << 8 | y_offset);
1566 ilo_cp_end(cp);
1567 }
1568
1569 static inline void
1570 gen6_emit_3DSTATE_POLY_STIPPLE_PATTERN(const struct ilo_dev_info *dev,
1571 const struct pipe_poly_stipple *pattern,
1572 struct ilo_cp *cp)
1573 {
1574 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x07);
1575 const uint8_t cmd_len = 33;
1576 int i;
1577
1578 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1579 assert(Elements(pattern->stipple) == 32);
1580
1581 ilo_cp_begin(cp, cmd_len);
1582 ilo_cp_write(cp, cmd | (cmd_len - 2));
1583 for (i = 0; i < 32; i++)
1584 ilo_cp_write(cp, pattern->stipple[i]);
1585 ilo_cp_end(cp);
1586 }
1587
1588 static inline void
1589 gen6_emit_3DSTATE_LINE_STIPPLE(const struct ilo_dev_info *dev,
1590 unsigned pattern, unsigned factor,
1591 struct ilo_cp *cp)
1592 {
1593 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x08);
1594 const uint8_t cmd_len = 3;
1595 unsigned inverse;
1596
1597 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1598 assert((pattern & 0xffff) == pattern);
1599 assert(factor >= 1 && factor <= 256);
1600
1601 ilo_cp_begin(cp, cmd_len);
1602 ilo_cp_write(cp, cmd | (cmd_len - 2));
1603 ilo_cp_write(cp, pattern);
1604
1605 if (dev->gen >= ILO_GEN(7)) {
1606 /* in U1.16 */
1607 inverse = (unsigned) (65536.0f / factor);
1608 ilo_cp_write(cp, inverse << 15 | factor);
1609 }
1610 else {
1611 /* in U1.13 */
1612 inverse = (unsigned) (8192.0f / factor);
1613 ilo_cp_write(cp, inverse << 16 | factor);
1614 }
1615
1616 ilo_cp_end(cp);
1617 }
1618
1619 static inline void
1620 gen6_emit_3DSTATE_AA_LINE_PARAMETERS(const struct ilo_dev_info *dev,
1621 struct ilo_cp *cp)
1622 {
1623 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0a);
1624 const uint8_t cmd_len = 3;
1625
1626 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1627
1628 ilo_cp_begin(cp, cmd_len);
1629 ilo_cp_write(cp, cmd | (cmd_len - 2));
1630 ilo_cp_write(cp, 0 << 16 | 0);
1631 ilo_cp_write(cp, 0 << 16 | 0);
1632 ilo_cp_end(cp);
1633 }
1634
1635 static inline void
1636 gen6_emit_3DSTATE_GS_SVB_INDEX(const struct ilo_dev_info *dev,
1637 int index, unsigned svbi,
1638 unsigned max_svbi,
1639 bool load_vertex_count,
1640 struct ilo_cp *cp)
1641 {
1642 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0b);
1643 const uint8_t cmd_len = 4;
1644 uint32_t dw1;
1645
1646 ILO_GPE_VALID_GEN(dev, 6, 6);
1647 assert(index >= 0 && index < 4);
1648
1649 dw1 = index << SVB_INDEX_SHIFT;
1650 if (load_vertex_count)
1651 dw1 |= SVB_LOAD_INTERNAL_VERTEX_COUNT;
1652
1653 ilo_cp_begin(cp, cmd_len);
1654 ilo_cp_write(cp, cmd | (cmd_len - 2));
1655 ilo_cp_write(cp, dw1);
1656 ilo_cp_write(cp, svbi);
1657 ilo_cp_write(cp, max_svbi);
1658 ilo_cp_end(cp);
1659 }
1660
1661 static inline void
1662 gen6_emit_3DSTATE_MULTISAMPLE(const struct ilo_dev_info *dev,
1663 int num_samples,
1664 const uint32_t *packed_sample_pos,
1665 bool pixel_location_center,
1666 struct ilo_cp *cp)
1667 {
1668 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x0d);
1669 const uint8_t cmd_len = (dev->gen >= ILO_GEN(7)) ? 4 : 3;
1670 uint32_t dw1, dw2, dw3;
1671
1672 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1673
1674 dw1 = (pixel_location_center) ?
1675 MS_PIXEL_LOCATION_CENTER : MS_PIXEL_LOCATION_UPPER_LEFT;
1676
1677 switch (num_samples) {
1678 case 0:
1679 case 1:
1680 dw1 |= MS_NUMSAMPLES_1;
1681 dw2 = 0;
1682 dw3 = 0;
1683 break;
1684 case 4:
1685 dw1 |= MS_NUMSAMPLES_4;
1686 dw2 = packed_sample_pos[0];
1687 dw3 = 0;
1688 break;
1689 case 8:
1690 assert(dev->gen >= ILO_GEN(7));
1691 dw1 |= MS_NUMSAMPLES_8;
1692 dw2 = packed_sample_pos[0];
1693 dw3 = packed_sample_pos[1];
1694 break;
1695 default:
1696 assert(!"unsupported sample count");
1697 dw1 |= MS_NUMSAMPLES_1;
1698 dw2 = 0;
1699 dw3 = 0;
1700 break;
1701 }
1702
1703 ilo_cp_begin(cp, cmd_len);
1704 ilo_cp_write(cp, cmd | (cmd_len - 2));
1705 ilo_cp_write(cp, dw1);
1706 ilo_cp_write(cp, dw2);
1707 if (dev->gen >= ILO_GEN(7))
1708 ilo_cp_write(cp, dw3);
1709 ilo_cp_end(cp);
1710 }
1711
1712 static inline void
1713 gen6_emit_3DSTATE_STENCIL_BUFFER(const struct ilo_dev_info *dev,
1714 const struct ilo_zs_surface *zs,
1715 struct ilo_cp *cp)
1716 {
1717 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1718 ILO_GPE_CMD(0x3, 0x0, 0x06) :
1719 ILO_GPE_CMD(0x3, 0x1, 0x0e);
1720 const uint8_t cmd_len = 3;
1721
1722 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1723
1724 ilo_cp_begin(cp, cmd_len);
1725 ilo_cp_write(cp, cmd | (cmd_len - 2));
1726 /* see ilo_gpe_init_zs_surface() */
1727 ilo_cp_write(cp, zs->payload[6]);
1728 ilo_cp_write_bo(cp, zs->payload[7], zs->separate_s8_bo,
1729 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1730 ilo_cp_end(cp);
1731 }
1732
1733 static inline void
1734 gen6_emit_3DSTATE_HIER_DEPTH_BUFFER(const struct ilo_dev_info *dev,
1735 const struct ilo_zs_surface *zs,
1736 struct ilo_cp *cp)
1737 {
1738 const uint32_t cmd = (dev->gen >= ILO_GEN(7)) ?
1739 ILO_GPE_CMD(0x3, 0x0, 0x07) :
1740 ILO_GPE_CMD(0x3, 0x1, 0x0f);
1741 const uint8_t cmd_len = 3;
1742
1743 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1744
1745 ilo_cp_begin(cp, cmd_len);
1746 ilo_cp_write(cp, cmd | (cmd_len - 2));
1747 /* see ilo_gpe_init_zs_surface() */
1748 ilo_cp_write(cp, zs->payload[8]);
1749 ilo_cp_write_bo(cp, zs->payload[9], zs->hiz_bo,
1750 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
1751 ilo_cp_end(cp);
1752 }
1753
1754 static inline void
1755 gen6_emit_3DSTATE_CLEAR_PARAMS(const struct ilo_dev_info *dev,
1756 uint32_t clear_val,
1757 struct ilo_cp *cp)
1758 {
1759 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x1, 0x10);
1760 const uint8_t cmd_len = 2;
1761
1762 ILO_GPE_VALID_GEN(dev, 6, 6);
1763
1764 ilo_cp_begin(cp, cmd_len);
1765 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1766 GEN5_DEPTH_CLEAR_VALID);
1767 ilo_cp_write(cp, clear_val);
1768 ilo_cp_end(cp);
1769 }
1770
1771 static inline void
1772 gen6_emit_PIPE_CONTROL(const struct ilo_dev_info *dev,
1773 uint32_t dw1,
1774 struct intel_bo *bo, uint32_t bo_offset,
1775 bool write_qword,
1776 struct ilo_cp *cp)
1777 {
1778 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x2, 0x00);
1779 const uint8_t cmd_len = (write_qword) ? 5 : 4;
1780 const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
1781 const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
1782
1783 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1784
1785 if (dw1 & PIPE_CONTROL_CS_STALL) {
1786 /*
1787 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1788 *
1789 * "1 of the following must also be set (when CS stall is set):
1790 *
1791 * * Depth Cache Flush Enable ([0] of DW1)
1792 * * Stall at Pixel Scoreboard ([1] of DW1)
1793 * * Depth Stall ([13] of DW1)
1794 * * Post-Sync Operation ([13] of DW1)
1795 * * Render Target Cache Flush Enable ([12] of DW1)
1796 * * Notify Enable ([8] of DW1)"
1797 *
1798 * From the Ivy Bridge PRM, volume 2 part 1, page 61:
1799 *
1800 * "One of the following must also be set (when CS stall is set):
1801 *
1802 * * Render Target Cache Flush Enable ([12] of DW1)
1803 * * Depth Cache Flush Enable ([0] of DW1)
1804 * * Stall at Pixel Scoreboard ([1] of DW1)
1805 * * Depth Stall ([13] of DW1)
1806 * * Post-Sync Operation ([13] of DW1)"
1807 */
1808 uint32_t bit_test = PIPE_CONTROL_WRITE_FLUSH |
1809 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1810 PIPE_CONTROL_STALL_AT_SCOREBOARD |
1811 PIPE_CONTROL_DEPTH_STALL;
1812
1813 /* post-sync op */
1814 bit_test |= PIPE_CONTROL_WRITE_IMMEDIATE |
1815 PIPE_CONTROL_WRITE_DEPTH_COUNT |
1816 PIPE_CONTROL_WRITE_TIMESTAMP;
1817
1818 if (dev->gen == ILO_GEN(6))
1819 bit_test |= PIPE_CONTROL_INTERRUPT_ENABLE;
1820
1821 assert(dw1 & bit_test);
1822 }
1823
1824 if (dw1 & PIPE_CONTROL_DEPTH_STALL) {
1825 /*
1826 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
1827 *
1828 * "Following bits must be clear (when Depth Stall is set):
1829 *
1830 * * Render Target Cache Flush Enable ([12] of DW1)
1831 * * Depth Cache Flush Enable ([0] of DW1)"
1832 */
1833 assert(!(dw1 & (PIPE_CONTROL_WRITE_FLUSH |
1834 PIPE_CONTROL_DEPTH_CACHE_FLUSH)));
1835 }
1836
1837 ilo_cp_begin(cp, cmd_len);
1838 ilo_cp_write(cp, cmd | (cmd_len - 2));
1839 ilo_cp_write(cp, dw1);
1840 ilo_cp_write_bo(cp, bo_offset, bo, read_domains, write_domain);
1841 ilo_cp_write(cp, 0);
1842 if (write_qword)
1843 ilo_cp_write(cp, 0);
1844 ilo_cp_end(cp);
1845 }
1846
1847 static inline void
1848 gen6_emit_3DPRIMITIVE(const struct ilo_dev_info *dev,
1849 const struct pipe_draw_info *info,
1850 const struct ilo_ib_state *ib,
1851 bool rectlist,
1852 struct ilo_cp *cp)
1853 {
1854 const uint32_t cmd = ILO_GPE_CMD(0x3, 0x3, 0x00);
1855 const uint8_t cmd_len = 6;
1856 const int prim = (rectlist) ?
1857 _3DPRIM_RECTLIST : ilo_gpe_gen6_translate_pipe_prim(info->mode);
1858 const int vb_access = (info->indexed) ?
1859 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_RANDOM :
1860 GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL;
1861 const uint32_t vb_start = info->start +
1862 ((info->indexed) ? ib->draw_start_offset : 0);
1863
1864 ILO_GPE_VALID_GEN(dev, 6, 6);
1865
1866 ilo_cp_begin(cp, cmd_len);
1867 ilo_cp_write(cp, cmd | (cmd_len - 2) |
1868 prim << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
1869 vb_access);
1870 ilo_cp_write(cp, info->count);
1871 ilo_cp_write(cp, vb_start);
1872 ilo_cp_write(cp, info->instance_count);
1873 ilo_cp_write(cp, info->start_instance);
1874 ilo_cp_write(cp, info->index_bias);
1875 ilo_cp_end(cp);
1876 }
1877
1878 static inline uint32_t
1879 gen6_emit_INTERFACE_DESCRIPTOR_DATA(const struct ilo_dev_info *dev,
1880 const struct ilo_shader_state **cs,
1881 uint32_t *sampler_state,
1882 int *num_samplers,
1883 uint32_t *binding_table_state,
1884 int *num_surfaces,
1885 int num_ids,
1886 struct ilo_cp *cp)
1887 {
1888 /*
1889 * From the Sandy Bridge PRM, volume 2 part 2, page 34:
1890 *
1891 * "(Interface Descriptor Total Length) This field must have the same
1892 * alignment as the Interface Descriptor Data Start Address.
1893 *
1894 * It must be DQWord (32-byte) aligned..."
1895 *
1896 * From the Sandy Bridge PRM, volume 2 part 2, page 35:
1897 *
1898 * "(Interface Descriptor Data Start Address) Specifies the 32-byte
1899 * aligned address of the Interface Descriptor data."
1900 */
1901 const int state_align = 32 / 4;
1902 const int state_len = (32 / 4) * num_ids;
1903 uint32_t state_offset, *dw;
1904 int i;
1905
1906 ILO_GPE_VALID_GEN(dev, 6, 6);
1907
1908 dw = ilo_cp_steal_ptr(cp, "INTERFACE_DESCRIPTOR_DATA",
1909 state_len, state_align, &state_offset);
1910
1911 for (i = 0; i < num_ids; i++) {
1912 dw[0] = ilo_shader_get_kernel_offset(cs[i]);
1913 dw[1] = 1 << 18; /* SPF */
1914 dw[2] = sampler_state[i] |
1915 (num_samplers[i] + 3) / 4 << 2;
1916 dw[3] = binding_table_state[i] |
1917 num_surfaces[i];
1918 dw[4] = 0 << 16 | /* CURBE Read Length */
1919 0; /* CURBE Read Offset */
1920 dw[5] = 0; /* Barrier ID */
1921 dw[6] = 0;
1922 dw[7] = 0;
1923
1924 dw += 8;
1925 }
1926
1927 return state_offset;
1928 }
1929
1930 static inline uint32_t
1931 gen6_emit_SF_VIEWPORT(const struct ilo_dev_info *dev,
1932 const struct ilo_viewport_cso *viewports,
1933 unsigned num_viewports,
1934 struct ilo_cp *cp)
1935 {
1936 const int state_align = 32 / 4;
1937 const int state_len = 8 * num_viewports;
1938 uint32_t state_offset, *dw;
1939 unsigned i;
1940
1941 ILO_GPE_VALID_GEN(dev, 6, 6);
1942
1943 /*
1944 * From the Sandy Bridge PRM, volume 2 part 1, page 262:
1945 *
1946 * "The viewport-specific state used by the SF unit (SF_VIEWPORT) is
1947 * stored as an array of up to 16 elements..."
1948 */
1949 assert(num_viewports && num_viewports <= 16);
1950
1951 dw = ilo_cp_steal_ptr(cp, "SF_VIEWPORT",
1952 state_len, state_align, &state_offset);
1953
1954 for (i = 0; i < num_viewports; i++) {
1955 const struct ilo_viewport_cso *vp = &viewports[i];
1956
1957 dw[0] = fui(vp->m00);
1958 dw[1] = fui(vp->m11);
1959 dw[2] = fui(vp->m22);
1960 dw[3] = fui(vp->m30);
1961 dw[4] = fui(vp->m31);
1962 dw[5] = fui(vp->m32);
1963 dw[6] = 0;
1964 dw[7] = 0;
1965
1966 dw += 8;
1967 }
1968
1969 return state_offset;
1970 }
1971
1972 static inline uint32_t
1973 gen6_emit_CLIP_VIEWPORT(const struct ilo_dev_info *dev,
1974 const struct ilo_viewport_cso *viewports,
1975 unsigned num_viewports,
1976 struct ilo_cp *cp)
1977 {
1978 const int state_align = 32 / 4;
1979 const int state_len = 4 * num_viewports;
1980 uint32_t state_offset, *dw;
1981 unsigned i;
1982
1983 ILO_GPE_VALID_GEN(dev, 6, 6);
1984
1985 /*
1986 * From the Sandy Bridge PRM, volume 2 part 1, page 193:
1987 *
1988 * "The viewport-related state is stored as an array of up to 16
1989 * elements..."
1990 */
1991 assert(num_viewports && num_viewports <= 16);
1992
1993 dw = ilo_cp_steal_ptr(cp, "CLIP_VIEWPORT",
1994 state_len, state_align, &state_offset);
1995
1996 for (i = 0; i < num_viewports; i++) {
1997 const struct ilo_viewport_cso *vp = &viewports[i];
1998
1999 dw[0] = fui(vp->min_gbx);
2000 dw[1] = fui(vp->max_gbx);
2001 dw[2] = fui(vp->min_gby);
2002 dw[3] = fui(vp->max_gby);
2003
2004 dw += 4;
2005 }
2006
2007 return state_offset;
2008 }
2009
2010 static inline uint32_t
2011 gen6_emit_CC_VIEWPORT(const struct ilo_dev_info *dev,
2012 const struct ilo_viewport_cso *viewports,
2013 unsigned num_viewports,
2014 struct ilo_cp *cp)
2015 {
2016 const int state_align = 32 / 4;
2017 const int state_len = 2 * num_viewports;
2018 uint32_t state_offset, *dw;
2019 unsigned i;
2020
2021 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2022
2023 /*
2024 * From the Sandy Bridge PRM, volume 2 part 1, page 385:
2025 *
2026 * "The viewport state is stored as an array of up to 16 elements..."
2027 */
2028 assert(num_viewports && num_viewports <= 16);
2029
2030 dw = ilo_cp_steal_ptr(cp, "CC_VIEWPORT",
2031 state_len, state_align, &state_offset);
2032
2033 for (i = 0; i < num_viewports; i++) {
2034 const struct ilo_viewport_cso *vp = &viewports[i];
2035
2036 dw[0] = fui(vp->min_z);
2037 dw[1] = fui(vp->max_z);
2038
2039 dw += 2;
2040 }
2041
2042 return state_offset;
2043 }
2044
2045 static inline uint32_t
2046 gen6_emit_COLOR_CALC_STATE(const struct ilo_dev_info *dev,
2047 const struct pipe_stencil_ref *stencil_ref,
2048 ubyte alpha_ref,
2049 const struct pipe_blend_color *blend_color,
2050 struct ilo_cp *cp)
2051 {
2052 const int state_align = 64 / 4;
2053 const int state_len = 6;
2054 uint32_t state_offset, *dw;
2055
2056 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2057
2058 dw = ilo_cp_steal_ptr(cp, "COLOR_CALC_STATE",
2059 state_len, state_align, &state_offset);
2060
2061 dw[0] = stencil_ref->ref_value[0] << 24 |
2062 stencil_ref->ref_value[1] << 16 |
2063 BRW_ALPHATEST_FORMAT_UNORM8;
2064 dw[1] = alpha_ref;
2065 dw[2] = fui(blend_color->color[0]);
2066 dw[3] = fui(blend_color->color[1]);
2067 dw[4] = fui(blend_color->color[2]);
2068 dw[5] = fui(blend_color->color[3]);
2069
2070 return state_offset;
2071 }
2072
2073 static inline uint32_t
2074 gen6_emit_BLEND_STATE(const struct ilo_dev_info *dev,
2075 const struct ilo_blend_state *blend,
2076 const struct ilo_fb_state *fb,
2077 const struct ilo_dsa_state *dsa,
2078 struct ilo_cp *cp)
2079 {
2080 const int state_align = 64 / 4;
2081 int state_len;
2082 uint32_t state_offset, *dw;
2083 unsigned num_targets, i;
2084
2085 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2086
2087 /*
2088 * From the Sandy Bridge PRM, volume 2 part 1, page 376:
2089 *
2090 * "The blend state is stored as an array of up to 8 elements..."
2091 */
2092 num_targets = fb->state.nr_cbufs;
2093 assert(num_targets <= 8);
2094
2095 if (!num_targets) {
2096 if (!dsa->dw_alpha)
2097 return 0;
2098 /* to be able to reference alpha func */
2099 num_targets = 1;
2100 }
2101
2102 state_len = 2 * num_targets;
2103
2104 dw = ilo_cp_steal_ptr(cp, "BLEND_STATE",
2105 state_len, state_align, &state_offset);
2106
2107 for (i = 0; i < num_targets; i++) {
2108 const unsigned idx = (blend->independent_blend_enable) ? i : 0;
2109 const struct ilo_blend_cso *cso = &blend->cso[idx];
2110 const int num_samples = fb->num_samples;
2111 const struct util_format_description *format_desc =
2112 (idx < fb->state.nr_cbufs && fb->state.cbufs[idx]) ?
2113 util_format_description(fb->state.cbufs[idx]->format) : NULL;
2114 bool rt_is_unorm, rt_is_pure_integer, rt_dst_alpha_forced_one;
2115
2116 rt_is_unorm = true;
2117 rt_is_pure_integer = false;
2118 rt_dst_alpha_forced_one = false;
2119
2120 if (format_desc) {
2121 int ch;
2122
2123 switch (format_desc->format) {
2124 case PIPE_FORMAT_B8G8R8X8_UNORM:
2125 /* force alpha to one when the HW format has alpha */
2126 assert(ilo_translate_render_format(PIPE_FORMAT_B8G8R8X8_UNORM)
2127 == BRW_SURFACEFORMAT_B8G8R8A8_UNORM);
2128 rt_dst_alpha_forced_one = true;
2129 break;
2130 default:
2131 break;
2132 }
2133
2134 for (ch = 0; ch < 4; ch++) {
2135 if (format_desc->channel[ch].type == UTIL_FORMAT_TYPE_VOID)
2136 continue;
2137
2138 if (format_desc->channel[ch].pure_integer) {
2139 rt_is_unorm = false;
2140 rt_is_pure_integer = true;
2141 break;
2142 }
2143
2144 if (!format_desc->channel[ch].normalized ||
2145 format_desc->channel[ch].type != UTIL_FORMAT_TYPE_UNSIGNED)
2146 rt_is_unorm = false;
2147 }
2148 }
2149
2150 dw[0] = cso->payload[0];
2151 dw[1] = cso->payload[1];
2152
2153 if (!rt_is_pure_integer) {
2154 if (rt_dst_alpha_forced_one)
2155 dw[0] |= cso->dw_blend_dst_alpha_forced_one;
2156 else
2157 dw[0] |= cso->dw_blend;
2158 }
2159
2160 /*
2161 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
2162 *
2163 * "Logic Ops are only supported on *_UNORM surfaces (excluding
2164 * _SRGB variants), otherwise Logic Ops must be DISABLED."
2165 *
2166 * Since logicop is ignored for non-UNORM color buffers, no special care
2167 * is needed.
2168 */
2169 if (rt_is_unorm)
2170 dw[1] |= cso->dw_logicop;
2171
2172 /*
2173 * From the Sandy Bridge PRM, volume 2 part 1, page 356:
2174 *
2175 * "When NumSamples = 1, AlphaToCoverage and AlphaToCoverage
2176 * Dither both must be disabled."
2177 *
2178 * There is no such limitation on GEN7, or for AlphaToOne. But GL
2179 * requires that anyway.
2180 */
2181 if (num_samples > 1)
2182 dw[1] |= cso->dw_alpha_mod;
2183
2184 /*
2185 * From the Sandy Bridge PRM, volume 2 part 1, page 382:
2186 *
2187 * "Alpha Test can only be enabled if Pixel Shader outputs a float
2188 * alpha value."
2189 */
2190 if (!rt_is_pure_integer)
2191 dw[1] |= dsa->dw_alpha;
2192
2193 dw += 2;
2194 }
2195
2196 return state_offset;
2197 }
2198
2199 static inline uint32_t
2200 gen6_emit_DEPTH_STENCIL_STATE(const struct ilo_dev_info *dev,
2201 const struct ilo_dsa_state *dsa,
2202 struct ilo_cp *cp)
2203 {
2204 const int state_align = 64 / 4;
2205 const int state_len = 3;
2206 uint32_t state_offset, *dw;
2207
2208
2209 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2210
2211 dw = ilo_cp_steal_ptr(cp, "DEPTH_STENCIL_STATE",
2212 state_len, state_align, &state_offset);
2213
2214 dw[0] = dsa->payload[0];
2215 dw[1] = dsa->payload[1];
2216 dw[2] = dsa->payload[2];
2217
2218 return state_offset;
2219 }
2220
2221 static inline uint32_t
2222 gen6_emit_SCISSOR_RECT(const struct ilo_dev_info *dev,
2223 const struct ilo_scissor_state *scissor,
2224 unsigned num_viewports,
2225 struct ilo_cp *cp)
2226 {
2227 const int state_align = 32 / 4;
2228 const int state_len = 2 * num_viewports;
2229 uint32_t state_offset, *dw;
2230
2231 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2232
2233 /*
2234 * From the Sandy Bridge PRM, volume 2 part 1, page 263:
2235 *
2236 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2237 * stored as an array of up to 16 elements..."
2238 */
2239 assert(num_viewports && num_viewports <= 16);
2240
2241 dw = ilo_cp_steal_ptr(cp, "SCISSOR_RECT",
2242 state_len, state_align, &state_offset);
2243
2244 memcpy(dw, scissor->payload, state_len * 4);
2245
2246 return state_offset;
2247 }
2248
2249 static inline uint32_t
2250 gen6_emit_BINDING_TABLE_STATE(const struct ilo_dev_info *dev,
2251 uint32_t *surface_states,
2252 int num_surface_states,
2253 struct ilo_cp *cp)
2254 {
2255 const int state_align = 32 / 4;
2256 const int state_len = num_surface_states;
2257 uint32_t state_offset, *dw;
2258
2259 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2260
2261 /*
2262 * From the Sandy Bridge PRM, volume 4 part 1, page 69:
2263 *
2264 * "It is stored as an array of up to 256 elements..."
2265 */
2266 assert(num_surface_states <= 256);
2267
2268 if (!num_surface_states)
2269 return 0;
2270
2271 dw = ilo_cp_steal_ptr(cp, "BINDING_TABLE_STATE",
2272 state_len, state_align, &state_offset);
2273 memcpy(dw, surface_states,
2274 num_surface_states * sizeof(surface_states[0]));
2275
2276 return state_offset;
2277 }
2278
2279 static inline uint32_t
2280 gen6_emit_SURFACE_STATE(const struct ilo_dev_info *dev,
2281 const struct ilo_view_surface *surf,
2282 bool for_render,
2283 struct ilo_cp *cp)
2284 {
2285 const int state_align = 32 / 4;
2286 const int state_len = (dev->gen >= ILO_GEN(7)) ? 8 : 6;
2287 uint32_t state_offset;
2288 uint32_t read_domains, write_domain;
2289
2290 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2291
2292 if (for_render) {
2293 read_domains = INTEL_DOMAIN_RENDER;
2294 write_domain = INTEL_DOMAIN_RENDER;
2295 }
2296 else {
2297 read_domains = INTEL_DOMAIN_SAMPLER;
2298 write_domain = 0;
2299 }
2300
2301 ilo_cp_steal(cp, "SURFACE_STATE", state_len, state_align, &state_offset);
2302
2303 STATIC_ASSERT(Elements(surf->payload) >= 8);
2304
2305 ilo_cp_write(cp, surf->payload[0]);
2306 ilo_cp_write_bo(cp, surf->payload[1],
2307 surf->bo, read_domains, write_domain);
2308 ilo_cp_write(cp, surf->payload[2]);
2309 ilo_cp_write(cp, surf->payload[3]);
2310 ilo_cp_write(cp, surf->payload[4]);
2311 ilo_cp_write(cp, surf->payload[5]);
2312
2313 if (dev->gen >= ILO_GEN(7)) {
2314 ilo_cp_write(cp, surf->payload[6]);
2315 ilo_cp_write(cp, surf->payload[7]);
2316 }
2317
2318 ilo_cp_end(cp);
2319
2320 return state_offset;
2321 }
2322
2323 static inline uint32_t
2324 gen6_emit_so_SURFACE_STATE(const struct ilo_dev_info *dev,
2325 const struct pipe_stream_output_target *so,
2326 const struct pipe_stream_output_info *so_info,
2327 int so_index,
2328 struct ilo_cp *cp)
2329 {
2330 struct ilo_buffer *buf = ilo_buffer(so->buffer);
2331 unsigned bo_offset, struct_size;
2332 enum pipe_format elem_format;
2333 struct ilo_view_surface surf;
2334
2335 ILO_GPE_VALID_GEN(dev, 6, 6);
2336
2337 bo_offset = so->buffer_offset + so_info->output[so_index].dst_offset * 4;
2338 struct_size = so_info->stride[so_info->output[so_index].output_buffer] * 4;
2339
2340 switch (so_info->output[so_index].num_components) {
2341 case 1:
2342 elem_format = PIPE_FORMAT_R32_FLOAT;
2343 break;
2344 case 2:
2345 elem_format = PIPE_FORMAT_R32G32_FLOAT;
2346 break;
2347 case 3:
2348 elem_format = PIPE_FORMAT_R32G32B32_FLOAT;
2349 break;
2350 case 4:
2351 elem_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
2352 break;
2353 default:
2354 assert(!"unexpected SO components length");
2355 elem_format = PIPE_FORMAT_R32_FLOAT;
2356 break;
2357 }
2358
2359 ilo_gpe_init_view_surface_for_buffer_gen6(dev, buf, bo_offset, so->buffer_size,
2360 struct_size, elem_format, false, true, &surf);
2361
2362 return gen6_emit_SURFACE_STATE(dev, &surf, false, cp);
2363 }
2364
2365 static inline uint32_t
2366 gen6_emit_SAMPLER_STATE(const struct ilo_dev_info *dev,
2367 const struct ilo_sampler_cso * const *samplers,
2368 const struct pipe_sampler_view * const *views,
2369 const uint32_t *sampler_border_colors,
2370 int num_samplers,
2371 struct ilo_cp *cp)
2372 {
2373 const int state_align = 32 / 4;
2374 const int state_len = 4 * num_samplers;
2375 uint32_t state_offset, *dw;
2376 int i;
2377
2378 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2379
2380 /*
2381 * From the Sandy Bridge PRM, volume 4 part 1, page 101:
2382 *
2383 * "The sampler state is stored as an array of up to 16 elements..."
2384 */
2385 assert(num_samplers <= 16);
2386
2387 if (!num_samplers)
2388 return 0;
2389
2390 dw = ilo_cp_steal_ptr(cp, "SAMPLER_STATE",
2391 state_len, state_align, &state_offset);
2392
2393 for (i = 0; i < num_samplers; i++) {
2394 const struct ilo_sampler_cso *sampler = samplers[i];
2395 const struct pipe_sampler_view *view = views[i];
2396 const uint32_t border_color = sampler_border_colors[i];
2397 uint32_t dw_filter, dw_wrap;
2398
2399 /* there may be holes */
2400 if (!sampler || !view) {
2401 /* disabled sampler */
2402 dw[0] = 1 << 31;
2403 dw[1] = 0;
2404 dw[2] = 0;
2405 dw[3] = 0;
2406 dw += 4;
2407
2408 continue;
2409 }
2410
2411 /* determine filter and wrap modes */
2412 switch (view->texture->target) {
2413 case PIPE_TEXTURE_1D:
2414 dw_filter = (sampler->anisotropic) ?
2415 sampler->dw_filter_aniso : sampler->dw_filter;
2416 dw_wrap = sampler->dw_wrap_1d;
2417 break;
2418 case PIPE_TEXTURE_3D:
2419 /*
2420 * From the Sandy Bridge PRM, volume 4 part 1, page 103:
2421 *
2422 * "Only MAPFILTER_NEAREST and MAPFILTER_LINEAR are supported for
2423 * surfaces of type SURFTYPE_3D."
2424 */
2425 dw_filter = sampler->dw_filter;
2426 dw_wrap = sampler->dw_wrap;
2427 break;
2428 case PIPE_TEXTURE_CUBE:
2429 dw_filter = (sampler->anisotropic) ?
2430 sampler->dw_filter_aniso : sampler->dw_filter;
2431 dw_wrap = sampler->dw_wrap_cube;
2432 break;
2433 default:
2434 dw_filter = (sampler->anisotropic) ?
2435 sampler->dw_filter_aniso : sampler->dw_filter;
2436 dw_wrap = sampler->dw_wrap;
2437 break;
2438 }
2439
2440 dw[0] = sampler->payload[0];
2441 dw[1] = sampler->payload[1];
2442 assert(!(border_color & 0x1f));
2443 dw[2] = border_color;
2444 dw[3] = sampler->payload[2];
2445
2446 dw[0] |= dw_filter;
2447
2448 if (dev->gen >= ILO_GEN(7)) {
2449 dw[3] |= dw_wrap;
2450 }
2451 else {
2452 /*
2453 * From the Sandy Bridge PRM, volume 4 part 1, page 21:
2454 *
2455 * "[DevSNB] Errata: Incorrect behavior is observed in cases
2456 * where the min and mag mode filters are different and
2457 * SurfMinLOD is nonzero. The determination of MagMode uses the
2458 * following equation instead of the one in the above
2459 * pseudocode: MagMode = (LOD + SurfMinLOD - Base <= 0)"
2460 *
2461 * As a way to work around that, we set Base to
2462 * view->u.tex.first_level.
2463 */
2464 dw[0] |= view->u.tex.first_level << 22;
2465
2466 dw[1] |= dw_wrap;
2467 }
2468
2469 dw += 4;
2470 }
2471
2472 return state_offset;
2473 }
2474
2475 static inline uint32_t
2476 gen6_emit_SAMPLER_BORDER_COLOR_STATE(const struct ilo_dev_info *dev,
2477 const struct ilo_sampler_cso *sampler,
2478 struct ilo_cp *cp)
2479 {
2480 const int state_align = 32 / 4;
2481 const int state_len = (dev->gen >= ILO_GEN(7)) ? 4 : 12;
2482 uint32_t state_offset, *dw;
2483
2484 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2485
2486 dw = ilo_cp_steal_ptr(cp, "SAMPLER_BORDER_COLOR_STATE",
2487 state_len, state_align, &state_offset);
2488
2489 /* see ilo_gpe_init_sampler_cso() */
2490 memcpy(dw, &sampler->payload[3], state_len * 4);
2491
2492 return state_offset;
2493 }
2494
2495 static inline uint32_t
2496 gen6_emit_push_constant_buffer(const struct ilo_dev_info *dev,
2497 int size, void **pcb,
2498 struct ilo_cp *cp)
2499 {
2500 /*
2501 * For all VS, GS, FS, and CS push constant buffers, they must be aligned
2502 * to 32 bytes, and their sizes are specified in 256-bit units.
2503 */
2504 const int state_align = 32 / 4;
2505 const int state_len = align(size, 32) / 4;
2506 uint32_t state_offset;
2507 char *buf;
2508
2509 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2510
2511 buf = ilo_cp_steal_ptr(cp, "PUSH_CONSTANT_BUFFER",
2512 state_len, state_align, &state_offset);
2513
2514 /* zero out the unused range */
2515 if (size < state_len * 4)
2516 memset(&buf[size], 0, state_len * 4 - size);
2517
2518 if (pcb)
2519 *pcb = buf;
2520
2521 return state_offset;
2522 }
2523
2524 #endif /* ILO_GPE_GEN6_H */