3ea8b8f5e777246a5b6e69acc58a4eb6c78fd514
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_framebuffer.h"
30 #include "util/u_half.h"
31 #include "brw_defines.h"
32 #include "intel_reg.h"
33
34 #include "ilo_context.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
40
41 /**
42 * Translate a pipe logicop to the matching hardware logicop.
43 */
44 static int
45 gen6_translate_pipe_logicop(unsigned logicop)
46 {
47 switch (logicop) {
48 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
49 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
50 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
51 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
52 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
53 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
54 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
55 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
56 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
57 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
58 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
59 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
60 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
61 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
62 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
63 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
64 default:
65 assert(!"unknown logicop function");
66 return BRW_LOGICOPFUNCTION_CLEAR;
67 }
68 }
69
70 /**
71 * Translate a pipe blend function to the matching hardware blend function.
72 */
73 static int
74 gen6_translate_pipe_blend(unsigned blend)
75 {
76 switch (blend) {
77 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
78 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
79 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
80 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
81 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
82 default:
83 assert(!"unknown blend function");
84 return BRW_BLENDFUNCTION_ADD;
85 };
86 }
87
88 /**
89 * Translate a pipe blend factor to the matching hardware blend factor.
90 */
91 static int
92 gen6_translate_pipe_blendfactor(unsigned blendfactor)
93 {
94 switch (blendfactor) {
95 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
96 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
97 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
98 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
99 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
100 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
101 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
102 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
103 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
104 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
105 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
106 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
107 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
108 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
109 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
110 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
111 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
112 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
113 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
114 default:
115 assert(!"unknown blend factor");
116 return BRW_BLENDFACTOR_ONE;
117 };
118 }
119
120 /**
121 * Translate a pipe stencil op to the matching hardware stencil op.
122 */
123 static int
124 gen6_translate_pipe_stencil_op(unsigned stencil_op)
125 {
126 switch (stencil_op) {
127 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
128 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
129 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
130 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
131 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
132 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
133 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
134 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
135 default:
136 assert(!"unknown stencil op");
137 return BRW_STENCILOP_KEEP;
138 }
139 }
140
141 /**
142 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
143 */
144 static int
145 gen6_translate_tex_mipfilter(unsigned filter)
146 {
147 switch (filter) {
148 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
149 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
150 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
151 default:
152 assert(!"unknown mipfilter");
153 return BRW_MIPFILTER_NONE;
154 }
155 }
156
157 /**
158 * Translate a pipe texture filter to the matching hardware mapfilter.
159 */
160 static int
161 gen6_translate_tex_filter(unsigned filter)
162 {
163 switch (filter) {
164 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
165 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
166 default:
167 assert(!"unknown sampler filter");
168 return BRW_MAPFILTER_NEAREST;
169 }
170 }
171
172 /**
173 * Translate a pipe texture coordinate wrapping mode to the matching hardware
174 * wrapping mode.
175 */
176 static int
177 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
178 {
179 /* clamp to edge or border? */
180 if (wrap == PIPE_TEX_WRAP_CLAMP) {
181 wrap = (clamp_to_edge) ?
182 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
183 }
184
185 switch (wrap) {
186 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
187 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
188 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
189 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
190 case PIPE_TEX_WRAP_CLAMP:
191 case PIPE_TEX_WRAP_MIRROR_CLAMP:
192 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
193 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
194 default:
195 assert(!"unknown sampler wrap mode");
196 return BRW_TEXCOORDMODE_WRAP;
197 }
198 }
199
200 /**
201 * Translate a pipe shadow compare function to the matching hardware shadow
202 * function.
203 */
204 static int
205 gen6_translate_shadow_func(unsigned func)
206 {
207 /*
208 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
209 * comparison, and 1.0 is returned when the comparison is true.
210 *
211 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
212 * the comparison, and 0.0 is returned when the comparison is true.
213 */
214 switch (func) {
215 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
216 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
217 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
218 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
219 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
220 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
221 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
222 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
223 default:
224 assert(!"unknown shadow compare function");
225 return BRW_PREFILTER_NEVER;
226 }
227 }
228
229 /**
230 * Translate a pipe DSA test function to the matching hardware compare
231 * function.
232 */
233 static int
234 gen6_translate_dsa_func(unsigned func)
235 {
236 switch (func) {
237 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
238 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
239 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
240 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
241 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
242 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
243 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
244 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
245 default:
246 assert(!"unknown depth/stencil/alpha test function");
247 return BRW_COMPAREFUNCTION_NEVER;
248 }
249 }
250
251 static void
252 ve_init_cso(const struct ilo_dev_info *dev,
253 const struct pipe_vertex_element *state,
254 unsigned vb_index,
255 struct ilo_ve_cso *cso)
256 {
257 int comp[4] = {
258 BRW_VE1_COMPONENT_STORE_SRC,
259 BRW_VE1_COMPONENT_STORE_SRC,
260 BRW_VE1_COMPONENT_STORE_SRC,
261 BRW_VE1_COMPONENT_STORE_SRC,
262 };
263 int format;
264
265 ILO_GPE_VALID_GEN(dev, 6, 7.5);
266
267 switch (util_format_get_nr_components(state->src_format)) {
268 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
269 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
270 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
271 BRW_VE1_COMPONENT_STORE_1_INT :
272 BRW_VE1_COMPONENT_STORE_1_FLT;
273 }
274
275 format = ilo_translate_vertex_format(state->src_format);
276
277 STATIC_ASSERT(Elements(cso->payload) >= 2);
278 cso->payload[0] =
279 vb_index << GEN6_VE0_INDEX_SHIFT |
280 GEN6_VE0_VALID |
281 format << BRW_VE0_FORMAT_SHIFT |
282 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
283
284 cso->payload[1] =
285 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
286 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
287 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
288 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
289 }
290
291 void
292 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
293 unsigned num_states,
294 const struct pipe_vertex_element *states,
295 struct ilo_ve_state *ve)
296 {
297 unsigned i;
298
299 ILO_GPE_VALID_GEN(dev, 6, 7.5);
300
301 ve->count = num_states;
302 ve->vb_count = 0;
303
304 for (i = 0; i < num_states; i++) {
305 const unsigned pipe_idx = states[i].vertex_buffer_index;
306 const unsigned instance_divisor = states[i].instance_divisor;
307 unsigned hw_idx;
308
309 /*
310 * map the pipe vb to the hardware vb, which has a fixed instance
311 * divisor
312 */
313 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
314 if (ve->vb_mapping[hw_idx] == pipe_idx &&
315 ve->instance_divisors[hw_idx] == instance_divisor)
316 break;
317 }
318
319 /* create one if there is no matching hardware vb */
320 if (hw_idx >= ve->vb_count) {
321 hw_idx = ve->vb_count++;
322
323 ve->vb_mapping[hw_idx] = pipe_idx;
324 ve->instance_divisors[hw_idx] = instance_divisor;
325 }
326
327 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
328 }
329 }
330
331 void
332 ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
333 const struct ilo_shader_state *vs,
334 struct ilo_shader_cso *cso)
335 {
336 int start_grf, vue_read_len, max_threads;
337 uint32_t dw2, dw4, dw5;
338
339 ILO_GPE_VALID_GEN(dev, 6, 7.5);
340
341 start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
342 vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
343
344 /*
345 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
346 *
347 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
348 * 128-bit vertex elements to be passed into the payload for each
349 * vertex."
350 *
351 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
352 * data to be read and passed to the thread."
353 */
354 vue_read_len = (vue_read_len + 1) / 2;
355 if (!vue_read_len)
356 vue_read_len = 1;
357
358 switch (dev->gen) {
359 case ILO_GEN(6):
360 /*
361 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
362 *
363 * "Device # of EUs #Threads/EU
364 * SNB GT2 12 5
365 * SNB GT1 6 4"
366 */
367 max_threads = (dev->gt == 2) ? 60 : 24;
368 break;
369 case ILO_GEN(7):
370 /*
371 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
372 *
373 * "Device # of EUs #Threads/EU
374 * Ivy Bridge (GT2) 16 8
375 * Ivy Bridge (GT1) 6 6"
376 */
377 max_threads = (dev->gt == 2) ? 128 : 36;
378 break;
379 case ILO_GEN(7.5):
380 /* see brwCreateContext() */
381 max_threads = (dev->gt >= 2) ? 280 : 70;
382 break;
383 default:
384 max_threads = 1;
385 break;
386 }
387
388 dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
389
390 dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
391 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
392 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
393
394 dw5 = GEN6_VS_STATISTICS_ENABLE |
395 GEN6_VS_ENABLE;
396
397 if (dev->gen >= ILO_GEN(7.5))
398 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
399 else
400 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
401
402 STATIC_ASSERT(Elements(cso->payload) >= 3);
403 cso->payload[0] = dw2;
404 cso->payload[1] = dw4;
405 cso->payload[2] = dw5;
406 }
407
408 void
409 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
410 const struct ilo_shader_state *gs,
411 struct ilo_shader_cso *cso)
412 {
413 int start_grf, vue_read_len, max_threads;
414 uint32_t dw2, dw4, dw5, dw6;
415
416 ILO_GPE_VALID_GEN(dev, 6, 6);
417
418 if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
419 start_grf = ilo_shader_get_kernel_param(gs,
420 ILO_KERNEL_URB_DATA_START_REG);
421
422 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
423 }
424 else {
425 start_grf = ilo_shader_get_kernel_param(gs,
426 ILO_KERNEL_VS_GEN6_SO_START_REG);
427
428 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
429 }
430
431 /*
432 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
433 *
434 * "Specifies the amount of URB data read and passed in the thread
435 * payload for each Vertex URB entry, in 256-bit register increments.
436 *
437 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
438 * 0 indicating no Vertex URB data to be read and passed to the
439 * thread."
440 */
441 vue_read_len = (vue_read_len + 1) / 2;
442 if (!vue_read_len)
443 vue_read_len = 1;
444
445 /*
446 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
447 *
448 * "Maximum Number of Threads valid range is [0,27] when Rendering
449 * Enabled bit is set."
450 *
451 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
452 *
453 * "Programming Note: If the GS stage is enabled, software must always
454 * allocate at least one GS URB Entry. This is true even if the GS
455 * thread never needs to output vertices to the pipeline, e.g., when
456 * only performing stream output. This is an artifact of the need to
457 * pass the GS thread an initial destination URB handle."
458 *
459 * As such, we always enable rendering, and limit the number of threads.
460 */
461 if (dev->gt == 2) {
462 /* maximum is 60, but limited to 28 */
463 max_threads = 28;
464 }
465 else {
466 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
467 max_threads = 21;
468 }
469
470 dw2 = GEN6_GS_SPF_MODE;
471
472 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
473 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
474 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
475
476 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
477 GEN6_GS_STATISTICS_ENABLE |
478 GEN6_GS_SO_STATISTICS_ENABLE |
479 GEN6_GS_RENDERING_ENABLE;
480
481 /*
482 * we cannot make use of GEN6_GS_REORDER because it will reorder
483 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
484 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
485 * (2N+2, 2N+1, 2N+3)).
486 */
487 dw6 = GEN6_GS_ENABLE;
488
489 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
490 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
491
492 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
493 const uint32_t svbi_post_inc =
494 ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
495
496 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
497 if (svbi_post_inc) {
498 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
499 svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
500 }
501 }
502
503 STATIC_ASSERT(Elements(cso->payload) >= 4);
504 cso->payload[0] = dw2;
505 cso->payload[1] = dw4;
506 cso->payload[2] = dw5;
507 cso->payload[3] = dw6;
508 }
509
510 void
511 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
512 const struct pipe_rasterizer_state *state,
513 struct ilo_rasterizer_clip *clip)
514 {
515 uint32_t dw1, dw2, dw3;
516
517 ILO_GPE_VALID_GEN(dev, 6, 7.5);
518
519 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
520
521 if (dev->gen >= ILO_GEN(7)) {
522 /*
523 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
524 *
525 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
526 * enabled only for the cases where the incoming primitive topology
527 * into the clipper guaranteed to be Trilist."
528 *
529 * What does this mean?
530 */
531 dw1 |= 0 << 19 |
532 GEN7_CLIP_EARLY_CULL;
533
534 if (state->front_ccw)
535 dw1 |= GEN7_CLIP_WINDING_CCW;
536
537 switch (state->cull_face) {
538 case PIPE_FACE_NONE:
539 dw1 |= GEN7_CLIP_CULLMODE_NONE;
540 break;
541 case PIPE_FACE_FRONT:
542 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
543 break;
544 case PIPE_FACE_BACK:
545 dw1 |= GEN7_CLIP_CULLMODE_BACK;
546 break;
547 case PIPE_FACE_FRONT_AND_BACK:
548 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
549 break;
550 }
551 }
552
553 dw2 = GEN6_CLIP_ENABLE |
554 GEN6_CLIP_XY_TEST |
555 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
556 GEN6_CLIP_MODE_NORMAL;
557
558 if (state->clip_halfz)
559 dw2 |= GEN6_CLIP_API_D3D;
560 else
561 dw2 |= GEN6_CLIP_API_OGL;
562
563 if (state->depth_clip)
564 dw2 |= GEN6_CLIP_Z_TEST;
565
566 if (state->flatshade_first) {
567 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
568 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
569 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
570 }
571 else {
572 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
573 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
574 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
575 }
576
577 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
578 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
579
580 clip->payload[0] = dw1;
581 clip->payload[1] = dw2;
582 clip->payload[2] = dw3;
583
584 clip->can_enable_guardband = true;
585
586 /*
587 * There are several reasons that guard band test should be disabled
588 *
589 * - GL wide points (to avoid partially visibie object)
590 * - GL wide or AA lines (to avoid partially visibie object)
591 */
592 if (state->point_size_per_vertex || state->point_size > 1.0f)
593 clip->can_enable_guardband = false;
594 if (state->line_smooth || state->line_width > 1.0f)
595 clip->can_enable_guardband = false;
596 }
597
598 void
599 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
600 const struct pipe_rasterizer_state *state,
601 struct ilo_rasterizer_sf *sf)
602 {
603 float offset_const, offset_scale, offset_clamp;
604 int line_width, point_width;
605 uint32_t dw1, dw2, dw3;
606
607 ILO_GPE_VALID_GEN(dev, 6, 7.5);
608
609 /*
610 * Scale the constant term. The minimum representable value used by the HW
611 * is not large enouch to be the minimum resolvable difference.
612 */
613 offset_const = state->offset_units * 2.0f;
614
615 offset_scale = state->offset_scale;
616 offset_clamp = state->offset_clamp;
617
618 /*
619 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
620 *
621 * "This bit (Statistics Enable) should be set whenever clipping is
622 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
623 * should be cleared if clipping is disabled or Statistics Enable in
624 * CLIP_STATE is clear."
625 */
626 dw1 = GEN6_SF_STATISTICS_ENABLE |
627 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
628
629 /* XXX GEN6 path seems to work fine for GEN7 */
630 if (false && dev->gen >= ILO_GEN(7)) {
631 /*
632 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
633 *
634 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
635 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
636 * Depth Offset Enable Point) should be set whenever non zero depth
637 * bias (Slope, Bias) values are used. Setting this bit may have
638 * some degradation of performance for some workloads."
639 */
640 if (state->offset_tri || state->offset_line || state->offset_point) {
641 /* XXX need to scale offset_const according to the depth format */
642 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
643
644 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
645 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
646 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
647 }
648 else {
649 offset_const = 0.0f;
650 offset_scale = 0.0f;
651 offset_clamp = 0.0f;
652 }
653 }
654 else {
655 if (state->offset_tri)
656 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
657 if (state->offset_line)
658 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
659 if (state->offset_point)
660 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
661 }
662
663 switch (state->fill_front) {
664 case PIPE_POLYGON_MODE_FILL:
665 dw1 |= GEN6_SF_FRONT_SOLID;
666 break;
667 case PIPE_POLYGON_MODE_LINE:
668 dw1 |= GEN6_SF_FRONT_WIREFRAME;
669 break;
670 case PIPE_POLYGON_MODE_POINT:
671 dw1 |= GEN6_SF_FRONT_POINT;
672 break;
673 }
674
675 switch (state->fill_back) {
676 case PIPE_POLYGON_MODE_FILL:
677 dw1 |= GEN6_SF_BACK_SOLID;
678 break;
679 case PIPE_POLYGON_MODE_LINE:
680 dw1 |= GEN6_SF_BACK_WIREFRAME;
681 break;
682 case PIPE_POLYGON_MODE_POINT:
683 dw1 |= GEN6_SF_BACK_POINT;
684 break;
685 }
686
687 if (state->front_ccw)
688 dw1 |= GEN6_SF_WINDING_CCW;
689
690 dw2 = 0;
691
692 if (state->line_smooth) {
693 /*
694 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
695 *
696 * "This field (Anti-aliasing Enable) must be disabled if any of the
697 * render targets have integer (UINT or SINT) surface format."
698 *
699 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
700 *
701 * "This field (Hierarchical Depth Buffer Enable) must be disabled
702 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
703 *
704 * TODO We do not check those yet.
705 */
706 dw2 |= GEN6_SF_LINE_AA_ENABLE |
707 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
708 }
709
710 switch (state->cull_face) {
711 case PIPE_FACE_NONE:
712 dw2 |= GEN6_SF_CULL_NONE;
713 break;
714 case PIPE_FACE_FRONT:
715 dw2 |= GEN6_SF_CULL_FRONT;
716 break;
717 case PIPE_FACE_BACK:
718 dw2 |= GEN6_SF_CULL_BACK;
719 break;
720 case PIPE_FACE_FRONT_AND_BACK:
721 dw2 |= GEN6_SF_CULL_BOTH;
722 break;
723 }
724
725 /*
726 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
727 * pixels in the minor direction. We have to make the lines slightly
728 * thicker, 0.5 pixel on both sides, so that they intersect that many
729 * pixels are considered into the lines.
730 *
731 * Line width is in U3.7.
732 */
733 line_width = (int) ((state->line_width +
734 (float) state->line_smooth) * 128.0f + 0.5f);
735 line_width = CLAMP(line_width, 0, 1023);
736
737 if (line_width == 128 && !state->line_smooth) {
738 /* use GIQ rules */
739 line_width = 0;
740 }
741
742 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
743
744 if (dev->gen >= ILO_GEN(7.5) && state->line_stipple_enable)
745 dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
746
747 if (state->scissor)
748 dw2 |= GEN6_SF_SCISSOR_ENABLE;
749
750 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
751 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
752
753 if (state->line_last_pixel)
754 dw3 |= 1 << 31;
755
756 if (state->flatshade_first) {
757 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
758 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
759 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
760 }
761 else {
762 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
763 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
764 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
765 }
766
767 if (!state->point_size_per_vertex)
768 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
769
770 /* in U8.3 */
771 point_width = (int) (state->point_size * 8.0f + 0.5f);
772 point_width = CLAMP(point_width, 1, 2047);
773
774 dw3 |= point_width;
775
776 STATIC_ASSERT(Elements(sf->payload) >= 6);
777 sf->payload[0] = dw1;
778 sf->payload[1] = dw2;
779 sf->payload[2] = dw3;
780 sf->payload[3] = fui(offset_const);
781 sf->payload[4] = fui(offset_scale);
782 sf->payload[5] = fui(offset_clamp);
783
784 if (state->multisample) {
785 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
786
787 /*
788 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
789 *
790 * "Software must not program a value of 0.0 when running in
791 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
792 * when multisampling rasterization is enabled."
793 */
794 if (!line_width) {
795 line_width = 128; /* 1.0f */
796
797 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
798 }
799 }
800 else {
801 sf->dw_msaa = 0;
802 }
803 }
804
805 void
806 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
807 const struct pipe_rasterizer_state *state,
808 struct ilo_rasterizer_wm *wm)
809 {
810 uint32_t dw5, dw6;
811
812 ILO_GPE_VALID_GEN(dev, 6, 6);
813
814 /* only the FF unit states are set, as in GEN7 */
815
816 dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
817
818 /* same value as in 3DSTATE_SF */
819 if (state->line_smooth)
820 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
821
822 if (state->poly_stipple_enable)
823 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
824 if (state->line_stipple_enable)
825 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
826
827 dw6 = GEN6_WM_POSITION_ZW_PIXEL |
828 GEN6_WM_MSRAST_OFF_PIXEL |
829 GEN6_WM_MSDISPMODE_PERSAMPLE;
830
831 if (state->bottom_edge_rule)
832 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
833
834 /*
835 * assertion that makes sure
836 *
837 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
838 *
839 * is valid
840 */
841 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
842 GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
843
844 wm->dw_msaa_rast =
845 (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
846 wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
847
848 STATIC_ASSERT(Elements(wm->payload) >= 2);
849 wm->payload[0] = dw5;
850 wm->payload[1] = dw6;
851 }
852
853 void
854 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
855 const struct ilo_shader_state *fs,
856 struct ilo_shader_cso *cso)
857 {
858 int start_grf, input_count, interps, max_threads;
859 uint32_t dw2, dw4, dw5, dw6;
860
861 ILO_GPE_VALID_GEN(dev, 6, 6);
862
863 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
864 input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
865 interps = ilo_shader_get_kernel_param(fs,
866 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
867
868 /* see brwCreateContext() */
869 max_threads = (dev->gt == 2) ? 80 : 40;
870
871 dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
872
873 dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
874 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
875 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
876
877 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
878
879 /*
880 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
881 *
882 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
883 * PS kernel or color calculator has the ability to kill (discard)
884 * pixels or samples, other than due to depth or stencil testing.
885 * This bit is required to be ENABLED in the following situations:
886 *
887 * The API pixel shader program contains "killpix" or "discard"
888 * instructions, or other code in the pixel shader kernel that can
889 * cause the final pixel mask to differ from the pixel mask received
890 * on dispatch.
891 *
892 * A sampler with chroma key enabled with kill pixel mode is used by
893 * the pixel shader.
894 *
895 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
896 * enabled.
897 *
898 * The pixel shader kernel generates and outputs oMask.
899 *
900 * Note: As ClipDistance clipping is fully supported in hardware and
901 * therefore not via PS instructions, there should be no need to
902 * ENABLE this bit due to ClipDistance clipping."
903 */
904 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
905 dw5 |= GEN6_WM_KILL_ENABLE;
906
907 /*
908 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
909 *
910 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
911 * field must be set to disabled."
912 *
913 * TODO This is not checked yet.
914 */
915 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
916 dw5 |= GEN6_WM_COMPUTED_DEPTH;
917
918 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
919 dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
920
921 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
922 dw5 |= GEN6_WM_USES_SOURCE_W;
923
924 /*
925 * TODO set this bit only when
926 *
927 * a) fs writes colors and color is not masked, or
928 * b) fs writes depth, or
929 * c) fs or cc kills
930 */
931 if (true)
932 dw5 |= GEN6_WM_DISPATCH_ENABLE;
933
934 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
935 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
936
937 dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
938 GEN6_WM_POSOFFSET_NONE |
939 interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
940
941 STATIC_ASSERT(Elements(cso->payload) >= 4);
942 cso->payload[0] = dw2;
943 cso->payload[1] = dw4;
944 cso->payload[2] = dw5;
945 cso->payload[3] = dw6;
946 }
947
948 struct ilo_zs_surface_info {
949 int surface_type;
950 int format;
951
952 struct {
953 struct intel_bo *bo;
954 unsigned stride;
955 enum intel_tiling_mode tiling;
956 uint32_t offset;
957 } zs, stencil, hiz;
958
959 unsigned width, height, depth;
960 unsigned lod, first_layer, num_layers;
961 uint32_t x_offset, y_offset;
962 };
963
964 static void
965 zs_init_info_null(const struct ilo_dev_info *dev,
966 struct ilo_zs_surface_info *info)
967 {
968 ILO_GPE_VALID_GEN(dev, 6, 7.5);
969
970 memset(info, 0, sizeof(*info));
971
972 info->surface_type = BRW_SURFACE_NULL;
973 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
974 info->width = 1;
975 info->height = 1;
976 info->depth = 1;
977 info->num_layers = 1;
978 }
979
980 static void
981 zs_init_info(const struct ilo_dev_info *dev,
982 const struct ilo_texture *tex,
983 enum pipe_format format, unsigned level,
984 unsigned first_layer, unsigned num_layers,
985 bool offset_to_layer, struct ilo_zs_surface_info *info)
986 {
987 uint32_t x_offset[3], y_offset[3];
988 bool separate_stencil;
989
990 ILO_GPE_VALID_GEN(dev, 6, 7.5);
991
992 memset(info, 0, sizeof(*info));
993
994 info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
995
996 if (info->surface_type == BRW_SURFACE_CUBE) {
997 /*
998 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
999 *
1000 * "For Other Surfaces (Cube Surfaces):
1001 * This field (Minimum Array Element) is ignored."
1002 *
1003 * "For Other Surfaces (Cube Surfaces):
1004 * This field (Render Target View Extent) is ignored."
1005 *
1006 * As such, we cannot set first_layer and num_layers on cube surfaces.
1007 * To work around that, treat it as a 2D surface.
1008 */
1009 info->surface_type = BRW_SURFACE_2D;
1010 }
1011
1012 if (dev->gen >= ILO_GEN(7)) {
1013 separate_stencil = true;
1014 }
1015 else {
1016 /*
1017 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1018 *
1019 * "This field (Separate Stencil Buffer Enable) must be set to the
1020 * same value (enabled or disabled) as Hierarchical Depth Buffer
1021 * Enable."
1022 */
1023 separate_stencil =
1024 ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers);
1025 }
1026
1027 /*
1028 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1029 *
1030 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
1031 * Surface Format of the depth buffer cannot be
1032 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
1033 * requires the separate stencil buffer."
1034 *
1035 * From the Ironlake PRM, volume 2 part 1, page 330:
1036 *
1037 * "If this field (Separate Stencil Buffer Enable) is disabled, the
1038 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
1039 *
1040 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
1041 * is indeed used, the depth values output by the fragment shaders will
1042 * be different when read back.
1043 *
1044 * As for GEN7+, separate_stencil is always true.
1045 */
1046 switch (format) {
1047 case PIPE_FORMAT_Z16_UNORM:
1048 info->format = BRW_DEPTHFORMAT_D16_UNORM;
1049 break;
1050 case PIPE_FORMAT_Z32_FLOAT:
1051 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1052 break;
1053 case PIPE_FORMAT_Z24X8_UNORM:
1054 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1055 info->format = (separate_stencil) ?
1056 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
1057 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
1058 break;
1059 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1060 info->format = (separate_stencil) ?
1061 BRW_DEPTHFORMAT_D32_FLOAT :
1062 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
1063 break;
1064 case PIPE_FORMAT_S8_UINT:
1065 if (separate_stencil) {
1066 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1067 break;
1068 }
1069 /* fall through */
1070 default:
1071 assert(!"unsupported depth/stencil format");
1072 zs_init_info_null(dev, info);
1073 return;
1074 break;
1075 }
1076
1077 if (format != PIPE_FORMAT_S8_UINT) {
1078 info->zs.bo = tex->bo;
1079 info->zs.stride = tex->bo_stride;
1080 info->zs.tiling = tex->tiling;
1081
1082 if (offset_to_layer) {
1083 info->zs.offset = ilo_texture_get_slice_offset(tex,
1084 level, first_layer, &x_offset[0], &y_offset[0]);
1085 }
1086 }
1087
1088 if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
1089 const struct ilo_texture *s8_tex =
1090 (tex->separate_s8) ? tex->separate_s8 : tex;
1091
1092 info->stencil.bo = s8_tex->bo;
1093
1094 /*
1095 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1096 *
1097 * "The pitch must be set to 2x the value computed based on width,
1098 * as the stencil buffer is stored with two rows interleaved."
1099 *
1100 * According to the classic driver, we need to do the same for GEN7+
1101 * even though the Ivy Bridge PRM does not say anything about it.
1102 */
1103 info->stencil.stride = s8_tex->bo_stride * 2;
1104
1105 info->stencil.tiling = s8_tex->tiling;
1106
1107 if (offset_to_layer) {
1108 info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
1109 level, first_layer, &x_offset[1], &y_offset[1]);
1110 }
1111 }
1112
1113 if (ilo_texture_can_enable_hiz(tex, level, first_layer, num_layers)) {
1114 info->hiz.bo = tex->hiz.bo;
1115 info->hiz.stride = tex->hiz.bo_stride;
1116 info->hiz.tiling = INTEL_TILING_Y;
1117
1118 /*
1119 * Layer offsetting is used on GEN6 only. And on GEN6, HiZ is enabled
1120 * only when the depth buffer is non-mipmapped and non-array, making
1121 * layer offsetting no-op.
1122 */
1123 if (offset_to_layer) {
1124 assert(level == 0 && first_layer == 0 && num_layers == 1);
1125
1126 info->hiz.offset = 0;
1127 x_offset[2] = 0;
1128 y_offset[2] = 0;
1129 }
1130 }
1131
1132 info->width = tex->base.width0;
1133 info->height = tex->base.height0;
1134 info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1135 tex->base.depth0 : num_layers;
1136
1137 info->lod = level;
1138 info->first_layer = first_layer;
1139 info->num_layers = num_layers;
1140
1141 if (offset_to_layer) {
1142 /* the size of the layer */
1143 info->width = u_minify(info->width, level);
1144 info->height = u_minify(info->height, level);
1145 if (info->surface_type == BRW_SURFACE_3D)
1146 info->depth = u_minify(info->depth, level);
1147 else
1148 info->depth = 1;
1149
1150 /* no layered rendering */
1151 assert(num_layers == 1);
1152
1153 info->lod = 0;
1154 info->first_layer = 0;
1155 info->num_layers = 1;
1156
1157 /* all three share the same X/Y offsets */
1158 if (info->zs.bo) {
1159 if (info->stencil.bo) {
1160 assert(x_offset[0] == x_offset[1]);
1161 assert(y_offset[0] == y_offset[1]);
1162 }
1163
1164 info->x_offset = x_offset[0];
1165 info->y_offset = y_offset[0];
1166 }
1167 else {
1168 assert(info->stencil.bo);
1169
1170 info->x_offset = x_offset[1];
1171 info->y_offset = y_offset[1];
1172 }
1173
1174 if (info->hiz.bo) {
1175 assert(info->x_offset == x_offset[2]);
1176 assert(info->y_offset == y_offset[2]);
1177 }
1178
1179 /*
1180 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
1181 *
1182 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
1183 * Coordinate Offset X) must be zero to ensure correct alignment"
1184 *
1185 * XXX Skip the check for gen6, which seems to be fine. We need to make
1186 * sure that does not happen eventually.
1187 */
1188 if (dev->gen >= ILO_GEN(7)) {
1189 assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
1190 info->x_offset &= ~7;
1191 info->y_offset &= ~7;
1192 }
1193
1194 info->width += info->x_offset;
1195 info->height += info->y_offset;
1196
1197 /* we have to treat them as 2D surfaces */
1198 if (info->surface_type == BRW_SURFACE_CUBE) {
1199 assert(tex->base.width0 == tex->base.height0);
1200 /* we will set slice_offset to point to the single face */
1201 info->surface_type = BRW_SURFACE_2D;
1202 }
1203 else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
1204 assert(tex->base.height0 == 1);
1205 info->surface_type = BRW_SURFACE_2D;
1206 }
1207 }
1208 }
1209
1210 void
1211 ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
1212 const struct ilo_texture *tex,
1213 enum pipe_format format, unsigned level,
1214 unsigned first_layer, unsigned num_layers,
1215 bool offset_to_layer, struct ilo_zs_surface *zs)
1216 {
1217 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
1218 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
1219 struct ilo_zs_surface_info info;
1220 uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
1221
1222 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1223
1224 if (tex) {
1225 zs_init_info(dev, tex, format, level, first_layer, num_layers,
1226 offset_to_layer, &info);
1227 }
1228 else {
1229 zs_init_info_null(dev, &info);
1230 }
1231
1232 switch (info.surface_type) {
1233 case BRW_SURFACE_NULL:
1234 break;
1235 case BRW_SURFACE_1D:
1236 assert(info.width <= max_2d_size && info.height == 1 &&
1237 info.depth <= max_array_size);
1238 assert(info.first_layer < max_array_size - 1 &&
1239 info.num_layers <= max_array_size);
1240 break;
1241 case BRW_SURFACE_2D:
1242 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1243 info.depth <= max_array_size);
1244 assert(info.first_layer < max_array_size - 1 &&
1245 info.num_layers <= max_array_size);
1246 break;
1247 case BRW_SURFACE_3D:
1248 assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
1249 assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
1250 assert(info.x_offset == 0 && info.y_offset == 0);
1251 break;
1252 case BRW_SURFACE_CUBE:
1253 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1254 info.depth == 1);
1255 assert(info.first_layer == 0 && info.num_layers == 1);
1256 assert(info.width == info.height);
1257 assert(info.x_offset == 0 && info.y_offset == 0);
1258 break;
1259 default:
1260 assert(!"unexpected depth surface type");
1261 break;
1262 }
1263
1264 dw1 = info.surface_type << 29 |
1265 info.format << 18;
1266
1267 if (info.zs.bo) {
1268 /* required for GEN6+ */
1269 assert(info.zs.tiling == INTEL_TILING_Y);
1270 assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
1271 info.zs.stride % 128 == 0);
1272 assert(info.width <= info.zs.stride);
1273
1274 dw1 |= (info.zs.stride - 1);
1275 dw2 = info.zs.offset;
1276 }
1277 else {
1278 dw2 = 0;
1279 }
1280
1281 if (dev->gen >= ILO_GEN(7)) {
1282 if (info.zs.bo)
1283 dw1 |= 1 << 28;
1284
1285 if (info.stencil.bo)
1286 dw1 |= 1 << 27;
1287
1288 if (info.hiz.bo)
1289 dw1 |= 1 << 22;
1290
1291 dw3 = (info.height - 1) << 18 |
1292 (info.width - 1) << 4 |
1293 info.lod;
1294
1295 dw4 = (info.depth - 1) << 21 |
1296 info.first_layer << 10;
1297
1298 dw5 = info.y_offset << 16 | info.x_offset;
1299
1300 dw6 = (info.num_layers - 1) << 21;
1301 }
1302 else {
1303 /* always Y-tiled */
1304 dw1 |= 1 << 27 |
1305 1 << 26;
1306
1307 if (info.hiz.bo) {
1308 dw1 |= 1 << 22 |
1309 1 << 21;
1310 }
1311
1312 dw3 = (info.height - 1) << 19 |
1313 (info.width - 1) << 6 |
1314 info.lod << 2 |
1315 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
1316
1317 dw4 = (info.depth - 1) << 21 |
1318 info.first_layer << 10 |
1319 (info.num_layers - 1) << 1;
1320
1321 dw5 = info.y_offset << 16 | info.x_offset;
1322
1323 dw6 = 0;
1324 }
1325
1326 STATIC_ASSERT(Elements(zs->payload) >= 10);
1327
1328 zs->payload[0] = dw1;
1329 zs->payload[1] = dw2;
1330 zs->payload[2] = dw3;
1331 zs->payload[3] = dw4;
1332 zs->payload[4] = dw5;
1333 zs->payload[5] = dw6;
1334
1335 /* do not increment reference count */
1336 zs->bo = info.zs.bo;
1337
1338 /* separate stencil */
1339 if (info.stencil.bo) {
1340 assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
1341 info.stencil.stride % 128 == 0);
1342
1343 zs->payload[6] = info.stencil.stride - 1;
1344 zs->payload[7] = info.stencil.offset;
1345
1346 if (dev->gen >= ILO_GEN(7.5))
1347 zs->payload[6] |= HSW_STENCIL_ENABLED;
1348
1349 /* do not increment reference count */
1350 zs->separate_s8_bo = info.stencil.bo;
1351 }
1352 else {
1353 zs->payload[6] = 0;
1354 zs->payload[7] = 0;
1355 zs->separate_s8_bo = NULL;
1356 }
1357
1358 /* hiz */
1359 if (info.hiz.bo) {
1360 zs->payload[8] = info.hiz.stride - 1;
1361 zs->payload[9] = info.hiz.offset;
1362
1363 /* do not increment reference count */
1364 zs->hiz_bo = info.hiz.bo;
1365 }
1366 else {
1367 zs->payload[8] = 0;
1368 zs->payload[9] = 0;
1369 zs->hiz_bo = NULL;
1370 }
1371 }
1372
1373 static void
1374 viewport_get_guardband(const struct ilo_dev_info *dev,
1375 int center_x, int center_y,
1376 int *min_gbx, int *max_gbx,
1377 int *min_gby, int *max_gby)
1378 {
1379 /*
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1381 *
1382 * "Per-Device Guardband Extents
1383 *
1384 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1385 * - Maximum Post-Clamp Delta (X or Y): 16K"
1386 *
1387 * "In addition, in order to be correctly rendered, objects must have a
1388 * screenspace bounding box not exceeding 8K in the X or Y direction.
1389 * This additional restriction must also be comprehended by software,
1390 * i.e., enforced by use of clipping."
1391 *
1392 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1393 *
1394 * "Per-Device Guardband Extents
1395 *
1396 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1397 * - Maximum Post-Clamp Delta (X or Y): N/A"
1398 *
1399 * "In addition, in order to be correctly rendered, objects must have a
1400 * screenspace bounding box not exceeding 8K in the X or Y direction.
1401 * This additional restriction must also be comprehended by software,
1402 * i.e., enforced by use of clipping."
1403 *
1404 * Combined, the bounding box of any object can not exceed 8K in both
1405 * width and height.
1406 *
1407 * Below we set the guardband as a squre of length 8K, centered at where
1408 * the viewport is. This makes sure all objects passing the GB test are
1409 * valid to the renderer, and those failing the XY clipping have a
1410 * better chance of passing the GB test.
1411 */
1412 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
1413 const int half_len = 8192 / 2;
1414
1415 /* make sure the guardband is within the valid range */
1416 if (center_x - half_len < -max_extent)
1417 center_x = -max_extent + half_len;
1418 else if (center_x + half_len > max_extent - 1)
1419 center_x = max_extent - half_len;
1420
1421 if (center_y - half_len < -max_extent)
1422 center_y = -max_extent + half_len;
1423 else if (center_y + half_len > max_extent - 1)
1424 center_y = max_extent - half_len;
1425
1426 *min_gbx = (float) (center_x - half_len);
1427 *max_gbx = (float) (center_x + half_len);
1428 *min_gby = (float) (center_y - half_len);
1429 *max_gby = (float) (center_y + half_len);
1430 }
1431
1432 void
1433 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
1434 const struct pipe_viewport_state *state,
1435 struct ilo_viewport_cso *vp)
1436 {
1437 const float scale_x = fabs(state->scale[0]);
1438 const float scale_y = fabs(state->scale[1]);
1439 const float scale_z = fabs(state->scale[2]);
1440 int min_gbx, max_gbx, min_gby, max_gby;
1441
1442 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1443
1444 viewport_get_guardband(dev,
1445 (int) state->translate[0],
1446 (int) state->translate[1],
1447 &min_gbx, &max_gbx, &min_gby, &max_gby);
1448
1449 /* matrix form */
1450 vp->m00 = state->scale[0];
1451 vp->m11 = state->scale[1];
1452 vp->m22 = state->scale[2];
1453 vp->m30 = state->translate[0];
1454 vp->m31 = state->translate[1];
1455 vp->m32 = state->translate[2];
1456
1457 /* guardband in NDC space */
1458 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
1459 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
1460 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
1461 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
1462
1463 /* viewport in screen space */
1464 vp->min_x = scale_x * -1.0f + state->translate[0];
1465 vp->max_x = scale_x * 1.0f + state->translate[0];
1466 vp->min_y = scale_y * -1.0f + state->translate[1];
1467 vp->max_y = scale_y * 1.0f + state->translate[1];
1468 vp->min_z = scale_z * -1.0f + state->translate[2];
1469 vp->max_z = scale_z * 1.0f + state->translate[2];
1470 }
1471
1472 static int
1473 gen6_blend_factor_dst_alpha_forced_one(int factor)
1474 {
1475 switch (factor) {
1476 case BRW_BLENDFACTOR_DST_ALPHA:
1477 return BRW_BLENDFACTOR_ONE;
1478 case BRW_BLENDFACTOR_INV_DST_ALPHA:
1479 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
1480 return BRW_BLENDFACTOR_ZERO;
1481 default:
1482 return factor;
1483 }
1484 }
1485
1486 static uint32_t
1487 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
1488 const struct pipe_rt_blend_state *rt,
1489 bool dst_alpha_forced_one)
1490 {
1491 int rgb_src, rgb_dst, a_src, a_dst;
1492 uint32_t dw;
1493
1494 if (!rt->blend_enable)
1495 return 0;
1496
1497 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
1498 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
1499 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
1500 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
1501
1502 if (dst_alpha_forced_one) {
1503 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
1504 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
1505 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
1506 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
1507 }
1508
1509 dw = 1 << 31 |
1510 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
1511 a_src << 20 |
1512 a_dst << 15 |
1513 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
1514 rgb_src << 5 |
1515 rgb_dst;
1516
1517 if (rt->rgb_func != rt->alpha_func ||
1518 rgb_src != a_src || rgb_dst != a_dst)
1519 dw |= 1 << 30;
1520
1521 return dw;
1522 }
1523
1524 void
1525 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
1526 const struct pipe_blend_state *state,
1527 struct ilo_blend_state *blend)
1528 {
1529 unsigned num_cso, i;
1530
1531 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1532
1533 if (state->independent_blend_enable) {
1534 num_cso = Elements(blend->cso);
1535 }
1536 else {
1537 memset(blend->cso, 0, sizeof(blend->cso));
1538 num_cso = 1;
1539 }
1540
1541 blend->independent_blend_enable = state->independent_blend_enable;
1542 blend->alpha_to_coverage = state->alpha_to_coverage;
1543 blend->dual_blend = false;
1544
1545 for (i = 0; i < num_cso; i++) {
1546 const struct pipe_rt_blend_state *rt = &state->rt[i];
1547 struct ilo_blend_cso *cso = &blend->cso[i];
1548 bool dual_blend;
1549
1550 cso->payload[0] = 0;
1551 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
1552 0x3;
1553
1554 if (!(rt->colormask & PIPE_MASK_A))
1555 cso->payload[1] |= 1 << 27;
1556 if (!(rt->colormask & PIPE_MASK_R))
1557 cso->payload[1] |= 1 << 26;
1558 if (!(rt->colormask & PIPE_MASK_G))
1559 cso->payload[1] |= 1 << 25;
1560 if (!(rt->colormask & PIPE_MASK_B))
1561 cso->payload[1] |= 1 << 24;
1562
1563 if (state->dither)
1564 cso->payload[1] |= 1 << 12;
1565
1566 /*
1567 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1568 *
1569 * "Color Buffer Blending and Logic Ops must not be enabled
1570 * simultaneously, or behavior is UNDEFINED."
1571 *
1572 * Since state->logicop_enable takes precedence over rt->blend_enable,
1573 * no special care is needed.
1574 */
1575 if (state->logicop_enable) {
1576 cso->dw_logicop = 1 << 22 |
1577 gen6_translate_pipe_logicop(state->logicop_func) << 18;
1578
1579 cso->dw_blend = 0;
1580 cso->dw_blend_dst_alpha_forced_one = 0;
1581
1582 dual_blend = false;
1583 }
1584 else {
1585 cso->dw_logicop = 0;
1586
1587 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
1588 cso->dw_blend_dst_alpha_forced_one =
1589 blend_get_rt_blend_enable(dev, rt, true);
1590
1591 dual_blend = (rt->blend_enable &&
1592 util_blend_state_is_dual(state, i));
1593 }
1594
1595 cso->dw_alpha_mod = 0;
1596
1597 if (state->alpha_to_coverage) {
1598 cso->dw_alpha_mod |= 1 << 31;
1599
1600 if (dev->gen >= ILO_GEN(7))
1601 cso->dw_alpha_mod |= 1 << 29;
1602 }
1603
1604 /*
1605 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1606 *
1607 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1608 * must be disabled."
1609 */
1610 if (state->alpha_to_one && !dual_blend)
1611 cso->dw_alpha_mod |= 1 << 30;
1612
1613 if (dual_blend)
1614 blend->dual_blend = true;
1615 }
1616 }
1617
1618 void
1619 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
1620 const struct pipe_depth_stencil_alpha_state *state,
1621 struct ilo_dsa_state *dsa)
1622 {
1623 const struct pipe_depth_state *depth = &state->depth;
1624 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
1625 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
1626 const struct pipe_alpha_state *alpha = &state->alpha;
1627 uint32_t *dw;
1628
1629 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1630
1631 STATIC_ASSERT(Elements(dsa->payload) >= 3);
1632 dw = dsa->payload;
1633
1634 /*
1635 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1636 *
1637 * "If the Depth Buffer is either undefined or does not have a surface
1638 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1639 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1640 *
1641 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1642 *
1643 * "This field (Stencil Test Enable) cannot be enabled if
1644 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1645 *
1646 * TODO We do not check these yet.
1647 */
1648 if (stencil0->enabled) {
1649 dw[0] = 1 << 31 |
1650 gen6_translate_dsa_func(stencil0->func) << 28 |
1651 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
1652 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
1653 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
1654 if (stencil0->writemask)
1655 dw[0] |= 1 << 18;
1656
1657 dw[1] = stencil0->valuemask << 24 |
1658 stencil0->writemask << 16;
1659
1660 if (stencil1->enabled) {
1661 dw[0] |= 1 << 15 |
1662 gen6_translate_dsa_func(stencil1->func) << 12 |
1663 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
1664 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
1665 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
1666 if (stencil1->writemask)
1667 dw[0] |= 1 << 18;
1668
1669 dw[1] |= stencil1->valuemask << 8 |
1670 stencil1->writemask;
1671 }
1672 }
1673 else {
1674 dw[0] = 0;
1675 dw[1] = 0;
1676 }
1677
1678 /*
1679 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1680 *
1681 * "Enabling the Depth Test function without defining a Depth Buffer is
1682 * UNDEFINED."
1683 *
1684 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1685 *
1686 * "A Depth Buffer must be defined before enabling writes to it, or
1687 * operation is UNDEFINED."
1688 *
1689 * TODO We do not check these yet.
1690 */
1691 dw[2] = depth->enabled << 31 |
1692 depth->writemask << 26;
1693 if (depth->enabled)
1694 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
1695 else
1696 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
1697
1698 /* dw_alpha will be ORed to BLEND_STATE */
1699 if (alpha->enabled) {
1700 dsa->dw_alpha = 1 << 16 |
1701 gen6_translate_dsa_func(alpha->func) << 13;
1702 }
1703 else {
1704 dsa->dw_alpha = 0;
1705 }
1706
1707 dsa->alpha_ref = float_to_ubyte(alpha->ref_value);
1708 }
1709
1710 void
1711 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
1712 unsigned start_slot,
1713 unsigned num_states,
1714 const struct pipe_scissor_state *states,
1715 struct ilo_scissor_state *scissor)
1716 {
1717 unsigned i;
1718
1719 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1720
1721 for (i = 0; i < num_states; i++) {
1722 uint16_t min_x, min_y, max_x, max_y;
1723
1724 /* both max and min are inclusive in SCISSOR_RECT */
1725 if (states[i].minx < states[i].maxx &&
1726 states[i].miny < states[i].maxy) {
1727 min_x = states[i].minx;
1728 min_y = states[i].miny;
1729 max_x = states[i].maxx - 1;
1730 max_y = states[i].maxy - 1;
1731 }
1732 else {
1733 /* we have to make min greater than max */
1734 min_x = 1;
1735 min_y = 1;
1736 max_x = 0;
1737 max_y = 0;
1738 }
1739
1740 scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
1741 scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
1742 }
1743
1744 if (!start_slot && num_states)
1745 scissor->scissor0 = states[0];
1746 }
1747
1748 void
1749 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
1750 struct ilo_scissor_state *scissor)
1751 {
1752 unsigned i;
1753
1754 for (i = 0; i < Elements(scissor->payload); i += 2) {
1755 scissor->payload[i + 0] = 1 << 16 | 1;
1756 scissor->payload[i + 1] = 0;
1757 }
1758 }
1759
1760 void
1761 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
1762 unsigned width, unsigned height,
1763 unsigned depth, unsigned level,
1764 struct ilo_view_surface *surf)
1765 {
1766 uint32_t *dw;
1767
1768 ILO_GPE_VALID_GEN(dev, 6, 6);
1769
1770 /*
1771 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
1772 *
1773 * "A null surface will be used in instances where an actual surface is
1774 * not bound. When a write message is generated to a null surface, no
1775 * actual surface is written to. When a read message (including any
1776 * sampling engine message) is generated to a null surface, the result
1777 * is all zeros. Note that a null surface type is allowed to be used
1778 * with all messages, even if it is not specificially indicated as
1779 * supported. All of the remaining fields in surface state are ignored
1780 * for null surfaces, with the following exceptions:
1781 *
1782 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
1783 * depth buffer's corresponding state for all render target
1784 * surfaces, including null.
1785 * * Surface Format must be R8G8B8A8_UNORM."
1786 *
1787 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
1788 *
1789 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1790 * true"
1791 */
1792
1793 STATIC_ASSERT(Elements(surf->payload) >= 6);
1794 dw = surf->payload;
1795
1796 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1797 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
1798
1799 dw[1] = 0;
1800
1801 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
1802 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1803 level << BRW_SURFACE_LOD_SHIFT;
1804
1805 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
1806 BRW_SURFACE_TILED;
1807
1808 dw[4] = 0;
1809 dw[5] = 0;
1810
1811 surf->bo = NULL;
1812 }
1813
1814 void
1815 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
1816 const struct ilo_buffer *buf,
1817 unsigned offset, unsigned size,
1818 unsigned struct_size,
1819 enum pipe_format elem_format,
1820 bool is_rt, bool render_cache_rw,
1821 struct ilo_view_surface *surf)
1822 {
1823 const int elem_size = util_format_get_blocksize(elem_format);
1824 int width, height, depth, pitch;
1825 int surface_format, num_entries;
1826 uint32_t *dw;
1827
1828 ILO_GPE_VALID_GEN(dev, 6, 6);
1829
1830 /*
1831 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
1832 * structure in a buffer.
1833 */
1834
1835 surface_format = ilo_translate_color_format(elem_format);
1836
1837 num_entries = size / struct_size;
1838 /* see if there is enough space to fit another element */
1839 if (size % struct_size >= elem_size)
1840 num_entries++;
1841
1842 /*
1843 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
1844 *
1845 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1846 * Address) specifies the base address of first element of the
1847 * surface. The surface is interpreted as a simple array of that
1848 * single element type. The address must be naturally-aligned to the
1849 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1850 * must be 16-byte aligned).
1851 *
1852 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1853 * the base address of the first element of the surface, computed in
1854 * software by adding the surface base address to the byte offset of
1855 * the element in the buffer."
1856 */
1857 if (is_rt)
1858 assert(offset % elem_size == 0);
1859
1860 /*
1861 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
1862 *
1863 * "For buffer surfaces, the number of entries in the buffer ranges
1864 * from 1 to 2^27."
1865 */
1866 assert(num_entries >= 1 && num_entries <= 1 << 27);
1867
1868 /*
1869 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1870 *
1871 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
1872 * indicates the size of the structure."
1873 */
1874 pitch = struct_size;
1875
1876 pitch--;
1877 num_entries--;
1878 /* bits [6:0] */
1879 width = (num_entries & 0x0000007f);
1880 /* bits [19:7] */
1881 height = (num_entries & 0x000fff80) >> 7;
1882 /* bits [26:20] */
1883 depth = (num_entries & 0x07f00000) >> 20;
1884
1885 STATIC_ASSERT(Elements(surf->payload) >= 6);
1886 dw = surf->payload;
1887
1888 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
1889 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1890 if (render_cache_rw)
1891 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1892
1893 dw[1] = offset;
1894
1895 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
1896 width << BRW_SURFACE_WIDTH_SHIFT;
1897
1898 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
1899 pitch << BRW_SURFACE_PITCH_SHIFT;
1900
1901 dw[4] = 0;
1902 dw[5] = 0;
1903
1904 /* do not increment reference count */
1905 surf->bo = buf->bo;
1906 }
1907
1908 void
1909 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
1910 const struct ilo_texture *tex,
1911 enum pipe_format format,
1912 unsigned first_level,
1913 unsigned num_levels,
1914 unsigned first_layer,
1915 unsigned num_layers,
1916 bool is_rt, bool offset_to_layer,
1917 struct ilo_view_surface *surf)
1918 {
1919 int surface_type, surface_format;
1920 int width, height, depth, pitch, lod;
1921 unsigned layer_offset, x_offset, y_offset;
1922 uint32_t *dw;
1923
1924 ILO_GPE_VALID_GEN(dev, 6, 6);
1925
1926 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1927 assert(surface_type != BRW_SURFACE_BUFFER);
1928
1929 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1930 format = PIPE_FORMAT_Z32_FLOAT;
1931
1932 if (is_rt)
1933 surface_format = ilo_translate_render_format(format);
1934 else
1935 surface_format = ilo_translate_texture_format(format);
1936 assert(surface_format >= 0);
1937
1938 width = tex->base.width0;
1939 height = tex->base.height0;
1940 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1941 tex->base.depth0 : num_layers;
1942 pitch = tex->bo_stride;
1943
1944 if (surface_type == BRW_SURFACE_CUBE) {
1945 /*
1946 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1947 *
1948 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
1949 * range of this field (Depth) is [0,84], indicating the number of
1950 * cube array elements (equal to the number of underlying 2D array
1951 * elements divided by 6). For other surfaces, this field must be
1952 * zero."
1953 *
1954 * When is_rt is true, we treat the texture as a 2D one to avoid the
1955 * restriction.
1956 */
1957 if (is_rt) {
1958 surface_type = BRW_SURFACE_2D;
1959 }
1960 else {
1961 assert(num_layers % 6 == 0);
1962 depth = num_layers / 6;
1963 }
1964 }
1965
1966 /* sanity check the size */
1967 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1968 switch (surface_type) {
1969 case BRW_SURFACE_1D:
1970 assert(width <= 8192 && height == 1 && depth <= 512);
1971 assert(first_layer < 512 && num_layers <= 512);
1972 break;
1973 case BRW_SURFACE_2D:
1974 assert(width <= 8192 && height <= 8192 && depth <= 512);
1975 assert(first_layer < 512 && num_layers <= 512);
1976 break;
1977 case BRW_SURFACE_3D:
1978 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1979 assert(first_layer < 2048 && num_layers <= 512);
1980 if (!is_rt)
1981 assert(first_layer == 0);
1982 break;
1983 case BRW_SURFACE_CUBE:
1984 assert(width <= 8192 && height <= 8192 && depth <= 85);
1985 assert(width == height);
1986 assert(first_layer < 512 && num_layers <= 512);
1987 if (is_rt)
1988 assert(first_layer == 0);
1989 break;
1990 default:
1991 assert(!"unexpected surface type");
1992 break;
1993 }
1994
1995 /* non-full array spacing is supported only on GEN7+ */
1996 assert(tex->array_spacing_full);
1997 /* non-interleaved samples are supported only on GEN7+ */
1998 if (tex->base.nr_samples > 1)
1999 assert(tex->interleaved);
2000
2001 if (is_rt) {
2002 assert(num_levels == 1);
2003 lod = first_level;
2004 }
2005 else {
2006 lod = num_levels - 1;
2007 }
2008
2009 /*
2010 * Offset to the layer. When rendering, the hardware requires LOD and
2011 * Depth to be the same for all render targets and the depth buffer. We
2012 * need to offset to the layer manually and always set LOD and Depth to 0.
2013 */
2014 if (offset_to_layer) {
2015 /* we lose the capability for layered rendering */
2016 assert(is_rt && num_layers == 1);
2017
2018 layer_offset = ilo_texture_get_slice_offset(tex,
2019 first_level, first_layer, &x_offset, &y_offset);
2020
2021 assert(x_offset % 4 == 0);
2022 assert(y_offset % 2 == 0);
2023 x_offset /= 4;
2024 y_offset /= 2;
2025
2026 /* derive the size for the LOD */
2027 width = u_minify(width, first_level);
2028 height = u_minify(height, first_level);
2029
2030 first_level = 0;
2031 first_layer = 0;
2032
2033 lod = 0;
2034 depth = 1;
2035 }
2036 else {
2037 layer_offset = 0;
2038 x_offset = 0;
2039 y_offset = 0;
2040 }
2041
2042 /*
2043 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
2044 *
2045 * "Linear render target surface base addresses must be element-size
2046 * aligned, for non-YUV surface formats, or a multiple of 2
2047 * element-sizes for YUV surface formats. Other linear surfaces have
2048 * no alignment requirements (byte alignment is sufficient.)"
2049 *
2050 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2051 *
2052 * "For linear render target surfaces, the pitch must be a multiple
2053 * of the element size for non-YUV surface formats. Pitch must be a
2054 * multiple of 2 * element size for YUV surface formats."
2055 *
2056 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
2057 *
2058 * "For linear surfaces, this field (X Offset) must be zero"
2059 */
2060 if (tex->tiling == INTEL_TILING_NONE) {
2061 if (is_rt) {
2062 const int elem_size = util_format_get_blocksize(format);
2063 assert(layer_offset % elem_size == 0);
2064 assert(pitch % elem_size == 0);
2065 }
2066
2067 assert(!x_offset);
2068 }
2069
2070 STATIC_ASSERT(Elements(surf->payload) >= 6);
2071 dw = surf->payload;
2072
2073 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
2074 surface_format << BRW_SURFACE_FORMAT_SHIFT |
2075 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
2076
2077 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
2078 dw[0] |= 1 << 9 |
2079 BRW_SURFACE_CUBEFACE_ENABLES;
2080 }
2081
2082 if (is_rt)
2083 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
2084
2085 dw[1] = layer_offset;
2086
2087 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
2088 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
2089 lod << BRW_SURFACE_LOD_SHIFT;
2090
2091 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
2092 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
2093 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
2094
2095 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
2096 first_layer << 17 |
2097 (num_layers - 1) << 8 |
2098 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
2099 BRW_SURFACE_MULTISAMPLECOUNT_1);
2100
2101 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
2102 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
2103 if (tex->valign_4)
2104 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
2105
2106 /* do not increment reference count */
2107 surf->bo = tex->bo;
2108 }
2109
2110 static void
2111 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
2112 const union pipe_color_union *color,
2113 uint32_t *dw, int num_dwords)
2114 {
2115 float rgba[4] = {
2116 color->f[0], color->f[1], color->f[2], color->f[3],
2117 };
2118
2119 ILO_GPE_VALID_GEN(dev, 6, 6);
2120
2121 assert(num_dwords >= 12);
2122
2123 /*
2124 * This state is not documented in the Sandy Bridge PRM, but in the
2125 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
2126 */
2127
2128 /* IEEE_FP */
2129 dw[1] = fui(rgba[0]);
2130 dw[2] = fui(rgba[1]);
2131 dw[3] = fui(rgba[2]);
2132 dw[4] = fui(rgba[3]);
2133
2134 /* FLOAT_16 */
2135 dw[5] = util_float_to_half(rgba[0]) |
2136 util_float_to_half(rgba[1]) << 16;
2137 dw[6] = util_float_to_half(rgba[2]) |
2138 util_float_to_half(rgba[3]) << 16;
2139
2140 /* clamp to [-1.0f, 1.0f] */
2141 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
2142 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
2143 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
2144 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
2145
2146 /* SNORM16 */
2147 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
2148 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
2149 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
2150 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
2151
2152 /* SNORM8 */
2153 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
2154 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
2155 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
2156 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
2157
2158 /* clamp to [0.0f, 1.0f] */
2159 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
2160 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
2161 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
2162 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
2163
2164 /* UNORM8 */
2165 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
2166 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
2167 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
2168 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
2169
2170 /* UNORM16 */
2171 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
2172 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
2173 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
2174 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
2175 }
2176
2177 void
2178 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
2179 const struct pipe_sampler_state *state,
2180 struct ilo_sampler_cso *sampler)
2181 {
2182 int mip_filter, min_filter, mag_filter, max_aniso;
2183 int lod_bias, max_lod, min_lod;
2184 int wrap_s, wrap_t, wrap_r, wrap_cube;
2185 bool clamp_is_to_edge;
2186 uint32_t dw0, dw1, dw3;
2187
2188 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2189
2190 memset(sampler, 0, sizeof(*sampler));
2191
2192 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
2193 min_filter = gen6_translate_tex_filter(state->min_img_filter);
2194 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
2195
2196 sampler->anisotropic = state->max_anisotropy;
2197
2198 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
2199 max_aniso = state->max_anisotropy / 2 - 1;
2200 else if (state->max_anisotropy > 16)
2201 max_aniso = BRW_ANISORATIO_16;
2202 else
2203 max_aniso = BRW_ANISORATIO_2;
2204
2205 /*
2206 *
2207 * Here is how the hardware calculate per-pixel LOD, from my reading of the
2208 * PRMs:
2209 *
2210 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
2211 * other ways. The number of texels is measured using level
2212 * SurfMinLod.
2213 * 2) Bias is added to LOD.
2214 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
2215 * compared with Base to determine whether magnification or
2216 * minification is needed. (if preclamp is disabled, LOD is compared
2217 * with Base before clamping)
2218 * 4) If magnification is needed, or no mipmapping is requested, LOD is
2219 * set to floor(MinLod).
2220 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
2221 *
2222 * With Gallium interface, Base is always zero and
2223 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
2224 */
2225 if (dev->gen >= ILO_GEN(7)) {
2226 const float scale = 256.0f;
2227
2228 /* [-16.0, 16.0) in S4.8 */
2229 lod_bias = (int)
2230 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2231 lod_bias &= 0x1fff;
2232
2233 /* [0.0, 14.0] in U4.8 */
2234 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
2235 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
2236 }
2237 else {
2238 const float scale = 64.0f;
2239
2240 /* [-16.0, 16.0) in S4.6 */
2241 lod_bias = (int)
2242 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2243 lod_bias &= 0x7ff;
2244
2245 /* [0.0, 13.0] in U4.6 */
2246 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
2247 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
2248 }
2249
2250 /*
2251 * We want LOD to be clamped to determine magnification/minification, and
2252 * get set to zero when it is magnification or when mipmapping is disabled.
2253 * The hardware would set LOD to floor(MinLod) and that is a problem when
2254 * MinLod is greater than or equal to 1.0f.
2255 *
2256 * With Base being zero, it is always minification when MinLod is non-zero.
2257 * To achieve our goal, we just need to set MinLod to zero and set
2258 * MagFilter to MinFilter when mipmapping is disabled.
2259 */
2260 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
2261 min_lod = 0;
2262 mag_filter = min_filter;
2263 }
2264
2265 /*
2266 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
2267 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
2268 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
2269 * texture coordinates to [0.0, 1.0].
2270 *
2271 * The clamping will be taken care of in the shaders. There are two
2272 * filters here, but let the minification one has a say.
2273 */
2274 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
2275 if (!clamp_is_to_edge) {
2276 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
2277 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
2278 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
2279 }
2280
2281 /* determine wrap s/t/r */
2282 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
2283 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
2284 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
2285
2286 /*
2287 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
2288 *
2289 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
2290 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
2291 * must have the same Address Control mode."
2292 *
2293 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
2294 *
2295 * "This field (Cube Surface Control Mode) must be set to
2296 * CUBECTRLMODE_PROGRAMMED"
2297 *
2298 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
2299 * map filtering.
2300 */
2301 if (state->seamless_cube_map &&
2302 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2303 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
2304 wrap_cube = BRW_TEXCOORDMODE_CUBE;
2305 }
2306 else {
2307 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
2308 }
2309
2310 if (!state->normalized_coords) {
2311 /*
2312 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
2313 *
2314 * "The following state must be set as indicated if this field
2315 * (Non-normalized Coordinate Enable) is enabled:
2316 *
2317 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
2318 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
2319 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
2320 * - Mag Mode Filter must be MAPFILTER_NEAREST or
2321 * MAPFILTER_LINEAR.
2322 * - Min Mode Filter must be MAPFILTER_NEAREST or
2323 * MAPFILTER_LINEAR.
2324 * - Mip Mode Filter must be MIPFILTER_NONE.
2325 * - Min LOD must be 0.
2326 * - Max LOD must be 0.
2327 * - MIP Count must be 0.
2328 * - Surface Min LOD must be 0.
2329 * - Texture LOD Bias must be 0."
2330 */
2331 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
2332 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
2333 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
2334 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
2335 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
2336 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
2337
2338 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
2339 mag_filter == BRW_MAPFILTER_LINEAR);
2340 assert(min_filter == BRW_MAPFILTER_NEAREST ||
2341 min_filter == BRW_MAPFILTER_LINEAR);
2342
2343 /* work around a bug in util_blitter */
2344 mip_filter = BRW_MIPFILTER_NONE;
2345
2346 assert(mip_filter == BRW_MIPFILTER_NONE);
2347 }
2348
2349 if (dev->gen >= ILO_GEN(7)) {
2350 dw0 = 1 << 28 |
2351 mip_filter << 20 |
2352 lod_bias << 1;
2353
2354 sampler->dw_filter = mag_filter << 17 |
2355 min_filter << 14;
2356
2357 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2358 BRW_MAPFILTER_ANISOTROPIC << 14 |
2359 1;
2360
2361 dw1 = min_lod << 20 |
2362 max_lod << 8;
2363
2364 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2365 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
2366
2367 dw3 = max_aniso << 19;
2368
2369 /* round the coordinates for linear filtering */
2370 if (min_filter != BRW_MAPFILTER_NEAREST) {
2371 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2372 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2373 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2374 }
2375 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2376 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2377 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2378 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2379 }
2380
2381 if (!state->normalized_coords)
2382 dw3 |= 1 << 10;
2383
2384 sampler->dw_wrap = wrap_s << 6 |
2385 wrap_t << 3 |
2386 wrap_r;
2387
2388 /*
2389 * As noted in the classic i965 driver, the HW may still reference
2390 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
2391 * mode
2392 */
2393 sampler->dw_wrap_1d = wrap_s << 6 |
2394 BRW_TEXCOORDMODE_WRAP << 3 |
2395 BRW_TEXCOORDMODE_WRAP;
2396
2397 sampler->dw_wrap_cube = wrap_cube << 6 |
2398 wrap_cube << 3 |
2399 wrap_cube;
2400
2401 STATIC_ASSERT(Elements(sampler->payload) >= 7);
2402
2403 sampler->payload[0] = dw0;
2404 sampler->payload[1] = dw1;
2405 sampler->payload[2] = dw3;
2406
2407 memcpy(&sampler->payload[3],
2408 state->border_color.ui, sizeof(state->border_color.ui));
2409 }
2410 else {
2411 dw0 = 1 << 28 |
2412 mip_filter << 20 |
2413 lod_bias << 3;
2414
2415 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2416 dw0 |= gen6_translate_shadow_func(state->compare_func);
2417
2418 sampler->dw_filter = (min_filter != mag_filter) << 27 |
2419 mag_filter << 17 |
2420 min_filter << 14;
2421
2422 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2423 BRW_MAPFILTER_ANISOTROPIC << 14;
2424
2425 dw1 = min_lod << 22 |
2426 max_lod << 12;
2427
2428 sampler->dw_wrap = wrap_s << 6 |
2429 wrap_t << 3 |
2430 wrap_r;
2431
2432 sampler->dw_wrap_1d = wrap_s << 6 |
2433 BRW_TEXCOORDMODE_WRAP << 3 |
2434 BRW_TEXCOORDMODE_WRAP;
2435
2436 sampler->dw_wrap_cube = wrap_cube << 6 |
2437 wrap_cube << 3 |
2438 wrap_cube;
2439
2440 dw3 = max_aniso << 19;
2441
2442 /* round the coordinates for linear filtering */
2443 if (min_filter != BRW_MAPFILTER_NEAREST) {
2444 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2445 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2446 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2447 }
2448 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2449 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2450 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2451 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2452 }
2453
2454 if (!state->normalized_coords)
2455 dw3 |= 1;
2456
2457 STATIC_ASSERT(Elements(sampler->payload) >= 15);
2458
2459 sampler->payload[0] = dw0;
2460 sampler->payload[1] = dw1;
2461 sampler->payload[2] = dw3;
2462
2463 sampler_init_border_color_gen6(dev,
2464 &state->border_color, &sampler->payload[3], 12);
2465 }
2466 }
2467
2468 void
2469 ilo_gpe_set_fb(const struct ilo_dev_info *dev,
2470 const struct pipe_framebuffer_state *state,
2471 struct ilo_fb_state *fb)
2472 {
2473 const struct pipe_surface *first;
2474 unsigned num_surfaces, first_idx;
2475
2476 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2477
2478 util_copy_framebuffer_state(&fb->state, state);
2479
2480 ilo_gpe_init_view_surface_null(dev,
2481 state->width, state->height,
2482 1, 0, &fb->null_rt);
2483
2484 first = NULL;
2485 for (first_idx = 0; first_idx < state->nr_cbufs; first_idx++) {
2486 if (state->cbufs[first_idx]) {
2487 first = state->cbufs[first_idx];
2488 break;
2489 }
2490 }
2491 if (!first)
2492 first = state->zsbuf;
2493
2494 fb->num_samples = (first) ? first->texture->nr_samples : 1;
2495 if (!fb->num_samples)
2496 fb->num_samples = 1;
2497
2498 fb->offset_to_layers = false;
2499
2500 /*
2501 * The PRMs list several restrictions when the framebuffer has more than
2502 * one surface, but it seems they are lifted on GEN7+.
2503 */
2504 num_surfaces = state->nr_cbufs + !!state->zsbuf;
2505
2506 if (dev->gen < ILO_GEN(7) && num_surfaces > 1) {
2507 const unsigned first_depth =
2508 (first->texture->target == PIPE_TEXTURE_3D) ?
2509 first->texture->depth0 :
2510 first->u.tex.last_layer - first->u.tex.first_layer + 1;
2511 bool has_3d_target = (first->texture->target == PIPE_TEXTURE_3D);
2512 unsigned i;
2513
2514 for (i = first_idx + 1; i < num_surfaces; i++) {
2515 const struct pipe_surface *surf =
2516 (i < state->nr_cbufs) ? state->cbufs[i] : state->zsbuf;
2517 unsigned depth;
2518
2519 if (!surf)
2520 continue;
2521
2522 depth = (surf->texture->target == PIPE_TEXTURE_3D) ?
2523 surf->texture->depth0 :
2524 surf->u.tex.last_layer - surf->u.tex.first_layer + 1;
2525
2526 has_3d_target |= (surf->texture->target == PIPE_TEXTURE_3D);
2527
2528 /*
2529 * From the Sandy Bridge PRM, volume 4 part 1, page 79:
2530 *
2531 * "The LOD of a render target must be the same as the LOD of the
2532 * other render target(s) and of the depth buffer (defined in
2533 * 3DSTATE_DEPTH_BUFFER)."
2534 *
2535 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2536 *
2537 * "The Depth of a render target must be the same as the Depth of
2538 * the other render target(s) and of the depth buffer (defined
2539 * in 3DSTATE_DEPTH_BUFFER)."
2540 */
2541 if (surf->u.tex.level != first->u.tex.level ||
2542 depth != first_depth) {
2543 fb->offset_to_layers = true;
2544 break;
2545 }
2546
2547 /*
2548 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
2549 *
2550 * "The Height of a render target must be the same as the Height
2551 * of the other render targets and the depth buffer (defined in
2552 * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
2553 * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
2554 * mapped)."
2555 *
2556 * From the Sandy Bridge PRM, volume 4 part 1, page 78:
2557 *
2558 * "The Width of a render target must be the same as the Width of
2559 * the other render target(s) and the depth buffer (defined in
2560 * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
2561 * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
2562 * mapped)."
2563 */
2564 if (surf->texture->width0 != first->texture->width0 ||
2565 surf->texture->height0 != first->texture->height0) {
2566 if (has_3d_target || first->u.tex.level || first_depth > 1) {
2567 fb->offset_to_layers = true;
2568 break;
2569 }
2570 }
2571 }
2572 }
2573 }
2574
2575 int
2576 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
2577 enum ilo_gpe_gen6_command cmd,
2578 int arg)
2579 {
2580 static const struct {
2581 int header;
2582 int body;
2583 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
2584 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
2585 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
2586 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
2587 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
2588 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
2589 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
2590 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
2591 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
2592 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
2593 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
2594 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
2595 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
2596 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
2597 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
2598 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
2599 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
2600 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
2601 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
2602 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
2603 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
2604 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
2605 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
2606 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
2607 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
2608 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
2609 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
2610 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
2611 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
2612 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
2613 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
2614 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
2615 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
2616 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
2617 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
2618 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
2619 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
2620 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
2621 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
2622 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
2623 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
2624 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
2625 };
2626 const int header = gen6_command_size_table[cmd].header;
2627 const int body = gen6_command_size_table[arg].body;
2628 const int count = arg;
2629
2630 ILO_GPE_VALID_GEN(dev, 6, 6);
2631 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
2632
2633 return (likely(count)) ? header + body * count : 0;
2634 }
2635
2636 int
2637 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
2638 enum ilo_gpe_gen6_state state,
2639 int arg)
2640 {
2641 static const struct {
2642 int alignment;
2643 int body;
2644 bool is_array;
2645 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
2646 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
2647 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
2648 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
2649 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
2650 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
2651 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
2652 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
2653 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
2654 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
2655 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
2656 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
2657 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
2658 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
2659 };
2660 const int alignment = gen6_state_size_table[state].alignment;
2661 const int body = gen6_state_size_table[state].body;
2662 const bool is_array = gen6_state_size_table[state].is_array;
2663 const int count = arg;
2664 int estimate;
2665
2666 ILO_GPE_VALID_GEN(dev, 6, 6);
2667 assert(state < ILO_GPE_GEN6_STATE_COUNT);
2668
2669 if (likely(count)) {
2670 if (is_array) {
2671 estimate = (alignment - 1) + body * count;
2672 }
2673 else {
2674 estimate = (alignment - 1) + body;
2675 /* all states are aligned */
2676 if (count > 1)
2677 estimate += util_align_npot(body, alignment) * (count - 1);
2678 }
2679 }
2680 else {
2681 estimate = 0;
2682 }
2683
2684 return estimate;
2685 }