ilo: get rid of GPE tables completely
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_state.h"
38 #include "ilo_gpe_gen6.h"
39
40 /**
41 * Translate a pipe logicop to the matching hardware logicop.
42 */
43 static int
44 gen6_translate_pipe_logicop(unsigned logicop)
45 {
46 switch (logicop) {
47 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
48 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
49 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
50 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
51 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
52 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
53 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
54 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
55 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
56 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
57 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
58 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
59 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
60 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
61 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
62 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
63 default:
64 assert(!"unknown logicop function");
65 return BRW_LOGICOPFUNCTION_CLEAR;
66 }
67 }
68
69 /**
70 * Translate a pipe blend function to the matching hardware blend function.
71 */
72 static int
73 gen6_translate_pipe_blend(unsigned blend)
74 {
75 switch (blend) {
76 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
77 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
78 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
79 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
80 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
81 default:
82 assert(!"unknown blend function");
83 return BRW_BLENDFUNCTION_ADD;
84 };
85 }
86
87 /**
88 * Translate a pipe blend factor to the matching hardware blend factor.
89 */
90 static int
91 gen6_translate_pipe_blendfactor(unsigned blendfactor)
92 {
93 switch (blendfactor) {
94 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
95 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
96 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
97 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
98 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
99 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
100 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
101 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
102 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
103 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
104 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
105 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
106 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
107 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
108 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
109 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
110 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
111 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
112 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
113 default:
114 assert(!"unknown blend factor");
115 return BRW_BLENDFACTOR_ONE;
116 };
117 }
118
119 /**
120 * Translate a pipe stencil op to the matching hardware stencil op.
121 */
122 static int
123 gen6_translate_pipe_stencil_op(unsigned stencil_op)
124 {
125 switch (stencil_op) {
126 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
127 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
128 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
129 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
130 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
131 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
132 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
133 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
134 default:
135 assert(!"unknown stencil op");
136 return BRW_STENCILOP_KEEP;
137 }
138 }
139
140 /**
141 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
142 */
143 static int
144 gen6_translate_tex_mipfilter(unsigned filter)
145 {
146 switch (filter) {
147 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
148 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
149 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
150 default:
151 assert(!"unknown mipfilter");
152 return BRW_MIPFILTER_NONE;
153 }
154 }
155
156 /**
157 * Translate a pipe texture filter to the matching hardware mapfilter.
158 */
159 static int
160 gen6_translate_tex_filter(unsigned filter)
161 {
162 switch (filter) {
163 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
164 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
165 default:
166 assert(!"unknown sampler filter");
167 return BRW_MAPFILTER_NEAREST;
168 }
169 }
170
171 /**
172 * Translate a pipe texture coordinate wrapping mode to the matching hardware
173 * wrapping mode.
174 */
175 static int
176 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
177 {
178 /* clamp to edge or border? */
179 if (wrap == PIPE_TEX_WRAP_CLAMP) {
180 wrap = (clamp_to_edge) ?
181 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
182 }
183
184 switch (wrap) {
185 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
186 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
187 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
188 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
189 case PIPE_TEX_WRAP_CLAMP:
190 case PIPE_TEX_WRAP_MIRROR_CLAMP:
191 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
192 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
193 default:
194 assert(!"unknown sampler wrap mode");
195 return BRW_TEXCOORDMODE_WRAP;
196 }
197 }
198
199 /**
200 * Translate a pipe shadow compare function to the matching hardware shadow
201 * function.
202 */
203 static int
204 gen6_translate_shadow_func(unsigned func)
205 {
206 /*
207 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
208 * comparison, and 1.0 is returned when the comparison is true.
209 *
210 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
211 * the comparison, and 0.0 is returned when the comparison is true.
212 */
213 switch (func) {
214 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
215 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
216 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
217 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
218 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
219 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
220 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
221 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
222 default:
223 assert(!"unknown shadow compare function");
224 return BRW_PREFILTER_NEVER;
225 }
226 }
227
228 /**
229 * Translate a pipe DSA test function to the matching hardware compare
230 * function.
231 */
232 static int
233 gen6_translate_dsa_func(unsigned func)
234 {
235 switch (func) {
236 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
237 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
238 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
239 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
240 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
241 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
242 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
243 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
244 default:
245 assert(!"unknown depth/stencil/alpha test function");
246 return BRW_COMPAREFUNCTION_NEVER;
247 }
248 }
249
250 static void
251 ve_init_cso(const struct ilo_dev_info *dev,
252 const struct pipe_vertex_element *state,
253 unsigned vb_index,
254 struct ilo_ve_cso *cso)
255 {
256 int comp[4] = {
257 BRW_VE1_COMPONENT_STORE_SRC,
258 BRW_VE1_COMPONENT_STORE_SRC,
259 BRW_VE1_COMPONENT_STORE_SRC,
260 BRW_VE1_COMPONENT_STORE_SRC,
261 };
262 int format;
263
264 ILO_GPE_VALID_GEN(dev, 6, 7);
265
266 switch (util_format_get_nr_components(state->src_format)) {
267 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
268 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
269 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
270 BRW_VE1_COMPONENT_STORE_1_INT :
271 BRW_VE1_COMPONENT_STORE_1_FLT;
272 }
273
274 format = ilo_translate_vertex_format(state->src_format);
275
276 STATIC_ASSERT(Elements(cso->payload) >= 2);
277 cso->payload[0] =
278 vb_index << GEN6_VE0_INDEX_SHIFT |
279 GEN6_VE0_VALID |
280 format << BRW_VE0_FORMAT_SHIFT |
281 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
282
283 cso->payload[1] =
284 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
285 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
286 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
287 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
288 }
289
290 void
291 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
292 unsigned num_states,
293 const struct pipe_vertex_element *states,
294 struct ilo_ve_state *ve)
295 {
296 unsigned i;
297
298 ILO_GPE_VALID_GEN(dev, 6, 7);
299
300 ve->count = num_states;
301 ve->vb_count = 0;
302
303 for (i = 0; i < num_states; i++) {
304 const unsigned pipe_idx = states[i].vertex_buffer_index;
305 const unsigned instance_divisor = states[i].instance_divisor;
306 unsigned hw_idx;
307
308 /*
309 * map the pipe vb to the hardware vb, which has a fixed instance
310 * divisor
311 */
312 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
313 if (ve->vb_mapping[hw_idx] == pipe_idx &&
314 ve->instance_divisors[hw_idx] == instance_divisor)
315 break;
316 }
317
318 /* create one if there is no matching hardware vb */
319 if (hw_idx >= ve->vb_count) {
320 hw_idx = ve->vb_count++;
321
322 ve->vb_mapping[hw_idx] = pipe_idx;
323 ve->instance_divisors[hw_idx] = instance_divisor;
324 }
325
326 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
327 }
328 }
329
330 void
331 ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
332 const struct ilo_shader_state *vs,
333 struct ilo_shader_cso *cso)
334 {
335 int start_grf, vue_read_len, max_threads;
336 uint32_t dw2, dw4, dw5;
337
338 ILO_GPE_VALID_GEN(dev, 6, 7);
339
340 start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
341 vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
342
343 /*
344 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
345 *
346 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
347 * 128-bit vertex elements to be passed into the payload for each
348 * vertex."
349 *
350 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
351 * data to be read and passed to the thread."
352 */
353 vue_read_len = (vue_read_len + 1) / 2;
354 if (!vue_read_len)
355 vue_read_len = 1;
356
357 switch (dev->gen) {
358 case ILO_GEN(6):
359 /*
360 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
361 *
362 * "Device # of EUs #Threads/EU
363 * SNB GT2 12 5
364 * SNB GT1 6 4"
365 */
366 max_threads = (dev->gt == 2) ? 60 : 24;
367 break;
368 case ILO_GEN(7):
369 /*
370 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
371 *
372 * "Device # of EUs #Threads/EU
373 * Ivy Bridge (GT2) 16 8
374 * Ivy Bridge (GT1) 6 6"
375 */
376 max_threads = (dev->gt == 2) ? 128 : 36;
377 break;
378 case ILO_GEN(7.5):
379 /* see brwCreateContext() */
380 max_threads = (dev->gt == 2) ? 280 : 70;
381 break;
382 default:
383 max_threads = 1;
384 break;
385 }
386
387 dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
388
389 dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
390 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
391 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
392
393 dw5 = GEN6_VS_STATISTICS_ENABLE |
394 GEN6_VS_ENABLE;
395
396 if (dev->gen >= ILO_GEN(7.5))
397 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
398 else
399 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
400
401 STATIC_ASSERT(Elements(cso->payload) >= 3);
402 cso->payload[0] = dw2;
403 cso->payload[1] = dw4;
404 cso->payload[2] = dw5;
405 }
406
407 void
408 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
409 const struct ilo_shader_state *gs,
410 struct ilo_shader_cso *cso)
411 {
412 int start_grf, vue_read_len, max_threads;
413 uint32_t dw2, dw4, dw5, dw6;
414
415 ILO_GPE_VALID_GEN(dev, 6, 6);
416
417 if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
418 start_grf = ilo_shader_get_kernel_param(gs,
419 ILO_KERNEL_URB_DATA_START_REG);
420
421 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
422 }
423 else {
424 start_grf = ilo_shader_get_kernel_param(gs,
425 ILO_KERNEL_VS_GEN6_SO_START_REG);
426
427 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
428 }
429
430 /*
431 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
432 *
433 * "Specifies the amount of URB data read and passed in the thread
434 * payload for each Vertex URB entry, in 256-bit register increments.
435 *
436 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
437 * 0 indicating no Vertex URB data to be read and passed to the
438 * thread."
439 */
440 vue_read_len = (vue_read_len + 1) / 2;
441 if (!vue_read_len)
442 vue_read_len = 1;
443
444 /*
445 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
446 *
447 * "Maximum Number of Threads valid range is [0,27] when Rendering
448 * Enabled bit is set."
449 *
450 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
451 *
452 * "Programming Note: If the GS stage is enabled, software must always
453 * allocate at least one GS URB Entry. This is true even if the GS
454 * thread never needs to output vertices to the pipeline, e.g., when
455 * only performing stream output. This is an artifact of the need to
456 * pass the GS thread an initial destination URB handle."
457 *
458 * As such, we always enable rendering, and limit the number of threads.
459 */
460 if (dev->gt == 2) {
461 /* maximum is 60, but limited to 28 */
462 max_threads = 28;
463 }
464 else {
465 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
466 max_threads = 21;
467 }
468
469 dw2 = GEN6_GS_SPF_MODE;
470
471 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
472 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
473 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
474
475 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
476 GEN6_GS_STATISTICS_ENABLE |
477 GEN6_GS_SO_STATISTICS_ENABLE |
478 GEN6_GS_RENDERING_ENABLE;
479
480 /*
481 * we cannot make use of GEN6_GS_REORDER because it will reorder
482 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
483 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
484 * (2N+2, 2N+1, 2N+3)).
485 */
486 dw6 = GEN6_GS_ENABLE;
487
488 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
489 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
490
491 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
492 const uint32_t svbi_post_inc =
493 ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
494
495 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
496 if (svbi_post_inc) {
497 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
498 svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
499 }
500 }
501
502 STATIC_ASSERT(Elements(cso->payload) >= 4);
503 cso->payload[0] = dw2;
504 cso->payload[1] = dw4;
505 cso->payload[2] = dw5;
506 cso->payload[3] = dw6;
507 }
508
509 void
510 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
511 const struct pipe_rasterizer_state *state,
512 struct ilo_rasterizer_clip *clip)
513 {
514 uint32_t dw1, dw2, dw3;
515
516 ILO_GPE_VALID_GEN(dev, 6, 7);
517
518 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
519
520 if (dev->gen >= ILO_GEN(7)) {
521 /*
522 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
523 *
524 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
525 * enabled only for the cases where the incoming primitive topology
526 * into the clipper guaranteed to be Trilist."
527 *
528 * What does this mean?
529 */
530 dw1 |= 0 << 19 |
531 GEN7_CLIP_EARLY_CULL;
532
533 if (state->front_ccw)
534 dw1 |= GEN7_CLIP_WINDING_CCW;
535
536 switch (state->cull_face) {
537 case PIPE_FACE_NONE:
538 dw1 |= GEN7_CLIP_CULLMODE_NONE;
539 break;
540 case PIPE_FACE_FRONT:
541 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
542 break;
543 case PIPE_FACE_BACK:
544 dw1 |= GEN7_CLIP_CULLMODE_BACK;
545 break;
546 case PIPE_FACE_FRONT_AND_BACK:
547 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
548 break;
549 }
550 }
551
552 dw2 = GEN6_CLIP_ENABLE |
553 GEN6_CLIP_XY_TEST |
554 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
555 GEN6_CLIP_MODE_NORMAL;
556
557 if (state->clip_halfz)
558 dw2 |= GEN6_CLIP_API_D3D;
559 else
560 dw2 |= GEN6_CLIP_API_OGL;
561
562 if (state->depth_clip)
563 dw2 |= GEN6_CLIP_Z_TEST;
564
565 if (state->flatshade_first) {
566 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
567 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
568 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
569 }
570 else {
571 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
572 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
573 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
574 }
575
576 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
577 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
578
579 clip->payload[0] = dw1;
580 clip->payload[1] = dw2;
581 clip->payload[2] = dw3;
582
583 clip->can_enable_guardband = true;
584
585 /*
586 * There are several reasons that guard band test should be disabled
587 *
588 * - GL wide points (to avoid partially visibie object)
589 * - GL wide or AA lines (to avoid partially visibie object)
590 */
591 if (state->point_size_per_vertex || state->point_size > 1.0f)
592 clip->can_enable_guardband = false;
593 if (state->line_smooth || state->line_width > 1.0f)
594 clip->can_enable_guardband = false;
595 }
596
597 void
598 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
599 const struct pipe_rasterizer_state *state,
600 struct ilo_rasterizer_sf *sf)
601 {
602 float offset_const, offset_scale, offset_clamp;
603 int line_width, point_width;
604 uint32_t dw1, dw2, dw3;
605
606 ILO_GPE_VALID_GEN(dev, 6, 7);
607
608 /*
609 * Scale the constant term. The minimum representable value used by the HW
610 * is not large enouch to be the minimum resolvable difference.
611 */
612 offset_const = state->offset_units * 2.0f;
613
614 offset_scale = state->offset_scale;
615 offset_clamp = state->offset_clamp;
616
617 /*
618 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
619 *
620 * "This bit (Statistics Enable) should be set whenever clipping is
621 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
622 * should be cleared if clipping is disabled or Statistics Enable in
623 * CLIP_STATE is clear."
624 */
625 dw1 = GEN6_SF_STATISTICS_ENABLE |
626 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
627
628 /* XXX GEN6 path seems to work fine for GEN7 */
629 if (false && dev->gen >= ILO_GEN(7)) {
630 /*
631 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
632 *
633 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
634 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
635 * Depth Offset Enable Point) should be set whenever non zero depth
636 * bias (Slope, Bias) values are used. Setting this bit may have
637 * some degradation of performance for some workloads."
638 */
639 if (state->offset_tri || state->offset_line || state->offset_point) {
640 /* XXX need to scale offset_const according to the depth format */
641 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
642
643 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
644 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
645 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
646 }
647 else {
648 offset_const = 0.0f;
649 offset_scale = 0.0f;
650 offset_clamp = 0.0f;
651 }
652 }
653 else {
654 if (state->offset_tri)
655 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
656 if (state->offset_line)
657 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
658 if (state->offset_point)
659 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
660 }
661
662 switch (state->fill_front) {
663 case PIPE_POLYGON_MODE_FILL:
664 dw1 |= GEN6_SF_FRONT_SOLID;
665 break;
666 case PIPE_POLYGON_MODE_LINE:
667 dw1 |= GEN6_SF_FRONT_WIREFRAME;
668 break;
669 case PIPE_POLYGON_MODE_POINT:
670 dw1 |= GEN6_SF_FRONT_POINT;
671 break;
672 }
673
674 switch (state->fill_back) {
675 case PIPE_POLYGON_MODE_FILL:
676 dw1 |= GEN6_SF_BACK_SOLID;
677 break;
678 case PIPE_POLYGON_MODE_LINE:
679 dw1 |= GEN6_SF_BACK_WIREFRAME;
680 break;
681 case PIPE_POLYGON_MODE_POINT:
682 dw1 |= GEN6_SF_BACK_POINT;
683 break;
684 }
685
686 if (state->front_ccw)
687 dw1 |= GEN6_SF_WINDING_CCW;
688
689 dw2 = 0;
690
691 if (state->line_smooth) {
692 /*
693 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
694 *
695 * "This field (Anti-aliasing Enable) must be disabled if any of the
696 * render targets have integer (UINT or SINT) surface format."
697 *
698 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
699 *
700 * "This field (Hierarchical Depth Buffer Enable) must be disabled
701 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
702 *
703 * TODO We do not check those yet.
704 */
705 dw2 |= GEN6_SF_LINE_AA_ENABLE |
706 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
707 }
708
709 switch (state->cull_face) {
710 case PIPE_FACE_NONE:
711 dw2 |= GEN6_SF_CULL_NONE;
712 break;
713 case PIPE_FACE_FRONT:
714 dw2 |= GEN6_SF_CULL_FRONT;
715 break;
716 case PIPE_FACE_BACK:
717 dw2 |= GEN6_SF_CULL_BACK;
718 break;
719 case PIPE_FACE_FRONT_AND_BACK:
720 dw2 |= GEN6_SF_CULL_BOTH;
721 break;
722 }
723
724 /*
725 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
726 * pixels in the minor direction. We have to make the lines slightly
727 * thicker, 0.5 pixel on both sides, so that they intersect that many
728 * pixels are considered into the lines.
729 *
730 * Line width is in U3.7.
731 */
732 line_width = (int) ((state->line_width +
733 (float) state->line_smooth) * 128.0f + 0.5f);
734 line_width = CLAMP(line_width, 0, 1023);
735
736 if (line_width == 128 && !state->line_smooth) {
737 /* use GIQ rules */
738 line_width = 0;
739 }
740
741 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
742
743 if (state->scissor)
744 dw2 |= GEN6_SF_SCISSOR_ENABLE;
745
746 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
747 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
748
749 if (state->line_last_pixel)
750 dw3 |= 1 << 31;
751
752 if (state->flatshade_first) {
753 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
754 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
755 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
756 }
757 else {
758 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
759 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
760 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
761 }
762
763 if (!state->point_size_per_vertex)
764 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
765
766 /* in U8.3 */
767 point_width = (int) (state->point_size * 8.0f + 0.5f);
768 point_width = CLAMP(point_width, 1, 2047);
769
770 dw3 |= point_width;
771
772 STATIC_ASSERT(Elements(sf->payload) >= 6);
773 sf->payload[0] = dw1;
774 sf->payload[1] = dw2;
775 sf->payload[2] = dw3;
776 sf->payload[3] = fui(offset_const);
777 sf->payload[4] = fui(offset_scale);
778 sf->payload[5] = fui(offset_clamp);
779
780 if (state->multisample) {
781 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
782
783 /*
784 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
785 *
786 * "Software must not program a value of 0.0 when running in
787 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
788 * when multisampling rasterization is enabled."
789 */
790 if (!line_width) {
791 line_width = 128; /* 1.0f */
792
793 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
794 }
795 }
796 else {
797 sf->dw_msaa = 0;
798 }
799 }
800
801 void
802 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
803 const struct pipe_rasterizer_state *state,
804 struct ilo_rasterizer_wm *wm)
805 {
806 uint32_t dw5, dw6;
807
808 ILO_GPE_VALID_GEN(dev, 6, 6);
809
810 /* only the FF unit states are set, as in GEN7 */
811
812 dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
813
814 /* same value as in 3DSTATE_SF */
815 if (state->line_smooth)
816 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
817
818 if (state->poly_stipple_enable)
819 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
820 if (state->line_stipple_enable)
821 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
822
823 dw6 = GEN6_WM_POSITION_ZW_PIXEL |
824 GEN6_WM_MSRAST_OFF_PIXEL |
825 GEN6_WM_MSDISPMODE_PERSAMPLE;
826
827 if (state->bottom_edge_rule)
828 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
829
830 /*
831 * assertion that makes sure
832 *
833 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
834 *
835 * is valid
836 */
837 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
838 GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
839
840 wm->dw_msaa_rast =
841 (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
842 wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
843
844 STATIC_ASSERT(Elements(wm->payload) >= 2);
845 wm->payload[0] = dw5;
846 wm->payload[1] = dw6;
847 }
848
849 void
850 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
851 const struct ilo_shader_state *fs,
852 struct ilo_shader_cso *cso)
853 {
854 int start_grf, input_count, interps, max_threads;
855 uint32_t dw2, dw4, dw5, dw6;
856
857 ILO_GPE_VALID_GEN(dev, 6, 6);
858
859 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
860 input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
861 interps = ilo_shader_get_kernel_param(fs,
862 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
863
864 /* see brwCreateContext() */
865 max_threads = (dev->gt == 2) ? 80 : 40;
866
867 dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
868
869 dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
870 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
871 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
872
873 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
874
875 /*
876 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
877 *
878 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
879 * PS kernel or color calculator has the ability to kill (discard)
880 * pixels or samples, other than due to depth or stencil testing.
881 * This bit is required to be ENABLED in the following situations:
882 *
883 * The API pixel shader program contains "killpix" or "discard"
884 * instructions, or other code in the pixel shader kernel that can
885 * cause the final pixel mask to differ from the pixel mask received
886 * on dispatch.
887 *
888 * A sampler with chroma key enabled with kill pixel mode is used by
889 * the pixel shader.
890 *
891 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
892 * enabled.
893 *
894 * The pixel shader kernel generates and outputs oMask.
895 *
896 * Note: As ClipDistance clipping is fully supported in hardware and
897 * therefore not via PS instructions, there should be no need to
898 * ENABLE this bit due to ClipDistance clipping."
899 */
900 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
901 dw5 |= GEN6_WM_KILL_ENABLE;
902
903 /*
904 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
905 *
906 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
907 * field must be set to disabled."
908 *
909 * TODO This is not checked yet.
910 */
911 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
912 dw5 |= GEN6_WM_COMPUTED_DEPTH;
913
914 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
915 dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
916
917 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
918 dw5 |= GEN6_WM_USES_SOURCE_W;
919
920 /*
921 * TODO set this bit only when
922 *
923 * a) fs writes colors and color is not masked, or
924 * b) fs writes depth, or
925 * c) fs or cc kills
926 */
927 if (true)
928 dw5 |= GEN6_WM_DISPATCH_ENABLE;
929
930 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
931 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
932
933 dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
934 GEN6_WM_POSOFFSET_NONE |
935 interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
936
937 STATIC_ASSERT(Elements(cso->payload) >= 4);
938 cso->payload[0] = dw2;
939 cso->payload[1] = dw4;
940 cso->payload[2] = dw5;
941 cso->payload[3] = dw6;
942 }
943
944 struct ilo_zs_surface_info {
945 int surface_type;
946 int format;
947
948 struct {
949 struct intel_bo *bo;
950 unsigned stride;
951 enum intel_tiling_mode tiling;
952 uint32_t offset;
953 } zs, stencil, hiz;
954
955 unsigned width, height, depth;
956 unsigned lod, first_layer, num_layers;
957 uint32_t x_offset, y_offset;
958 };
959
960 static void
961 zs_init_info_null(const struct ilo_dev_info *dev,
962 struct ilo_zs_surface_info *info)
963 {
964 ILO_GPE_VALID_GEN(dev, 6, 7);
965
966 memset(info, 0, sizeof(*info));
967
968 info->surface_type = BRW_SURFACE_NULL;
969 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
970 info->width = 1;
971 info->height = 1;
972 info->depth = 1;
973 info->num_layers = 1;
974 }
975
976 static void
977 zs_init_info(const struct ilo_dev_info *dev,
978 const struct ilo_texture *tex,
979 enum pipe_format format,
980 unsigned level,
981 unsigned first_layer, unsigned num_layers,
982 struct ilo_zs_surface_info *info)
983 {
984 const bool rebase_layer = true;
985 struct intel_bo * const hiz_bo = NULL;
986 bool separate_stencil;
987 uint32_t x_offset[3], y_offset[3];
988
989 ILO_GPE_VALID_GEN(dev, 6, 7);
990
991 memset(info, 0, sizeof(*info));
992
993 info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
994
995 if (info->surface_type == BRW_SURFACE_CUBE) {
996 /*
997 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
998 *
999 * "For Other Surfaces (Cube Surfaces):
1000 * This field (Minimum Array Element) is ignored."
1001 *
1002 * "For Other Surfaces (Cube Surfaces):
1003 * This field (Render Target View Extent) is ignored."
1004 *
1005 * As such, we cannot set first_layer and num_layers on cube surfaces.
1006 * To work around that, treat it as a 2D surface.
1007 */
1008 info->surface_type = BRW_SURFACE_2D;
1009 }
1010
1011 if (dev->gen >= ILO_GEN(7)) {
1012 separate_stencil = true;
1013 }
1014 else {
1015 /*
1016 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1017 *
1018 * "This field (Separate Stencil Buffer Enable) must be set to the
1019 * same value (enabled or disabled) as Hierarchical Depth Buffer
1020 * Enable."
1021 */
1022 separate_stencil = (hiz_bo != NULL);
1023 }
1024
1025 /*
1026 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1027 *
1028 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
1029 * Surface Format of the depth buffer cannot be
1030 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
1031 * requires the separate stencil buffer."
1032 *
1033 * From the Ironlake PRM, volume 2 part 1, page 330:
1034 *
1035 * "If this field (Separate Stencil Buffer Enable) is disabled, the
1036 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
1037 *
1038 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
1039 * is indeed used, the depth values output by the fragment shaders will
1040 * be different when read back.
1041 *
1042 * As for GEN7+, separate_stencil is always true.
1043 */
1044 switch (format) {
1045 case PIPE_FORMAT_Z16_UNORM:
1046 info->format = BRW_DEPTHFORMAT_D16_UNORM;
1047 break;
1048 case PIPE_FORMAT_Z32_FLOAT:
1049 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1050 break;
1051 case PIPE_FORMAT_Z24X8_UNORM:
1052 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1053 info->format = (separate_stencil) ?
1054 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
1055 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
1056 break;
1057 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1058 info->format = (separate_stencil) ?
1059 BRW_DEPTHFORMAT_D32_FLOAT :
1060 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
1061 break;
1062 case PIPE_FORMAT_S8_UINT:
1063 if (separate_stencil) {
1064 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1065 break;
1066 }
1067 /* fall through */
1068 default:
1069 assert(!"unsupported depth/stencil format");
1070 zs_init_info_null(dev, info);
1071 return;
1072 break;
1073 }
1074
1075 if (format != PIPE_FORMAT_S8_UINT) {
1076 info->zs.bo = tex->bo;
1077 info->zs.stride = tex->bo_stride;
1078 info->zs.tiling = tex->tiling;
1079
1080 if (rebase_layer) {
1081 info->zs.offset = ilo_texture_get_slice_offset(tex,
1082 level, first_layer, &x_offset[0], &y_offset[0]);
1083 }
1084 }
1085
1086 if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
1087 const struct ilo_texture *s8_tex =
1088 (tex->separate_s8) ? tex->separate_s8 : tex;
1089
1090 info->stencil.bo = s8_tex->bo;
1091
1092 /*
1093 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1094 *
1095 * "The pitch must be set to 2x the value computed based on width,
1096 * as the stencil buffer is stored with two rows interleaved."
1097 *
1098 * According to the classic driver, we need to do the same for GEN7+
1099 * even though the Ivy Bridge PRM does not say anything about it.
1100 */
1101 info->stencil.stride = s8_tex->bo_stride * 2;
1102
1103 info->stencil.tiling = s8_tex->tiling;
1104
1105 if (rebase_layer) {
1106 info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
1107 level, first_layer, &x_offset[1], &y_offset[1]);
1108 }
1109 }
1110
1111 if (hiz_bo) {
1112 info->hiz.bo = hiz_bo;
1113 info->hiz.stride = 0;
1114 info->hiz.tiling = 0;
1115 info->hiz.offset = 0;
1116 x_offset[2] = 0;
1117 y_offset[2] = 0;
1118 }
1119
1120 info->width = tex->base.width0;
1121 info->height = tex->base.height0;
1122 info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1123 tex->base.depth0 : num_layers;
1124
1125 info->lod = level;
1126 info->first_layer = first_layer;
1127 info->num_layers = num_layers;
1128
1129 if (rebase_layer) {
1130 /* the size of the layer */
1131 info->width = u_minify(info->width, level);
1132 info->height = u_minify(info->height, level);
1133 if (info->surface_type == BRW_SURFACE_3D)
1134 info->depth = u_minify(info->depth, level);
1135 else
1136 info->depth = 1;
1137
1138 /* no layered rendering */
1139 assert(num_layers == 1);
1140
1141 info->lod = 0;
1142 info->first_layer = 0;
1143 info->num_layers = 1;
1144
1145 /* all three share the same X/Y offsets */
1146 if (info->zs.bo) {
1147 if (info->stencil.bo) {
1148 assert(x_offset[0] == x_offset[1]);
1149 assert(y_offset[0] == y_offset[1]);
1150 }
1151
1152 info->x_offset = x_offset[0];
1153 info->y_offset = y_offset[0];
1154 }
1155 else {
1156 assert(info->stencil.bo);
1157
1158 info->x_offset = x_offset[1];
1159 info->y_offset = y_offset[1];
1160 }
1161
1162 if (info->hiz.bo) {
1163 assert(info->x_offset == x_offset[2]);
1164 assert(info->y_offset == y_offset[2]);
1165 }
1166
1167 /*
1168 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
1169 *
1170 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
1171 * Coordinate Offset X) must be zero to ensure correct alignment"
1172 *
1173 * XXX Skip the check for gen6, which seems to be fine. We need to make
1174 * sure that does not happen eventually.
1175 */
1176 if (dev->gen >= ILO_GEN(7)) {
1177 assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
1178 info->x_offset &= ~7;
1179 info->y_offset &= ~7;
1180 }
1181
1182 info->width += info->x_offset;
1183 info->height += info->y_offset;
1184
1185 /* we have to treat them as 2D surfaces */
1186 if (info->surface_type == BRW_SURFACE_CUBE) {
1187 assert(tex->base.width0 == tex->base.height0);
1188 /* we will set slice_offset to point to the single face */
1189 info->surface_type = BRW_SURFACE_2D;
1190 }
1191 else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
1192 assert(tex->base.height0 == 1);
1193 info->surface_type = BRW_SURFACE_2D;
1194 }
1195 }
1196 }
1197
1198 void
1199 ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
1200 const struct ilo_texture *tex,
1201 enum pipe_format format,
1202 unsigned level,
1203 unsigned first_layer, unsigned num_layers,
1204 struct ilo_zs_surface *zs)
1205 {
1206 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
1207 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
1208 struct ilo_zs_surface_info info;
1209 uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
1210
1211 ILO_GPE_VALID_GEN(dev, 6, 7);
1212
1213 if (tex)
1214 zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
1215 else
1216 zs_init_info_null(dev, &info);
1217
1218 switch (info.surface_type) {
1219 case BRW_SURFACE_NULL:
1220 break;
1221 case BRW_SURFACE_1D:
1222 assert(info.width <= max_2d_size && info.height == 1 &&
1223 info.depth <= max_array_size);
1224 assert(info.first_layer < max_array_size - 1 &&
1225 info.num_layers <= max_array_size);
1226 break;
1227 case BRW_SURFACE_2D:
1228 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1229 info.depth <= max_array_size);
1230 assert(info.first_layer < max_array_size - 1 &&
1231 info.num_layers <= max_array_size);
1232 break;
1233 case BRW_SURFACE_3D:
1234 assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
1235 assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
1236 assert(info.x_offset == 0 && info.y_offset == 0);
1237 break;
1238 case BRW_SURFACE_CUBE:
1239 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1240 info.depth == 1);
1241 assert(info.first_layer == 0 && info.num_layers == 1);
1242 assert(info.width == info.height);
1243 assert(info.x_offset == 0 && info.y_offset == 0);
1244 break;
1245 default:
1246 assert(!"unexpected depth surface type");
1247 break;
1248 }
1249
1250 dw1 = info.surface_type << 29 |
1251 info.format << 18;
1252
1253 if (info.zs.bo) {
1254 /* required for GEN6+ */
1255 assert(info.zs.tiling == INTEL_TILING_Y);
1256 assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
1257 info.zs.stride % 128 == 0);
1258 assert(info.width <= info.zs.stride);
1259
1260 dw1 |= (info.zs.stride - 1);
1261 dw2 = info.zs.offset;
1262 }
1263 else {
1264 dw2 = 0;
1265 }
1266
1267 if (dev->gen >= ILO_GEN(7)) {
1268 if (info.zs.bo)
1269 dw1 |= 1 << 28;
1270
1271 if (info.stencil.bo)
1272 dw1 |= 1 << 27;
1273
1274 if (info.hiz.bo)
1275 dw1 |= 1 << 22;
1276
1277 dw3 = (info.height - 1) << 18 |
1278 (info.width - 1) << 4 |
1279 info.lod;
1280
1281 dw4 = (info.depth - 1) << 21 |
1282 info.first_layer << 10;
1283
1284 dw5 = info.y_offset << 16 | info.x_offset;
1285
1286 dw6 = (info.num_layers - 1) << 21;
1287 }
1288 else {
1289 /* always Y-tiled */
1290 dw1 |= 1 << 27 |
1291 1 << 26;
1292
1293 if (info.hiz.bo) {
1294 dw1 |= 1 << 22 |
1295 1 << 21;
1296 }
1297
1298 dw3 = (info.height - 1) << 19 |
1299 (info.width - 1) << 6 |
1300 info.lod << 2 |
1301 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
1302
1303 dw4 = (info.depth - 1) << 21 |
1304 info.first_layer << 10 |
1305 (info.num_layers - 1) << 1;
1306
1307 dw5 = info.y_offset << 16 | info.x_offset;
1308
1309 dw6 = 0;
1310 }
1311
1312 STATIC_ASSERT(Elements(zs->payload) >= 10);
1313
1314 zs->payload[0] = dw1;
1315 zs->payload[1] = dw2;
1316 zs->payload[2] = dw3;
1317 zs->payload[3] = dw4;
1318 zs->payload[4] = dw5;
1319 zs->payload[5] = dw6;
1320
1321 /* do not increment reference count */
1322 zs->bo = info.zs.bo;
1323
1324 /* separate stencil */
1325 if (info.stencil.bo) {
1326 assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
1327 info.stencil.stride % 128 == 0);
1328
1329 zs->payload[6] = info.stencil.stride - 1;
1330 zs->payload[7] = info.stencil.offset;
1331
1332 /* do not increment reference count */
1333 zs->separate_s8_bo = info.stencil.bo;
1334 }
1335 else {
1336 zs->payload[6] = 0;
1337 zs->payload[7] = 0;
1338 zs->separate_s8_bo = NULL;
1339 }
1340
1341 /* hiz */
1342 if (info.hiz.bo) {
1343 zs->payload[8] = info.hiz.stride - 1;
1344 zs->payload[9] = info.hiz.offset;
1345
1346 /* do not increment reference count */
1347 zs->hiz_bo = info.hiz.bo;
1348 }
1349 else {
1350 zs->payload[8] = 0;
1351 zs->payload[9] = 0;
1352 zs->hiz_bo = NULL;
1353 }
1354 }
1355
1356 static void
1357 viewport_get_guardband(const struct ilo_dev_info *dev,
1358 int center_x, int center_y,
1359 int *min_gbx, int *max_gbx,
1360 int *min_gby, int *max_gby)
1361 {
1362 /*
1363 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1364 *
1365 * "Per-Device Guardband Extents
1366 *
1367 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1368 * - Maximum Post-Clamp Delta (X or Y): 16K"
1369 *
1370 * "In addition, in order to be correctly rendered, objects must have a
1371 * screenspace bounding box not exceeding 8K in the X or Y direction.
1372 * This additional restriction must also be comprehended by software,
1373 * i.e., enforced by use of clipping."
1374 *
1375 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1376 *
1377 * "Per-Device Guardband Extents
1378 *
1379 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1380 * - Maximum Post-Clamp Delta (X or Y): N/A"
1381 *
1382 * "In addition, in order to be correctly rendered, objects must have a
1383 * screenspace bounding box not exceeding 8K in the X or Y direction.
1384 * This additional restriction must also be comprehended by software,
1385 * i.e., enforced by use of clipping."
1386 *
1387 * Combined, the bounding box of any object can not exceed 8K in both
1388 * width and height.
1389 *
1390 * Below we set the guardband as a squre of length 8K, centered at where
1391 * the viewport is. This makes sure all objects passing the GB test are
1392 * valid to the renderer, and those failing the XY clipping have a
1393 * better chance of passing the GB test.
1394 */
1395 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
1396 const int half_len = 8192 / 2;
1397
1398 /* make sure the guardband is within the valid range */
1399 if (center_x - half_len < -max_extent)
1400 center_x = -max_extent + half_len;
1401 else if (center_x + half_len > max_extent - 1)
1402 center_x = max_extent - half_len;
1403
1404 if (center_y - half_len < -max_extent)
1405 center_y = -max_extent + half_len;
1406 else if (center_y + half_len > max_extent - 1)
1407 center_y = max_extent - half_len;
1408
1409 *min_gbx = (float) (center_x - half_len);
1410 *max_gbx = (float) (center_x + half_len);
1411 *min_gby = (float) (center_y - half_len);
1412 *max_gby = (float) (center_y + half_len);
1413 }
1414
1415 void
1416 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
1417 const struct pipe_viewport_state *state,
1418 struct ilo_viewport_cso *vp)
1419 {
1420 const float scale_x = fabs(state->scale[0]);
1421 const float scale_y = fabs(state->scale[1]);
1422 const float scale_z = fabs(state->scale[2]);
1423 int min_gbx, max_gbx, min_gby, max_gby;
1424
1425 ILO_GPE_VALID_GEN(dev, 6, 7);
1426
1427 viewport_get_guardband(dev,
1428 (int) state->translate[0],
1429 (int) state->translate[1],
1430 &min_gbx, &max_gbx, &min_gby, &max_gby);
1431
1432 /* matrix form */
1433 vp->m00 = state->scale[0];
1434 vp->m11 = state->scale[1];
1435 vp->m22 = state->scale[2];
1436 vp->m30 = state->translate[0];
1437 vp->m31 = state->translate[1];
1438 vp->m32 = state->translate[2];
1439
1440 /* guardband in NDC space */
1441 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
1442 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
1443 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
1444 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
1445
1446 /* viewport in screen space */
1447 vp->min_x = scale_x * -1.0f + state->translate[0];
1448 vp->max_x = scale_x * 1.0f + state->translate[0];
1449 vp->min_y = scale_y * -1.0f + state->translate[1];
1450 vp->max_y = scale_y * 1.0f + state->translate[1];
1451 vp->min_z = scale_z * -1.0f + state->translate[2];
1452 vp->max_z = scale_z * 1.0f + state->translate[2];
1453 }
1454
1455 static int
1456 gen6_blend_factor_dst_alpha_forced_one(int factor)
1457 {
1458 switch (factor) {
1459 case BRW_BLENDFACTOR_DST_ALPHA:
1460 return BRW_BLENDFACTOR_ONE;
1461 case BRW_BLENDFACTOR_INV_DST_ALPHA:
1462 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
1463 return BRW_BLENDFACTOR_ZERO;
1464 default:
1465 return factor;
1466 }
1467 }
1468
1469 static uint32_t
1470 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
1471 const struct pipe_rt_blend_state *rt,
1472 bool dst_alpha_forced_one)
1473 {
1474 int rgb_src, rgb_dst, a_src, a_dst;
1475 uint32_t dw;
1476
1477 if (!rt->blend_enable)
1478 return 0;
1479
1480 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
1481 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
1482 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
1483 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
1484
1485 if (dst_alpha_forced_one) {
1486 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
1487 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
1488 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
1489 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
1490 }
1491
1492 dw = 1 << 31 |
1493 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
1494 a_src << 20 |
1495 a_dst << 15 |
1496 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
1497 rgb_src << 5 |
1498 rgb_dst;
1499
1500 if (rt->rgb_func != rt->alpha_func ||
1501 rgb_src != a_src || rgb_dst != a_dst)
1502 dw |= 1 << 30;
1503
1504 return dw;
1505 }
1506
1507 void
1508 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
1509 const struct pipe_blend_state *state,
1510 struct ilo_blend_state *blend)
1511 {
1512 unsigned num_cso, i;
1513
1514 ILO_GPE_VALID_GEN(dev, 6, 7);
1515
1516 if (state->independent_blend_enable) {
1517 num_cso = Elements(blend->cso);
1518 }
1519 else {
1520 memset(blend->cso, 0, sizeof(blend->cso));
1521 num_cso = 1;
1522 }
1523
1524 blend->independent_blend_enable = state->independent_blend_enable;
1525 blend->alpha_to_coverage = state->alpha_to_coverage;
1526 blend->dual_blend = false;
1527
1528 for (i = 0; i < num_cso; i++) {
1529 const struct pipe_rt_blend_state *rt = &state->rt[i];
1530 struct ilo_blend_cso *cso = &blend->cso[i];
1531 bool dual_blend;
1532
1533 cso->payload[0] = 0;
1534 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
1535 0x3;
1536
1537 if (!(rt->colormask & PIPE_MASK_A))
1538 cso->payload[1] |= 1 << 27;
1539 if (!(rt->colormask & PIPE_MASK_R))
1540 cso->payload[1] |= 1 << 26;
1541 if (!(rt->colormask & PIPE_MASK_G))
1542 cso->payload[1] |= 1 << 25;
1543 if (!(rt->colormask & PIPE_MASK_B))
1544 cso->payload[1] |= 1 << 24;
1545
1546 if (state->dither)
1547 cso->payload[1] |= 1 << 12;
1548
1549 /*
1550 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1551 *
1552 * "Color Buffer Blending and Logic Ops must not be enabled
1553 * simultaneously, or behavior is UNDEFINED."
1554 *
1555 * Since state->logicop_enable takes precedence over rt->blend_enable,
1556 * no special care is needed.
1557 */
1558 if (state->logicop_enable) {
1559 cso->dw_logicop = 1 << 22 |
1560 gen6_translate_pipe_logicop(state->logicop_func) << 18;
1561
1562 cso->dw_blend = 0;
1563 cso->dw_blend_dst_alpha_forced_one = 0;
1564
1565 dual_blend = false;
1566 }
1567 else {
1568 cso->dw_logicop = 0;
1569
1570 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
1571 cso->dw_blend_dst_alpha_forced_one =
1572 blend_get_rt_blend_enable(dev, rt, true);
1573
1574 dual_blend = (rt->blend_enable &&
1575 util_blend_state_is_dual(state, i));
1576 }
1577
1578 cso->dw_alpha_mod = 0;
1579
1580 if (state->alpha_to_coverage) {
1581 cso->dw_alpha_mod |= 1 << 31;
1582
1583 if (dev->gen >= ILO_GEN(7))
1584 cso->dw_alpha_mod |= 1 << 29;
1585 }
1586
1587 /*
1588 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1589 *
1590 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1591 * must be disabled."
1592 */
1593 if (state->alpha_to_one && !dual_blend)
1594 cso->dw_alpha_mod |= 1 << 30;
1595
1596 if (dual_blend)
1597 blend->dual_blend = true;
1598 }
1599 }
1600
1601 void
1602 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
1603 const struct pipe_depth_stencil_alpha_state *state,
1604 struct ilo_dsa_state *dsa)
1605 {
1606 const struct pipe_depth_state *depth = &state->depth;
1607 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
1608 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
1609 const struct pipe_alpha_state *alpha = &state->alpha;
1610 uint32_t *dw;
1611
1612 ILO_GPE_VALID_GEN(dev, 6, 7);
1613
1614 STATIC_ASSERT(Elements(dsa->payload) >= 3);
1615 dw = dsa->payload;
1616
1617 /*
1618 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1619 *
1620 * "If the Depth Buffer is either undefined or does not have a surface
1621 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1622 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1623 *
1624 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1625 *
1626 * "This field (Stencil Test Enable) cannot be enabled if
1627 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1628 *
1629 * TODO We do not check these yet.
1630 */
1631 if (stencil0->enabled) {
1632 dw[0] = 1 << 31 |
1633 gen6_translate_dsa_func(stencil0->func) << 28 |
1634 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
1635 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
1636 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
1637 if (stencil0->writemask)
1638 dw[0] |= 1 << 18;
1639
1640 dw[1] = stencil0->valuemask << 24 |
1641 stencil0->writemask << 16;
1642
1643 if (stencil1->enabled) {
1644 dw[0] |= 1 << 15 |
1645 gen6_translate_dsa_func(stencil1->func) << 12 |
1646 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
1647 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
1648 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
1649 if (stencil1->writemask)
1650 dw[0] |= 1 << 18;
1651
1652 dw[1] |= stencil1->valuemask << 8 |
1653 stencil1->writemask;
1654 }
1655 }
1656 else {
1657 dw[0] = 0;
1658 dw[1] = 0;
1659 }
1660
1661 /*
1662 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1663 *
1664 * "Enabling the Depth Test function without defining a Depth Buffer is
1665 * UNDEFINED."
1666 *
1667 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1668 *
1669 * "A Depth Buffer must be defined before enabling writes to it, or
1670 * operation is UNDEFINED."
1671 *
1672 * TODO We do not check these yet.
1673 */
1674 dw[2] = depth->enabled << 31 |
1675 depth->writemask << 26;
1676 if (depth->enabled)
1677 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
1678 else
1679 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
1680
1681 /* dw_alpha will be ORed to BLEND_STATE */
1682 if (alpha->enabled) {
1683 dsa->dw_alpha = 1 << 16 |
1684 gen6_translate_dsa_func(alpha->func) << 13;
1685 }
1686 else {
1687 dsa->dw_alpha = 0;
1688 }
1689
1690 dsa->alpha_ref = float_to_ubyte(alpha->ref_value);
1691 }
1692
1693 void
1694 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
1695 unsigned start_slot,
1696 unsigned num_states,
1697 const struct pipe_scissor_state *states,
1698 struct ilo_scissor_state *scissor)
1699 {
1700 unsigned i;
1701
1702 ILO_GPE_VALID_GEN(dev, 6, 7);
1703
1704 for (i = 0; i < num_states; i++) {
1705 uint16_t min_x, min_y, max_x, max_y;
1706
1707 /* both max and min are inclusive in SCISSOR_RECT */
1708 if (states[i].minx < states[i].maxx &&
1709 states[i].miny < states[i].maxy) {
1710 min_x = states[i].minx;
1711 min_y = states[i].miny;
1712 max_x = states[i].maxx - 1;
1713 max_y = states[i].maxy - 1;
1714 }
1715 else {
1716 /* we have to make min greater than max */
1717 min_x = 1;
1718 min_y = 1;
1719 max_x = 0;
1720 max_y = 0;
1721 }
1722
1723 scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
1724 scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
1725 }
1726
1727 if (!start_slot && num_states)
1728 scissor->scissor0 = states[0];
1729 }
1730
1731 void
1732 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
1733 struct ilo_scissor_state *scissor)
1734 {
1735 unsigned i;
1736
1737 for (i = 0; i < Elements(scissor->payload); i += 2) {
1738 scissor->payload[i + 0] = 1 << 16 | 1;
1739 scissor->payload[i + 1] = 0;
1740 }
1741 }
1742
1743 void
1744 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
1745 unsigned width, unsigned height,
1746 unsigned depth, unsigned level,
1747 struct ilo_view_surface *surf)
1748 {
1749 uint32_t *dw;
1750
1751 ILO_GPE_VALID_GEN(dev, 6, 6);
1752
1753 /*
1754 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
1755 *
1756 * "A null surface will be used in instances where an actual surface is
1757 * not bound. When a write message is generated to a null surface, no
1758 * actual surface is written to. When a read message (including any
1759 * sampling engine message) is generated to a null surface, the result
1760 * is all zeros. Note that a null surface type is allowed to be used
1761 * with all messages, even if it is not specificially indicated as
1762 * supported. All of the remaining fields in surface state are ignored
1763 * for null surfaces, with the following exceptions:
1764 *
1765 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
1766 * depth buffer's corresponding state for all render target
1767 * surfaces, including null.
1768 * * Surface Format must be R8G8B8A8_UNORM."
1769 *
1770 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
1771 *
1772 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1773 * true"
1774 */
1775
1776 STATIC_ASSERT(Elements(surf->payload) >= 6);
1777 dw = surf->payload;
1778
1779 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1780 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
1781
1782 dw[1] = 0;
1783
1784 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
1785 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1786 level << BRW_SURFACE_LOD_SHIFT;
1787
1788 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
1789 BRW_SURFACE_TILED;
1790
1791 dw[4] = 0;
1792 dw[5] = 0;
1793
1794 surf->bo = NULL;
1795 }
1796
1797 void
1798 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
1799 const struct ilo_buffer *buf,
1800 unsigned offset, unsigned size,
1801 unsigned struct_size,
1802 enum pipe_format elem_format,
1803 bool is_rt, bool render_cache_rw,
1804 struct ilo_view_surface *surf)
1805 {
1806 const int elem_size = util_format_get_blocksize(elem_format);
1807 int width, height, depth, pitch;
1808 int surface_format, num_entries;
1809 uint32_t *dw;
1810
1811 ILO_GPE_VALID_GEN(dev, 6, 6);
1812
1813 /*
1814 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
1815 * structure in a buffer.
1816 */
1817
1818 surface_format = ilo_translate_color_format(elem_format);
1819
1820 num_entries = size / struct_size;
1821 /* see if there is enough space to fit another element */
1822 if (size % struct_size >= elem_size)
1823 num_entries++;
1824
1825 /*
1826 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
1827 *
1828 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1829 * Address) specifies the base address of first element of the
1830 * surface. The surface is interpreted as a simple array of that
1831 * single element type. The address must be naturally-aligned to the
1832 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1833 * must be 16-byte aligned).
1834 *
1835 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1836 * the base address of the first element of the surface, computed in
1837 * software by adding the surface base address to the byte offset of
1838 * the element in the buffer."
1839 */
1840 if (is_rt)
1841 assert(offset % elem_size == 0);
1842
1843 /*
1844 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
1845 *
1846 * "For buffer surfaces, the number of entries in the buffer ranges
1847 * from 1 to 2^27."
1848 */
1849 assert(num_entries >= 1 && num_entries <= 1 << 27);
1850
1851 /*
1852 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1853 *
1854 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
1855 * indicates the size of the structure."
1856 */
1857 pitch = struct_size;
1858
1859 pitch--;
1860 num_entries--;
1861 /* bits [6:0] */
1862 width = (num_entries & 0x0000007f);
1863 /* bits [19:7] */
1864 height = (num_entries & 0x000fff80) >> 7;
1865 /* bits [26:20] */
1866 depth = (num_entries & 0x07f00000) >> 20;
1867
1868 STATIC_ASSERT(Elements(surf->payload) >= 6);
1869 dw = surf->payload;
1870
1871 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
1872 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1873 if (render_cache_rw)
1874 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1875
1876 dw[1] = offset;
1877
1878 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
1879 width << BRW_SURFACE_WIDTH_SHIFT;
1880
1881 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
1882 pitch << BRW_SURFACE_PITCH_SHIFT;
1883
1884 dw[4] = 0;
1885 dw[5] = 0;
1886
1887 /* do not increment reference count */
1888 surf->bo = buf->bo;
1889 }
1890
1891 void
1892 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
1893 const struct ilo_texture *tex,
1894 enum pipe_format format,
1895 unsigned first_level,
1896 unsigned num_levels,
1897 unsigned first_layer,
1898 unsigned num_layers,
1899 bool is_rt, bool render_cache_rw,
1900 struct ilo_view_surface *surf)
1901 {
1902 int surface_type, surface_format;
1903 int width, height, depth, pitch, lod;
1904 unsigned layer_offset, x_offset, y_offset;
1905 uint32_t *dw;
1906
1907 ILO_GPE_VALID_GEN(dev, 6, 6);
1908
1909 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1910 assert(surface_type != BRW_SURFACE_BUFFER);
1911
1912 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1913 format = PIPE_FORMAT_Z32_FLOAT;
1914
1915 if (is_rt)
1916 surface_format = ilo_translate_render_format(format);
1917 else
1918 surface_format = ilo_translate_texture_format(format);
1919 assert(surface_format >= 0);
1920
1921 width = tex->base.width0;
1922 height = tex->base.height0;
1923 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1924 tex->base.depth0 : num_layers;
1925 pitch = tex->bo_stride;
1926
1927 if (surface_type == BRW_SURFACE_CUBE) {
1928 /*
1929 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1930 *
1931 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
1932 * range of this field (Depth) is [0,84], indicating the number of
1933 * cube array elements (equal to the number of underlying 2D array
1934 * elements divided by 6). For other surfaces, this field must be
1935 * zero."
1936 *
1937 * When is_rt is true, we treat the texture as a 2D one to avoid the
1938 * restriction.
1939 */
1940 if (is_rt) {
1941 surface_type = BRW_SURFACE_2D;
1942 }
1943 else {
1944 assert(num_layers % 6 == 0);
1945 depth = num_layers / 6;
1946 }
1947 }
1948
1949 /* sanity check the size */
1950 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1951 switch (surface_type) {
1952 case BRW_SURFACE_1D:
1953 assert(width <= 8192 && height == 1 && depth <= 512);
1954 assert(first_layer < 512 && num_layers <= 512);
1955 break;
1956 case BRW_SURFACE_2D:
1957 assert(width <= 8192 && height <= 8192 && depth <= 512);
1958 assert(first_layer < 512 && num_layers <= 512);
1959 break;
1960 case BRW_SURFACE_3D:
1961 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1962 assert(first_layer < 2048 && num_layers <= 512);
1963 if (!is_rt)
1964 assert(first_layer == 0);
1965 break;
1966 case BRW_SURFACE_CUBE:
1967 assert(width <= 8192 && height <= 8192 && depth <= 85);
1968 assert(width == height);
1969 assert(first_layer < 512 && num_layers <= 512);
1970 if (is_rt)
1971 assert(first_layer == 0);
1972 break;
1973 default:
1974 assert(!"unexpected surface type");
1975 break;
1976 }
1977
1978 /* non-full array spacing is supported only on GEN7+ */
1979 assert(tex->array_spacing_full);
1980 /* non-interleaved samples are supported only on GEN7+ */
1981 if (tex->base.nr_samples > 1)
1982 assert(tex->interleaved);
1983
1984 if (is_rt) {
1985 /*
1986 * Compute the offset to the layer manually.
1987 *
1988 * For rendering, the hardware requires LOD to be the same for all
1989 * render targets and the depth buffer. We need to compute the offset
1990 * to the layer manually and always set LOD to 0.
1991 */
1992 if (true) {
1993 /* we lose the capability for layered rendering */
1994 assert(num_layers == 1);
1995
1996 layer_offset = ilo_texture_get_slice_offset(tex,
1997 first_level, first_layer, &x_offset, &y_offset);
1998
1999 assert(x_offset % 4 == 0);
2000 assert(y_offset % 2 == 0);
2001 x_offset /= 4;
2002 y_offset /= 2;
2003
2004 /* derive the size for the LOD */
2005 width = u_minify(width, first_level);
2006 height = u_minify(height, first_level);
2007 if (surface_type == BRW_SURFACE_3D)
2008 depth = u_minify(depth, first_level);
2009 else
2010 depth = 1;
2011
2012 first_level = 0;
2013 first_layer = 0;
2014 lod = 0;
2015 }
2016 else {
2017 layer_offset = 0;
2018 x_offset = 0;
2019 y_offset = 0;
2020 }
2021
2022 assert(num_levels == 1);
2023 lod = first_level;
2024 }
2025 else {
2026 layer_offset = 0;
2027 x_offset = 0;
2028 y_offset = 0;
2029
2030 lod = num_levels - 1;
2031 }
2032
2033 /*
2034 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
2035 *
2036 * "Linear render target surface base addresses must be element-size
2037 * aligned, for non-YUV surface formats, or a multiple of 2
2038 * element-sizes for YUV surface formats. Other linear surfaces have
2039 * no alignment requirements (byte alignment is sufficient.)"
2040 *
2041 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2042 *
2043 * "For linear render target surfaces, the pitch must be a multiple
2044 * of the element size for non-YUV surface formats. Pitch must be a
2045 * multiple of 2 * element size for YUV surface formats."
2046 *
2047 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
2048 *
2049 * "For linear surfaces, this field (X Offset) must be zero"
2050 */
2051 if (tex->tiling == INTEL_TILING_NONE) {
2052 if (is_rt) {
2053 const int elem_size = util_format_get_blocksize(format);
2054 assert(layer_offset % elem_size == 0);
2055 assert(pitch % elem_size == 0);
2056 }
2057
2058 assert(!x_offset);
2059 }
2060
2061 STATIC_ASSERT(Elements(surf->payload) >= 6);
2062 dw = surf->payload;
2063
2064 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
2065 surface_format << BRW_SURFACE_FORMAT_SHIFT |
2066 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
2067
2068 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
2069 dw[0] |= 1 << 9 |
2070 BRW_SURFACE_CUBEFACE_ENABLES;
2071 }
2072
2073 if (render_cache_rw)
2074 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
2075
2076 dw[1] = layer_offset;
2077
2078 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
2079 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
2080 lod << BRW_SURFACE_LOD_SHIFT;
2081
2082 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
2083 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
2084 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
2085
2086 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
2087 first_layer << 17 |
2088 (num_layers - 1) << 8 |
2089 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
2090 BRW_SURFACE_MULTISAMPLECOUNT_1);
2091
2092 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
2093 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
2094 if (tex->valign_4)
2095 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
2096
2097 /* do not increment reference count */
2098 surf->bo = tex->bo;
2099 }
2100
2101 static void
2102 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
2103 const union pipe_color_union *color,
2104 uint32_t *dw, int num_dwords)
2105 {
2106 float rgba[4] = {
2107 color->f[0], color->f[1], color->f[2], color->f[3],
2108 };
2109
2110 ILO_GPE_VALID_GEN(dev, 6, 6);
2111
2112 assert(num_dwords >= 12);
2113
2114 /*
2115 * This state is not documented in the Sandy Bridge PRM, but in the
2116 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
2117 */
2118
2119 /* IEEE_FP */
2120 dw[1] = fui(rgba[0]);
2121 dw[2] = fui(rgba[1]);
2122 dw[3] = fui(rgba[2]);
2123 dw[4] = fui(rgba[3]);
2124
2125 /* FLOAT_16 */
2126 dw[5] = util_float_to_half(rgba[0]) |
2127 util_float_to_half(rgba[1]) << 16;
2128 dw[6] = util_float_to_half(rgba[2]) |
2129 util_float_to_half(rgba[3]) << 16;
2130
2131 /* clamp to [-1.0f, 1.0f] */
2132 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
2133 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
2134 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
2135 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
2136
2137 /* SNORM16 */
2138 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
2139 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
2140 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
2141 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
2142
2143 /* SNORM8 */
2144 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
2145 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
2146 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
2147 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
2148
2149 /* clamp to [0.0f, 1.0f] */
2150 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
2151 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
2152 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
2153 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
2154
2155 /* UNORM8 */
2156 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
2157 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
2158 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
2159 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
2160
2161 /* UNORM16 */
2162 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
2163 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
2164 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
2165 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
2166 }
2167
2168 void
2169 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
2170 const struct pipe_sampler_state *state,
2171 struct ilo_sampler_cso *sampler)
2172 {
2173 int mip_filter, min_filter, mag_filter, max_aniso;
2174 int lod_bias, max_lod, min_lod;
2175 int wrap_s, wrap_t, wrap_r, wrap_cube;
2176 bool clamp_is_to_edge;
2177 uint32_t dw0, dw1, dw3;
2178
2179 ILO_GPE_VALID_GEN(dev, 6, 7);
2180
2181 memset(sampler, 0, sizeof(*sampler));
2182
2183 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
2184 min_filter = gen6_translate_tex_filter(state->min_img_filter);
2185 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
2186
2187 sampler->anisotropic = state->max_anisotropy;
2188
2189 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
2190 max_aniso = state->max_anisotropy / 2 - 1;
2191 else if (state->max_anisotropy > 16)
2192 max_aniso = BRW_ANISORATIO_16;
2193 else
2194 max_aniso = BRW_ANISORATIO_2;
2195
2196 /*
2197 *
2198 * Here is how the hardware calculate per-pixel LOD, from my reading of the
2199 * PRMs:
2200 *
2201 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
2202 * other ways. The number of texels is measured using level
2203 * SurfMinLod.
2204 * 2) Bias is added to LOD.
2205 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
2206 * compared with Base to determine whether magnification or
2207 * minification is needed. (if preclamp is disabled, LOD is compared
2208 * with Base before clamping)
2209 * 4) If magnification is needed, or no mipmapping is requested, LOD is
2210 * set to floor(MinLod).
2211 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
2212 *
2213 * With Gallium interface, Base is always zero and
2214 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
2215 */
2216 if (dev->gen >= ILO_GEN(7)) {
2217 const float scale = 256.0f;
2218
2219 /* [-16.0, 16.0) in S4.8 */
2220 lod_bias = (int)
2221 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2222 lod_bias &= 0x1fff;
2223
2224 /* [0.0, 14.0] in U4.8 */
2225 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
2226 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
2227 }
2228 else {
2229 const float scale = 64.0f;
2230
2231 /* [-16.0, 16.0) in S4.6 */
2232 lod_bias = (int)
2233 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2234 lod_bias &= 0x7ff;
2235
2236 /* [0.0, 13.0] in U4.6 */
2237 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
2238 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
2239 }
2240
2241 /*
2242 * We want LOD to be clamped to determine magnification/minification, and
2243 * get set to zero when it is magnification or when mipmapping is disabled.
2244 * The hardware would set LOD to floor(MinLod) and that is a problem when
2245 * MinLod is greater than or equal to 1.0f.
2246 *
2247 * With Base being zero, it is always minification when MinLod is non-zero.
2248 * To achieve our goal, we just need to set MinLod to zero and set
2249 * MagFilter to MinFilter when mipmapping is disabled.
2250 */
2251 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
2252 min_lod = 0;
2253 mag_filter = min_filter;
2254 }
2255
2256 /*
2257 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
2258 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
2259 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
2260 * texture coordinates to [0.0, 1.0].
2261 *
2262 * The clamping will be taken care of in the shaders. There are two
2263 * filters here, but let the minification one has a say.
2264 */
2265 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
2266 if (!clamp_is_to_edge) {
2267 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
2268 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
2269 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
2270 }
2271
2272 /* determine wrap s/t/r */
2273 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
2274 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
2275 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
2276
2277 /*
2278 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
2279 *
2280 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
2281 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
2282 * must have the same Address Control mode."
2283 *
2284 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
2285 *
2286 * "This field (Cube Surface Control Mode) must be set to
2287 * CUBECTRLMODE_PROGRAMMED"
2288 *
2289 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
2290 * map filtering.
2291 */
2292 if (state->seamless_cube_map &&
2293 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2294 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
2295 wrap_cube = BRW_TEXCOORDMODE_CUBE;
2296 }
2297 else {
2298 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
2299 }
2300
2301 if (!state->normalized_coords) {
2302 /*
2303 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
2304 *
2305 * "The following state must be set as indicated if this field
2306 * (Non-normalized Coordinate Enable) is enabled:
2307 *
2308 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
2309 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
2310 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
2311 * - Mag Mode Filter must be MAPFILTER_NEAREST or
2312 * MAPFILTER_LINEAR.
2313 * - Min Mode Filter must be MAPFILTER_NEAREST or
2314 * MAPFILTER_LINEAR.
2315 * - Mip Mode Filter must be MIPFILTER_NONE.
2316 * - Min LOD must be 0.
2317 * - Max LOD must be 0.
2318 * - MIP Count must be 0.
2319 * - Surface Min LOD must be 0.
2320 * - Texture LOD Bias must be 0."
2321 */
2322 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
2323 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
2324 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
2325 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
2326 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
2327 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
2328
2329 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
2330 mag_filter == BRW_MAPFILTER_LINEAR);
2331 assert(min_filter == BRW_MAPFILTER_NEAREST ||
2332 min_filter == BRW_MAPFILTER_LINEAR);
2333
2334 /* work around a bug in util_blitter */
2335 mip_filter = BRW_MIPFILTER_NONE;
2336
2337 assert(mip_filter == BRW_MIPFILTER_NONE);
2338 }
2339
2340 if (dev->gen >= ILO_GEN(7)) {
2341 dw0 = 1 << 28 |
2342 mip_filter << 20 |
2343 lod_bias << 1;
2344
2345 sampler->dw_filter = mag_filter << 17 |
2346 min_filter << 14;
2347
2348 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2349 BRW_MAPFILTER_ANISOTROPIC << 14 |
2350 1;
2351
2352 dw1 = min_lod << 20 |
2353 max_lod << 8;
2354
2355 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2356 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
2357
2358 dw3 = max_aniso << 19;
2359
2360 /* round the coordinates for linear filtering */
2361 if (min_filter != BRW_MAPFILTER_NEAREST) {
2362 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2363 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2364 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2365 }
2366 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2367 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2368 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2369 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2370 }
2371
2372 if (!state->normalized_coords)
2373 dw3 |= 1 << 10;
2374
2375 sampler->dw_wrap = wrap_s << 6 |
2376 wrap_t << 3 |
2377 wrap_r;
2378
2379 /*
2380 * As noted in the classic i965 driver, the HW may still reference
2381 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
2382 * mode
2383 */
2384 sampler->dw_wrap_1d = wrap_s << 6 |
2385 BRW_TEXCOORDMODE_WRAP << 3 |
2386 BRW_TEXCOORDMODE_WRAP;
2387
2388 sampler->dw_wrap_cube = wrap_cube << 6 |
2389 wrap_cube << 3 |
2390 wrap_cube;
2391
2392 STATIC_ASSERT(Elements(sampler->payload) >= 7);
2393
2394 sampler->payload[0] = dw0;
2395 sampler->payload[1] = dw1;
2396 sampler->payload[2] = dw3;
2397
2398 memcpy(&sampler->payload[3],
2399 state->border_color.ui, sizeof(state->border_color.ui));
2400 }
2401 else {
2402 dw0 = 1 << 28 |
2403 mip_filter << 20 |
2404 lod_bias << 3;
2405
2406 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2407 dw0 |= gen6_translate_shadow_func(state->compare_func);
2408
2409 sampler->dw_filter = (min_filter != mag_filter) << 27 |
2410 mag_filter << 17 |
2411 min_filter << 14;
2412
2413 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2414 BRW_MAPFILTER_ANISOTROPIC << 14;
2415
2416 dw1 = min_lod << 22 |
2417 max_lod << 12;
2418
2419 sampler->dw_wrap = wrap_s << 6 |
2420 wrap_t << 3 |
2421 wrap_r;
2422
2423 sampler->dw_wrap_1d = wrap_s << 6 |
2424 BRW_TEXCOORDMODE_WRAP << 3 |
2425 BRW_TEXCOORDMODE_WRAP;
2426
2427 sampler->dw_wrap_cube = wrap_cube << 6 |
2428 wrap_cube << 3 |
2429 wrap_cube;
2430
2431 dw3 = max_aniso << 19;
2432
2433 /* round the coordinates for linear filtering */
2434 if (min_filter != BRW_MAPFILTER_NEAREST) {
2435 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2436 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2437 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2438 }
2439 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2440 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2441 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2442 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2443 }
2444
2445 if (!state->normalized_coords)
2446 dw3 |= 1;
2447
2448 STATIC_ASSERT(Elements(sampler->payload) >= 15);
2449
2450 sampler->payload[0] = dw0;
2451 sampler->payload[1] = dw1;
2452 sampler->payload[2] = dw3;
2453
2454 sampler_init_border_color_gen6(dev,
2455 &state->border_color, &sampler->payload[3], 12);
2456 }
2457 }
2458
2459 int
2460 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
2461 enum ilo_gpe_gen6_command cmd,
2462 int arg)
2463 {
2464 static const struct {
2465 int header;
2466 int body;
2467 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
2468 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
2469 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
2470 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
2471 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
2472 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
2473 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
2474 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
2475 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
2476 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
2477 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
2478 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
2479 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
2480 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
2481 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
2482 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
2483 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
2484 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
2485 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
2486 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
2487 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
2488 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
2489 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
2490 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
2491 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
2492 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
2493 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
2494 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
2495 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
2496 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
2497 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
2498 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
2499 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
2500 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
2501 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
2502 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
2503 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
2504 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
2505 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
2506 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
2507 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
2508 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
2509 };
2510 const int header = gen6_command_size_table[cmd].header;
2511 const int body = gen6_command_size_table[arg].body;
2512 const int count = arg;
2513
2514 ILO_GPE_VALID_GEN(dev, 6, 6);
2515 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
2516
2517 return (likely(count)) ? header + body * count : 0;
2518 }
2519
2520 int
2521 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
2522 enum ilo_gpe_gen6_state state,
2523 int arg)
2524 {
2525 static const struct {
2526 int alignment;
2527 int body;
2528 bool is_array;
2529 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
2530 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
2531 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
2532 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
2533 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
2534 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
2535 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
2536 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
2537 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
2538 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
2539 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
2540 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
2541 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
2542 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
2543 };
2544 const int alignment = gen6_state_size_table[state].alignment;
2545 const int body = gen6_state_size_table[state].body;
2546 const bool is_array = gen6_state_size_table[state].is_array;
2547 const int count = arg;
2548 int estimate;
2549
2550 ILO_GPE_VALID_GEN(dev, 6, 6);
2551 assert(state < ILO_GPE_GEN6_STATE_COUNT);
2552
2553 if (likely(count)) {
2554 if (is_array) {
2555 estimate = (alignment - 1) + body * count;
2556 }
2557 else {
2558 estimate = (alignment - 1) + body;
2559 /* all states are aligned */
2560 if (count > 1)
2561 estimate += util_align_npot(body, alignment) * (count - 1);
2562 }
2563 }
2564 else {
2565 estimate = 0;
2566 }
2567
2568 return estimate;
2569 }