ilo: preliminary GEN 7.5 support
[mesa.git] / src / gallium / drivers / ilo / ilo_gpe_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_dual_blend.h"
29 #include "util/u_half.h"
30 #include "brw_defines.h"
31 #include "intel_reg.h"
32
33 #include "ilo_context.h"
34 #include "ilo_format.h"
35 #include "ilo_resource.h"
36 #include "ilo_shader.h"
37 #include "ilo_state.h"
38 #include "ilo_gpe_gen6.h"
39
40 /**
41 * Translate a pipe logicop to the matching hardware logicop.
42 */
43 static int
44 gen6_translate_pipe_logicop(unsigned logicop)
45 {
46 switch (logicop) {
47 case PIPE_LOGICOP_CLEAR: return BRW_LOGICOPFUNCTION_CLEAR;
48 case PIPE_LOGICOP_NOR: return BRW_LOGICOPFUNCTION_NOR;
49 case PIPE_LOGICOP_AND_INVERTED: return BRW_LOGICOPFUNCTION_AND_INVERTED;
50 case PIPE_LOGICOP_COPY_INVERTED: return BRW_LOGICOPFUNCTION_COPY_INVERTED;
51 case PIPE_LOGICOP_AND_REVERSE: return BRW_LOGICOPFUNCTION_AND_REVERSE;
52 case PIPE_LOGICOP_INVERT: return BRW_LOGICOPFUNCTION_INVERT;
53 case PIPE_LOGICOP_XOR: return BRW_LOGICOPFUNCTION_XOR;
54 case PIPE_LOGICOP_NAND: return BRW_LOGICOPFUNCTION_NAND;
55 case PIPE_LOGICOP_AND: return BRW_LOGICOPFUNCTION_AND;
56 case PIPE_LOGICOP_EQUIV: return BRW_LOGICOPFUNCTION_EQUIV;
57 case PIPE_LOGICOP_NOOP: return BRW_LOGICOPFUNCTION_NOOP;
58 case PIPE_LOGICOP_OR_INVERTED: return BRW_LOGICOPFUNCTION_OR_INVERTED;
59 case PIPE_LOGICOP_COPY: return BRW_LOGICOPFUNCTION_COPY;
60 case PIPE_LOGICOP_OR_REVERSE: return BRW_LOGICOPFUNCTION_OR_REVERSE;
61 case PIPE_LOGICOP_OR: return BRW_LOGICOPFUNCTION_OR;
62 case PIPE_LOGICOP_SET: return BRW_LOGICOPFUNCTION_SET;
63 default:
64 assert(!"unknown logicop function");
65 return BRW_LOGICOPFUNCTION_CLEAR;
66 }
67 }
68
69 /**
70 * Translate a pipe blend function to the matching hardware blend function.
71 */
72 static int
73 gen6_translate_pipe_blend(unsigned blend)
74 {
75 switch (blend) {
76 case PIPE_BLEND_ADD: return BRW_BLENDFUNCTION_ADD;
77 case PIPE_BLEND_SUBTRACT: return BRW_BLENDFUNCTION_SUBTRACT;
78 case PIPE_BLEND_REVERSE_SUBTRACT: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT;
79 case PIPE_BLEND_MIN: return BRW_BLENDFUNCTION_MIN;
80 case PIPE_BLEND_MAX: return BRW_BLENDFUNCTION_MAX;
81 default:
82 assert(!"unknown blend function");
83 return BRW_BLENDFUNCTION_ADD;
84 };
85 }
86
87 /**
88 * Translate a pipe blend factor to the matching hardware blend factor.
89 */
90 static int
91 gen6_translate_pipe_blendfactor(unsigned blendfactor)
92 {
93 switch (blendfactor) {
94 case PIPE_BLENDFACTOR_ONE: return BRW_BLENDFACTOR_ONE;
95 case PIPE_BLENDFACTOR_SRC_COLOR: return BRW_BLENDFACTOR_SRC_COLOR;
96 case PIPE_BLENDFACTOR_SRC_ALPHA: return BRW_BLENDFACTOR_SRC_ALPHA;
97 case PIPE_BLENDFACTOR_DST_ALPHA: return BRW_BLENDFACTOR_DST_ALPHA;
98 case PIPE_BLENDFACTOR_DST_COLOR: return BRW_BLENDFACTOR_DST_COLOR;
99 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE;
100 case PIPE_BLENDFACTOR_CONST_COLOR: return BRW_BLENDFACTOR_CONST_COLOR;
101 case PIPE_BLENDFACTOR_CONST_ALPHA: return BRW_BLENDFACTOR_CONST_ALPHA;
102 case PIPE_BLENDFACTOR_SRC1_COLOR: return BRW_BLENDFACTOR_SRC1_COLOR;
103 case PIPE_BLENDFACTOR_SRC1_ALPHA: return BRW_BLENDFACTOR_SRC1_ALPHA;
104 case PIPE_BLENDFACTOR_ZERO: return BRW_BLENDFACTOR_ZERO;
105 case PIPE_BLENDFACTOR_INV_SRC_COLOR: return BRW_BLENDFACTOR_INV_SRC_COLOR;
106 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return BRW_BLENDFACTOR_INV_SRC_ALPHA;
107 case PIPE_BLENDFACTOR_INV_DST_ALPHA: return BRW_BLENDFACTOR_INV_DST_ALPHA;
108 case PIPE_BLENDFACTOR_INV_DST_COLOR: return BRW_BLENDFACTOR_INV_DST_COLOR;
109 case PIPE_BLENDFACTOR_INV_CONST_COLOR: return BRW_BLENDFACTOR_INV_CONST_COLOR;
110 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return BRW_BLENDFACTOR_INV_CONST_ALPHA;
111 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: return BRW_BLENDFACTOR_INV_SRC1_COLOR;
112 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: return BRW_BLENDFACTOR_INV_SRC1_ALPHA;
113 default:
114 assert(!"unknown blend factor");
115 return BRW_BLENDFACTOR_ONE;
116 };
117 }
118
119 /**
120 * Translate a pipe stencil op to the matching hardware stencil op.
121 */
122 static int
123 gen6_translate_pipe_stencil_op(unsigned stencil_op)
124 {
125 switch (stencil_op) {
126 case PIPE_STENCIL_OP_KEEP: return BRW_STENCILOP_KEEP;
127 case PIPE_STENCIL_OP_ZERO: return BRW_STENCILOP_ZERO;
128 case PIPE_STENCIL_OP_REPLACE: return BRW_STENCILOP_REPLACE;
129 case PIPE_STENCIL_OP_INCR: return BRW_STENCILOP_INCRSAT;
130 case PIPE_STENCIL_OP_DECR: return BRW_STENCILOP_DECRSAT;
131 case PIPE_STENCIL_OP_INCR_WRAP: return BRW_STENCILOP_INCR;
132 case PIPE_STENCIL_OP_DECR_WRAP: return BRW_STENCILOP_DECR;
133 case PIPE_STENCIL_OP_INVERT: return BRW_STENCILOP_INVERT;
134 default:
135 assert(!"unknown stencil op");
136 return BRW_STENCILOP_KEEP;
137 }
138 }
139
140 /**
141 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
142 */
143 static int
144 gen6_translate_tex_mipfilter(unsigned filter)
145 {
146 switch (filter) {
147 case PIPE_TEX_MIPFILTER_NEAREST: return BRW_MIPFILTER_NEAREST;
148 case PIPE_TEX_MIPFILTER_LINEAR: return BRW_MIPFILTER_LINEAR;
149 case PIPE_TEX_MIPFILTER_NONE: return BRW_MIPFILTER_NONE;
150 default:
151 assert(!"unknown mipfilter");
152 return BRW_MIPFILTER_NONE;
153 }
154 }
155
156 /**
157 * Translate a pipe texture filter to the matching hardware mapfilter.
158 */
159 static int
160 gen6_translate_tex_filter(unsigned filter)
161 {
162 switch (filter) {
163 case PIPE_TEX_FILTER_NEAREST: return BRW_MAPFILTER_NEAREST;
164 case PIPE_TEX_FILTER_LINEAR: return BRW_MAPFILTER_LINEAR;
165 default:
166 assert(!"unknown sampler filter");
167 return BRW_MAPFILTER_NEAREST;
168 }
169 }
170
171 /**
172 * Translate a pipe texture coordinate wrapping mode to the matching hardware
173 * wrapping mode.
174 */
175 static int
176 gen6_translate_tex_wrap(unsigned wrap, bool clamp_to_edge)
177 {
178 /* clamp to edge or border? */
179 if (wrap == PIPE_TEX_WRAP_CLAMP) {
180 wrap = (clamp_to_edge) ?
181 PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
182 }
183
184 switch (wrap) {
185 case PIPE_TEX_WRAP_REPEAT: return BRW_TEXCOORDMODE_WRAP;
186 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return BRW_TEXCOORDMODE_CLAMP;
187 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return BRW_TEXCOORDMODE_CLAMP_BORDER;
188 case PIPE_TEX_WRAP_MIRROR_REPEAT: return BRW_TEXCOORDMODE_MIRROR;
189 case PIPE_TEX_WRAP_CLAMP:
190 case PIPE_TEX_WRAP_MIRROR_CLAMP:
191 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
192 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
193 default:
194 assert(!"unknown sampler wrap mode");
195 return BRW_TEXCOORDMODE_WRAP;
196 }
197 }
198
199 /**
200 * Translate a pipe shadow compare function to the matching hardware shadow
201 * function.
202 */
203 static int
204 gen6_translate_shadow_func(unsigned func)
205 {
206 /*
207 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
208 * comparison, and 1.0 is returned when the comparison is true.
209 *
210 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
211 * the comparison, and 0.0 is returned when the comparison is true.
212 */
213 switch (func) {
214 case PIPE_FUNC_NEVER: return BRW_PREFILTER_ALWAYS;
215 case PIPE_FUNC_LESS: return BRW_PREFILTER_LEQUAL;
216 case PIPE_FUNC_EQUAL: return BRW_PREFILTER_NOTEQUAL;
217 case PIPE_FUNC_LEQUAL: return BRW_PREFILTER_LESS;
218 case PIPE_FUNC_GREATER: return BRW_PREFILTER_GEQUAL;
219 case PIPE_FUNC_NOTEQUAL: return BRW_PREFILTER_EQUAL;
220 case PIPE_FUNC_GEQUAL: return BRW_PREFILTER_GREATER;
221 case PIPE_FUNC_ALWAYS: return BRW_PREFILTER_NEVER;
222 default:
223 assert(!"unknown shadow compare function");
224 return BRW_PREFILTER_NEVER;
225 }
226 }
227
228 /**
229 * Translate a pipe DSA test function to the matching hardware compare
230 * function.
231 */
232 static int
233 gen6_translate_dsa_func(unsigned func)
234 {
235 switch (func) {
236 case PIPE_FUNC_NEVER: return BRW_COMPAREFUNCTION_NEVER;
237 case PIPE_FUNC_LESS: return BRW_COMPAREFUNCTION_LESS;
238 case PIPE_FUNC_EQUAL: return BRW_COMPAREFUNCTION_EQUAL;
239 case PIPE_FUNC_LEQUAL: return BRW_COMPAREFUNCTION_LEQUAL;
240 case PIPE_FUNC_GREATER: return BRW_COMPAREFUNCTION_GREATER;
241 case PIPE_FUNC_NOTEQUAL: return BRW_COMPAREFUNCTION_NOTEQUAL;
242 case PIPE_FUNC_GEQUAL: return BRW_COMPAREFUNCTION_GEQUAL;
243 case PIPE_FUNC_ALWAYS: return BRW_COMPAREFUNCTION_ALWAYS;
244 default:
245 assert(!"unknown depth/stencil/alpha test function");
246 return BRW_COMPAREFUNCTION_NEVER;
247 }
248 }
249
250 static void
251 ve_init_cso(const struct ilo_dev_info *dev,
252 const struct pipe_vertex_element *state,
253 unsigned vb_index,
254 struct ilo_ve_cso *cso)
255 {
256 int comp[4] = {
257 BRW_VE1_COMPONENT_STORE_SRC,
258 BRW_VE1_COMPONENT_STORE_SRC,
259 BRW_VE1_COMPONENT_STORE_SRC,
260 BRW_VE1_COMPONENT_STORE_SRC,
261 };
262 int format;
263
264 ILO_GPE_VALID_GEN(dev, 6, 7.5);
265
266 switch (util_format_get_nr_components(state->src_format)) {
267 case 1: comp[1] = BRW_VE1_COMPONENT_STORE_0;
268 case 2: comp[2] = BRW_VE1_COMPONENT_STORE_0;
269 case 3: comp[3] = (util_format_is_pure_integer(state->src_format)) ?
270 BRW_VE1_COMPONENT_STORE_1_INT :
271 BRW_VE1_COMPONENT_STORE_1_FLT;
272 }
273
274 format = ilo_translate_vertex_format(state->src_format);
275
276 STATIC_ASSERT(Elements(cso->payload) >= 2);
277 cso->payload[0] =
278 vb_index << GEN6_VE0_INDEX_SHIFT |
279 GEN6_VE0_VALID |
280 format << BRW_VE0_FORMAT_SHIFT |
281 state->src_offset << BRW_VE0_SRC_OFFSET_SHIFT;
282
283 cso->payload[1] =
284 comp[0] << BRW_VE1_COMPONENT_0_SHIFT |
285 comp[1] << BRW_VE1_COMPONENT_1_SHIFT |
286 comp[2] << BRW_VE1_COMPONENT_2_SHIFT |
287 comp[3] << BRW_VE1_COMPONENT_3_SHIFT;
288 }
289
290 void
291 ilo_gpe_init_ve(const struct ilo_dev_info *dev,
292 unsigned num_states,
293 const struct pipe_vertex_element *states,
294 struct ilo_ve_state *ve)
295 {
296 unsigned i;
297
298 ILO_GPE_VALID_GEN(dev, 6, 7.5);
299
300 ve->count = num_states;
301 ve->vb_count = 0;
302
303 for (i = 0; i < num_states; i++) {
304 const unsigned pipe_idx = states[i].vertex_buffer_index;
305 const unsigned instance_divisor = states[i].instance_divisor;
306 unsigned hw_idx;
307
308 /*
309 * map the pipe vb to the hardware vb, which has a fixed instance
310 * divisor
311 */
312 for (hw_idx = 0; hw_idx < ve->vb_count; hw_idx++) {
313 if (ve->vb_mapping[hw_idx] == pipe_idx &&
314 ve->instance_divisors[hw_idx] == instance_divisor)
315 break;
316 }
317
318 /* create one if there is no matching hardware vb */
319 if (hw_idx >= ve->vb_count) {
320 hw_idx = ve->vb_count++;
321
322 ve->vb_mapping[hw_idx] = pipe_idx;
323 ve->instance_divisors[hw_idx] = instance_divisor;
324 }
325
326 ve_init_cso(dev, &states[i], hw_idx, &ve->cso[i]);
327 }
328 }
329
330 void
331 ilo_gpe_init_vs_cso(const struct ilo_dev_info *dev,
332 const struct ilo_shader_state *vs,
333 struct ilo_shader_cso *cso)
334 {
335 int start_grf, vue_read_len, max_threads;
336 uint32_t dw2, dw4, dw5;
337
338 ILO_GPE_VALID_GEN(dev, 6, 7.5);
339
340 start_grf = ilo_shader_get_kernel_param(vs, ILO_KERNEL_URB_DATA_START_REG);
341 vue_read_len = ilo_shader_get_kernel_param(vs, ILO_KERNEL_INPUT_COUNT);
342
343 /*
344 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
345 *
346 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
347 * 128-bit vertex elements to be passed into the payload for each
348 * vertex."
349 *
350 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
351 * data to be read and passed to the thread."
352 */
353 vue_read_len = (vue_read_len + 1) / 2;
354 if (!vue_read_len)
355 vue_read_len = 1;
356
357 switch (dev->gen) {
358 case ILO_GEN(6):
359 /*
360 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
361 *
362 * "Device # of EUs #Threads/EU
363 * SNB GT2 12 5
364 * SNB GT1 6 4"
365 */
366 max_threads = (dev->gt == 2) ? 60 : 24;
367 break;
368 case ILO_GEN(7):
369 /*
370 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
371 *
372 * "Device # of EUs #Threads/EU
373 * Ivy Bridge (GT2) 16 8
374 * Ivy Bridge (GT1) 6 6"
375 */
376 max_threads = (dev->gt == 2) ? 128 : 36;
377 break;
378 case ILO_GEN(7.5):
379 /* see brwCreateContext() */
380 max_threads = (dev->gt >= 2) ? 280 : 70;
381 break;
382 default:
383 max_threads = 1;
384 break;
385 }
386
387 dw2 = (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT;
388
389 dw4 = start_grf << GEN6_VS_DISPATCH_START_GRF_SHIFT |
390 vue_read_len << GEN6_VS_URB_READ_LENGTH_SHIFT |
391 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT;
392
393 dw5 = GEN6_VS_STATISTICS_ENABLE |
394 GEN6_VS_ENABLE;
395
396 if (dev->gen >= ILO_GEN(7.5))
397 dw5 |= (max_threads - 1) << HSW_VS_MAX_THREADS_SHIFT;
398 else
399 dw5 |= (max_threads - 1) << GEN6_VS_MAX_THREADS_SHIFT;
400
401 STATIC_ASSERT(Elements(cso->payload) >= 3);
402 cso->payload[0] = dw2;
403 cso->payload[1] = dw4;
404 cso->payload[2] = dw5;
405 }
406
407 void
408 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info *dev,
409 const struct ilo_shader_state *gs,
410 struct ilo_shader_cso *cso)
411 {
412 int start_grf, vue_read_len, max_threads;
413 uint32_t dw2, dw4, dw5, dw6;
414
415 ILO_GPE_VALID_GEN(dev, 6, 6);
416
417 if (ilo_shader_get_type(gs) == PIPE_SHADER_GEOMETRY) {
418 start_grf = ilo_shader_get_kernel_param(gs,
419 ILO_KERNEL_URB_DATA_START_REG);
420
421 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_INPUT_COUNT);
422 }
423 else {
424 start_grf = ilo_shader_get_kernel_param(gs,
425 ILO_KERNEL_VS_GEN6_SO_START_REG);
426
427 vue_read_len = ilo_shader_get_kernel_param(gs, ILO_KERNEL_OUTPUT_COUNT);
428 }
429
430 /*
431 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
432 *
433 * "Specifies the amount of URB data read and passed in the thread
434 * payload for each Vertex URB entry, in 256-bit register increments.
435 *
436 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
437 * 0 indicating no Vertex URB data to be read and passed to the
438 * thread."
439 */
440 vue_read_len = (vue_read_len + 1) / 2;
441 if (!vue_read_len)
442 vue_read_len = 1;
443
444 /*
445 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
446 *
447 * "Maximum Number of Threads valid range is [0,27] when Rendering
448 * Enabled bit is set."
449 *
450 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
451 *
452 * "Programming Note: If the GS stage is enabled, software must always
453 * allocate at least one GS URB Entry. This is true even if the GS
454 * thread never needs to output vertices to the pipeline, e.g., when
455 * only performing stream output. This is an artifact of the need to
456 * pass the GS thread an initial destination URB handle."
457 *
458 * As such, we always enable rendering, and limit the number of threads.
459 */
460 if (dev->gt == 2) {
461 /* maximum is 60, but limited to 28 */
462 max_threads = 28;
463 }
464 else {
465 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
466 max_threads = 21;
467 }
468
469 dw2 = GEN6_GS_SPF_MODE;
470
471 dw4 = vue_read_len << GEN6_GS_URB_READ_LENGTH_SHIFT |
472 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT |
473 start_grf << GEN6_GS_DISPATCH_START_GRF_SHIFT;
474
475 dw5 = (max_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT |
476 GEN6_GS_STATISTICS_ENABLE |
477 GEN6_GS_SO_STATISTICS_ENABLE |
478 GEN6_GS_RENDERING_ENABLE;
479
480 /*
481 * we cannot make use of GEN6_GS_REORDER because it will reorder
482 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
483 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
484 * (2N+2, 2N+1, 2N+3)).
485 */
486 dw6 = GEN6_GS_ENABLE;
487
488 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_DISCARD_ADJACENCY))
489 dw6 |= GEN6_GS_DISCARD_ADJACENCY;
490
491 if (ilo_shader_get_kernel_param(gs, ILO_KERNEL_VS_GEN6_SO)) {
492 const uint32_t svbi_post_inc =
493 ilo_shader_get_kernel_param(gs, ILO_KERNEL_GS_GEN6_SVBI_POST_INC);
494
495 dw6 |= GEN6_GS_SVBI_PAYLOAD_ENABLE;
496 if (svbi_post_inc) {
497 dw6 |= GEN6_GS_SVBI_POSTINCREMENT_ENABLE |
498 svbi_post_inc << GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT;
499 }
500 }
501
502 STATIC_ASSERT(Elements(cso->payload) >= 4);
503 cso->payload[0] = dw2;
504 cso->payload[1] = dw4;
505 cso->payload[2] = dw5;
506 cso->payload[3] = dw6;
507 }
508
509 void
510 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info *dev,
511 const struct pipe_rasterizer_state *state,
512 struct ilo_rasterizer_clip *clip)
513 {
514 uint32_t dw1, dw2, dw3;
515
516 ILO_GPE_VALID_GEN(dev, 6, 7.5);
517
518 dw1 = GEN6_CLIP_STATISTICS_ENABLE;
519
520 if (dev->gen >= ILO_GEN(7)) {
521 /*
522 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
523 *
524 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
525 * enabled only for the cases where the incoming primitive topology
526 * into the clipper guaranteed to be Trilist."
527 *
528 * What does this mean?
529 */
530 dw1 |= 0 << 19 |
531 GEN7_CLIP_EARLY_CULL;
532
533 if (state->front_ccw)
534 dw1 |= GEN7_CLIP_WINDING_CCW;
535
536 switch (state->cull_face) {
537 case PIPE_FACE_NONE:
538 dw1 |= GEN7_CLIP_CULLMODE_NONE;
539 break;
540 case PIPE_FACE_FRONT:
541 dw1 |= GEN7_CLIP_CULLMODE_FRONT;
542 break;
543 case PIPE_FACE_BACK:
544 dw1 |= GEN7_CLIP_CULLMODE_BACK;
545 break;
546 case PIPE_FACE_FRONT_AND_BACK:
547 dw1 |= GEN7_CLIP_CULLMODE_BOTH;
548 break;
549 }
550 }
551
552 dw2 = GEN6_CLIP_ENABLE |
553 GEN6_CLIP_XY_TEST |
554 state->clip_plane_enable << GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT |
555 GEN6_CLIP_MODE_NORMAL;
556
557 if (state->clip_halfz)
558 dw2 |= GEN6_CLIP_API_D3D;
559 else
560 dw2 |= GEN6_CLIP_API_OGL;
561
562 if (state->depth_clip)
563 dw2 |= GEN6_CLIP_Z_TEST;
564
565 if (state->flatshade_first) {
566 dw2 |= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
567 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
568 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
569 }
570 else {
571 dw2 |= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT |
572 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT |
573 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT;
574 }
575
576 dw3 = 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
577 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT;
578
579 clip->payload[0] = dw1;
580 clip->payload[1] = dw2;
581 clip->payload[2] = dw3;
582
583 clip->can_enable_guardband = true;
584
585 /*
586 * There are several reasons that guard band test should be disabled
587 *
588 * - GL wide points (to avoid partially visibie object)
589 * - GL wide or AA lines (to avoid partially visibie object)
590 */
591 if (state->point_size_per_vertex || state->point_size > 1.0f)
592 clip->can_enable_guardband = false;
593 if (state->line_smooth || state->line_width > 1.0f)
594 clip->can_enable_guardband = false;
595 }
596
597 void
598 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info *dev,
599 const struct pipe_rasterizer_state *state,
600 struct ilo_rasterizer_sf *sf)
601 {
602 float offset_const, offset_scale, offset_clamp;
603 int line_width, point_width;
604 uint32_t dw1, dw2, dw3;
605
606 ILO_GPE_VALID_GEN(dev, 6, 7.5);
607
608 /*
609 * Scale the constant term. The minimum representable value used by the HW
610 * is not large enouch to be the minimum resolvable difference.
611 */
612 offset_const = state->offset_units * 2.0f;
613
614 offset_scale = state->offset_scale;
615 offset_clamp = state->offset_clamp;
616
617 /*
618 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
619 *
620 * "This bit (Statistics Enable) should be set whenever clipping is
621 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
622 * should be cleared if clipping is disabled or Statistics Enable in
623 * CLIP_STATE is clear."
624 */
625 dw1 = GEN6_SF_STATISTICS_ENABLE |
626 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE;
627
628 /* XXX GEN6 path seems to work fine for GEN7 */
629 if (false && dev->gen >= ILO_GEN(7)) {
630 /*
631 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
632 *
633 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
634 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
635 * Depth Offset Enable Point) should be set whenever non zero depth
636 * bias (Slope, Bias) values are used. Setting this bit may have
637 * some degradation of performance for some workloads."
638 */
639 if (state->offset_tri || state->offset_line || state->offset_point) {
640 /* XXX need to scale offset_const according to the depth format */
641 dw1 |= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS;
642
643 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID |
644 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME |
645 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
646 }
647 else {
648 offset_const = 0.0f;
649 offset_scale = 0.0f;
650 offset_clamp = 0.0f;
651 }
652 }
653 else {
654 if (state->offset_tri)
655 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID;
656 if (state->offset_line)
657 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME;
658 if (state->offset_point)
659 dw1 |= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT;
660 }
661
662 switch (state->fill_front) {
663 case PIPE_POLYGON_MODE_FILL:
664 dw1 |= GEN6_SF_FRONT_SOLID;
665 break;
666 case PIPE_POLYGON_MODE_LINE:
667 dw1 |= GEN6_SF_FRONT_WIREFRAME;
668 break;
669 case PIPE_POLYGON_MODE_POINT:
670 dw1 |= GEN6_SF_FRONT_POINT;
671 break;
672 }
673
674 switch (state->fill_back) {
675 case PIPE_POLYGON_MODE_FILL:
676 dw1 |= GEN6_SF_BACK_SOLID;
677 break;
678 case PIPE_POLYGON_MODE_LINE:
679 dw1 |= GEN6_SF_BACK_WIREFRAME;
680 break;
681 case PIPE_POLYGON_MODE_POINT:
682 dw1 |= GEN6_SF_BACK_POINT;
683 break;
684 }
685
686 if (state->front_ccw)
687 dw1 |= GEN6_SF_WINDING_CCW;
688
689 dw2 = 0;
690
691 if (state->line_smooth) {
692 /*
693 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
694 *
695 * "This field (Anti-aliasing Enable) must be disabled if any of the
696 * render targets have integer (UINT or SINT) surface format."
697 *
698 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
699 *
700 * "This field (Hierarchical Depth Buffer Enable) must be disabled
701 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
702 *
703 * TODO We do not check those yet.
704 */
705 dw2 |= GEN6_SF_LINE_AA_ENABLE |
706 GEN6_SF_LINE_END_CAP_WIDTH_1_0;
707 }
708
709 switch (state->cull_face) {
710 case PIPE_FACE_NONE:
711 dw2 |= GEN6_SF_CULL_NONE;
712 break;
713 case PIPE_FACE_FRONT:
714 dw2 |= GEN6_SF_CULL_FRONT;
715 break;
716 case PIPE_FACE_BACK:
717 dw2 |= GEN6_SF_CULL_BACK;
718 break;
719 case PIPE_FACE_FRONT_AND_BACK:
720 dw2 |= GEN6_SF_CULL_BOTH;
721 break;
722 }
723
724 /*
725 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
726 * pixels in the minor direction. We have to make the lines slightly
727 * thicker, 0.5 pixel on both sides, so that they intersect that many
728 * pixels are considered into the lines.
729 *
730 * Line width is in U3.7.
731 */
732 line_width = (int) ((state->line_width +
733 (float) state->line_smooth) * 128.0f + 0.5f);
734 line_width = CLAMP(line_width, 0, 1023);
735
736 if (line_width == 128 && !state->line_smooth) {
737 /* use GIQ rules */
738 line_width = 0;
739 }
740
741 dw2 |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
742
743 if (dev->gen >= ILO_GEN(7.5) && state->line_stipple_enable)
744 dw2 |= HSW_SF_LINE_STIPPLE_ENABLE;
745
746 if (state->scissor)
747 dw2 |= GEN6_SF_SCISSOR_ENABLE;
748
749 dw3 = GEN6_SF_LINE_AA_MODE_TRUE |
750 GEN6_SF_VERTEX_SUBPIXEL_8BITS;
751
752 if (state->line_last_pixel)
753 dw3 |= 1 << 31;
754
755 if (state->flatshade_first) {
756 dw3 |= 0 << GEN6_SF_TRI_PROVOKE_SHIFT |
757 0 << GEN6_SF_LINE_PROVOKE_SHIFT |
758 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
759 }
760 else {
761 dw3 |= 2 << GEN6_SF_TRI_PROVOKE_SHIFT |
762 1 << GEN6_SF_LINE_PROVOKE_SHIFT |
763 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT;
764 }
765
766 if (!state->point_size_per_vertex)
767 dw3 |= GEN6_SF_USE_STATE_POINT_WIDTH;
768
769 /* in U8.3 */
770 point_width = (int) (state->point_size * 8.0f + 0.5f);
771 point_width = CLAMP(point_width, 1, 2047);
772
773 dw3 |= point_width;
774
775 STATIC_ASSERT(Elements(sf->payload) >= 6);
776 sf->payload[0] = dw1;
777 sf->payload[1] = dw2;
778 sf->payload[2] = dw3;
779 sf->payload[3] = fui(offset_const);
780 sf->payload[4] = fui(offset_scale);
781 sf->payload[5] = fui(offset_clamp);
782
783 if (state->multisample) {
784 sf->dw_msaa = GEN6_SF_MSRAST_ON_PATTERN;
785
786 /*
787 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
788 *
789 * "Software must not program a value of 0.0 when running in
790 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
791 * when multisampling rasterization is enabled."
792 */
793 if (!line_width) {
794 line_width = 128; /* 1.0f */
795
796 sf->dw_msaa |= line_width << GEN6_SF_LINE_WIDTH_SHIFT;
797 }
798 }
799 else {
800 sf->dw_msaa = 0;
801 }
802 }
803
804 void
805 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info *dev,
806 const struct pipe_rasterizer_state *state,
807 struct ilo_rasterizer_wm *wm)
808 {
809 uint32_t dw5, dw6;
810
811 ILO_GPE_VALID_GEN(dev, 6, 6);
812
813 /* only the FF unit states are set, as in GEN7 */
814
815 dw5 = GEN6_WM_LINE_AA_WIDTH_2_0;
816
817 /* same value as in 3DSTATE_SF */
818 if (state->line_smooth)
819 dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0;
820
821 if (state->poly_stipple_enable)
822 dw5 |= GEN6_WM_POLYGON_STIPPLE_ENABLE;
823 if (state->line_stipple_enable)
824 dw5 |= GEN6_WM_LINE_STIPPLE_ENABLE;
825
826 dw6 = GEN6_WM_POSITION_ZW_PIXEL |
827 GEN6_WM_MSRAST_OFF_PIXEL |
828 GEN6_WM_MSDISPMODE_PERSAMPLE;
829
830 if (state->bottom_edge_rule)
831 dw6 |= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT;
832
833 /*
834 * assertion that makes sure
835 *
836 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
837 *
838 * is valid
839 */
840 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL == 0 &&
841 GEN6_WM_MSDISPMODE_PERSAMPLE == 0);
842
843 wm->dw_msaa_rast =
844 (state->multisample) ? GEN6_WM_MSRAST_ON_PATTERN : 0;
845 wm->dw_msaa_disp = GEN6_WM_MSDISPMODE_PERPIXEL;
846
847 STATIC_ASSERT(Elements(wm->payload) >= 2);
848 wm->payload[0] = dw5;
849 wm->payload[1] = dw6;
850 }
851
852 void
853 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info *dev,
854 const struct ilo_shader_state *fs,
855 struct ilo_shader_cso *cso)
856 {
857 int start_grf, input_count, interps, max_threads;
858 uint32_t dw2, dw4, dw5, dw6;
859
860 ILO_GPE_VALID_GEN(dev, 6, 6);
861
862 start_grf = ilo_shader_get_kernel_param(fs, ILO_KERNEL_URB_DATA_START_REG);
863 input_count = ilo_shader_get_kernel_param(fs, ILO_KERNEL_INPUT_COUNT);
864 interps = ilo_shader_get_kernel_param(fs,
865 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS);
866
867 /* see brwCreateContext() */
868 max_threads = (dev->gt == 2) ? 80 : 40;
869
870 dw2 = (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT;
871
872 dw4 = start_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0 |
873 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1 |
874 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
875
876 dw5 = (max_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
877
878 /*
879 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
880 *
881 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
882 * PS kernel or color calculator has the ability to kill (discard)
883 * pixels or samples, other than due to depth or stencil testing.
884 * This bit is required to be ENABLED in the following situations:
885 *
886 * The API pixel shader program contains "killpix" or "discard"
887 * instructions, or other code in the pixel shader kernel that can
888 * cause the final pixel mask to differ from the pixel mask received
889 * on dispatch.
890 *
891 * A sampler with chroma key enabled with kill pixel mode is used by
892 * the pixel shader.
893 *
894 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
895 * enabled.
896 *
897 * The pixel shader kernel generates and outputs oMask.
898 *
899 * Note: As ClipDistance clipping is fully supported in hardware and
900 * therefore not via PS instructions, there should be no need to
901 * ENABLE this bit due to ClipDistance clipping."
902 */
903 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_USE_KILL))
904 dw5 |= GEN6_WM_KILL_ENABLE;
905
906 /*
907 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
908 *
909 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
910 * field must be set to disabled."
911 *
912 * TODO This is not checked yet.
913 */
914 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_OUTPUT_Z))
915 dw5 |= GEN6_WM_COMPUTED_DEPTH;
916
917 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_Z))
918 dw5 |= GEN6_WM_USES_SOURCE_DEPTH;
919
920 if (ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_INPUT_W))
921 dw5 |= GEN6_WM_USES_SOURCE_W;
922
923 /*
924 * TODO set this bit only when
925 *
926 * a) fs writes colors and color is not masked, or
927 * b) fs writes depth, or
928 * c) fs or cc kills
929 */
930 if (true)
931 dw5 |= GEN6_WM_DISPATCH_ENABLE;
932
933 assert(!ilo_shader_get_kernel_param(fs, ILO_KERNEL_FS_DISPATCH_16_OFFSET));
934 dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
935
936 dw6 = input_count << GEN6_WM_NUM_SF_OUTPUTS_SHIFT |
937 GEN6_WM_POSOFFSET_NONE |
938 interps << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
939
940 STATIC_ASSERT(Elements(cso->payload) >= 4);
941 cso->payload[0] = dw2;
942 cso->payload[1] = dw4;
943 cso->payload[2] = dw5;
944 cso->payload[3] = dw6;
945 }
946
947 struct ilo_zs_surface_info {
948 int surface_type;
949 int format;
950
951 struct {
952 struct intel_bo *bo;
953 unsigned stride;
954 enum intel_tiling_mode tiling;
955 uint32_t offset;
956 } zs, stencil, hiz;
957
958 unsigned width, height, depth;
959 unsigned lod, first_layer, num_layers;
960 uint32_t x_offset, y_offset;
961 };
962
963 static void
964 zs_init_info_null(const struct ilo_dev_info *dev,
965 struct ilo_zs_surface_info *info)
966 {
967 ILO_GPE_VALID_GEN(dev, 6, 7.5);
968
969 memset(info, 0, sizeof(*info));
970
971 info->surface_type = BRW_SURFACE_NULL;
972 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
973 info->width = 1;
974 info->height = 1;
975 info->depth = 1;
976 info->num_layers = 1;
977 }
978
979 static void
980 zs_init_info(const struct ilo_dev_info *dev,
981 const struct ilo_texture *tex,
982 enum pipe_format format,
983 unsigned level,
984 unsigned first_layer, unsigned num_layers,
985 struct ilo_zs_surface_info *info)
986 {
987 const bool rebase_layer = true;
988 struct intel_bo * const hiz_bo = NULL;
989 bool separate_stencil;
990 uint32_t x_offset[3], y_offset[3];
991
992 ILO_GPE_VALID_GEN(dev, 6, 7.5);
993
994 memset(info, 0, sizeof(*info));
995
996 info->surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
997
998 if (info->surface_type == BRW_SURFACE_CUBE) {
999 /*
1000 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
1001 *
1002 * "For Other Surfaces (Cube Surfaces):
1003 * This field (Minimum Array Element) is ignored."
1004 *
1005 * "For Other Surfaces (Cube Surfaces):
1006 * This field (Render Target View Extent) is ignored."
1007 *
1008 * As such, we cannot set first_layer and num_layers on cube surfaces.
1009 * To work around that, treat it as a 2D surface.
1010 */
1011 info->surface_type = BRW_SURFACE_2D;
1012 }
1013
1014 if (dev->gen >= ILO_GEN(7)) {
1015 separate_stencil = true;
1016 }
1017 else {
1018 /*
1019 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1020 *
1021 * "This field (Separate Stencil Buffer Enable) must be set to the
1022 * same value (enabled or disabled) as Hierarchical Depth Buffer
1023 * Enable."
1024 */
1025 separate_stencil = (hiz_bo != NULL);
1026 }
1027
1028 /*
1029 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1030 *
1031 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
1032 * Surface Format of the depth buffer cannot be
1033 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
1034 * requires the separate stencil buffer."
1035 *
1036 * From the Ironlake PRM, volume 2 part 1, page 330:
1037 *
1038 * "If this field (Separate Stencil Buffer Enable) is disabled, the
1039 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
1040 *
1041 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
1042 * is indeed used, the depth values output by the fragment shaders will
1043 * be different when read back.
1044 *
1045 * As for GEN7+, separate_stencil is always true.
1046 */
1047 switch (format) {
1048 case PIPE_FORMAT_Z16_UNORM:
1049 info->format = BRW_DEPTHFORMAT_D16_UNORM;
1050 break;
1051 case PIPE_FORMAT_Z32_FLOAT:
1052 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1053 break;
1054 case PIPE_FORMAT_Z24X8_UNORM:
1055 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1056 info->format = (separate_stencil) ?
1057 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT :
1058 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT;
1059 break;
1060 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1061 info->format = (separate_stencil) ?
1062 BRW_DEPTHFORMAT_D32_FLOAT :
1063 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT;
1064 break;
1065 case PIPE_FORMAT_S8_UINT:
1066 if (separate_stencil) {
1067 info->format = BRW_DEPTHFORMAT_D32_FLOAT;
1068 break;
1069 }
1070 /* fall through */
1071 default:
1072 assert(!"unsupported depth/stencil format");
1073 zs_init_info_null(dev, info);
1074 return;
1075 break;
1076 }
1077
1078 if (format != PIPE_FORMAT_S8_UINT) {
1079 info->zs.bo = tex->bo;
1080 info->zs.stride = tex->bo_stride;
1081 info->zs.tiling = tex->tiling;
1082
1083 if (rebase_layer) {
1084 info->zs.offset = ilo_texture_get_slice_offset(tex,
1085 level, first_layer, &x_offset[0], &y_offset[0]);
1086 }
1087 }
1088
1089 if (tex->separate_s8 || format == PIPE_FORMAT_S8_UINT) {
1090 const struct ilo_texture *s8_tex =
1091 (tex->separate_s8) ? tex->separate_s8 : tex;
1092
1093 info->stencil.bo = s8_tex->bo;
1094
1095 /*
1096 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1097 *
1098 * "The pitch must be set to 2x the value computed based on width,
1099 * as the stencil buffer is stored with two rows interleaved."
1100 *
1101 * According to the classic driver, we need to do the same for GEN7+
1102 * even though the Ivy Bridge PRM does not say anything about it.
1103 */
1104 info->stencil.stride = s8_tex->bo_stride * 2;
1105
1106 info->stencil.tiling = s8_tex->tiling;
1107
1108 if (rebase_layer) {
1109 info->stencil.offset = ilo_texture_get_slice_offset(s8_tex,
1110 level, first_layer, &x_offset[1], &y_offset[1]);
1111 }
1112 }
1113
1114 if (hiz_bo) {
1115 info->hiz.bo = hiz_bo;
1116 info->hiz.stride = 0;
1117 info->hiz.tiling = 0;
1118 info->hiz.offset = 0;
1119 x_offset[2] = 0;
1120 y_offset[2] = 0;
1121 }
1122
1123 info->width = tex->base.width0;
1124 info->height = tex->base.height0;
1125 info->depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1126 tex->base.depth0 : num_layers;
1127
1128 info->lod = level;
1129 info->first_layer = first_layer;
1130 info->num_layers = num_layers;
1131
1132 if (rebase_layer) {
1133 /* the size of the layer */
1134 info->width = u_minify(info->width, level);
1135 info->height = u_minify(info->height, level);
1136 if (info->surface_type == BRW_SURFACE_3D)
1137 info->depth = u_minify(info->depth, level);
1138 else
1139 info->depth = 1;
1140
1141 /* no layered rendering */
1142 assert(num_layers == 1);
1143
1144 info->lod = 0;
1145 info->first_layer = 0;
1146 info->num_layers = 1;
1147
1148 /* all three share the same X/Y offsets */
1149 if (info->zs.bo) {
1150 if (info->stencil.bo) {
1151 assert(x_offset[0] == x_offset[1]);
1152 assert(y_offset[0] == y_offset[1]);
1153 }
1154
1155 info->x_offset = x_offset[0];
1156 info->y_offset = y_offset[0];
1157 }
1158 else {
1159 assert(info->stencil.bo);
1160
1161 info->x_offset = x_offset[1];
1162 info->y_offset = y_offset[1];
1163 }
1164
1165 if (info->hiz.bo) {
1166 assert(info->x_offset == x_offset[2]);
1167 assert(info->y_offset == y_offset[2]);
1168 }
1169
1170 /*
1171 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
1172 *
1173 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
1174 * Coordinate Offset X) must be zero to ensure correct alignment"
1175 *
1176 * XXX Skip the check for gen6, which seems to be fine. We need to make
1177 * sure that does not happen eventually.
1178 */
1179 if (dev->gen >= ILO_GEN(7)) {
1180 assert((info->x_offset & 7) == 0 && (info->y_offset & 7) == 0);
1181 info->x_offset &= ~7;
1182 info->y_offset &= ~7;
1183 }
1184
1185 info->width += info->x_offset;
1186 info->height += info->y_offset;
1187
1188 /* we have to treat them as 2D surfaces */
1189 if (info->surface_type == BRW_SURFACE_CUBE) {
1190 assert(tex->base.width0 == tex->base.height0);
1191 /* we will set slice_offset to point to the single face */
1192 info->surface_type = BRW_SURFACE_2D;
1193 }
1194 else if (info->surface_type == BRW_SURFACE_1D && info->height > 1) {
1195 assert(tex->base.height0 == 1);
1196 info->surface_type = BRW_SURFACE_2D;
1197 }
1198 }
1199 }
1200
1201 void
1202 ilo_gpe_init_zs_surface(const struct ilo_dev_info *dev,
1203 const struct ilo_texture *tex,
1204 enum pipe_format format,
1205 unsigned level,
1206 unsigned first_layer, unsigned num_layers,
1207 struct ilo_zs_surface *zs)
1208 {
1209 const int max_2d_size = (dev->gen >= ILO_GEN(7)) ? 16384 : 8192;
1210 const int max_array_size = (dev->gen >= ILO_GEN(7)) ? 2048 : 512;
1211 struct ilo_zs_surface_info info;
1212 uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
1213
1214 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1215
1216 if (tex)
1217 zs_init_info(dev, tex, format, level, first_layer, num_layers, &info);
1218 else
1219 zs_init_info_null(dev, &info);
1220
1221 switch (info.surface_type) {
1222 case BRW_SURFACE_NULL:
1223 break;
1224 case BRW_SURFACE_1D:
1225 assert(info.width <= max_2d_size && info.height == 1 &&
1226 info.depth <= max_array_size);
1227 assert(info.first_layer < max_array_size - 1 &&
1228 info.num_layers <= max_array_size);
1229 break;
1230 case BRW_SURFACE_2D:
1231 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1232 info.depth <= max_array_size);
1233 assert(info.first_layer < max_array_size - 1 &&
1234 info.num_layers <= max_array_size);
1235 break;
1236 case BRW_SURFACE_3D:
1237 assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
1238 assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
1239 assert(info.x_offset == 0 && info.y_offset == 0);
1240 break;
1241 case BRW_SURFACE_CUBE:
1242 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
1243 info.depth == 1);
1244 assert(info.first_layer == 0 && info.num_layers == 1);
1245 assert(info.width == info.height);
1246 assert(info.x_offset == 0 && info.y_offset == 0);
1247 break;
1248 default:
1249 assert(!"unexpected depth surface type");
1250 break;
1251 }
1252
1253 dw1 = info.surface_type << 29 |
1254 info.format << 18;
1255
1256 if (info.zs.bo) {
1257 /* required for GEN6+ */
1258 assert(info.zs.tiling == INTEL_TILING_Y);
1259 assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
1260 info.zs.stride % 128 == 0);
1261 assert(info.width <= info.zs.stride);
1262
1263 dw1 |= (info.zs.stride - 1);
1264 dw2 = info.zs.offset;
1265 }
1266 else {
1267 dw2 = 0;
1268 }
1269
1270 if (dev->gen >= ILO_GEN(7)) {
1271 if (info.zs.bo)
1272 dw1 |= 1 << 28;
1273
1274 if (info.stencil.bo)
1275 dw1 |= 1 << 27;
1276
1277 if (info.hiz.bo)
1278 dw1 |= 1 << 22;
1279
1280 dw3 = (info.height - 1) << 18 |
1281 (info.width - 1) << 4 |
1282 info.lod;
1283
1284 dw4 = (info.depth - 1) << 21 |
1285 info.first_layer << 10;
1286
1287 dw5 = info.y_offset << 16 | info.x_offset;
1288
1289 dw6 = (info.num_layers - 1) << 21;
1290 }
1291 else {
1292 /* always Y-tiled */
1293 dw1 |= 1 << 27 |
1294 1 << 26;
1295
1296 if (info.hiz.bo) {
1297 dw1 |= 1 << 22 |
1298 1 << 21;
1299 }
1300
1301 dw3 = (info.height - 1) << 19 |
1302 (info.width - 1) << 6 |
1303 info.lod << 2 |
1304 BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1;
1305
1306 dw4 = (info.depth - 1) << 21 |
1307 info.first_layer << 10 |
1308 (info.num_layers - 1) << 1;
1309
1310 dw5 = info.y_offset << 16 | info.x_offset;
1311
1312 dw6 = 0;
1313 }
1314
1315 STATIC_ASSERT(Elements(zs->payload) >= 10);
1316
1317 zs->payload[0] = dw1;
1318 zs->payload[1] = dw2;
1319 zs->payload[2] = dw3;
1320 zs->payload[3] = dw4;
1321 zs->payload[4] = dw5;
1322 zs->payload[5] = dw6;
1323
1324 /* do not increment reference count */
1325 zs->bo = info.zs.bo;
1326
1327 /* separate stencil */
1328 if (info.stencil.bo) {
1329 assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
1330 info.stencil.stride % 128 == 0);
1331
1332 zs->payload[6] = info.stencil.stride - 1;
1333 zs->payload[7] = info.stencil.offset;
1334
1335 if (dev->gen >= ILO_GEN(7.5))
1336 zs->payload[6] |= HSW_STENCIL_ENABLED;
1337
1338 /* do not increment reference count */
1339 zs->separate_s8_bo = info.stencil.bo;
1340 }
1341 else {
1342 zs->payload[6] = 0;
1343 zs->payload[7] = 0;
1344 zs->separate_s8_bo = NULL;
1345 }
1346
1347 /* hiz */
1348 if (info.hiz.bo) {
1349 zs->payload[8] = info.hiz.stride - 1;
1350 zs->payload[9] = info.hiz.offset;
1351
1352 /* do not increment reference count */
1353 zs->hiz_bo = info.hiz.bo;
1354 }
1355 else {
1356 zs->payload[8] = 0;
1357 zs->payload[9] = 0;
1358 zs->hiz_bo = NULL;
1359 }
1360 }
1361
1362 static void
1363 viewport_get_guardband(const struct ilo_dev_info *dev,
1364 int center_x, int center_y,
1365 int *min_gbx, int *max_gbx,
1366 int *min_gby, int *max_gby)
1367 {
1368 /*
1369 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1370 *
1371 * "Per-Device Guardband Extents
1372 *
1373 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1374 * - Maximum Post-Clamp Delta (X or Y): 16K"
1375 *
1376 * "In addition, in order to be correctly rendered, objects must have a
1377 * screenspace bounding box not exceeding 8K in the X or Y direction.
1378 * This additional restriction must also be comprehended by software,
1379 * i.e., enforced by use of clipping."
1380 *
1381 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1382 *
1383 * "Per-Device Guardband Extents
1384 *
1385 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1386 * - Maximum Post-Clamp Delta (X or Y): N/A"
1387 *
1388 * "In addition, in order to be correctly rendered, objects must have a
1389 * screenspace bounding box not exceeding 8K in the X or Y direction.
1390 * This additional restriction must also be comprehended by software,
1391 * i.e., enforced by use of clipping."
1392 *
1393 * Combined, the bounding box of any object can not exceed 8K in both
1394 * width and height.
1395 *
1396 * Below we set the guardband as a squre of length 8K, centered at where
1397 * the viewport is. This makes sure all objects passing the GB test are
1398 * valid to the renderer, and those failing the XY clipping have a
1399 * better chance of passing the GB test.
1400 */
1401 const int max_extent = (dev->gen >= ILO_GEN(7)) ? 32768 : 16384;
1402 const int half_len = 8192 / 2;
1403
1404 /* make sure the guardband is within the valid range */
1405 if (center_x - half_len < -max_extent)
1406 center_x = -max_extent + half_len;
1407 else if (center_x + half_len > max_extent - 1)
1408 center_x = max_extent - half_len;
1409
1410 if (center_y - half_len < -max_extent)
1411 center_y = -max_extent + half_len;
1412 else if (center_y + half_len > max_extent - 1)
1413 center_y = max_extent - half_len;
1414
1415 *min_gbx = (float) (center_x - half_len);
1416 *max_gbx = (float) (center_x + half_len);
1417 *min_gby = (float) (center_y - half_len);
1418 *max_gby = (float) (center_y + half_len);
1419 }
1420
1421 void
1422 ilo_gpe_set_viewport_cso(const struct ilo_dev_info *dev,
1423 const struct pipe_viewport_state *state,
1424 struct ilo_viewport_cso *vp)
1425 {
1426 const float scale_x = fabs(state->scale[0]);
1427 const float scale_y = fabs(state->scale[1]);
1428 const float scale_z = fabs(state->scale[2]);
1429 int min_gbx, max_gbx, min_gby, max_gby;
1430
1431 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1432
1433 viewport_get_guardband(dev,
1434 (int) state->translate[0],
1435 (int) state->translate[1],
1436 &min_gbx, &max_gbx, &min_gby, &max_gby);
1437
1438 /* matrix form */
1439 vp->m00 = state->scale[0];
1440 vp->m11 = state->scale[1];
1441 vp->m22 = state->scale[2];
1442 vp->m30 = state->translate[0];
1443 vp->m31 = state->translate[1];
1444 vp->m32 = state->translate[2];
1445
1446 /* guardband in NDC space */
1447 vp->min_gbx = ((float) min_gbx - state->translate[0]) / scale_x;
1448 vp->max_gbx = ((float) max_gbx - state->translate[0]) / scale_x;
1449 vp->min_gby = ((float) min_gby - state->translate[1]) / scale_y;
1450 vp->max_gby = ((float) max_gby - state->translate[1]) / scale_y;
1451
1452 /* viewport in screen space */
1453 vp->min_x = scale_x * -1.0f + state->translate[0];
1454 vp->max_x = scale_x * 1.0f + state->translate[0];
1455 vp->min_y = scale_y * -1.0f + state->translate[1];
1456 vp->max_y = scale_y * 1.0f + state->translate[1];
1457 vp->min_z = scale_z * -1.0f + state->translate[2];
1458 vp->max_z = scale_z * 1.0f + state->translate[2];
1459 }
1460
1461 static int
1462 gen6_blend_factor_dst_alpha_forced_one(int factor)
1463 {
1464 switch (factor) {
1465 case BRW_BLENDFACTOR_DST_ALPHA:
1466 return BRW_BLENDFACTOR_ONE;
1467 case BRW_BLENDFACTOR_INV_DST_ALPHA:
1468 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE:
1469 return BRW_BLENDFACTOR_ZERO;
1470 default:
1471 return factor;
1472 }
1473 }
1474
1475 static uint32_t
1476 blend_get_rt_blend_enable(const struct ilo_dev_info *dev,
1477 const struct pipe_rt_blend_state *rt,
1478 bool dst_alpha_forced_one)
1479 {
1480 int rgb_src, rgb_dst, a_src, a_dst;
1481 uint32_t dw;
1482
1483 if (!rt->blend_enable)
1484 return 0;
1485
1486 rgb_src = gen6_translate_pipe_blendfactor(rt->rgb_src_factor);
1487 rgb_dst = gen6_translate_pipe_blendfactor(rt->rgb_dst_factor);
1488 a_src = gen6_translate_pipe_blendfactor(rt->alpha_src_factor);
1489 a_dst = gen6_translate_pipe_blendfactor(rt->alpha_dst_factor);
1490
1491 if (dst_alpha_forced_one) {
1492 rgb_src = gen6_blend_factor_dst_alpha_forced_one(rgb_src);
1493 rgb_dst = gen6_blend_factor_dst_alpha_forced_one(rgb_dst);
1494 a_src = gen6_blend_factor_dst_alpha_forced_one(a_src);
1495 a_dst = gen6_blend_factor_dst_alpha_forced_one(a_dst);
1496 }
1497
1498 dw = 1 << 31 |
1499 gen6_translate_pipe_blend(rt->alpha_func) << 26 |
1500 a_src << 20 |
1501 a_dst << 15 |
1502 gen6_translate_pipe_blend(rt->rgb_func) << 11 |
1503 rgb_src << 5 |
1504 rgb_dst;
1505
1506 if (rt->rgb_func != rt->alpha_func ||
1507 rgb_src != a_src || rgb_dst != a_dst)
1508 dw |= 1 << 30;
1509
1510 return dw;
1511 }
1512
1513 void
1514 ilo_gpe_init_blend(const struct ilo_dev_info *dev,
1515 const struct pipe_blend_state *state,
1516 struct ilo_blend_state *blend)
1517 {
1518 unsigned num_cso, i;
1519
1520 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1521
1522 if (state->independent_blend_enable) {
1523 num_cso = Elements(blend->cso);
1524 }
1525 else {
1526 memset(blend->cso, 0, sizeof(blend->cso));
1527 num_cso = 1;
1528 }
1529
1530 blend->independent_blend_enable = state->independent_blend_enable;
1531 blend->alpha_to_coverage = state->alpha_to_coverage;
1532 blend->dual_blend = false;
1533
1534 for (i = 0; i < num_cso; i++) {
1535 const struct pipe_rt_blend_state *rt = &state->rt[i];
1536 struct ilo_blend_cso *cso = &blend->cso[i];
1537 bool dual_blend;
1538
1539 cso->payload[0] = 0;
1540 cso->payload[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT << 2 |
1541 0x3;
1542
1543 if (!(rt->colormask & PIPE_MASK_A))
1544 cso->payload[1] |= 1 << 27;
1545 if (!(rt->colormask & PIPE_MASK_R))
1546 cso->payload[1] |= 1 << 26;
1547 if (!(rt->colormask & PIPE_MASK_G))
1548 cso->payload[1] |= 1 << 25;
1549 if (!(rt->colormask & PIPE_MASK_B))
1550 cso->payload[1] |= 1 << 24;
1551
1552 if (state->dither)
1553 cso->payload[1] |= 1 << 12;
1554
1555 /*
1556 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1557 *
1558 * "Color Buffer Blending and Logic Ops must not be enabled
1559 * simultaneously, or behavior is UNDEFINED."
1560 *
1561 * Since state->logicop_enable takes precedence over rt->blend_enable,
1562 * no special care is needed.
1563 */
1564 if (state->logicop_enable) {
1565 cso->dw_logicop = 1 << 22 |
1566 gen6_translate_pipe_logicop(state->logicop_func) << 18;
1567
1568 cso->dw_blend = 0;
1569 cso->dw_blend_dst_alpha_forced_one = 0;
1570
1571 dual_blend = false;
1572 }
1573 else {
1574 cso->dw_logicop = 0;
1575
1576 cso->dw_blend = blend_get_rt_blend_enable(dev, rt, false);
1577 cso->dw_blend_dst_alpha_forced_one =
1578 blend_get_rt_blend_enable(dev, rt, true);
1579
1580 dual_blend = (rt->blend_enable &&
1581 util_blend_state_is_dual(state, i));
1582 }
1583
1584 cso->dw_alpha_mod = 0;
1585
1586 if (state->alpha_to_coverage) {
1587 cso->dw_alpha_mod |= 1 << 31;
1588
1589 if (dev->gen >= ILO_GEN(7))
1590 cso->dw_alpha_mod |= 1 << 29;
1591 }
1592
1593 /*
1594 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1595 *
1596 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1597 * must be disabled."
1598 */
1599 if (state->alpha_to_one && !dual_blend)
1600 cso->dw_alpha_mod |= 1 << 30;
1601
1602 if (dual_blend)
1603 blend->dual_blend = true;
1604 }
1605 }
1606
1607 void
1608 ilo_gpe_init_dsa(const struct ilo_dev_info *dev,
1609 const struct pipe_depth_stencil_alpha_state *state,
1610 struct ilo_dsa_state *dsa)
1611 {
1612 const struct pipe_depth_state *depth = &state->depth;
1613 const struct pipe_stencil_state *stencil0 = &state->stencil[0];
1614 const struct pipe_stencil_state *stencil1 = &state->stencil[1];
1615 const struct pipe_alpha_state *alpha = &state->alpha;
1616 uint32_t *dw;
1617
1618 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1619
1620 STATIC_ASSERT(Elements(dsa->payload) >= 3);
1621 dw = dsa->payload;
1622
1623 /*
1624 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1625 *
1626 * "If the Depth Buffer is either undefined or does not have a surface
1627 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1628 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1629 *
1630 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1631 *
1632 * "This field (Stencil Test Enable) cannot be enabled if
1633 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1634 *
1635 * TODO We do not check these yet.
1636 */
1637 if (stencil0->enabled) {
1638 dw[0] = 1 << 31 |
1639 gen6_translate_dsa_func(stencil0->func) << 28 |
1640 gen6_translate_pipe_stencil_op(stencil0->fail_op) << 25 |
1641 gen6_translate_pipe_stencil_op(stencil0->zfail_op) << 22 |
1642 gen6_translate_pipe_stencil_op(stencil0->zpass_op) << 19;
1643 if (stencil0->writemask)
1644 dw[0] |= 1 << 18;
1645
1646 dw[1] = stencil0->valuemask << 24 |
1647 stencil0->writemask << 16;
1648
1649 if (stencil1->enabled) {
1650 dw[0] |= 1 << 15 |
1651 gen6_translate_dsa_func(stencil1->func) << 12 |
1652 gen6_translate_pipe_stencil_op(stencil1->fail_op) << 9 |
1653 gen6_translate_pipe_stencil_op(stencil1->zfail_op) << 6 |
1654 gen6_translate_pipe_stencil_op(stencil1->zpass_op) << 3;
1655 if (stencil1->writemask)
1656 dw[0] |= 1 << 18;
1657
1658 dw[1] |= stencil1->valuemask << 8 |
1659 stencil1->writemask;
1660 }
1661 }
1662 else {
1663 dw[0] = 0;
1664 dw[1] = 0;
1665 }
1666
1667 /*
1668 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1669 *
1670 * "Enabling the Depth Test function without defining a Depth Buffer is
1671 * UNDEFINED."
1672 *
1673 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1674 *
1675 * "A Depth Buffer must be defined before enabling writes to it, or
1676 * operation is UNDEFINED."
1677 *
1678 * TODO We do not check these yet.
1679 */
1680 dw[2] = depth->enabled << 31 |
1681 depth->writemask << 26;
1682 if (depth->enabled)
1683 dw[2] |= gen6_translate_dsa_func(depth->func) << 27;
1684 else
1685 dw[2] |= BRW_COMPAREFUNCTION_ALWAYS << 27;
1686
1687 /* dw_alpha will be ORed to BLEND_STATE */
1688 if (alpha->enabled) {
1689 dsa->dw_alpha = 1 << 16 |
1690 gen6_translate_dsa_func(alpha->func) << 13;
1691 }
1692 else {
1693 dsa->dw_alpha = 0;
1694 }
1695
1696 dsa->alpha_ref = float_to_ubyte(alpha->ref_value);
1697 }
1698
1699 void
1700 ilo_gpe_set_scissor(const struct ilo_dev_info *dev,
1701 unsigned start_slot,
1702 unsigned num_states,
1703 const struct pipe_scissor_state *states,
1704 struct ilo_scissor_state *scissor)
1705 {
1706 unsigned i;
1707
1708 ILO_GPE_VALID_GEN(dev, 6, 7.5);
1709
1710 for (i = 0; i < num_states; i++) {
1711 uint16_t min_x, min_y, max_x, max_y;
1712
1713 /* both max and min are inclusive in SCISSOR_RECT */
1714 if (states[i].minx < states[i].maxx &&
1715 states[i].miny < states[i].maxy) {
1716 min_x = states[i].minx;
1717 min_y = states[i].miny;
1718 max_x = states[i].maxx - 1;
1719 max_y = states[i].maxy - 1;
1720 }
1721 else {
1722 /* we have to make min greater than max */
1723 min_x = 1;
1724 min_y = 1;
1725 max_x = 0;
1726 max_y = 0;
1727 }
1728
1729 scissor->payload[(start_slot + i) * 2 + 0] = min_y << 16 | min_x;
1730 scissor->payload[(start_slot + i) * 2 + 1] = max_y << 16 | max_x;
1731 }
1732
1733 if (!start_slot && num_states)
1734 scissor->scissor0 = states[0];
1735 }
1736
1737 void
1738 ilo_gpe_set_scissor_null(const struct ilo_dev_info *dev,
1739 struct ilo_scissor_state *scissor)
1740 {
1741 unsigned i;
1742
1743 for (i = 0; i < Elements(scissor->payload); i += 2) {
1744 scissor->payload[i + 0] = 1 << 16 | 1;
1745 scissor->payload[i + 1] = 0;
1746 }
1747 }
1748
1749 void
1750 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info *dev,
1751 unsigned width, unsigned height,
1752 unsigned depth, unsigned level,
1753 struct ilo_view_surface *surf)
1754 {
1755 uint32_t *dw;
1756
1757 ILO_GPE_VALID_GEN(dev, 6, 6);
1758
1759 /*
1760 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
1761 *
1762 * "A null surface will be used in instances where an actual surface is
1763 * not bound. When a write message is generated to a null surface, no
1764 * actual surface is written to. When a read message (including any
1765 * sampling engine message) is generated to a null surface, the result
1766 * is all zeros. Note that a null surface type is allowed to be used
1767 * with all messages, even if it is not specificially indicated as
1768 * supported. All of the remaining fields in surface state are ignored
1769 * for null surfaces, with the following exceptions:
1770 *
1771 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
1772 * depth buffer's corresponding state for all render target
1773 * surfaces, including null.
1774 * * Surface Format must be R8G8B8A8_UNORM."
1775 *
1776 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
1777 *
1778 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1779 * true"
1780 */
1781
1782 STATIC_ASSERT(Elements(surf->payload) >= 6);
1783 dw = surf->payload;
1784
1785 dw[0] = BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
1786 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT;
1787
1788 dw[1] = 0;
1789
1790 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
1791 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
1792 level << BRW_SURFACE_LOD_SHIFT;
1793
1794 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
1795 BRW_SURFACE_TILED;
1796
1797 dw[4] = 0;
1798 dw[5] = 0;
1799
1800 surf->bo = NULL;
1801 }
1802
1803 void
1804 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info *dev,
1805 const struct ilo_buffer *buf,
1806 unsigned offset, unsigned size,
1807 unsigned struct_size,
1808 enum pipe_format elem_format,
1809 bool is_rt, bool render_cache_rw,
1810 struct ilo_view_surface *surf)
1811 {
1812 const int elem_size = util_format_get_blocksize(elem_format);
1813 int width, height, depth, pitch;
1814 int surface_format, num_entries;
1815 uint32_t *dw;
1816
1817 ILO_GPE_VALID_GEN(dev, 6, 6);
1818
1819 /*
1820 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
1821 * structure in a buffer.
1822 */
1823
1824 surface_format = ilo_translate_color_format(elem_format);
1825
1826 num_entries = size / struct_size;
1827 /* see if there is enough space to fit another element */
1828 if (size % struct_size >= elem_size)
1829 num_entries++;
1830
1831 /*
1832 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
1833 *
1834 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1835 * Address) specifies the base address of first element of the
1836 * surface. The surface is interpreted as a simple array of that
1837 * single element type. The address must be naturally-aligned to the
1838 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1839 * must be 16-byte aligned).
1840 *
1841 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1842 * the base address of the first element of the surface, computed in
1843 * software by adding the surface base address to the byte offset of
1844 * the element in the buffer."
1845 */
1846 if (is_rt)
1847 assert(offset % elem_size == 0);
1848
1849 /*
1850 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
1851 *
1852 * "For buffer surfaces, the number of entries in the buffer ranges
1853 * from 1 to 2^27."
1854 */
1855 assert(num_entries >= 1 && num_entries <= 1 << 27);
1856
1857 /*
1858 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1859 *
1860 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
1861 * indicates the size of the structure."
1862 */
1863 pitch = struct_size;
1864
1865 pitch--;
1866 num_entries--;
1867 /* bits [6:0] */
1868 width = (num_entries & 0x0000007f);
1869 /* bits [19:7] */
1870 height = (num_entries & 0x000fff80) >> 7;
1871 /* bits [26:20] */
1872 depth = (num_entries & 0x07f00000) >> 20;
1873
1874 STATIC_ASSERT(Elements(surf->payload) >= 6);
1875 dw = surf->payload;
1876
1877 dw[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
1878 surface_format << BRW_SURFACE_FORMAT_SHIFT;
1879 if (render_cache_rw)
1880 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
1881
1882 dw[1] = offset;
1883
1884 dw[2] = height << BRW_SURFACE_HEIGHT_SHIFT |
1885 width << BRW_SURFACE_WIDTH_SHIFT;
1886
1887 dw[3] = depth << BRW_SURFACE_DEPTH_SHIFT |
1888 pitch << BRW_SURFACE_PITCH_SHIFT;
1889
1890 dw[4] = 0;
1891 dw[5] = 0;
1892
1893 /* do not increment reference count */
1894 surf->bo = buf->bo;
1895 }
1896
1897 void
1898 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info *dev,
1899 const struct ilo_texture *tex,
1900 enum pipe_format format,
1901 unsigned first_level,
1902 unsigned num_levels,
1903 unsigned first_layer,
1904 unsigned num_layers,
1905 bool is_rt, bool render_cache_rw,
1906 struct ilo_view_surface *surf)
1907 {
1908 int surface_type, surface_format;
1909 int width, height, depth, pitch, lod;
1910 unsigned layer_offset, x_offset, y_offset;
1911 uint32_t *dw;
1912
1913 ILO_GPE_VALID_GEN(dev, 6, 6);
1914
1915 surface_type = ilo_gpe_gen6_translate_texture(tex->base.target);
1916 assert(surface_type != BRW_SURFACE_BUFFER);
1917
1918 if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT && tex->separate_s8)
1919 format = PIPE_FORMAT_Z32_FLOAT;
1920
1921 if (is_rt)
1922 surface_format = ilo_translate_render_format(format);
1923 else
1924 surface_format = ilo_translate_texture_format(format);
1925 assert(surface_format >= 0);
1926
1927 width = tex->base.width0;
1928 height = tex->base.height0;
1929 depth = (tex->base.target == PIPE_TEXTURE_3D) ?
1930 tex->base.depth0 : num_layers;
1931 pitch = tex->bo_stride;
1932
1933 if (surface_type == BRW_SURFACE_CUBE) {
1934 /*
1935 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1936 *
1937 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
1938 * range of this field (Depth) is [0,84], indicating the number of
1939 * cube array elements (equal to the number of underlying 2D array
1940 * elements divided by 6). For other surfaces, this field must be
1941 * zero."
1942 *
1943 * When is_rt is true, we treat the texture as a 2D one to avoid the
1944 * restriction.
1945 */
1946 if (is_rt) {
1947 surface_type = BRW_SURFACE_2D;
1948 }
1949 else {
1950 assert(num_layers % 6 == 0);
1951 depth = num_layers / 6;
1952 }
1953 }
1954
1955 /* sanity check the size */
1956 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
1957 switch (surface_type) {
1958 case BRW_SURFACE_1D:
1959 assert(width <= 8192 && height == 1 && depth <= 512);
1960 assert(first_layer < 512 && num_layers <= 512);
1961 break;
1962 case BRW_SURFACE_2D:
1963 assert(width <= 8192 && height <= 8192 && depth <= 512);
1964 assert(first_layer < 512 && num_layers <= 512);
1965 break;
1966 case BRW_SURFACE_3D:
1967 assert(width <= 2048 && height <= 2048 && depth <= 2048);
1968 assert(first_layer < 2048 && num_layers <= 512);
1969 if (!is_rt)
1970 assert(first_layer == 0);
1971 break;
1972 case BRW_SURFACE_CUBE:
1973 assert(width <= 8192 && height <= 8192 && depth <= 85);
1974 assert(width == height);
1975 assert(first_layer < 512 && num_layers <= 512);
1976 if (is_rt)
1977 assert(first_layer == 0);
1978 break;
1979 default:
1980 assert(!"unexpected surface type");
1981 break;
1982 }
1983
1984 /* non-full array spacing is supported only on GEN7+ */
1985 assert(tex->array_spacing_full);
1986 /* non-interleaved samples are supported only on GEN7+ */
1987 if (tex->base.nr_samples > 1)
1988 assert(tex->interleaved);
1989
1990 if (is_rt) {
1991 /*
1992 * Compute the offset to the layer manually.
1993 *
1994 * For rendering, the hardware requires LOD to be the same for all
1995 * render targets and the depth buffer. We need to compute the offset
1996 * to the layer manually and always set LOD to 0.
1997 */
1998 if (true) {
1999 /* we lose the capability for layered rendering */
2000 assert(num_layers == 1);
2001
2002 layer_offset = ilo_texture_get_slice_offset(tex,
2003 first_level, first_layer, &x_offset, &y_offset);
2004
2005 assert(x_offset % 4 == 0);
2006 assert(y_offset % 2 == 0);
2007 x_offset /= 4;
2008 y_offset /= 2;
2009
2010 /* derive the size for the LOD */
2011 width = u_minify(width, first_level);
2012 height = u_minify(height, first_level);
2013 if (surface_type == BRW_SURFACE_3D)
2014 depth = u_minify(depth, first_level);
2015 else
2016 depth = 1;
2017
2018 first_level = 0;
2019 first_layer = 0;
2020 lod = 0;
2021 }
2022 else {
2023 layer_offset = 0;
2024 x_offset = 0;
2025 y_offset = 0;
2026 }
2027
2028 assert(num_levels == 1);
2029 lod = first_level;
2030 }
2031 else {
2032 layer_offset = 0;
2033 x_offset = 0;
2034 y_offset = 0;
2035
2036 lod = num_levels - 1;
2037 }
2038
2039 /*
2040 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
2041 *
2042 * "Linear render target surface base addresses must be element-size
2043 * aligned, for non-YUV surface formats, or a multiple of 2
2044 * element-sizes for YUV surface formats. Other linear surfaces have
2045 * no alignment requirements (byte alignment is sufficient.)"
2046 *
2047 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2048 *
2049 * "For linear render target surfaces, the pitch must be a multiple
2050 * of the element size for non-YUV surface formats. Pitch must be a
2051 * multiple of 2 * element size for YUV surface formats."
2052 *
2053 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
2054 *
2055 * "For linear surfaces, this field (X Offset) must be zero"
2056 */
2057 if (tex->tiling == INTEL_TILING_NONE) {
2058 if (is_rt) {
2059 const int elem_size = util_format_get_blocksize(format);
2060 assert(layer_offset % elem_size == 0);
2061 assert(pitch % elem_size == 0);
2062 }
2063
2064 assert(!x_offset);
2065 }
2066
2067 STATIC_ASSERT(Elements(surf->payload) >= 6);
2068 dw = surf->payload;
2069
2070 dw[0] = surface_type << BRW_SURFACE_TYPE_SHIFT |
2071 surface_format << BRW_SURFACE_FORMAT_SHIFT |
2072 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT;
2073
2074 if (surface_type == BRW_SURFACE_CUBE && !is_rt) {
2075 dw[0] |= 1 << 9 |
2076 BRW_SURFACE_CUBEFACE_ENABLES;
2077 }
2078
2079 if (render_cache_rw)
2080 dw[0] |= BRW_SURFACE_RC_READ_WRITE;
2081
2082 dw[1] = layer_offset;
2083
2084 dw[2] = (height - 1) << BRW_SURFACE_HEIGHT_SHIFT |
2085 (width - 1) << BRW_SURFACE_WIDTH_SHIFT |
2086 lod << BRW_SURFACE_LOD_SHIFT;
2087
2088 dw[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
2089 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT |
2090 ilo_gpe_gen6_translate_winsys_tiling(tex->tiling);
2091
2092 dw[4] = first_level << BRW_SURFACE_MIN_LOD_SHIFT |
2093 first_layer << 17 |
2094 (num_layers - 1) << 8 |
2095 ((tex->base.nr_samples > 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4 :
2096 BRW_SURFACE_MULTISAMPLECOUNT_1);
2097
2098 dw[5] = x_offset << BRW_SURFACE_X_OFFSET_SHIFT |
2099 y_offset << BRW_SURFACE_Y_OFFSET_SHIFT;
2100 if (tex->valign_4)
2101 dw[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE;
2102
2103 /* do not increment reference count */
2104 surf->bo = tex->bo;
2105 }
2106
2107 static void
2108 sampler_init_border_color_gen6(const struct ilo_dev_info *dev,
2109 const union pipe_color_union *color,
2110 uint32_t *dw, int num_dwords)
2111 {
2112 float rgba[4] = {
2113 color->f[0], color->f[1], color->f[2], color->f[3],
2114 };
2115
2116 ILO_GPE_VALID_GEN(dev, 6, 6);
2117
2118 assert(num_dwords >= 12);
2119
2120 /*
2121 * This state is not documented in the Sandy Bridge PRM, but in the
2122 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
2123 */
2124
2125 /* IEEE_FP */
2126 dw[1] = fui(rgba[0]);
2127 dw[2] = fui(rgba[1]);
2128 dw[3] = fui(rgba[2]);
2129 dw[4] = fui(rgba[3]);
2130
2131 /* FLOAT_16 */
2132 dw[5] = util_float_to_half(rgba[0]) |
2133 util_float_to_half(rgba[1]) << 16;
2134 dw[6] = util_float_to_half(rgba[2]) |
2135 util_float_to_half(rgba[3]) << 16;
2136
2137 /* clamp to [-1.0f, 1.0f] */
2138 rgba[0] = CLAMP(rgba[0], -1.0f, 1.0f);
2139 rgba[1] = CLAMP(rgba[1], -1.0f, 1.0f);
2140 rgba[2] = CLAMP(rgba[2], -1.0f, 1.0f);
2141 rgba[3] = CLAMP(rgba[3], -1.0f, 1.0f);
2142
2143 /* SNORM16 */
2144 dw[9] = (int16_t) util_iround(rgba[0] * 32767.0f) |
2145 (int16_t) util_iround(rgba[1] * 32767.0f) << 16;
2146 dw[10] = (int16_t) util_iround(rgba[2] * 32767.0f) |
2147 (int16_t) util_iround(rgba[3] * 32767.0f) << 16;
2148
2149 /* SNORM8 */
2150 dw[11] = (int8_t) util_iround(rgba[0] * 127.0f) |
2151 (int8_t) util_iround(rgba[1] * 127.0f) << 8 |
2152 (int8_t) util_iround(rgba[2] * 127.0f) << 16 |
2153 (int8_t) util_iround(rgba[3] * 127.0f) << 24;
2154
2155 /* clamp to [0.0f, 1.0f] */
2156 rgba[0] = CLAMP(rgba[0], 0.0f, 1.0f);
2157 rgba[1] = CLAMP(rgba[1], 0.0f, 1.0f);
2158 rgba[2] = CLAMP(rgba[2], 0.0f, 1.0f);
2159 rgba[3] = CLAMP(rgba[3], 0.0f, 1.0f);
2160
2161 /* UNORM8 */
2162 dw[0] = (uint8_t) util_iround(rgba[0] * 255.0f) |
2163 (uint8_t) util_iround(rgba[1] * 255.0f) << 8 |
2164 (uint8_t) util_iround(rgba[2] * 255.0f) << 16 |
2165 (uint8_t) util_iround(rgba[3] * 255.0f) << 24;
2166
2167 /* UNORM16 */
2168 dw[7] = (uint16_t) util_iround(rgba[0] * 65535.0f) |
2169 (uint16_t) util_iround(rgba[1] * 65535.0f) << 16;
2170 dw[8] = (uint16_t) util_iround(rgba[2] * 65535.0f) |
2171 (uint16_t) util_iround(rgba[3] * 65535.0f) << 16;
2172 }
2173
2174 void
2175 ilo_gpe_init_sampler_cso(const struct ilo_dev_info *dev,
2176 const struct pipe_sampler_state *state,
2177 struct ilo_sampler_cso *sampler)
2178 {
2179 int mip_filter, min_filter, mag_filter, max_aniso;
2180 int lod_bias, max_lod, min_lod;
2181 int wrap_s, wrap_t, wrap_r, wrap_cube;
2182 bool clamp_is_to_edge;
2183 uint32_t dw0, dw1, dw3;
2184
2185 ILO_GPE_VALID_GEN(dev, 6, 7.5);
2186
2187 memset(sampler, 0, sizeof(*sampler));
2188
2189 mip_filter = gen6_translate_tex_mipfilter(state->min_mip_filter);
2190 min_filter = gen6_translate_tex_filter(state->min_img_filter);
2191 mag_filter = gen6_translate_tex_filter(state->mag_img_filter);
2192
2193 sampler->anisotropic = state->max_anisotropy;
2194
2195 if (state->max_anisotropy >= 2 && state->max_anisotropy <= 16)
2196 max_aniso = state->max_anisotropy / 2 - 1;
2197 else if (state->max_anisotropy > 16)
2198 max_aniso = BRW_ANISORATIO_16;
2199 else
2200 max_aniso = BRW_ANISORATIO_2;
2201
2202 /*
2203 *
2204 * Here is how the hardware calculate per-pixel LOD, from my reading of the
2205 * PRMs:
2206 *
2207 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
2208 * other ways. The number of texels is measured using level
2209 * SurfMinLod.
2210 * 2) Bias is added to LOD.
2211 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
2212 * compared with Base to determine whether magnification or
2213 * minification is needed. (if preclamp is disabled, LOD is compared
2214 * with Base before clamping)
2215 * 4) If magnification is needed, or no mipmapping is requested, LOD is
2216 * set to floor(MinLod).
2217 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
2218 *
2219 * With Gallium interface, Base is always zero and
2220 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
2221 */
2222 if (dev->gen >= ILO_GEN(7)) {
2223 const float scale = 256.0f;
2224
2225 /* [-16.0, 16.0) in S4.8 */
2226 lod_bias = (int)
2227 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2228 lod_bias &= 0x1fff;
2229
2230 /* [0.0, 14.0] in U4.8 */
2231 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 14.0f) * scale);
2232 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 14.0f) * scale);
2233 }
2234 else {
2235 const float scale = 64.0f;
2236
2237 /* [-16.0, 16.0) in S4.6 */
2238 lod_bias = (int)
2239 (CLAMP(state->lod_bias, -16.0f, 15.9f) * scale);
2240 lod_bias &= 0x7ff;
2241
2242 /* [0.0, 13.0] in U4.6 */
2243 max_lod = (int) (CLAMP(state->max_lod, 0.0f, 13.0f) * scale);
2244 min_lod = (int) (CLAMP(state->min_lod, 0.0f, 13.0f) * scale);
2245 }
2246
2247 /*
2248 * We want LOD to be clamped to determine magnification/minification, and
2249 * get set to zero when it is magnification or when mipmapping is disabled.
2250 * The hardware would set LOD to floor(MinLod) and that is a problem when
2251 * MinLod is greater than or equal to 1.0f.
2252 *
2253 * With Base being zero, it is always minification when MinLod is non-zero.
2254 * To achieve our goal, we just need to set MinLod to zero and set
2255 * MagFilter to MinFilter when mipmapping is disabled.
2256 */
2257 if (state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE && min_lod) {
2258 min_lod = 0;
2259 mag_filter = min_filter;
2260 }
2261
2262 /*
2263 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
2264 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
2265 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
2266 * texture coordinates to [0.0, 1.0].
2267 *
2268 * The clamping will be taken care of in the shaders. There are two
2269 * filters here, but let the minification one has a say.
2270 */
2271 clamp_is_to_edge = (state->min_img_filter == PIPE_TEX_FILTER_NEAREST);
2272 if (!clamp_is_to_edge) {
2273 sampler->saturate_s = (state->wrap_s == PIPE_TEX_WRAP_CLAMP);
2274 sampler->saturate_t = (state->wrap_t == PIPE_TEX_WRAP_CLAMP);
2275 sampler->saturate_r = (state->wrap_r == PIPE_TEX_WRAP_CLAMP);
2276 }
2277
2278 /* determine wrap s/t/r */
2279 wrap_s = gen6_translate_tex_wrap(state->wrap_s, clamp_is_to_edge);
2280 wrap_t = gen6_translate_tex_wrap(state->wrap_t, clamp_is_to_edge);
2281 wrap_r = gen6_translate_tex_wrap(state->wrap_r, clamp_is_to_edge);
2282
2283 /*
2284 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
2285 *
2286 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
2287 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
2288 * must have the same Address Control mode."
2289 *
2290 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
2291 *
2292 * "This field (Cube Surface Control Mode) must be set to
2293 * CUBECTRLMODE_PROGRAMMED"
2294 *
2295 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
2296 * map filtering.
2297 */
2298 if (state->seamless_cube_map &&
2299 (state->min_img_filter != PIPE_TEX_FILTER_NEAREST ||
2300 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST)) {
2301 wrap_cube = BRW_TEXCOORDMODE_CUBE;
2302 }
2303 else {
2304 wrap_cube = BRW_TEXCOORDMODE_CLAMP;
2305 }
2306
2307 if (!state->normalized_coords) {
2308 /*
2309 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
2310 *
2311 * "The following state must be set as indicated if this field
2312 * (Non-normalized Coordinate Enable) is enabled:
2313 *
2314 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
2315 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
2316 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
2317 * - Mag Mode Filter must be MAPFILTER_NEAREST or
2318 * MAPFILTER_LINEAR.
2319 * - Min Mode Filter must be MAPFILTER_NEAREST or
2320 * MAPFILTER_LINEAR.
2321 * - Mip Mode Filter must be MIPFILTER_NONE.
2322 * - Min LOD must be 0.
2323 * - Max LOD must be 0.
2324 * - MIP Count must be 0.
2325 * - Surface Min LOD must be 0.
2326 * - Texture LOD Bias must be 0."
2327 */
2328 assert(wrap_s == BRW_TEXCOORDMODE_CLAMP ||
2329 wrap_s == BRW_TEXCOORDMODE_CLAMP_BORDER);
2330 assert(wrap_t == BRW_TEXCOORDMODE_CLAMP ||
2331 wrap_t == BRW_TEXCOORDMODE_CLAMP_BORDER);
2332 assert(wrap_r == BRW_TEXCOORDMODE_CLAMP ||
2333 wrap_r == BRW_TEXCOORDMODE_CLAMP_BORDER);
2334
2335 assert(mag_filter == BRW_MAPFILTER_NEAREST ||
2336 mag_filter == BRW_MAPFILTER_LINEAR);
2337 assert(min_filter == BRW_MAPFILTER_NEAREST ||
2338 min_filter == BRW_MAPFILTER_LINEAR);
2339
2340 /* work around a bug in util_blitter */
2341 mip_filter = BRW_MIPFILTER_NONE;
2342
2343 assert(mip_filter == BRW_MIPFILTER_NONE);
2344 }
2345
2346 if (dev->gen >= ILO_GEN(7)) {
2347 dw0 = 1 << 28 |
2348 mip_filter << 20 |
2349 lod_bias << 1;
2350
2351 sampler->dw_filter = mag_filter << 17 |
2352 min_filter << 14;
2353
2354 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2355 BRW_MAPFILTER_ANISOTROPIC << 14 |
2356 1;
2357
2358 dw1 = min_lod << 20 |
2359 max_lod << 8;
2360
2361 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2362 dw1 |= gen6_translate_shadow_func(state->compare_func) << 1;
2363
2364 dw3 = max_aniso << 19;
2365
2366 /* round the coordinates for linear filtering */
2367 if (min_filter != BRW_MAPFILTER_NEAREST) {
2368 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2369 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2370 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2371 }
2372 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2373 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2374 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2375 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2376 }
2377
2378 if (!state->normalized_coords)
2379 dw3 |= 1 << 10;
2380
2381 sampler->dw_wrap = wrap_s << 6 |
2382 wrap_t << 3 |
2383 wrap_r;
2384
2385 /*
2386 * As noted in the classic i965 driver, the HW may still reference
2387 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
2388 * mode
2389 */
2390 sampler->dw_wrap_1d = wrap_s << 6 |
2391 BRW_TEXCOORDMODE_WRAP << 3 |
2392 BRW_TEXCOORDMODE_WRAP;
2393
2394 sampler->dw_wrap_cube = wrap_cube << 6 |
2395 wrap_cube << 3 |
2396 wrap_cube;
2397
2398 STATIC_ASSERT(Elements(sampler->payload) >= 7);
2399
2400 sampler->payload[0] = dw0;
2401 sampler->payload[1] = dw1;
2402 sampler->payload[2] = dw3;
2403
2404 memcpy(&sampler->payload[3],
2405 state->border_color.ui, sizeof(state->border_color.ui));
2406 }
2407 else {
2408 dw0 = 1 << 28 |
2409 mip_filter << 20 |
2410 lod_bias << 3;
2411
2412 if (state->compare_mode != PIPE_TEX_COMPARE_NONE)
2413 dw0 |= gen6_translate_shadow_func(state->compare_func);
2414
2415 sampler->dw_filter = (min_filter != mag_filter) << 27 |
2416 mag_filter << 17 |
2417 min_filter << 14;
2418
2419 sampler->dw_filter_aniso = BRW_MAPFILTER_ANISOTROPIC << 17 |
2420 BRW_MAPFILTER_ANISOTROPIC << 14;
2421
2422 dw1 = min_lod << 22 |
2423 max_lod << 12;
2424
2425 sampler->dw_wrap = wrap_s << 6 |
2426 wrap_t << 3 |
2427 wrap_r;
2428
2429 sampler->dw_wrap_1d = wrap_s << 6 |
2430 BRW_TEXCOORDMODE_WRAP << 3 |
2431 BRW_TEXCOORDMODE_WRAP;
2432
2433 sampler->dw_wrap_cube = wrap_cube << 6 |
2434 wrap_cube << 3 |
2435 wrap_cube;
2436
2437 dw3 = max_aniso << 19;
2438
2439 /* round the coordinates for linear filtering */
2440 if (min_filter != BRW_MAPFILTER_NEAREST) {
2441 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
2442 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
2443 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN) << 13;
2444 }
2445 if (mag_filter != BRW_MAPFILTER_NEAREST) {
2446 dw3 |= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
2447 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
2448 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG) << 13;
2449 }
2450
2451 if (!state->normalized_coords)
2452 dw3 |= 1;
2453
2454 STATIC_ASSERT(Elements(sampler->payload) >= 15);
2455
2456 sampler->payload[0] = dw0;
2457 sampler->payload[1] = dw1;
2458 sampler->payload[2] = dw3;
2459
2460 sampler_init_border_color_gen6(dev,
2461 &state->border_color, &sampler->payload[3], 12);
2462 }
2463 }
2464
2465 int
2466 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info *dev,
2467 enum ilo_gpe_gen6_command cmd,
2468 int arg)
2469 {
2470 static const struct {
2471 int header;
2472 int body;
2473 } gen6_command_size_table[ILO_GPE_GEN6_COMMAND_COUNT] = {
2474 [ILO_GPE_GEN6_STATE_BASE_ADDRESS] = { 0, 10 },
2475 [ILO_GPE_GEN6_STATE_SIP] = { 0, 2 },
2476 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS] = { 0, 1 },
2477 [ILO_GPE_GEN6_PIPELINE_SELECT] = { 0, 1 },
2478 [ILO_GPE_GEN6_MEDIA_VFE_STATE] = { 0, 8 },
2479 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD] = { 0, 4 },
2480 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD] = { 0, 4 },
2481 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE] = { 0, 2 },
2482 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH] = { 0, 2 },
2483 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER] = { 17, 1 },
2484 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS] = { 0, 4 },
2485 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS] = { 0, 4 },
2486 [ILO_GPE_GEN6_3DSTATE_URB] = { 0, 3 },
2487 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS] = { 1, 4 },
2488 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS] = { 1, 2 },
2489 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER] = { 0, 3 },
2490 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS] = { 0, 4 },
2491 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS] = { 0, 4 },
2492 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS] = { 0, 2 },
2493 [ILO_GPE_GEN6_3DSTATE_VS] = { 0, 6 },
2494 [ILO_GPE_GEN6_3DSTATE_GS] = { 0, 7 },
2495 [ILO_GPE_GEN6_3DSTATE_CLIP] = { 0, 4 },
2496 [ILO_GPE_GEN6_3DSTATE_SF] = { 0, 20 },
2497 [ILO_GPE_GEN6_3DSTATE_WM] = { 0, 9 },
2498 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS] = { 0, 5 },
2499 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS] = { 0, 5 },
2500 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS] = { 0, 5 },
2501 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK] = { 0, 2 },
2502 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE] = { 0, 4 },
2503 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER] = { 0, 7 },
2504 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET] = { 0, 2 },
2505 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN] = { 0, 33 },
2506 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE] = { 0, 3 },
2507 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS] = { 0, 3 },
2508 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX] = { 0, 4 },
2509 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE] = { 0, 3 },
2510 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER] = { 0, 3 },
2511 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER] = { 0, 3 },
2512 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS] = { 0, 2 },
2513 [ILO_GPE_GEN6_PIPE_CONTROL] = { 0, 5 },
2514 [ILO_GPE_GEN6_3DPRIMITIVE] = { 0, 6 },
2515 };
2516 const int header = gen6_command_size_table[cmd].header;
2517 const int body = gen6_command_size_table[arg].body;
2518 const int count = arg;
2519
2520 ILO_GPE_VALID_GEN(dev, 6, 6);
2521 assert(cmd < ILO_GPE_GEN6_COMMAND_COUNT);
2522
2523 return (likely(count)) ? header + body * count : 0;
2524 }
2525
2526 int
2527 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info *dev,
2528 enum ilo_gpe_gen6_state state,
2529 int arg)
2530 {
2531 static const struct {
2532 int alignment;
2533 int body;
2534 bool is_array;
2535 } gen6_state_size_table[ILO_GPE_GEN6_STATE_COUNT] = {
2536 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA] = { 8, 8, true },
2537 [ILO_GPE_GEN6_SF_VIEWPORT] = { 8, 8, true },
2538 [ILO_GPE_GEN6_CLIP_VIEWPORT] = { 8, 4, true },
2539 [ILO_GPE_GEN6_CC_VIEWPORT] = { 8, 2, true },
2540 [ILO_GPE_GEN6_COLOR_CALC_STATE] = { 16, 6, false },
2541 [ILO_GPE_GEN6_BLEND_STATE] = { 16, 2, true },
2542 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE] = { 16, 3, false },
2543 [ILO_GPE_GEN6_SCISSOR_RECT] = { 8, 2, true },
2544 [ILO_GPE_GEN6_BINDING_TABLE_STATE] = { 8, 1, true },
2545 [ILO_GPE_GEN6_SURFACE_STATE] = { 8, 6, false },
2546 [ILO_GPE_GEN6_SAMPLER_STATE] = { 8, 4, true },
2547 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE] = { 8, 12, false },
2548 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER] = { 8, 1, true },
2549 };
2550 const int alignment = gen6_state_size_table[state].alignment;
2551 const int body = gen6_state_size_table[state].body;
2552 const bool is_array = gen6_state_size_table[state].is_array;
2553 const int count = arg;
2554 int estimate;
2555
2556 ILO_GPE_VALID_GEN(dev, 6, 6);
2557 assert(state < ILO_GPE_GEN6_STATE_COUNT);
2558
2559 if (likely(count)) {
2560 if (is_array) {
2561 estimate = (alignment - 1) + body * count;
2562 }
2563 else {
2564 estimate = (alignment - 1) + body;
2565 /* all states are aligned */
2566 if (count > 1)
2567 estimate += util_align_npot(body, alignment) * (count - 1);
2568 }
2569 }
2570 else {
2571 estimate = 0;
2572 }
2573
2574 return estimate;
2575 }