2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_dual_blend.h"
29 #include "util/u_framebuffer.h"
30 #include "util/u_half.h"
31 #include "brw_defines.h"
32 #include "intel_reg.h"
34 #include "ilo_context.h"
35 #include "ilo_format.h"
36 #include "ilo_resource.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_gpe_gen6.h"
42 * Translate a pipe logicop to the matching hardware logicop.
45 gen6_translate_pipe_logicop(unsigned logicop
)
48 case PIPE_LOGICOP_CLEAR
: return BRW_LOGICOPFUNCTION_CLEAR
;
49 case PIPE_LOGICOP_NOR
: return BRW_LOGICOPFUNCTION_NOR
;
50 case PIPE_LOGICOP_AND_INVERTED
: return BRW_LOGICOPFUNCTION_AND_INVERTED
;
51 case PIPE_LOGICOP_COPY_INVERTED
: return BRW_LOGICOPFUNCTION_COPY_INVERTED
;
52 case PIPE_LOGICOP_AND_REVERSE
: return BRW_LOGICOPFUNCTION_AND_REVERSE
;
53 case PIPE_LOGICOP_INVERT
: return BRW_LOGICOPFUNCTION_INVERT
;
54 case PIPE_LOGICOP_XOR
: return BRW_LOGICOPFUNCTION_XOR
;
55 case PIPE_LOGICOP_NAND
: return BRW_LOGICOPFUNCTION_NAND
;
56 case PIPE_LOGICOP_AND
: return BRW_LOGICOPFUNCTION_AND
;
57 case PIPE_LOGICOP_EQUIV
: return BRW_LOGICOPFUNCTION_EQUIV
;
58 case PIPE_LOGICOP_NOOP
: return BRW_LOGICOPFUNCTION_NOOP
;
59 case PIPE_LOGICOP_OR_INVERTED
: return BRW_LOGICOPFUNCTION_OR_INVERTED
;
60 case PIPE_LOGICOP_COPY
: return BRW_LOGICOPFUNCTION_COPY
;
61 case PIPE_LOGICOP_OR_REVERSE
: return BRW_LOGICOPFUNCTION_OR_REVERSE
;
62 case PIPE_LOGICOP_OR
: return BRW_LOGICOPFUNCTION_OR
;
63 case PIPE_LOGICOP_SET
: return BRW_LOGICOPFUNCTION_SET
;
65 assert(!"unknown logicop function");
66 return BRW_LOGICOPFUNCTION_CLEAR
;
71 * Translate a pipe blend function to the matching hardware blend function.
74 gen6_translate_pipe_blend(unsigned blend
)
77 case PIPE_BLEND_ADD
: return BRW_BLENDFUNCTION_ADD
;
78 case PIPE_BLEND_SUBTRACT
: return BRW_BLENDFUNCTION_SUBTRACT
;
79 case PIPE_BLEND_REVERSE_SUBTRACT
: return BRW_BLENDFUNCTION_REVERSE_SUBTRACT
;
80 case PIPE_BLEND_MIN
: return BRW_BLENDFUNCTION_MIN
;
81 case PIPE_BLEND_MAX
: return BRW_BLENDFUNCTION_MAX
;
83 assert(!"unknown blend function");
84 return BRW_BLENDFUNCTION_ADD
;
89 * Translate a pipe blend factor to the matching hardware blend factor.
92 gen6_translate_pipe_blendfactor(unsigned blendfactor
)
94 switch (blendfactor
) {
95 case PIPE_BLENDFACTOR_ONE
: return BRW_BLENDFACTOR_ONE
;
96 case PIPE_BLENDFACTOR_SRC_COLOR
: return BRW_BLENDFACTOR_SRC_COLOR
;
97 case PIPE_BLENDFACTOR_SRC_ALPHA
: return BRW_BLENDFACTOR_SRC_ALPHA
;
98 case PIPE_BLENDFACTOR_DST_ALPHA
: return BRW_BLENDFACTOR_DST_ALPHA
;
99 case PIPE_BLENDFACTOR_DST_COLOR
: return BRW_BLENDFACTOR_DST_COLOR
;
100 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
: return BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
;
101 case PIPE_BLENDFACTOR_CONST_COLOR
: return BRW_BLENDFACTOR_CONST_COLOR
;
102 case PIPE_BLENDFACTOR_CONST_ALPHA
: return BRW_BLENDFACTOR_CONST_ALPHA
;
103 case PIPE_BLENDFACTOR_SRC1_COLOR
: return BRW_BLENDFACTOR_SRC1_COLOR
;
104 case PIPE_BLENDFACTOR_SRC1_ALPHA
: return BRW_BLENDFACTOR_SRC1_ALPHA
;
105 case PIPE_BLENDFACTOR_ZERO
: return BRW_BLENDFACTOR_ZERO
;
106 case PIPE_BLENDFACTOR_INV_SRC_COLOR
: return BRW_BLENDFACTOR_INV_SRC_COLOR
;
107 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
: return BRW_BLENDFACTOR_INV_SRC_ALPHA
;
108 case PIPE_BLENDFACTOR_INV_DST_ALPHA
: return BRW_BLENDFACTOR_INV_DST_ALPHA
;
109 case PIPE_BLENDFACTOR_INV_DST_COLOR
: return BRW_BLENDFACTOR_INV_DST_COLOR
;
110 case PIPE_BLENDFACTOR_INV_CONST_COLOR
: return BRW_BLENDFACTOR_INV_CONST_COLOR
;
111 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
: return BRW_BLENDFACTOR_INV_CONST_ALPHA
;
112 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
: return BRW_BLENDFACTOR_INV_SRC1_COLOR
;
113 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
: return BRW_BLENDFACTOR_INV_SRC1_ALPHA
;
115 assert(!"unknown blend factor");
116 return BRW_BLENDFACTOR_ONE
;
121 * Translate a pipe stencil op to the matching hardware stencil op.
124 gen6_translate_pipe_stencil_op(unsigned stencil_op
)
126 switch (stencil_op
) {
127 case PIPE_STENCIL_OP_KEEP
: return BRW_STENCILOP_KEEP
;
128 case PIPE_STENCIL_OP_ZERO
: return BRW_STENCILOP_ZERO
;
129 case PIPE_STENCIL_OP_REPLACE
: return BRW_STENCILOP_REPLACE
;
130 case PIPE_STENCIL_OP_INCR
: return BRW_STENCILOP_INCRSAT
;
131 case PIPE_STENCIL_OP_DECR
: return BRW_STENCILOP_DECRSAT
;
132 case PIPE_STENCIL_OP_INCR_WRAP
: return BRW_STENCILOP_INCR
;
133 case PIPE_STENCIL_OP_DECR_WRAP
: return BRW_STENCILOP_DECR
;
134 case PIPE_STENCIL_OP_INVERT
: return BRW_STENCILOP_INVERT
;
136 assert(!"unknown stencil op");
137 return BRW_STENCILOP_KEEP
;
142 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
145 gen6_translate_tex_mipfilter(unsigned filter
)
148 case PIPE_TEX_MIPFILTER_NEAREST
: return BRW_MIPFILTER_NEAREST
;
149 case PIPE_TEX_MIPFILTER_LINEAR
: return BRW_MIPFILTER_LINEAR
;
150 case PIPE_TEX_MIPFILTER_NONE
: return BRW_MIPFILTER_NONE
;
152 assert(!"unknown mipfilter");
153 return BRW_MIPFILTER_NONE
;
158 * Translate a pipe texture filter to the matching hardware mapfilter.
161 gen6_translate_tex_filter(unsigned filter
)
164 case PIPE_TEX_FILTER_NEAREST
: return BRW_MAPFILTER_NEAREST
;
165 case PIPE_TEX_FILTER_LINEAR
: return BRW_MAPFILTER_LINEAR
;
167 assert(!"unknown sampler filter");
168 return BRW_MAPFILTER_NEAREST
;
173 * Translate a pipe texture coordinate wrapping mode to the matching hardware
177 gen6_translate_tex_wrap(unsigned wrap
, bool clamp_to_edge
)
179 /* clamp to edge or border? */
180 if (wrap
== PIPE_TEX_WRAP_CLAMP
) {
181 wrap
= (clamp_to_edge
) ?
182 PIPE_TEX_WRAP_CLAMP_TO_EDGE
: PIPE_TEX_WRAP_CLAMP_TO_BORDER
;
186 case PIPE_TEX_WRAP_REPEAT
: return BRW_TEXCOORDMODE_WRAP
;
187 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
: return BRW_TEXCOORDMODE_CLAMP
;
188 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
: return BRW_TEXCOORDMODE_CLAMP_BORDER
;
189 case PIPE_TEX_WRAP_MIRROR_REPEAT
: return BRW_TEXCOORDMODE_MIRROR
;
190 case PIPE_TEX_WRAP_CLAMP
:
191 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
192 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
193 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
195 assert(!"unknown sampler wrap mode");
196 return BRW_TEXCOORDMODE_WRAP
;
201 * Translate a pipe shadow compare function to the matching hardware shadow
205 gen6_translate_shadow_func(unsigned func
)
208 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
209 * comparison, and 1.0 is returned when the comparison is true.
211 * For BRW_PREFILTER_x, the reference value is on the right-hand side of
212 * the comparison, and 0.0 is returned when the comparison is true.
215 case PIPE_FUNC_NEVER
: return BRW_PREFILTER_ALWAYS
;
216 case PIPE_FUNC_LESS
: return BRW_PREFILTER_LEQUAL
;
217 case PIPE_FUNC_EQUAL
: return BRW_PREFILTER_NOTEQUAL
;
218 case PIPE_FUNC_LEQUAL
: return BRW_PREFILTER_LESS
;
219 case PIPE_FUNC_GREATER
: return BRW_PREFILTER_GEQUAL
;
220 case PIPE_FUNC_NOTEQUAL
: return BRW_PREFILTER_EQUAL
;
221 case PIPE_FUNC_GEQUAL
: return BRW_PREFILTER_GREATER
;
222 case PIPE_FUNC_ALWAYS
: return BRW_PREFILTER_NEVER
;
224 assert(!"unknown shadow compare function");
225 return BRW_PREFILTER_NEVER
;
230 * Translate a pipe DSA test function to the matching hardware compare
234 gen6_translate_dsa_func(unsigned func
)
237 case PIPE_FUNC_NEVER
: return BRW_COMPAREFUNCTION_NEVER
;
238 case PIPE_FUNC_LESS
: return BRW_COMPAREFUNCTION_LESS
;
239 case PIPE_FUNC_EQUAL
: return BRW_COMPAREFUNCTION_EQUAL
;
240 case PIPE_FUNC_LEQUAL
: return BRW_COMPAREFUNCTION_LEQUAL
;
241 case PIPE_FUNC_GREATER
: return BRW_COMPAREFUNCTION_GREATER
;
242 case PIPE_FUNC_NOTEQUAL
: return BRW_COMPAREFUNCTION_NOTEQUAL
;
243 case PIPE_FUNC_GEQUAL
: return BRW_COMPAREFUNCTION_GEQUAL
;
244 case PIPE_FUNC_ALWAYS
: return BRW_COMPAREFUNCTION_ALWAYS
;
246 assert(!"unknown depth/stencil/alpha test function");
247 return BRW_COMPAREFUNCTION_NEVER
;
252 ve_init_cso(const struct ilo_dev_info
*dev
,
253 const struct pipe_vertex_element
*state
,
255 struct ilo_ve_cso
*cso
)
258 BRW_VE1_COMPONENT_STORE_SRC
,
259 BRW_VE1_COMPONENT_STORE_SRC
,
260 BRW_VE1_COMPONENT_STORE_SRC
,
261 BRW_VE1_COMPONENT_STORE_SRC
,
265 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
267 switch (util_format_get_nr_components(state
->src_format
)) {
268 case 1: comp
[1] = BRW_VE1_COMPONENT_STORE_0
;
269 case 2: comp
[2] = BRW_VE1_COMPONENT_STORE_0
;
270 case 3: comp
[3] = (util_format_is_pure_integer(state
->src_format
)) ?
271 BRW_VE1_COMPONENT_STORE_1_INT
:
272 BRW_VE1_COMPONENT_STORE_1_FLT
;
275 format
= ilo_translate_vertex_format(state
->src_format
);
277 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
279 vb_index
<< GEN6_VE0_INDEX_SHIFT
|
281 format
<< BRW_VE0_FORMAT_SHIFT
|
282 state
->src_offset
<< BRW_VE0_SRC_OFFSET_SHIFT
;
285 comp
[0] << BRW_VE1_COMPONENT_0_SHIFT
|
286 comp
[1] << BRW_VE1_COMPONENT_1_SHIFT
|
287 comp
[2] << BRW_VE1_COMPONENT_2_SHIFT
|
288 comp
[3] << BRW_VE1_COMPONENT_3_SHIFT
;
292 ilo_gpe_init_ve(const struct ilo_dev_info
*dev
,
294 const struct pipe_vertex_element
*states
,
295 struct ilo_ve_state
*ve
)
299 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
301 ve
->count
= num_states
;
304 for (i
= 0; i
< num_states
; i
++) {
305 const unsigned pipe_idx
= states
[i
].vertex_buffer_index
;
306 const unsigned instance_divisor
= states
[i
].instance_divisor
;
310 * map the pipe vb to the hardware vb, which has a fixed instance
313 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
314 if (ve
->vb_mapping
[hw_idx
] == pipe_idx
&&
315 ve
->instance_divisors
[hw_idx
] == instance_divisor
)
319 /* create one if there is no matching hardware vb */
320 if (hw_idx
>= ve
->vb_count
) {
321 hw_idx
= ve
->vb_count
++;
323 ve
->vb_mapping
[hw_idx
] = pipe_idx
;
324 ve
->instance_divisors
[hw_idx
] = instance_divisor
;
327 ve_init_cso(dev
, &states
[i
], hw_idx
, &ve
->cso
[i
]);
332 ilo_gpe_init_vs_cso(const struct ilo_dev_info
*dev
,
333 const struct ilo_shader_state
*vs
,
334 struct ilo_shader_cso
*cso
)
336 int start_grf
, vue_read_len
, max_threads
;
337 uint32_t dw2
, dw4
, dw5
;
339 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
341 start_grf
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_URB_DATA_START_REG
);
342 vue_read_len
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_INPUT_COUNT
);
345 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
347 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
348 * 128-bit vertex elements to be passed into the payload for each
351 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
352 * data to be read and passed to the thread."
354 vue_read_len
= (vue_read_len
+ 1) / 2;
361 * From the Sandy Bridge PRM, volume 1 part 1, page 22:
363 * "Device # of EUs #Threads/EU
367 max_threads
= (dev
->gt
== 2) ? 60 : 24;
371 * From the Ivy Bridge PRM, volume 1 part 1, page 18:
373 * "Device # of EUs #Threads/EU
374 * Ivy Bridge (GT2) 16 8
375 * Ivy Bridge (GT1) 6 6"
377 max_threads
= (dev
->gt
== 2) ? 128 : 36;
380 /* see brwCreateContext() */
381 max_threads
= (dev
->gt
>= 2) ? 280 : 70;
388 dw2
= (true) ? 0 : GEN6_VS_FLOATING_POINT_MODE_ALT
;
390 dw4
= start_grf
<< GEN6_VS_DISPATCH_START_GRF_SHIFT
|
391 vue_read_len
<< GEN6_VS_URB_READ_LENGTH_SHIFT
|
392 0 << GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT
;
394 dw5
= GEN6_VS_STATISTICS_ENABLE
|
397 if (dev
->gen
>= ILO_GEN(7.5))
398 dw5
|= (max_threads
- 1) << HSW_VS_MAX_THREADS_SHIFT
;
400 dw5
|= (max_threads
- 1) << GEN6_VS_MAX_THREADS_SHIFT
;
402 STATIC_ASSERT(Elements(cso
->payload
) >= 3);
403 cso
->payload
[0] = dw2
;
404 cso
->payload
[1] = dw4
;
405 cso
->payload
[2] = dw5
;
409 ilo_gpe_init_gs_cso_gen6(const struct ilo_dev_info
*dev
,
410 const struct ilo_shader_state
*gs
,
411 struct ilo_shader_cso
*cso
)
413 int start_grf
, vue_read_len
, max_threads
;
414 uint32_t dw2
, dw4
, dw5
, dw6
;
416 ILO_GPE_VALID_GEN(dev
, 6, 6);
418 if (ilo_shader_get_type(gs
) == PIPE_SHADER_GEOMETRY
) {
419 start_grf
= ilo_shader_get_kernel_param(gs
,
420 ILO_KERNEL_URB_DATA_START_REG
);
422 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_INPUT_COUNT
);
425 start_grf
= ilo_shader_get_kernel_param(gs
,
426 ILO_KERNEL_VS_GEN6_SO_START_REG
);
428 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_OUTPUT_COUNT
);
432 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
434 * "Specifies the amount of URB data read and passed in the thread
435 * payload for each Vertex URB entry, in 256-bit register increments.
437 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
438 * 0 indicating no Vertex URB data to be read and passed to the
441 vue_read_len
= (vue_read_len
+ 1) / 2;
446 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
448 * "Maximum Number of Threads valid range is [0,27] when Rendering
449 * Enabled bit is set."
451 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
453 * "Programming Note: If the GS stage is enabled, software must always
454 * allocate at least one GS URB Entry. This is true even if the GS
455 * thread never needs to output vertices to the pipeline, e.g., when
456 * only performing stream output. This is an artifact of the need to
457 * pass the GS thread an initial destination URB handle."
459 * As such, we always enable rendering, and limit the number of threads.
462 /* maximum is 60, but limited to 28 */
466 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
470 dw2
= GEN6_GS_SPF_MODE
;
472 dw4
= vue_read_len
<< GEN6_GS_URB_READ_LENGTH_SHIFT
|
473 0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT
|
474 start_grf
<< GEN6_GS_DISPATCH_START_GRF_SHIFT
;
476 dw5
= (max_threads
- 1) << GEN6_GS_MAX_THREADS_SHIFT
|
477 GEN6_GS_STATISTICS_ENABLE
|
478 GEN6_GS_SO_STATISTICS_ENABLE
|
479 GEN6_GS_RENDERING_ENABLE
;
482 * we cannot make use of GEN6_GS_REORDER because it will reorder
483 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
484 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
485 * (2N+2, 2N+1, 2N+3)).
487 dw6
= GEN6_GS_ENABLE
;
489 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_DISCARD_ADJACENCY
))
490 dw6
|= GEN6_GS_DISCARD_ADJACENCY
;
492 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_VS_GEN6_SO
)) {
493 const uint32_t svbi_post_inc
=
494 ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_GEN6_SVBI_POST_INC
);
496 dw6
|= GEN6_GS_SVBI_PAYLOAD_ENABLE
;
498 dw6
|= GEN6_GS_SVBI_POSTINCREMENT_ENABLE
|
499 svbi_post_inc
<< GEN6_GS_SVBI_POSTINCREMENT_VALUE_SHIFT
;
503 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
504 cso
->payload
[0] = dw2
;
505 cso
->payload
[1] = dw4
;
506 cso
->payload
[2] = dw5
;
507 cso
->payload
[3] = dw6
;
511 ilo_gpe_init_rasterizer_clip(const struct ilo_dev_info
*dev
,
512 const struct pipe_rasterizer_state
*state
,
513 struct ilo_rasterizer_clip
*clip
)
515 uint32_t dw1
, dw2
, dw3
;
517 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
519 dw1
= GEN6_CLIP_STATISTICS_ENABLE
;
521 if (dev
->gen
>= ILO_GEN(7)) {
523 * From the Ivy Bridge PRM, volume 2 part 1, page 219:
525 * "Workaround : Due to Hardware issue "EarlyCull" needs to be
526 * enabled only for the cases where the incoming primitive topology
527 * into the clipper guaranteed to be Trilist."
529 * What does this mean?
532 GEN7_CLIP_EARLY_CULL
;
534 if (state
->front_ccw
)
535 dw1
|= GEN7_CLIP_WINDING_CCW
;
537 switch (state
->cull_face
) {
539 dw1
|= GEN7_CLIP_CULLMODE_NONE
;
541 case PIPE_FACE_FRONT
:
542 dw1
|= GEN7_CLIP_CULLMODE_FRONT
;
545 dw1
|= GEN7_CLIP_CULLMODE_BACK
;
547 case PIPE_FACE_FRONT_AND_BACK
:
548 dw1
|= GEN7_CLIP_CULLMODE_BOTH
;
553 dw2
= GEN6_CLIP_ENABLE
|
555 state
->clip_plane_enable
<< GEN6_USER_CLIP_CLIP_DISTANCES_SHIFT
|
556 GEN6_CLIP_MODE_NORMAL
;
558 if (state
->clip_halfz
)
559 dw2
|= GEN6_CLIP_API_D3D
;
561 dw2
|= GEN6_CLIP_API_OGL
;
563 if (state
->depth_clip
)
564 dw2
|= GEN6_CLIP_Z_TEST
;
566 if (state
->flatshade_first
) {
567 dw2
|= 0 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
568 0 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
569 1 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
572 dw2
|= 2 << GEN6_CLIP_TRI_PROVOKE_SHIFT
|
573 1 << GEN6_CLIP_LINE_PROVOKE_SHIFT
|
574 2 << GEN6_CLIP_TRIFAN_PROVOKE_SHIFT
;
577 dw3
= 0x1 << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT
|
578 0x7ff << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT
;
580 clip
->payload
[0] = dw1
;
581 clip
->payload
[1] = dw2
;
582 clip
->payload
[2] = dw3
;
584 clip
->can_enable_guardband
= true;
587 * There are several reasons that guard band test should be disabled
589 * - GL wide points (to avoid partially visibie object)
590 * - GL wide or AA lines (to avoid partially visibie object)
592 if (state
->point_size_per_vertex
|| state
->point_size
> 1.0f
)
593 clip
->can_enable_guardband
= false;
594 if (state
->line_smooth
|| state
->line_width
> 1.0f
)
595 clip
->can_enable_guardband
= false;
599 ilo_gpe_init_rasterizer_sf(const struct ilo_dev_info
*dev
,
600 const struct pipe_rasterizer_state
*state
,
601 struct ilo_rasterizer_sf
*sf
)
603 float offset_const
, offset_scale
, offset_clamp
;
604 int line_width
, point_width
;
605 uint32_t dw1
, dw2
, dw3
;
607 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
610 * Scale the constant term. The minimum representable value used by the HW
611 * is not large enouch to be the minimum resolvable difference.
613 offset_const
= state
->offset_units
* 2.0f
;
615 offset_scale
= state
->offset_scale
;
616 offset_clamp
= state
->offset_clamp
;
619 * From the Sandy Bridge PRM, volume 2 part 1, page 248:
621 * "This bit (Statistics Enable) should be set whenever clipping is
622 * enabled and the Statistics Enable bit is set in CLIP_STATE. It
623 * should be cleared if clipping is disabled or Statistics Enable in
624 * CLIP_STATE is clear."
626 dw1
= GEN6_SF_STATISTICS_ENABLE
|
627 GEN6_SF_VIEWPORT_TRANSFORM_ENABLE
;
629 /* XXX GEN6 path seems to work fine for GEN7 */
630 if (false && dev
->gen
>= ILO_GEN(7)) {
632 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
634 * "This bit (Legacy Global Depth Bias Enable, Global Depth Offset
635 * Enable Solid , Global Depth Offset Enable Wireframe, and Global
636 * Depth Offset Enable Point) should be set whenever non zero depth
637 * bias (Slope, Bias) values are used. Setting this bit may have
638 * some degradation of performance for some workloads."
640 if (state
->offset_tri
|| state
->offset_line
|| state
->offset_point
) {
641 /* XXX need to scale offset_const according to the depth format */
642 dw1
|= GEN6_SF_LEGACY_GLOBAL_DEPTH_BIAS
;
644 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
|
645 GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
|
646 GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
655 if (state
->offset_tri
)
656 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_SOLID
;
657 if (state
->offset_line
)
658 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_WIREFRAME
;
659 if (state
->offset_point
)
660 dw1
|= GEN6_SF_GLOBAL_DEPTH_OFFSET_POINT
;
663 switch (state
->fill_front
) {
664 case PIPE_POLYGON_MODE_FILL
:
665 dw1
|= GEN6_SF_FRONT_SOLID
;
667 case PIPE_POLYGON_MODE_LINE
:
668 dw1
|= GEN6_SF_FRONT_WIREFRAME
;
670 case PIPE_POLYGON_MODE_POINT
:
671 dw1
|= GEN6_SF_FRONT_POINT
;
675 switch (state
->fill_back
) {
676 case PIPE_POLYGON_MODE_FILL
:
677 dw1
|= GEN6_SF_BACK_SOLID
;
679 case PIPE_POLYGON_MODE_LINE
:
680 dw1
|= GEN6_SF_BACK_WIREFRAME
;
682 case PIPE_POLYGON_MODE_POINT
:
683 dw1
|= GEN6_SF_BACK_POINT
;
687 if (state
->front_ccw
)
688 dw1
|= GEN6_SF_WINDING_CCW
;
692 if (state
->line_smooth
) {
694 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
696 * "This field (Anti-aliasing Enable) must be disabled if any of the
697 * render targets have integer (UINT or SINT) surface format."
699 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
701 * "This field (Hierarchical Depth Buffer Enable) must be disabled
702 * if Anti-aliasing Enable in 3DSTATE_SF is enabled.
704 * TODO We do not check those yet.
706 dw2
|= GEN6_SF_LINE_AA_ENABLE
|
707 GEN6_SF_LINE_END_CAP_WIDTH_1_0
;
710 switch (state
->cull_face
) {
712 dw2
|= GEN6_SF_CULL_NONE
;
714 case PIPE_FACE_FRONT
:
715 dw2
|= GEN6_SF_CULL_FRONT
;
718 dw2
|= GEN6_SF_CULL_BACK
;
720 case PIPE_FACE_FRONT_AND_BACK
:
721 dw2
|= GEN6_SF_CULL_BOTH
;
726 * Smooth lines should intersect ceil(line_width) or (ceil(line_width) + 1)
727 * pixels in the minor direction. We have to make the lines slightly
728 * thicker, 0.5 pixel on both sides, so that they intersect that many
729 * pixels are considered into the lines.
731 * Line width is in U3.7.
733 line_width
= (int) ((state
->line_width
+
734 (float) state
->line_smooth
) * 128.0f
+ 0.5f
);
735 line_width
= CLAMP(line_width
, 0, 1023);
737 if (line_width
== 128 && !state
->line_smooth
) {
742 dw2
|= line_width
<< GEN6_SF_LINE_WIDTH_SHIFT
;
744 if (dev
->gen
>= ILO_GEN(7.5) && state
->line_stipple_enable
)
745 dw2
|= HSW_SF_LINE_STIPPLE_ENABLE
;
748 dw2
|= GEN6_SF_SCISSOR_ENABLE
;
750 dw3
= GEN6_SF_LINE_AA_MODE_TRUE
|
751 GEN6_SF_VERTEX_SUBPIXEL_8BITS
;
753 if (state
->line_last_pixel
)
756 if (state
->flatshade_first
) {
757 dw3
|= 0 << GEN6_SF_TRI_PROVOKE_SHIFT
|
758 0 << GEN6_SF_LINE_PROVOKE_SHIFT
|
759 1 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
762 dw3
|= 2 << GEN6_SF_TRI_PROVOKE_SHIFT
|
763 1 << GEN6_SF_LINE_PROVOKE_SHIFT
|
764 2 << GEN6_SF_TRIFAN_PROVOKE_SHIFT
;
767 if (!state
->point_size_per_vertex
)
768 dw3
|= GEN6_SF_USE_STATE_POINT_WIDTH
;
771 point_width
= (int) (state
->point_size
* 8.0f
+ 0.5f
);
772 point_width
= CLAMP(point_width
, 1, 2047);
776 STATIC_ASSERT(Elements(sf
->payload
) >= 6);
777 sf
->payload
[0] = dw1
;
778 sf
->payload
[1] = dw2
;
779 sf
->payload
[2] = dw3
;
780 sf
->payload
[3] = fui(offset_const
);
781 sf
->payload
[4] = fui(offset_scale
);
782 sf
->payload
[5] = fui(offset_clamp
);
784 if (state
->multisample
) {
785 sf
->dw_msaa
= GEN6_SF_MSRAST_ON_PATTERN
;
788 * From the Sandy Bridge PRM, volume 2 part 1, page 251:
790 * "Software must not program a value of 0.0 when running in
791 * MSRASTMODE_ON_xxx modes - zero-width lines are not available
792 * when multisampling rasterization is enabled."
795 line_width
= 128; /* 1.0f */
797 sf
->dw_msaa
|= line_width
<< GEN6_SF_LINE_WIDTH_SHIFT
;
806 ilo_gpe_init_rasterizer_wm_gen6(const struct ilo_dev_info
*dev
,
807 const struct pipe_rasterizer_state
*state
,
808 struct ilo_rasterizer_wm
*wm
)
812 ILO_GPE_VALID_GEN(dev
, 6, 6);
814 /* only the FF unit states are set, as in GEN7 */
816 dw5
= GEN6_WM_LINE_AA_WIDTH_2_0
;
818 /* same value as in 3DSTATE_SF */
819 if (state
->line_smooth
)
820 dw5
|= GEN6_WM_LINE_END_CAP_AA_WIDTH_1_0
;
822 if (state
->poly_stipple_enable
)
823 dw5
|= GEN6_WM_POLYGON_STIPPLE_ENABLE
;
824 if (state
->line_stipple_enable
)
825 dw5
|= GEN6_WM_LINE_STIPPLE_ENABLE
;
827 dw6
= GEN6_WM_POSITION_ZW_PIXEL
|
828 GEN6_WM_MSRAST_OFF_PIXEL
|
829 GEN6_WM_MSDISPMODE_PERSAMPLE
;
831 if (state
->bottom_edge_rule
)
832 dw6
|= GEN6_WM_POINT_RASTRULE_UPPER_RIGHT
;
835 * assertion that makes sure
837 * dw6 |= wm->dw_msaa_rast | wm->dw_msaa_disp;
841 STATIC_ASSERT(GEN6_WM_MSRAST_OFF_PIXEL
== 0 &&
842 GEN6_WM_MSDISPMODE_PERSAMPLE
== 0);
845 (state
->multisample
) ? GEN6_WM_MSRAST_ON_PATTERN
: 0;
846 wm
->dw_msaa_disp
= GEN6_WM_MSDISPMODE_PERPIXEL
;
848 STATIC_ASSERT(Elements(wm
->payload
) >= 2);
849 wm
->payload
[0] = dw5
;
850 wm
->payload
[1] = dw6
;
854 ilo_gpe_init_fs_cso_gen6(const struct ilo_dev_info
*dev
,
855 const struct ilo_shader_state
*fs
,
856 struct ilo_shader_cso
*cso
)
858 int start_grf
, input_count
, interps
, max_threads
;
859 uint32_t dw2
, dw4
, dw5
, dw6
;
861 ILO_GPE_VALID_GEN(dev
, 6, 6);
863 start_grf
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_URB_DATA_START_REG
);
864 input_count
= ilo_shader_get_kernel_param(fs
, ILO_KERNEL_INPUT_COUNT
);
865 interps
= ilo_shader_get_kernel_param(fs
,
866 ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS
);
868 /* see brwCreateContext() */
869 max_threads
= (dev
->gt
== 2) ? 80 : 40;
871 dw2
= (true) ? 0 : GEN6_WM_FLOATING_POINT_MODE_ALT
;
873 dw4
= start_grf
<< GEN6_WM_DISPATCH_START_GRF_SHIFT_0
|
874 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_1
|
875 0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2
;
877 dw5
= (max_threads
- 1) << GEN6_WM_MAX_THREADS_SHIFT
;
880 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
882 * "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
883 * PS kernel or color calculator has the ability to kill (discard)
884 * pixels or samples, other than due to depth or stencil testing.
885 * This bit is required to be ENABLED in the following situations:
887 * The API pixel shader program contains "killpix" or "discard"
888 * instructions, or other code in the pixel shader kernel that can
889 * cause the final pixel mask to differ from the pixel mask received
892 * A sampler with chroma key enabled with kill pixel mode is used by
895 * Any render target has Alpha Test Enable or AlphaToCoverage Enable
898 * The pixel shader kernel generates and outputs oMask.
900 * Note: As ClipDistance clipping is fully supported in hardware and
901 * therefore not via PS instructions, there should be no need to
902 * ENABLE this bit due to ClipDistance clipping."
904 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_USE_KILL
))
905 dw5
|= GEN6_WM_KILL_ENABLE
;
908 * From the Sandy Bridge PRM, volume 2 part 1, page 275:
910 * "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
911 * field must be set to disabled."
913 * TODO This is not checked yet.
915 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_OUTPUT_Z
))
916 dw5
|= GEN6_WM_COMPUTED_DEPTH
;
918 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_Z
))
919 dw5
|= GEN6_WM_USES_SOURCE_DEPTH
;
921 if (ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_INPUT_W
))
922 dw5
|= GEN6_WM_USES_SOURCE_W
;
925 * TODO set this bit only when
927 * a) fs writes colors and color is not masked, or
928 * b) fs writes depth, or
932 dw5
|= GEN6_WM_DISPATCH_ENABLE
;
934 assert(!ilo_shader_get_kernel_param(fs
, ILO_KERNEL_FS_DISPATCH_16_OFFSET
));
935 dw5
|= GEN6_WM_8_DISPATCH_ENABLE
;
937 dw6
= input_count
<< GEN6_WM_NUM_SF_OUTPUTS_SHIFT
|
938 GEN6_WM_POSOFFSET_NONE
|
939 interps
<< GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT
;
941 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
942 cso
->payload
[0] = dw2
;
943 cso
->payload
[1] = dw4
;
944 cso
->payload
[2] = dw5
;
945 cso
->payload
[3] = dw6
;
948 struct ilo_zs_surface_info
{
955 enum intel_tiling_mode tiling
;
959 unsigned width
, height
, depth
;
960 unsigned lod
, first_layer
, num_layers
;
961 uint32_t x_offset
, y_offset
;
965 zs_init_info_null(const struct ilo_dev_info
*dev
,
966 struct ilo_zs_surface_info
*info
)
968 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
970 memset(info
, 0, sizeof(*info
));
972 info
->surface_type
= BRW_SURFACE_NULL
;
973 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
977 info
->num_layers
= 1;
981 zs_init_info(const struct ilo_dev_info
*dev
,
982 const struct ilo_texture
*tex
,
983 enum pipe_format format
, unsigned level
,
984 unsigned first_layer
, unsigned num_layers
,
985 bool offset_to_layer
, struct ilo_zs_surface_info
*info
)
987 uint32_t x_offset
[3], y_offset
[3];
988 bool separate_stencil
;
990 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
992 memset(info
, 0, sizeof(*info
));
994 info
->surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
996 if (info
->surface_type
== BRW_SURFACE_CUBE
) {
998 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
1000 * "For Other Surfaces (Cube Surfaces):
1001 * This field (Minimum Array Element) is ignored."
1003 * "For Other Surfaces (Cube Surfaces):
1004 * This field (Render Target View Extent) is ignored."
1006 * As such, we cannot set first_layer and num_layers on cube surfaces.
1007 * To work around that, treat it as a 2D surface.
1009 info
->surface_type
= BRW_SURFACE_2D
;
1012 if (dev
->gen
>= ILO_GEN(7)) {
1013 separate_stencil
= true;
1017 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1019 * "This field (Separate Stencil Buffer Enable) must be set to the
1020 * same value (enabled or disabled) as Hierarchical Depth Buffer
1024 ilo_texture_can_enable_hiz(tex
, level
, first_layer
, num_layers
);
1028 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
1030 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
1031 * Surface Format of the depth buffer cannot be
1032 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
1033 * requires the separate stencil buffer."
1035 * From the Ironlake PRM, volume 2 part 1, page 330:
1037 * "If this field (Separate Stencil Buffer Enable) is disabled, the
1038 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
1040 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
1041 * is indeed used, the depth values output by the fragment shaders will
1042 * be different when read back.
1044 * As for GEN7+, separate_stencil is always true.
1047 case PIPE_FORMAT_Z16_UNORM
:
1048 info
->format
= BRW_DEPTHFORMAT_D16_UNORM
;
1050 case PIPE_FORMAT_Z32_FLOAT
:
1051 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
1053 case PIPE_FORMAT_Z24X8_UNORM
:
1054 case PIPE_FORMAT_Z24_UNORM_S8_UINT
:
1055 info
->format
= (separate_stencil
) ?
1056 BRW_DEPTHFORMAT_D24_UNORM_X8_UINT
:
1057 BRW_DEPTHFORMAT_D24_UNORM_S8_UINT
;
1059 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
:
1060 info
->format
= (separate_stencil
) ?
1061 BRW_DEPTHFORMAT_D32_FLOAT
:
1062 BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT
;
1064 case PIPE_FORMAT_S8_UINT
:
1065 if (separate_stencil
) {
1066 info
->format
= BRW_DEPTHFORMAT_D32_FLOAT
;
1071 assert(!"unsupported depth/stencil format");
1072 zs_init_info_null(dev
, info
);
1077 if (format
!= PIPE_FORMAT_S8_UINT
) {
1078 info
->zs
.bo
= tex
->bo
;
1079 info
->zs
.stride
= tex
->bo_stride
;
1080 info
->zs
.tiling
= tex
->tiling
;
1082 if (offset_to_layer
) {
1083 info
->zs
.offset
= ilo_texture_get_slice_offset(tex
,
1084 level
, first_layer
, &x_offset
[0], &y_offset
[0]);
1088 if (tex
->separate_s8
|| format
== PIPE_FORMAT_S8_UINT
) {
1089 const struct ilo_texture
*s8_tex
=
1090 (tex
->separate_s8
) ? tex
->separate_s8
: tex
;
1092 info
->stencil
.bo
= s8_tex
->bo
;
1095 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
1097 * "The pitch must be set to 2x the value computed based on width,
1098 * as the stencil buffer is stored with two rows interleaved."
1100 * According to the classic driver, we need to do the same for GEN7+
1101 * even though the Ivy Bridge PRM does not say anything about it.
1103 info
->stencil
.stride
= s8_tex
->bo_stride
* 2;
1105 info
->stencil
.tiling
= s8_tex
->tiling
;
1107 if (offset_to_layer
) {
1108 info
->stencil
.offset
= ilo_texture_get_slice_offset(s8_tex
,
1109 level
, first_layer
, &x_offset
[1], &y_offset
[1]);
1113 if (ilo_texture_can_enable_hiz(tex
, level
, first_layer
, num_layers
)) {
1114 info
->hiz
.bo
= tex
->hiz
.bo
;
1115 info
->hiz
.stride
= tex
->hiz
.bo_stride
;
1116 info
->hiz
.tiling
= INTEL_TILING_Y
;
1119 * Layer offsetting is used on GEN6 only. And on GEN6, HiZ is enabled
1120 * only when the depth buffer is non-mipmapped and non-array, making
1121 * layer offsetting no-op.
1123 if (offset_to_layer
) {
1124 assert(level
== 0 && first_layer
== 0 && num_layers
== 1);
1126 info
->hiz
.offset
= 0;
1132 info
->width
= tex
->base
.width0
;
1133 info
->height
= tex
->base
.height0
;
1134 info
->depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
1135 tex
->base
.depth0
: num_layers
;
1138 info
->first_layer
= first_layer
;
1139 info
->num_layers
= num_layers
;
1141 if (offset_to_layer
) {
1142 /* the size of the layer */
1143 info
->width
= u_minify(info
->width
, level
);
1144 info
->height
= u_minify(info
->height
, level
);
1145 if (info
->surface_type
== BRW_SURFACE_3D
)
1146 info
->depth
= u_minify(info
->depth
, level
);
1150 /* no layered rendering */
1151 assert(num_layers
== 1);
1154 info
->first_layer
= 0;
1155 info
->num_layers
= 1;
1157 /* all three share the same X/Y offsets */
1159 if (info
->stencil
.bo
) {
1160 assert(x_offset
[0] == x_offset
[1]);
1161 assert(y_offset
[0] == y_offset
[1]);
1164 info
->x_offset
= x_offset
[0];
1165 info
->y_offset
= y_offset
[0];
1168 assert(info
->stencil
.bo
);
1170 info
->x_offset
= x_offset
[1];
1171 info
->y_offset
= y_offset
[1];
1175 assert(info
->x_offset
== x_offset
[2]);
1176 assert(info
->y_offset
== y_offset
[2]);
1180 * From the Sandy Bridge PRM, volume 2 part 1, page 326:
1182 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and Depth
1183 * Coordinate Offset X) must be zero to ensure correct alignment"
1185 * XXX Skip the check for gen6, which seems to be fine. We need to make
1186 * sure that does not happen eventually.
1188 if (dev
->gen
>= ILO_GEN(7)) {
1189 assert((info
->x_offset
& 7) == 0 && (info
->y_offset
& 7) == 0);
1190 info
->x_offset
&= ~7;
1191 info
->y_offset
&= ~7;
1194 info
->width
+= info
->x_offset
;
1195 info
->height
+= info
->y_offset
;
1197 /* we have to treat them as 2D surfaces */
1198 if (info
->surface_type
== BRW_SURFACE_CUBE
) {
1199 assert(tex
->base
.width0
== tex
->base
.height0
);
1200 /* we will set slice_offset to point to the single face */
1201 info
->surface_type
= BRW_SURFACE_2D
;
1203 else if (info
->surface_type
== BRW_SURFACE_1D
&& info
->height
> 1) {
1204 assert(tex
->base
.height0
== 1);
1205 info
->surface_type
= BRW_SURFACE_2D
;
1211 ilo_gpe_init_zs_surface(const struct ilo_dev_info
*dev
,
1212 const struct ilo_texture
*tex
,
1213 enum pipe_format format
, unsigned level
,
1214 unsigned first_layer
, unsigned num_layers
,
1215 bool offset_to_layer
, struct ilo_zs_surface
*zs
)
1217 const int max_2d_size
= (dev
->gen
>= ILO_GEN(7)) ? 16384 : 8192;
1218 const int max_array_size
= (dev
->gen
>= ILO_GEN(7)) ? 2048 : 512;
1219 struct ilo_zs_surface_info info
;
1220 uint32_t dw1
, dw2
, dw3
, dw4
, dw5
, dw6
;
1222 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1225 zs_init_info(dev
, tex
, format
, level
, first_layer
, num_layers
,
1226 offset_to_layer
, &info
);
1229 zs_init_info_null(dev
, &info
);
1232 switch (info
.surface_type
) {
1233 case BRW_SURFACE_NULL
:
1235 case BRW_SURFACE_1D
:
1236 assert(info
.width
<= max_2d_size
&& info
.height
== 1 &&
1237 info
.depth
<= max_array_size
);
1238 assert(info
.first_layer
< max_array_size
- 1 &&
1239 info
.num_layers
<= max_array_size
);
1241 case BRW_SURFACE_2D
:
1242 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1243 info
.depth
<= max_array_size
);
1244 assert(info
.first_layer
< max_array_size
- 1 &&
1245 info
.num_layers
<= max_array_size
);
1247 case BRW_SURFACE_3D
:
1248 assert(info
.width
<= 2048 && info
.height
<= 2048 && info
.depth
<= 2048);
1249 assert(info
.first_layer
< 2048 && info
.num_layers
<= max_array_size
);
1250 assert(info
.x_offset
== 0 && info
.y_offset
== 0);
1252 case BRW_SURFACE_CUBE
:
1253 assert(info
.width
<= max_2d_size
&& info
.height
<= max_2d_size
&&
1255 assert(info
.first_layer
== 0 && info
.num_layers
== 1);
1256 assert(info
.width
== info
.height
);
1257 assert(info
.x_offset
== 0 && info
.y_offset
== 0);
1260 assert(!"unexpected depth surface type");
1264 dw1
= info
.surface_type
<< 29 |
1268 /* required for GEN6+ */
1269 assert(info
.zs
.tiling
== INTEL_TILING_Y
);
1270 assert(info
.zs
.stride
> 0 && info
.zs
.stride
< 128 * 1024 &&
1271 info
.zs
.stride
% 128 == 0);
1272 assert(info
.width
<= info
.zs
.stride
);
1274 dw1
|= (info
.zs
.stride
- 1);
1275 dw2
= info
.zs
.offset
;
1281 if (dev
->gen
>= ILO_GEN(7)) {
1285 if (info
.stencil
.bo
)
1291 dw3
= (info
.height
- 1) << 18 |
1292 (info
.width
- 1) << 4 |
1295 dw4
= (info
.depth
- 1) << 21 |
1296 info
.first_layer
<< 10;
1298 dw5
= info
.y_offset
<< 16 | info
.x_offset
;
1300 dw6
= (info
.num_layers
- 1) << 21;
1303 /* always Y-tiled */
1312 dw3
= (info
.height
- 1) << 19 |
1313 (info
.width
- 1) << 6 |
1315 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< 1;
1317 dw4
= (info
.depth
- 1) << 21 |
1318 info
.first_layer
<< 10 |
1319 (info
.num_layers
- 1) << 1;
1321 dw5
= info
.y_offset
<< 16 | info
.x_offset
;
1326 STATIC_ASSERT(Elements(zs
->payload
) >= 10);
1328 zs
->payload
[0] = dw1
;
1329 zs
->payload
[1] = dw2
;
1330 zs
->payload
[2] = dw3
;
1331 zs
->payload
[3] = dw4
;
1332 zs
->payload
[4] = dw5
;
1333 zs
->payload
[5] = dw6
;
1335 /* do not increment reference count */
1336 zs
->bo
= info
.zs
.bo
;
1338 /* separate stencil */
1339 if (info
.stencil
.bo
) {
1340 assert(info
.stencil
.stride
> 0 && info
.stencil
.stride
< 128 * 1024 &&
1341 info
.stencil
.stride
% 128 == 0);
1343 zs
->payload
[6] = info
.stencil
.stride
- 1;
1344 zs
->payload
[7] = info
.stencil
.offset
;
1346 if (dev
->gen
>= ILO_GEN(7.5))
1347 zs
->payload
[6] |= HSW_STENCIL_ENABLED
;
1349 /* do not increment reference count */
1350 zs
->separate_s8_bo
= info
.stencil
.bo
;
1355 zs
->separate_s8_bo
= NULL
;
1360 zs
->payload
[8] = info
.hiz
.stride
- 1;
1361 zs
->payload
[9] = info
.hiz
.offset
;
1363 /* do not increment reference count */
1364 zs
->hiz_bo
= info
.hiz
.bo
;
1374 viewport_get_guardband(const struct ilo_dev_info
*dev
,
1375 int center_x
, int center_y
,
1376 int *min_gbx
, int *max_gbx
,
1377 int *min_gby
, int *max_gby
)
1380 * From the Sandy Bridge PRM, volume 2 part 1, page 234:
1382 * "Per-Device Guardband Extents
1384 * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
1385 * - Maximum Post-Clamp Delta (X or Y): 16K"
1387 * "In addition, in order to be correctly rendered, objects must have a
1388 * screenspace bounding box not exceeding 8K in the X or Y direction.
1389 * This additional restriction must also be comprehended by software,
1390 * i.e., enforced by use of clipping."
1392 * From the Ivy Bridge PRM, volume 2 part 1, page 248:
1394 * "Per-Device Guardband Extents
1396 * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
1397 * - Maximum Post-Clamp Delta (X or Y): N/A"
1399 * "In addition, in order to be correctly rendered, objects must have a
1400 * screenspace bounding box not exceeding 8K in the X or Y direction.
1401 * This additional restriction must also be comprehended by software,
1402 * i.e., enforced by use of clipping."
1404 * Combined, the bounding box of any object can not exceed 8K in both
1407 * Below we set the guardband as a squre of length 8K, centered at where
1408 * the viewport is. This makes sure all objects passing the GB test are
1409 * valid to the renderer, and those failing the XY clipping have a
1410 * better chance of passing the GB test.
1412 const int max_extent
= (dev
->gen
>= ILO_GEN(7)) ? 32768 : 16384;
1413 const int half_len
= 8192 / 2;
1415 /* make sure the guardband is within the valid range */
1416 if (center_x
- half_len
< -max_extent
)
1417 center_x
= -max_extent
+ half_len
;
1418 else if (center_x
+ half_len
> max_extent
- 1)
1419 center_x
= max_extent
- half_len
;
1421 if (center_y
- half_len
< -max_extent
)
1422 center_y
= -max_extent
+ half_len
;
1423 else if (center_y
+ half_len
> max_extent
- 1)
1424 center_y
= max_extent
- half_len
;
1426 *min_gbx
= (float) (center_x
- half_len
);
1427 *max_gbx
= (float) (center_x
+ half_len
);
1428 *min_gby
= (float) (center_y
- half_len
);
1429 *max_gby
= (float) (center_y
+ half_len
);
1433 ilo_gpe_set_viewport_cso(const struct ilo_dev_info
*dev
,
1434 const struct pipe_viewport_state
*state
,
1435 struct ilo_viewport_cso
*vp
)
1437 const float scale_x
= fabs(state
->scale
[0]);
1438 const float scale_y
= fabs(state
->scale
[1]);
1439 const float scale_z
= fabs(state
->scale
[2]);
1440 int min_gbx
, max_gbx
, min_gby
, max_gby
;
1442 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1444 viewport_get_guardband(dev
,
1445 (int) state
->translate
[0],
1446 (int) state
->translate
[1],
1447 &min_gbx
, &max_gbx
, &min_gby
, &max_gby
);
1450 vp
->m00
= state
->scale
[0];
1451 vp
->m11
= state
->scale
[1];
1452 vp
->m22
= state
->scale
[2];
1453 vp
->m30
= state
->translate
[0];
1454 vp
->m31
= state
->translate
[1];
1455 vp
->m32
= state
->translate
[2];
1457 /* guardband in NDC space */
1458 vp
->min_gbx
= ((float) min_gbx
- state
->translate
[0]) / scale_x
;
1459 vp
->max_gbx
= ((float) max_gbx
- state
->translate
[0]) / scale_x
;
1460 vp
->min_gby
= ((float) min_gby
- state
->translate
[1]) / scale_y
;
1461 vp
->max_gby
= ((float) max_gby
- state
->translate
[1]) / scale_y
;
1463 /* viewport in screen space */
1464 vp
->min_x
= scale_x
* -1.0f
+ state
->translate
[0];
1465 vp
->max_x
= scale_x
* 1.0f
+ state
->translate
[0];
1466 vp
->min_y
= scale_y
* -1.0f
+ state
->translate
[1];
1467 vp
->max_y
= scale_y
* 1.0f
+ state
->translate
[1];
1468 vp
->min_z
= scale_z
* -1.0f
+ state
->translate
[2];
1469 vp
->max_z
= scale_z
* 1.0f
+ state
->translate
[2];
1473 gen6_blend_factor_dst_alpha_forced_one(int factor
)
1476 case BRW_BLENDFACTOR_DST_ALPHA
:
1477 return BRW_BLENDFACTOR_ONE
;
1478 case BRW_BLENDFACTOR_INV_DST_ALPHA
:
1479 case BRW_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1480 return BRW_BLENDFACTOR_ZERO
;
1487 blend_get_rt_blend_enable(const struct ilo_dev_info
*dev
,
1488 const struct pipe_rt_blend_state
*rt
,
1489 bool dst_alpha_forced_one
)
1491 int rgb_src
, rgb_dst
, a_src
, a_dst
;
1494 if (!rt
->blend_enable
)
1497 rgb_src
= gen6_translate_pipe_blendfactor(rt
->rgb_src_factor
);
1498 rgb_dst
= gen6_translate_pipe_blendfactor(rt
->rgb_dst_factor
);
1499 a_src
= gen6_translate_pipe_blendfactor(rt
->alpha_src_factor
);
1500 a_dst
= gen6_translate_pipe_blendfactor(rt
->alpha_dst_factor
);
1502 if (dst_alpha_forced_one
) {
1503 rgb_src
= gen6_blend_factor_dst_alpha_forced_one(rgb_src
);
1504 rgb_dst
= gen6_blend_factor_dst_alpha_forced_one(rgb_dst
);
1505 a_src
= gen6_blend_factor_dst_alpha_forced_one(a_src
);
1506 a_dst
= gen6_blend_factor_dst_alpha_forced_one(a_dst
);
1510 gen6_translate_pipe_blend(rt
->alpha_func
) << 26 |
1513 gen6_translate_pipe_blend(rt
->rgb_func
) << 11 |
1517 if (rt
->rgb_func
!= rt
->alpha_func
||
1518 rgb_src
!= a_src
|| rgb_dst
!= a_dst
)
1525 ilo_gpe_init_blend(const struct ilo_dev_info
*dev
,
1526 const struct pipe_blend_state
*state
,
1527 struct ilo_blend_state
*blend
)
1529 unsigned num_cso
, i
;
1531 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1533 if (state
->independent_blend_enable
) {
1534 num_cso
= Elements(blend
->cso
);
1537 memset(blend
->cso
, 0, sizeof(blend
->cso
));
1541 blend
->independent_blend_enable
= state
->independent_blend_enable
;
1542 blend
->alpha_to_coverage
= state
->alpha_to_coverage
;
1543 blend
->dual_blend
= false;
1545 for (i
= 0; i
< num_cso
; i
++) {
1546 const struct pipe_rt_blend_state
*rt
= &state
->rt
[i
];
1547 struct ilo_blend_cso
*cso
= &blend
->cso
[i
];
1550 cso
->payload
[0] = 0;
1551 cso
->payload
[1] = BRW_RENDERTARGET_CLAMPRANGE_FORMAT
<< 2 |
1554 if (!(rt
->colormask
& PIPE_MASK_A
))
1555 cso
->payload
[1] |= 1 << 27;
1556 if (!(rt
->colormask
& PIPE_MASK_R
))
1557 cso
->payload
[1] |= 1 << 26;
1558 if (!(rt
->colormask
& PIPE_MASK_G
))
1559 cso
->payload
[1] |= 1 << 25;
1560 if (!(rt
->colormask
& PIPE_MASK_B
))
1561 cso
->payload
[1] |= 1 << 24;
1564 cso
->payload
[1] |= 1 << 12;
1567 * From the Sandy Bridge PRM, volume 2 part 1, page 365:
1569 * "Color Buffer Blending and Logic Ops must not be enabled
1570 * simultaneously, or behavior is UNDEFINED."
1572 * Since state->logicop_enable takes precedence over rt->blend_enable,
1573 * no special care is needed.
1575 if (state
->logicop_enable
) {
1576 cso
->dw_logicop
= 1 << 22 |
1577 gen6_translate_pipe_logicop(state
->logicop_func
) << 18;
1580 cso
->dw_blend_dst_alpha_forced_one
= 0;
1585 cso
->dw_logicop
= 0;
1587 cso
->dw_blend
= blend_get_rt_blend_enable(dev
, rt
, false);
1588 cso
->dw_blend_dst_alpha_forced_one
=
1589 blend_get_rt_blend_enable(dev
, rt
, true);
1591 dual_blend
= (rt
->blend_enable
&&
1592 util_blend_state_is_dual(state
, i
));
1595 cso
->dw_alpha_mod
= 0;
1597 if (state
->alpha_to_coverage
) {
1598 cso
->dw_alpha_mod
|= 1 << 31;
1600 if (dev
->gen
>= ILO_GEN(7))
1601 cso
->dw_alpha_mod
|= 1 << 29;
1605 * From the Sandy Bridge PRM, volume 2 part 1, page 378:
1607 * "If Dual Source Blending is enabled, this bit (AlphaToOne Enable)
1608 * must be disabled."
1610 if (state
->alpha_to_one
&& !dual_blend
)
1611 cso
->dw_alpha_mod
|= 1 << 30;
1614 blend
->dual_blend
= true;
1619 ilo_gpe_init_dsa(const struct ilo_dev_info
*dev
,
1620 const struct pipe_depth_stencil_alpha_state
*state
,
1621 struct ilo_dsa_state
*dsa
)
1623 const struct pipe_depth_state
*depth
= &state
->depth
;
1624 const struct pipe_stencil_state
*stencil0
= &state
->stencil
[0];
1625 const struct pipe_stencil_state
*stencil1
= &state
->stencil
[1];
1626 const struct pipe_alpha_state
*alpha
= &state
->alpha
;
1629 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1631 STATIC_ASSERT(Elements(dsa
->payload
) >= 3);
1635 * From the Sandy Bridge PRM, volume 2 part 1, page 359:
1637 * "If the Depth Buffer is either undefined or does not have a surface
1638 * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
1639 * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
1641 * From the Sandy Bridge PRM, volume 2 part 1, page 370:
1643 * "This field (Stencil Test Enable) cannot be enabled if
1644 * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
1646 * TODO We do not check these yet.
1648 if (stencil0
->enabled
) {
1650 gen6_translate_dsa_func(stencil0
->func
) << 28 |
1651 gen6_translate_pipe_stencil_op(stencil0
->fail_op
) << 25 |
1652 gen6_translate_pipe_stencil_op(stencil0
->zfail_op
) << 22 |
1653 gen6_translate_pipe_stencil_op(stencil0
->zpass_op
) << 19;
1654 if (stencil0
->writemask
)
1657 dw
[1] = stencil0
->valuemask
<< 24 |
1658 stencil0
->writemask
<< 16;
1660 if (stencil1
->enabled
) {
1662 gen6_translate_dsa_func(stencil1
->func
) << 12 |
1663 gen6_translate_pipe_stencil_op(stencil1
->fail_op
) << 9 |
1664 gen6_translate_pipe_stencil_op(stencil1
->zfail_op
) << 6 |
1665 gen6_translate_pipe_stencil_op(stencil1
->zpass_op
) << 3;
1666 if (stencil1
->writemask
)
1669 dw
[1] |= stencil1
->valuemask
<< 8 |
1670 stencil1
->writemask
;
1679 * From the Sandy Bridge PRM, volume 2 part 1, page 360:
1681 * "Enabling the Depth Test function without defining a Depth Buffer is
1684 * From the Sandy Bridge PRM, volume 2 part 1, page 375:
1686 * "A Depth Buffer must be defined before enabling writes to it, or
1687 * operation is UNDEFINED."
1689 * TODO We do not check these yet.
1691 dw
[2] = depth
->enabled
<< 31 |
1692 depth
->writemask
<< 26;
1694 dw
[2] |= gen6_translate_dsa_func(depth
->func
) << 27;
1696 dw
[2] |= BRW_COMPAREFUNCTION_ALWAYS
<< 27;
1698 /* dw_alpha will be ORed to BLEND_STATE */
1699 if (alpha
->enabled
) {
1700 dsa
->dw_alpha
= 1 << 16 |
1701 gen6_translate_dsa_func(alpha
->func
) << 13;
1707 dsa
->alpha_ref
= float_to_ubyte(alpha
->ref_value
);
1711 ilo_gpe_set_scissor(const struct ilo_dev_info
*dev
,
1712 unsigned start_slot
,
1713 unsigned num_states
,
1714 const struct pipe_scissor_state
*states
,
1715 struct ilo_scissor_state
*scissor
)
1719 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
1721 for (i
= 0; i
< num_states
; i
++) {
1722 uint16_t min_x
, min_y
, max_x
, max_y
;
1724 /* both max and min are inclusive in SCISSOR_RECT */
1725 if (states
[i
].minx
< states
[i
].maxx
&&
1726 states
[i
].miny
< states
[i
].maxy
) {
1727 min_x
= states
[i
].minx
;
1728 min_y
= states
[i
].miny
;
1729 max_x
= states
[i
].maxx
- 1;
1730 max_y
= states
[i
].maxy
- 1;
1733 /* we have to make min greater than max */
1740 scissor
->payload
[(start_slot
+ i
) * 2 + 0] = min_y
<< 16 | min_x
;
1741 scissor
->payload
[(start_slot
+ i
) * 2 + 1] = max_y
<< 16 | max_x
;
1744 if (!start_slot
&& num_states
)
1745 scissor
->scissor0
= states
[0];
1749 ilo_gpe_set_scissor_null(const struct ilo_dev_info
*dev
,
1750 struct ilo_scissor_state
*scissor
)
1754 for (i
= 0; i
< Elements(scissor
->payload
); i
+= 2) {
1755 scissor
->payload
[i
+ 0] = 1 << 16 | 1;
1756 scissor
->payload
[i
+ 1] = 0;
1761 ilo_gpe_init_view_surface_null_gen6(const struct ilo_dev_info
*dev
,
1762 unsigned width
, unsigned height
,
1763 unsigned depth
, unsigned level
,
1764 struct ilo_view_surface
*surf
)
1768 ILO_GPE_VALID_GEN(dev
, 6, 6);
1771 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
1773 * "A null surface will be used in instances where an actual surface is
1774 * not bound. When a write message is generated to a null surface, no
1775 * actual surface is written to. When a read message (including any
1776 * sampling engine message) is generated to a null surface, the result
1777 * is all zeros. Note that a null surface type is allowed to be used
1778 * with all messages, even if it is not specificially indicated as
1779 * supported. All of the remaining fields in surface state are ignored
1780 * for null surfaces, with the following exceptions:
1782 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
1783 * depth buffer's corresponding state for all render target
1784 * surfaces, including null.
1785 * * Surface Format must be R8G8B8A8_UNORM."
1787 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
1789 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
1793 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
1796 dw
[0] = BRW_SURFACE_NULL
<< BRW_SURFACE_TYPE_SHIFT
|
1797 BRW_SURFACEFORMAT_B8G8R8A8_UNORM
<< BRW_SURFACE_FORMAT_SHIFT
;
1801 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
1802 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
1803 level
<< BRW_SURFACE_LOD_SHIFT
;
1805 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
1815 ilo_gpe_init_view_surface_for_buffer_gen6(const struct ilo_dev_info
*dev
,
1816 const struct ilo_buffer
*buf
,
1817 unsigned offset
, unsigned size
,
1818 unsigned struct_size
,
1819 enum pipe_format elem_format
,
1820 bool is_rt
, bool render_cache_rw
,
1821 struct ilo_view_surface
*surf
)
1823 const int elem_size
= util_format_get_blocksize(elem_format
);
1824 int width
, height
, depth
, pitch
;
1825 int surface_format
, num_entries
;
1828 ILO_GPE_VALID_GEN(dev
, 6, 6);
1831 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
1832 * structure in a buffer.
1835 surface_format
= ilo_translate_color_format(elem_format
);
1837 num_entries
= size
/ struct_size
;
1838 /* see if there is enough space to fit another element */
1839 if (size
% struct_size
>= elem_size
)
1843 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
1845 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
1846 * Address) specifies the base address of first element of the
1847 * surface. The surface is interpreted as a simple array of that
1848 * single element type. The address must be naturally-aligned to the
1849 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
1850 * must be 16-byte aligned).
1852 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
1853 * the base address of the first element of the surface, computed in
1854 * software by adding the surface base address to the byte offset of
1855 * the element in the buffer."
1858 assert(offset
% elem_size
== 0);
1861 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
1863 * "For buffer surfaces, the number of entries in the buffer ranges
1866 assert(num_entries
>= 1 && num_entries
<= 1 << 27);
1869 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1871 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
1872 * indicates the size of the structure."
1874 pitch
= struct_size
;
1879 width
= (num_entries
& 0x0000007f);
1881 height
= (num_entries
& 0x000fff80) >> 7;
1883 depth
= (num_entries
& 0x07f00000) >> 20;
1885 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
1888 dw
[0] = BRW_SURFACE_BUFFER
<< BRW_SURFACE_TYPE_SHIFT
|
1889 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
;
1890 if (render_cache_rw
)
1891 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
1895 dw
[2] = height
<< BRW_SURFACE_HEIGHT_SHIFT
|
1896 width
<< BRW_SURFACE_WIDTH_SHIFT
;
1898 dw
[3] = depth
<< BRW_SURFACE_DEPTH_SHIFT
|
1899 pitch
<< BRW_SURFACE_PITCH_SHIFT
;
1904 /* do not increment reference count */
1909 ilo_gpe_init_view_surface_for_texture_gen6(const struct ilo_dev_info
*dev
,
1910 const struct ilo_texture
*tex
,
1911 enum pipe_format format
,
1912 unsigned first_level
,
1913 unsigned num_levels
,
1914 unsigned first_layer
,
1915 unsigned num_layers
,
1916 bool is_rt
, bool offset_to_layer
,
1917 struct ilo_view_surface
*surf
)
1919 int surface_type
, surface_format
;
1920 int width
, height
, depth
, pitch
, lod
;
1921 unsigned layer_offset
, x_offset
, y_offset
;
1924 ILO_GPE_VALID_GEN(dev
, 6, 6);
1926 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
1927 assert(surface_type
!= BRW_SURFACE_BUFFER
);
1929 if (format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&& tex
->separate_s8
)
1930 format
= PIPE_FORMAT_Z32_FLOAT
;
1933 surface_format
= ilo_translate_render_format(format
);
1935 surface_format
= ilo_translate_texture_format(format
);
1936 assert(surface_format
>= 0);
1938 width
= tex
->base
.width0
;
1939 height
= tex
->base
.height0
;
1940 depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
1941 tex
->base
.depth0
: num_layers
;
1942 pitch
= tex
->bo_stride
;
1944 if (surface_type
== BRW_SURFACE_CUBE
) {
1946 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1948 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
1949 * range of this field (Depth) is [0,84], indicating the number of
1950 * cube array elements (equal to the number of underlying 2D array
1951 * elements divided by 6). For other surfaces, this field must be
1954 * When is_rt is true, we treat the texture as a 2D one to avoid the
1958 surface_type
= BRW_SURFACE_2D
;
1961 assert(num_layers
% 6 == 0);
1962 depth
= num_layers
/ 6;
1966 /* sanity check the size */
1967 assert(width
>= 1 && height
>= 1 && depth
>= 1 && pitch
>= 1);
1968 switch (surface_type
) {
1969 case BRW_SURFACE_1D
:
1970 assert(width
<= 8192 && height
== 1 && depth
<= 512);
1971 assert(first_layer
< 512 && num_layers
<= 512);
1973 case BRW_SURFACE_2D
:
1974 assert(width
<= 8192 && height
<= 8192 && depth
<= 512);
1975 assert(first_layer
< 512 && num_layers
<= 512);
1977 case BRW_SURFACE_3D
:
1978 assert(width
<= 2048 && height
<= 2048 && depth
<= 2048);
1979 assert(first_layer
< 2048 && num_layers
<= 512);
1981 assert(first_layer
== 0);
1983 case BRW_SURFACE_CUBE
:
1984 assert(width
<= 8192 && height
<= 8192 && depth
<= 85);
1985 assert(width
== height
);
1986 assert(first_layer
< 512 && num_layers
<= 512);
1988 assert(first_layer
== 0);
1991 assert(!"unexpected surface type");
1995 /* non-full array spacing is supported only on GEN7+ */
1996 assert(tex
->array_spacing_full
);
1997 /* non-interleaved samples are supported only on GEN7+ */
1998 if (tex
->base
.nr_samples
> 1)
1999 assert(tex
->interleaved
);
2002 assert(num_levels
== 1);
2006 lod
= num_levels
- 1;
2010 * Offset to the layer. When rendering, the hardware requires LOD and
2011 * Depth to be the same for all render targets and the depth buffer. We
2012 * need to offset to the layer manually and always set LOD and Depth to 0.
2014 if (offset_to_layer
) {
2015 /* we lose the capability for layered rendering */
2016 assert(is_rt
&& num_layers
== 1);
2018 layer_offset
= ilo_texture_get_slice_offset(tex
,
2019 first_level
, first_layer
, &x_offset
, &y_offset
);
2021 assert(x_offset
% 4 == 0);
2022 assert(y_offset
% 2 == 0);
2026 /* derive the size for the LOD */
2027 width
= u_minify(width
, first_level
);
2028 height
= u_minify(height
, first_level
);
2043 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
2045 * "Linear render target surface base addresses must be element-size
2046 * aligned, for non-YUV surface formats, or a multiple of 2
2047 * element-sizes for YUV surface formats. Other linear surfaces have
2048 * no alignment requirements (byte alignment is sufficient.)"
2050 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2052 * "For linear render target surfaces, the pitch must be a multiple
2053 * of the element size for non-YUV surface formats. Pitch must be a
2054 * multiple of 2 * element size for YUV surface formats."
2056 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
2058 * "For linear surfaces, this field (X Offset) must be zero"
2060 if (tex
->tiling
== INTEL_TILING_NONE
) {
2062 const int elem_size
= util_format_get_blocksize(format
);
2063 assert(layer_offset
% elem_size
== 0);
2064 assert(pitch
% elem_size
== 0);
2070 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
2073 dw
[0] = surface_type
<< BRW_SURFACE_TYPE_SHIFT
|
2074 surface_format
<< BRW_SURFACE_FORMAT_SHIFT
|
2075 BRW_SURFACE_MIPMAPLAYOUT_BELOW
<< BRW_SURFACE_MIPLAYOUT_SHIFT
;
2077 if (surface_type
== BRW_SURFACE_CUBE
&& !is_rt
) {
2079 BRW_SURFACE_CUBEFACE_ENABLES
;
2083 dw
[0] |= BRW_SURFACE_RC_READ_WRITE
;
2085 dw
[1] = layer_offset
;
2087 dw
[2] = (height
- 1) << BRW_SURFACE_HEIGHT_SHIFT
|
2088 (width
- 1) << BRW_SURFACE_WIDTH_SHIFT
|
2089 lod
<< BRW_SURFACE_LOD_SHIFT
;
2091 dw
[3] = (depth
- 1) << BRW_SURFACE_DEPTH_SHIFT
|
2092 (pitch
- 1) << BRW_SURFACE_PITCH_SHIFT
|
2093 ilo_gpe_gen6_translate_winsys_tiling(tex
->tiling
);
2095 dw
[4] = first_level
<< BRW_SURFACE_MIN_LOD_SHIFT
|
2097 (num_layers
- 1) << 8 |
2098 ((tex
->base
.nr_samples
> 1) ? BRW_SURFACE_MULTISAMPLECOUNT_4
:
2099 BRW_SURFACE_MULTISAMPLECOUNT_1
);
2101 dw
[5] = x_offset
<< BRW_SURFACE_X_OFFSET_SHIFT
|
2102 y_offset
<< BRW_SURFACE_Y_OFFSET_SHIFT
;
2104 dw
[5] |= BRW_SURFACE_VERTICAL_ALIGN_ENABLE
;
2106 /* do not increment reference count */
2111 sampler_init_border_color_gen6(const struct ilo_dev_info
*dev
,
2112 const union pipe_color_union
*color
,
2113 uint32_t *dw
, int num_dwords
)
2116 color
->f
[0], color
->f
[1], color
->f
[2], color
->f
[3],
2119 ILO_GPE_VALID_GEN(dev
, 6, 6);
2121 assert(num_dwords
>= 12);
2124 * This state is not documented in the Sandy Bridge PRM, but in the
2125 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
2129 dw
[1] = fui(rgba
[0]);
2130 dw
[2] = fui(rgba
[1]);
2131 dw
[3] = fui(rgba
[2]);
2132 dw
[4] = fui(rgba
[3]);
2135 dw
[5] = util_float_to_half(rgba
[0]) |
2136 util_float_to_half(rgba
[1]) << 16;
2137 dw
[6] = util_float_to_half(rgba
[2]) |
2138 util_float_to_half(rgba
[3]) << 16;
2140 /* clamp to [-1.0f, 1.0f] */
2141 rgba
[0] = CLAMP(rgba
[0], -1.0f
, 1.0f
);
2142 rgba
[1] = CLAMP(rgba
[1], -1.0f
, 1.0f
);
2143 rgba
[2] = CLAMP(rgba
[2], -1.0f
, 1.0f
);
2144 rgba
[3] = CLAMP(rgba
[3], -1.0f
, 1.0f
);
2147 dw
[9] = (int16_t) util_iround(rgba
[0] * 32767.0f
) |
2148 (int16_t) util_iround(rgba
[1] * 32767.0f
) << 16;
2149 dw
[10] = (int16_t) util_iround(rgba
[2] * 32767.0f
) |
2150 (int16_t) util_iround(rgba
[3] * 32767.0f
) << 16;
2153 dw
[11] = (int8_t) util_iround(rgba
[0] * 127.0f
) |
2154 (int8_t) util_iround(rgba
[1] * 127.0f
) << 8 |
2155 (int8_t) util_iround(rgba
[2] * 127.0f
) << 16 |
2156 (int8_t) util_iround(rgba
[3] * 127.0f
) << 24;
2158 /* clamp to [0.0f, 1.0f] */
2159 rgba
[0] = CLAMP(rgba
[0], 0.0f
, 1.0f
);
2160 rgba
[1] = CLAMP(rgba
[1], 0.0f
, 1.0f
);
2161 rgba
[2] = CLAMP(rgba
[2], 0.0f
, 1.0f
);
2162 rgba
[3] = CLAMP(rgba
[3], 0.0f
, 1.0f
);
2165 dw
[0] = (uint8_t) util_iround(rgba
[0] * 255.0f
) |
2166 (uint8_t) util_iround(rgba
[1] * 255.0f
) << 8 |
2167 (uint8_t) util_iround(rgba
[2] * 255.0f
) << 16 |
2168 (uint8_t) util_iround(rgba
[3] * 255.0f
) << 24;
2171 dw
[7] = (uint16_t) util_iround(rgba
[0] * 65535.0f
) |
2172 (uint16_t) util_iround(rgba
[1] * 65535.0f
) << 16;
2173 dw
[8] = (uint16_t) util_iround(rgba
[2] * 65535.0f
) |
2174 (uint16_t) util_iround(rgba
[3] * 65535.0f
) << 16;
2178 ilo_gpe_init_sampler_cso(const struct ilo_dev_info
*dev
,
2179 const struct pipe_sampler_state
*state
,
2180 struct ilo_sampler_cso
*sampler
)
2182 int mip_filter
, min_filter
, mag_filter
, max_aniso
;
2183 int lod_bias
, max_lod
, min_lod
;
2184 int wrap_s
, wrap_t
, wrap_r
, wrap_cube
;
2185 bool clamp_is_to_edge
;
2186 uint32_t dw0
, dw1
, dw3
;
2188 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2190 memset(sampler
, 0, sizeof(*sampler
));
2192 mip_filter
= gen6_translate_tex_mipfilter(state
->min_mip_filter
);
2193 min_filter
= gen6_translate_tex_filter(state
->min_img_filter
);
2194 mag_filter
= gen6_translate_tex_filter(state
->mag_img_filter
);
2196 sampler
->anisotropic
= state
->max_anisotropy
;
2198 if (state
->max_anisotropy
>= 2 && state
->max_anisotropy
<= 16)
2199 max_aniso
= state
->max_anisotropy
/ 2 - 1;
2200 else if (state
->max_anisotropy
> 16)
2201 max_aniso
= BRW_ANISORATIO_16
;
2203 max_aniso
= BRW_ANISORATIO_2
;
2207 * Here is how the hardware calculate per-pixel LOD, from my reading of the
2210 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
2211 * other ways. The number of texels is measured using level
2213 * 2) Bias is added to LOD.
2214 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
2215 * compared with Base to determine whether magnification or
2216 * minification is needed. (if preclamp is disabled, LOD is compared
2217 * with Base before clamping)
2218 * 4) If magnification is needed, or no mipmapping is requested, LOD is
2219 * set to floor(MinLod).
2220 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
2222 * With Gallium interface, Base is always zero and
2223 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
2225 if (dev
->gen
>= ILO_GEN(7)) {
2226 const float scale
= 256.0f
;
2228 /* [-16.0, 16.0) in S4.8 */
2230 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
2233 /* [0.0, 14.0] in U4.8 */
2234 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 14.0f
) * scale
);
2235 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 14.0f
) * scale
);
2238 const float scale
= 64.0f
;
2240 /* [-16.0, 16.0) in S4.6 */
2242 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
2245 /* [0.0, 13.0] in U4.6 */
2246 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 13.0f
) * scale
);
2247 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 13.0f
) * scale
);
2251 * We want LOD to be clamped to determine magnification/minification, and
2252 * get set to zero when it is magnification or when mipmapping is disabled.
2253 * The hardware would set LOD to floor(MinLod) and that is a problem when
2254 * MinLod is greater than or equal to 1.0f.
2256 * With Base being zero, it is always minification when MinLod is non-zero.
2257 * To achieve our goal, we just need to set MinLod to zero and set
2258 * MagFilter to MinFilter when mipmapping is disabled.
2260 if (state
->min_mip_filter
== PIPE_TEX_MIPFILTER_NONE
&& min_lod
) {
2262 mag_filter
= min_filter
;
2266 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
2267 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering, PIPE_TEX_WRAP_CLAMP
2268 * means PIPE_TEX_WRAP_CLAMP_TO_BORDER while additionally clamping the
2269 * texture coordinates to [0.0, 1.0].
2271 * The clamping will be taken care of in the shaders. There are two
2272 * filters here, but let the minification one has a say.
2274 clamp_is_to_edge
= (state
->min_img_filter
== PIPE_TEX_FILTER_NEAREST
);
2275 if (!clamp_is_to_edge
) {
2276 sampler
->saturate_s
= (state
->wrap_s
== PIPE_TEX_WRAP_CLAMP
);
2277 sampler
->saturate_t
= (state
->wrap_t
== PIPE_TEX_WRAP_CLAMP
);
2278 sampler
->saturate_r
= (state
->wrap_r
== PIPE_TEX_WRAP_CLAMP
);
2281 /* determine wrap s/t/r */
2282 wrap_s
= gen6_translate_tex_wrap(state
->wrap_s
, clamp_is_to_edge
);
2283 wrap_t
= gen6_translate_tex_wrap(state
->wrap_t
, clamp_is_to_edge
);
2284 wrap_r
= gen6_translate_tex_wrap(state
->wrap_r
, clamp_is_to_edge
);
2287 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
2289 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
2290 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
2291 * must have the same Address Control mode."
2293 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
2295 * "This field (Cube Surface Control Mode) must be set to
2296 * CUBECTRLMODE_PROGRAMMED"
2298 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
2301 if (state
->seamless_cube_map
&&
2302 (state
->min_img_filter
!= PIPE_TEX_FILTER_NEAREST
||
2303 state
->mag_img_filter
!= PIPE_TEX_FILTER_NEAREST
)) {
2304 wrap_cube
= BRW_TEXCOORDMODE_CUBE
;
2307 wrap_cube
= BRW_TEXCOORDMODE_CLAMP
;
2310 if (!state
->normalized_coords
) {
2312 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
2314 * "The following state must be set as indicated if this field
2315 * (Non-normalized Coordinate Enable) is enabled:
2317 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
2318 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
2319 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
2320 * - Mag Mode Filter must be MAPFILTER_NEAREST or
2322 * - Min Mode Filter must be MAPFILTER_NEAREST or
2324 * - Mip Mode Filter must be MIPFILTER_NONE.
2325 * - Min LOD must be 0.
2326 * - Max LOD must be 0.
2327 * - MIP Count must be 0.
2328 * - Surface Min LOD must be 0.
2329 * - Texture LOD Bias must be 0."
2331 assert(wrap_s
== BRW_TEXCOORDMODE_CLAMP
||
2332 wrap_s
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
2333 assert(wrap_t
== BRW_TEXCOORDMODE_CLAMP
||
2334 wrap_t
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
2335 assert(wrap_r
== BRW_TEXCOORDMODE_CLAMP
||
2336 wrap_r
== BRW_TEXCOORDMODE_CLAMP_BORDER
);
2338 assert(mag_filter
== BRW_MAPFILTER_NEAREST
||
2339 mag_filter
== BRW_MAPFILTER_LINEAR
);
2340 assert(min_filter
== BRW_MAPFILTER_NEAREST
||
2341 min_filter
== BRW_MAPFILTER_LINEAR
);
2343 /* work around a bug in util_blitter */
2344 mip_filter
= BRW_MIPFILTER_NONE
;
2346 assert(mip_filter
== BRW_MIPFILTER_NONE
);
2349 if (dev
->gen
>= ILO_GEN(7)) {
2354 sampler
->dw_filter
= mag_filter
<< 17 |
2357 sampler
->dw_filter_aniso
= BRW_MAPFILTER_ANISOTROPIC
<< 17 |
2358 BRW_MAPFILTER_ANISOTROPIC
<< 14 |
2361 dw1
= min_lod
<< 20 |
2364 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
2365 dw1
|= gen6_translate_shadow_func(state
->compare_func
) << 1;
2367 dw3
= max_aniso
<< 19;
2369 /* round the coordinates for linear filtering */
2370 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
2371 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
2372 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
2373 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
2375 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
2376 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
2377 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
2378 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
2381 if (!state
->normalized_coords
)
2384 sampler
->dw_wrap
= wrap_s
<< 6 |
2389 * As noted in the classic i965 driver, the HW may still reference
2390 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
2393 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
2394 BRW_TEXCOORDMODE_WRAP
<< 3 |
2395 BRW_TEXCOORDMODE_WRAP
;
2397 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
2401 STATIC_ASSERT(Elements(sampler
->payload
) >= 7);
2403 sampler
->payload
[0] = dw0
;
2404 sampler
->payload
[1] = dw1
;
2405 sampler
->payload
[2] = dw3
;
2407 memcpy(&sampler
->payload
[3],
2408 state
->border_color
.ui
, sizeof(state
->border_color
.ui
));
2415 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
2416 dw0
|= gen6_translate_shadow_func(state
->compare_func
);
2418 sampler
->dw_filter
= (min_filter
!= mag_filter
) << 27 |
2422 sampler
->dw_filter_aniso
= BRW_MAPFILTER_ANISOTROPIC
<< 17 |
2423 BRW_MAPFILTER_ANISOTROPIC
<< 14;
2425 dw1
= min_lod
<< 22 |
2428 sampler
->dw_wrap
= wrap_s
<< 6 |
2432 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
2433 BRW_TEXCOORDMODE_WRAP
<< 3 |
2434 BRW_TEXCOORDMODE_WRAP
;
2436 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
2440 dw3
= max_aniso
<< 19;
2442 /* round the coordinates for linear filtering */
2443 if (min_filter
!= BRW_MAPFILTER_NEAREST
) {
2444 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
2445 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
2446 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
) << 13;
2448 if (mag_filter
!= BRW_MAPFILTER_NEAREST
) {
2449 dw3
|= (BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
2450 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
2451 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
) << 13;
2454 if (!state
->normalized_coords
)
2457 STATIC_ASSERT(Elements(sampler
->payload
) >= 15);
2459 sampler
->payload
[0] = dw0
;
2460 sampler
->payload
[1] = dw1
;
2461 sampler
->payload
[2] = dw3
;
2463 sampler_init_border_color_gen6(dev
,
2464 &state
->border_color
, &sampler
->payload
[3], 12);
2469 ilo_gpe_set_fb(const struct ilo_dev_info
*dev
,
2470 const struct pipe_framebuffer_state
*state
,
2471 struct ilo_fb_state
*fb
)
2473 const struct pipe_surface
*first
;
2474 unsigned num_surfaces
, first_idx
;
2476 ILO_GPE_VALID_GEN(dev
, 6, 7.5);
2478 util_copy_framebuffer_state(&fb
->state
, state
);
2480 ilo_gpe_init_view_surface_null(dev
,
2481 state
->width
, state
->height
,
2482 1, 0, &fb
->null_rt
);
2485 for (first_idx
= 0; first_idx
< state
->nr_cbufs
; first_idx
++) {
2486 if (state
->cbufs
[first_idx
]) {
2487 first
= state
->cbufs
[first_idx
];
2492 first
= state
->zsbuf
;
2494 fb
->num_samples
= (first
) ? first
->texture
->nr_samples
: 1;
2495 if (!fb
->num_samples
)
2496 fb
->num_samples
= 1;
2498 fb
->offset_to_layers
= false;
2501 * The PRMs list several restrictions when the framebuffer has more than
2502 * one surface, but it seems they are lifted on GEN7+.
2504 num_surfaces
= state
->nr_cbufs
+ !!state
->zsbuf
;
2506 if (dev
->gen
< ILO_GEN(7) && num_surfaces
> 1) {
2507 const unsigned first_depth
=
2508 (first
->texture
->target
== PIPE_TEXTURE_3D
) ?
2509 first
->texture
->depth0
:
2510 first
->u
.tex
.last_layer
- first
->u
.tex
.first_layer
+ 1;
2511 bool has_3d_target
= (first
->texture
->target
== PIPE_TEXTURE_3D
);
2514 for (i
= first_idx
+ 1; i
< num_surfaces
; i
++) {
2515 const struct pipe_surface
*surf
=
2516 (i
< state
->nr_cbufs
) ? state
->cbufs
[i
] : state
->zsbuf
;
2522 depth
= (surf
->texture
->target
== PIPE_TEXTURE_3D
) ?
2523 surf
->texture
->depth0
:
2524 surf
->u
.tex
.last_layer
- surf
->u
.tex
.first_layer
+ 1;
2526 has_3d_target
|= (surf
->texture
->target
== PIPE_TEXTURE_3D
);
2529 * From the Sandy Bridge PRM, volume 4 part 1, page 79:
2531 * "The LOD of a render target must be the same as the LOD of the
2532 * other render target(s) and of the depth buffer (defined in
2533 * 3DSTATE_DEPTH_BUFFER)."
2535 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
2537 * "The Depth of a render target must be the same as the Depth of
2538 * the other render target(s) and of the depth buffer (defined
2539 * in 3DSTATE_DEPTH_BUFFER)."
2541 if (surf
->u
.tex
.level
!= first
->u
.tex
.level
||
2542 depth
!= first_depth
) {
2543 fb
->offset_to_layers
= true;
2548 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
2550 * "The Height of a render target must be the same as the Height
2551 * of the other render targets and the depth buffer (defined in
2552 * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
2553 * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
2556 * From the Sandy Bridge PRM, volume 4 part 1, page 78:
2558 * "The Width of a render target must be the same as the Width of
2559 * the other render target(s) and the depth buffer (defined in
2560 * 3DSTATE_DEPTH_BUFFER), unless Surface Type is SURFTYPE_1D or
2561 * SURFTYPE_2D with Depth = 0 (non-array) and LOD = 0 (non-mip
2564 if (surf
->texture
->width0
!= first
->texture
->width0
||
2565 surf
->texture
->height0
!= first
->texture
->height0
) {
2566 if (has_3d_target
|| first
->u
.tex
.level
|| first_depth
> 1) {
2567 fb
->offset_to_layers
= true;
2576 ilo_gpe_gen6_estimate_command_size(const struct ilo_dev_info
*dev
,
2577 enum ilo_gpe_gen6_command cmd
,
2580 static const struct {
2583 } gen6_command_size_table
[ILO_GPE_GEN6_COMMAND_COUNT
] = {
2584 [ILO_GPE_GEN6_STATE_BASE_ADDRESS
] = { 0, 10 },
2585 [ILO_GPE_GEN6_STATE_SIP
] = { 0, 2 },
2586 [ILO_GPE_GEN6_3DSTATE_VF_STATISTICS
] = { 0, 1 },
2587 [ILO_GPE_GEN6_PIPELINE_SELECT
] = { 0, 1 },
2588 [ILO_GPE_GEN6_MEDIA_VFE_STATE
] = { 0, 8 },
2589 [ILO_GPE_GEN6_MEDIA_CURBE_LOAD
] = { 0, 4 },
2590 [ILO_GPE_GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD
] = { 0, 4 },
2591 [ILO_GPE_GEN6_MEDIA_GATEWAY_STATE
] = { 0, 2 },
2592 [ILO_GPE_GEN6_MEDIA_STATE_FLUSH
] = { 0, 2 },
2593 [ILO_GPE_GEN6_MEDIA_OBJECT_WALKER
] = { 17, 1 },
2594 [ILO_GPE_GEN6_3DSTATE_BINDING_TABLE_POINTERS
] = { 0, 4 },
2595 [ILO_GPE_GEN6_3DSTATE_SAMPLER_STATE_POINTERS
] = { 0, 4 },
2596 [ILO_GPE_GEN6_3DSTATE_URB
] = { 0, 3 },
2597 [ILO_GPE_GEN6_3DSTATE_VERTEX_BUFFERS
] = { 1, 4 },
2598 [ILO_GPE_GEN6_3DSTATE_VERTEX_ELEMENTS
] = { 1, 2 },
2599 [ILO_GPE_GEN6_3DSTATE_INDEX_BUFFER
] = { 0, 3 },
2600 [ILO_GPE_GEN6_3DSTATE_VIEWPORT_STATE_POINTERS
] = { 0, 4 },
2601 [ILO_GPE_GEN6_3DSTATE_CC_STATE_POINTERS
] = { 0, 4 },
2602 [ILO_GPE_GEN6_3DSTATE_SCISSOR_STATE_POINTERS
] = { 0, 2 },
2603 [ILO_GPE_GEN6_3DSTATE_VS
] = { 0, 6 },
2604 [ILO_GPE_GEN6_3DSTATE_GS
] = { 0, 7 },
2605 [ILO_GPE_GEN6_3DSTATE_CLIP
] = { 0, 4 },
2606 [ILO_GPE_GEN6_3DSTATE_SF
] = { 0, 20 },
2607 [ILO_GPE_GEN6_3DSTATE_WM
] = { 0, 9 },
2608 [ILO_GPE_GEN6_3DSTATE_CONSTANT_VS
] = { 0, 5 },
2609 [ILO_GPE_GEN6_3DSTATE_CONSTANT_GS
] = { 0, 5 },
2610 [ILO_GPE_GEN6_3DSTATE_CONSTANT_PS
] = { 0, 5 },
2611 [ILO_GPE_GEN6_3DSTATE_SAMPLE_MASK
] = { 0, 2 },
2612 [ILO_GPE_GEN6_3DSTATE_DRAWING_RECTANGLE
] = { 0, 4 },
2613 [ILO_GPE_GEN6_3DSTATE_DEPTH_BUFFER
] = { 0, 7 },
2614 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_OFFSET
] = { 0, 2 },
2615 [ILO_GPE_GEN6_3DSTATE_POLY_STIPPLE_PATTERN
] = { 0, 33 },
2616 [ILO_GPE_GEN6_3DSTATE_LINE_STIPPLE
] = { 0, 3 },
2617 [ILO_GPE_GEN6_3DSTATE_AA_LINE_PARAMETERS
] = { 0, 3 },
2618 [ILO_GPE_GEN6_3DSTATE_GS_SVB_INDEX
] = { 0, 4 },
2619 [ILO_GPE_GEN6_3DSTATE_MULTISAMPLE
] = { 0, 3 },
2620 [ILO_GPE_GEN6_3DSTATE_STENCIL_BUFFER
] = { 0, 3 },
2621 [ILO_GPE_GEN6_3DSTATE_HIER_DEPTH_BUFFER
] = { 0, 3 },
2622 [ILO_GPE_GEN6_3DSTATE_CLEAR_PARAMS
] = { 0, 2 },
2623 [ILO_GPE_GEN6_PIPE_CONTROL
] = { 0, 5 },
2624 [ILO_GPE_GEN6_3DPRIMITIVE
] = { 0, 6 },
2626 const int header
= gen6_command_size_table
[cmd
].header
;
2627 const int body
= gen6_command_size_table
[arg
].body
;
2628 const int count
= arg
;
2630 ILO_GPE_VALID_GEN(dev
, 6, 6);
2631 assert(cmd
< ILO_GPE_GEN6_COMMAND_COUNT
);
2633 return (likely(count
)) ? header
+ body
* count
: 0;
2637 ilo_gpe_gen6_estimate_state_size(const struct ilo_dev_info
*dev
,
2638 enum ilo_gpe_gen6_state state
,
2641 static const struct {
2645 } gen6_state_size_table
[ILO_GPE_GEN6_STATE_COUNT
] = {
2646 [ILO_GPE_GEN6_INTERFACE_DESCRIPTOR_DATA
] = { 8, 8, true },
2647 [ILO_GPE_GEN6_SF_VIEWPORT
] = { 8, 8, true },
2648 [ILO_GPE_GEN6_CLIP_VIEWPORT
] = { 8, 4, true },
2649 [ILO_GPE_GEN6_CC_VIEWPORT
] = { 8, 2, true },
2650 [ILO_GPE_GEN6_COLOR_CALC_STATE
] = { 16, 6, false },
2651 [ILO_GPE_GEN6_BLEND_STATE
] = { 16, 2, true },
2652 [ILO_GPE_GEN6_DEPTH_STENCIL_STATE
] = { 16, 3, false },
2653 [ILO_GPE_GEN6_SCISSOR_RECT
] = { 8, 2, true },
2654 [ILO_GPE_GEN6_BINDING_TABLE_STATE
] = { 8, 1, true },
2655 [ILO_GPE_GEN6_SURFACE_STATE
] = { 8, 6, false },
2656 [ILO_GPE_GEN6_SAMPLER_STATE
] = { 8, 4, true },
2657 [ILO_GPE_GEN6_SAMPLER_BORDER_COLOR_STATE
] = { 8, 12, false },
2658 [ILO_GPE_GEN6_PUSH_CONSTANT_BUFFER
] = { 8, 1, true },
2660 const int alignment
= gen6_state_size_table
[state
].alignment
;
2661 const int body
= gen6_state_size_table
[state
].body
;
2662 const bool is_array
= gen6_state_size_table
[state
].is_array
;
2663 const int count
= arg
;
2666 ILO_GPE_VALID_GEN(dev
, 6, 6);
2667 assert(state
< ILO_GPE_GEN6_STATE_COUNT
);
2669 if (likely(count
)) {
2671 estimate
= (alignment
- 1) + body
* count
;
2674 estimate
= (alignment
- 1) + body
;
2675 /* all states are aligned */
2677 estimate
+= util_align_npot(body
, alignment
) * (count
- 1);