2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2014 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30 #include "util/u_framebuffer.h"
31 #include "util/u_half.h"
32 #include "util/u_resource.h"
34 #include "ilo_format.h"
35 #include "ilo_state_3d.h"
36 #include "../ilo_resource.h"
37 #include "../ilo_shader.h"
40 ve_init_cso(const struct ilo_dev
*dev
,
41 const struct pipe_vertex_element
*state
,
43 struct ilo_ve_cso
*cso
)
46 GEN6_VFCOMP_STORE_SRC
,
47 GEN6_VFCOMP_STORE_SRC
,
48 GEN6_VFCOMP_STORE_SRC
,
49 GEN6_VFCOMP_STORE_SRC
,
53 ILO_DEV_ASSERT(dev
, 6, 8);
55 switch (util_format_get_nr_components(state
->src_format
)) {
56 case 1: comp
[1] = GEN6_VFCOMP_STORE_0
;
57 case 2: comp
[2] = GEN6_VFCOMP_STORE_0
;
58 case 3: comp
[3] = (util_format_is_pure_integer(state
->src_format
)) ?
59 GEN6_VFCOMP_STORE_1_INT
:
60 GEN6_VFCOMP_STORE_1_FP
;
63 format
= ilo_format_translate_vertex(dev
, state
->src_format
);
65 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
67 vb_index
<< GEN6_VE_DW0_VB_INDEX__SHIFT
|
69 format
<< GEN6_VE_DW0_FORMAT__SHIFT
|
70 state
->src_offset
<< GEN6_VE_DW0_VB_OFFSET__SHIFT
;
73 comp
[0] << GEN6_VE_DW1_COMP0__SHIFT
|
74 comp
[1] << GEN6_VE_DW1_COMP1__SHIFT
|
75 comp
[2] << GEN6_VE_DW1_COMP2__SHIFT
|
76 comp
[3] << GEN6_VE_DW1_COMP3__SHIFT
;
80 ilo_gpe_init_ve(const struct ilo_dev
*dev
,
82 const struct pipe_vertex_element
*states
,
83 struct ilo_ve_state
*ve
)
87 ILO_DEV_ASSERT(dev
, 6, 8);
89 ve
->count
= num_states
;
92 for (i
= 0; i
< num_states
; i
++) {
93 const unsigned pipe_idx
= states
[i
].vertex_buffer_index
;
94 const unsigned instance_divisor
= states
[i
].instance_divisor
;
98 * map the pipe vb to the hardware vb, which has a fixed instance
101 for (hw_idx
= 0; hw_idx
< ve
->vb_count
; hw_idx
++) {
102 if (ve
->vb_mapping
[hw_idx
] == pipe_idx
&&
103 ve
->instance_divisors
[hw_idx
] == instance_divisor
)
107 /* create one if there is no matching hardware vb */
108 if (hw_idx
>= ve
->vb_count
) {
109 hw_idx
= ve
->vb_count
++;
111 ve
->vb_mapping
[hw_idx
] = pipe_idx
;
112 ve
->instance_divisors
[hw_idx
] = instance_divisor
;
115 ve_init_cso(dev
, &states
[i
], hw_idx
, &ve
->cso
[i
]);
120 ilo_gpe_set_ve_edgeflag(const struct ilo_dev
*dev
,
121 struct ilo_ve_cso
*cso
)
125 ILO_DEV_ASSERT(dev
, 6, 8);
128 * From the Sandy Bridge PRM, volume 2 part 1, page 94:
130 * "- This bit (Edge Flag Enable) must only be ENABLED on the last
131 * valid VERTEX_ELEMENT structure.
133 * - When set, Component 0 Control must be set to VFCOMP_STORE_SRC,
134 * and Component 1-3 Control must be set to VFCOMP_NOSTORE.
136 * - The Source Element Format must be set to the UINT format.
138 * - [DevSNB]: Edge Flags are not supported for QUADLIST
139 * primitives. Software may elect to convert QUADLIST primitives
140 * to some set of corresponding edge-flag-supported primitive
141 * types (e.g., POLYGONs) prior to submission to the 3D pipeline."
143 cso
->payload
[0] |= GEN6_VE_DW0_EDGE_FLAG_ENABLE
;
146 * Edge flags have format GEN6_FORMAT_R8_USCALED when defined via
147 * glEdgeFlagPointer(), and format GEN6_FORMAT_R32_FLOAT when defined
148 * via glEdgeFlag(), as can be seen in vbo_attrib_tmp.h.
150 * Since all the hardware cares about is whether the flags are zero or not,
151 * we can treat them as the corresponding _UINT formats.
153 format
= GEN_EXTRACT(cso
->payload
[0], GEN6_VE_DW0_FORMAT
);
154 cso
->payload
[0] &= ~GEN6_VE_DW0_FORMAT__MASK
;
157 case GEN6_FORMAT_R32_FLOAT
:
158 format
= GEN6_FORMAT_R32_UINT
;
160 case GEN6_FORMAT_R8_USCALED
:
161 format
= GEN6_FORMAT_R8_UINT
;
167 cso
->payload
[0] |= GEN_SHIFT32(format
, GEN6_VE_DW0_FORMAT
);
170 GEN6_VFCOMP_STORE_SRC
<< GEN6_VE_DW1_COMP0__SHIFT
|
171 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_DW1_COMP1__SHIFT
|
172 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_DW1_COMP2__SHIFT
|
173 GEN6_VFCOMP_NOSTORE
<< GEN6_VE_DW1_COMP3__SHIFT
;
177 ilo_gpe_init_ve_nosrc(const struct ilo_dev
*dev
,
178 int comp0
, int comp1
, int comp2
, int comp3
,
179 struct ilo_ve_cso
*cso
)
181 ILO_DEV_ASSERT(dev
, 6, 8);
183 STATIC_ASSERT(Elements(cso
->payload
) >= 2);
185 assert(comp0
!= GEN6_VFCOMP_STORE_SRC
&&
186 comp1
!= GEN6_VFCOMP_STORE_SRC
&&
187 comp2
!= GEN6_VFCOMP_STORE_SRC
&&
188 comp3
!= GEN6_VFCOMP_STORE_SRC
);
190 cso
->payload
[0] = GEN6_VE_DW0_VALID
;
192 comp0
<< GEN6_VE_DW1_COMP0__SHIFT
|
193 comp1
<< GEN6_VE_DW1_COMP1__SHIFT
|
194 comp2
<< GEN6_VE_DW1_COMP2__SHIFT
|
195 comp3
<< GEN6_VE_DW1_COMP3__SHIFT
;
199 ilo_gpe_init_vs_cso(const struct ilo_dev
*dev
,
200 const struct ilo_shader_state
*vs
,
201 struct ilo_shader_cso
*cso
)
203 int start_grf
, vue_read_len
, sampler_count
, max_threads
;
204 uint32_t dw2
, dw4
, dw5
;
206 ILO_DEV_ASSERT(dev
, 6, 8);
208 start_grf
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_URB_DATA_START_REG
);
209 vue_read_len
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_INPUT_COUNT
);
210 sampler_count
= ilo_shader_get_kernel_param(vs
, ILO_KERNEL_SAMPLER_COUNT
);
213 * From the Sandy Bridge PRM, volume 2 part 1, page 135:
215 * "(Vertex URB Entry Read Length) Specifies the number of pairs of
216 * 128-bit vertex elements to be passed into the payload for each
219 * "It is UNDEFINED to set this field to 0 indicating no Vertex URB
220 * data to be read and passed to the thread."
222 vue_read_len
= (vue_read_len
+ 1) / 2;
226 max_threads
= dev
->thread_count
;
227 if (ilo_dev_gen(dev
) == ILO_GEN(7.5) && dev
->gt
== 2)
230 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
231 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
233 dw4
= start_grf
<< GEN6_VS_DW4_URB_GRF_START__SHIFT
|
234 vue_read_len
<< GEN6_VS_DW4_URB_READ_LEN__SHIFT
|
235 0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT
;
237 dw5
= GEN6_VS_DW5_STATISTICS
|
238 GEN6_VS_DW5_VS_ENABLE
;
240 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5))
241 dw5
|= (max_threads
- 1) << GEN75_VS_DW5_MAX_THREADS__SHIFT
;
243 dw5
|= (max_threads
- 1) << GEN6_VS_DW5_MAX_THREADS__SHIFT
;
245 STATIC_ASSERT(Elements(cso
->payload
) >= 3);
246 cso
->payload
[0] = dw2
;
247 cso
->payload
[1] = dw4
;
248 cso
->payload
[2] = dw5
;
252 gs_init_cso_gen6(const struct ilo_dev
*dev
,
253 const struct ilo_shader_state
*gs
,
254 struct ilo_shader_cso
*cso
)
256 int start_grf
, vue_read_len
, max_threads
;
257 uint32_t dw2
, dw4
, dw5
, dw6
;
259 ILO_DEV_ASSERT(dev
, 6, 6);
261 if (ilo_shader_get_type(gs
) == PIPE_SHADER_GEOMETRY
) {
262 start_grf
= ilo_shader_get_kernel_param(gs
,
263 ILO_KERNEL_URB_DATA_START_REG
);
265 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_INPUT_COUNT
);
268 start_grf
= ilo_shader_get_kernel_param(gs
,
269 ILO_KERNEL_VS_GEN6_SO_START_REG
);
271 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_OUTPUT_COUNT
);
275 * From the Sandy Bridge PRM, volume 2 part 1, page 153:
277 * "Specifies the amount of URB data read and passed in the thread
278 * payload for each Vertex URB entry, in 256-bit register increments.
280 * It is UNDEFINED to set this field (Vertex URB Entry Read Length) to
281 * 0 indicating no Vertex URB data to be read and passed to the
284 vue_read_len
= (vue_read_len
+ 1) / 2;
289 * From the Sandy Bridge PRM, volume 2 part 1, page 154:
291 * "Maximum Number of Threads valid range is [0,27] when Rendering
292 * Enabled bit is set."
294 * From the Sandy Bridge PRM, volume 2 part 1, page 173:
296 * "Programming Note: If the GS stage is enabled, software must always
297 * allocate at least one GS URB Entry. This is true even if the GS
298 * thread never needs to output vertices to the pipeline, e.g., when
299 * only performing stream output. This is an artifact of the need to
300 * pass the GS thread an initial destination URB handle."
302 * As such, we always enable rendering, and limit the number of threads.
305 /* maximum is 60, but limited to 28 */
309 /* maximum is 24, but limited to 21 (see brwCreateContext()) */
313 dw2
= GEN6_THREADDISP_SPF
;
315 dw4
= vue_read_len
<< GEN6_GS_DW4_URB_READ_LEN__SHIFT
|
316 0 << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT
|
317 start_grf
<< GEN6_GS_DW4_URB_GRF_START__SHIFT
;
319 dw5
= (max_threads
- 1) << GEN6_GS_DW5_MAX_THREADS__SHIFT
|
320 GEN6_GS_DW5_STATISTICS
|
321 GEN6_GS_DW5_SO_STATISTICS
|
322 GEN6_GS_DW5_RENDER_ENABLE
;
325 * we cannot make use of GEN6_GS_REORDER because it will reorder
326 * triangle strips according to D3D rules (triangle 2N+1 uses vertices
327 * (2N+1, 2N+3, 2N+2)), instead of GL rules (triangle 2N+1 uses vertices
328 * (2N+2, 2N+1, 2N+3)).
330 dw6
= GEN6_GS_DW6_GS_ENABLE
;
332 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_DISCARD_ADJACENCY
))
333 dw6
|= GEN6_GS_DW6_DISCARD_ADJACENCY
;
335 if (ilo_shader_get_kernel_param(gs
, ILO_KERNEL_VS_GEN6_SO
)) {
336 const uint32_t svbi_post_inc
=
337 ilo_shader_get_kernel_param(gs
, ILO_KERNEL_GS_GEN6_SVBI_POST_INC
);
339 dw6
|= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE
;
341 dw6
|= GEN6_GS_DW6_SVBI_POST_INC_ENABLE
|
342 svbi_post_inc
<< GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT
;
346 STATIC_ASSERT(Elements(cso
->payload
) >= 4);
347 cso
->payload
[0] = dw2
;
348 cso
->payload
[1] = dw4
;
349 cso
->payload
[2] = dw5
;
350 cso
->payload
[3] = dw6
;
354 gs_init_cso_gen7(const struct ilo_dev
*dev
,
355 const struct ilo_shader_state
*gs
,
356 struct ilo_shader_cso
*cso
)
358 int start_grf
, vue_read_len
, sampler_count
, max_threads
;
359 uint32_t dw2
, dw4
, dw5
;
361 ILO_DEV_ASSERT(dev
, 7, 7.5);
363 start_grf
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_URB_DATA_START_REG
);
364 vue_read_len
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_INPUT_COUNT
);
365 sampler_count
= ilo_shader_get_kernel_param(gs
, ILO_KERNEL_SAMPLER_COUNT
);
368 vue_read_len
= (vue_read_len
+ 1) / 2;
370 switch (ilo_dev_gen(dev
)) {
372 max_threads
= (dev
->gt
>= 2) ? 256 : 70;
375 max_threads
= (dev
->gt
== 2) ? 128 : 36;
382 dw2
= (true) ? 0 : GEN6_THREADDISP_FP_MODE_ALT
;
383 dw2
|= ((sampler_count
+ 3) / 4) << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT
;
385 dw4
= vue_read_len
<< GEN7_GS_DW4_URB_READ_LEN__SHIFT
|
386 GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES
|
387 0 << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT
|
388 start_grf
<< GEN7_GS_DW4_URB_GRF_START__SHIFT
;
390 dw5
= (max_threads
- 1) << GEN7_GS_DW5_MAX_THREADS__SHIFT
|
391 GEN7_GS_DW5_STATISTICS
|
392 GEN7_GS_DW5_GS_ENABLE
;
394 STATIC_ASSERT(Elements(cso
->payload
) >= 3);
395 cso
->payload
[0] = dw2
;
396 cso
->payload
[1] = dw4
;
397 cso
->payload
[2] = dw5
;
401 ilo_gpe_init_gs_cso(const struct ilo_dev
*dev
,
402 const struct ilo_shader_state
*gs
,
403 struct ilo_shader_cso
*cso
)
405 if (ilo_dev_gen(dev
) >= ILO_GEN(7))
406 gs_init_cso_gen7(dev
, gs
, cso
);
408 gs_init_cso_gen6(dev
, gs
, cso
);
412 view_init_null_gen6(const struct ilo_dev
*dev
,
413 unsigned width
, unsigned height
,
414 unsigned depth
, unsigned level
,
415 struct ilo_view_surface
*surf
)
419 ILO_DEV_ASSERT(dev
, 6, 6);
421 assert(width
>= 1 && height
>= 1 && depth
>= 1);
424 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
426 * "A null surface will be used in instances where an actual surface is
427 * not bound. When a write message is generated to a null surface, no
428 * actual surface is written to. When a read message (including any
429 * sampling engine message) is generated to a null surface, the result
430 * is all zeros. Note that a null surface type is allowed to be used
431 * with all messages, even if it is not specificially indicated as
432 * supported. All of the remaining fields in surface state are ignored
433 * for null surfaces, with the following exceptions:
435 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
436 * depth buffer's corresponding state for all render target
437 * surfaces, including null.
438 * * Surface Format must be R8G8B8A8_UNORM."
440 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
442 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
446 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
449 dw
[0] = GEN6_SURFTYPE_NULL
<< GEN6_SURFACE_DW0_TYPE__SHIFT
|
450 GEN6_FORMAT_B8G8R8A8_UNORM
<< GEN6_SURFACE_DW0_FORMAT__SHIFT
;
454 dw
[2] = (height
- 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT
|
455 (width
- 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT
|
456 level
<< GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT
;
458 dw
[3] = (depth
- 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT
|
466 view_init_for_buffer_gen6(const struct ilo_dev
*dev
,
467 const struct ilo_buffer
*buf
,
468 unsigned offset
, unsigned size
,
469 unsigned struct_size
,
470 enum pipe_format elem_format
,
471 bool is_rt
, bool render_cache_rw
,
472 struct ilo_view_surface
*surf
)
474 const int elem_size
= util_format_get_blocksize(elem_format
);
475 int width
, height
, depth
, pitch
;
476 int surface_format
, num_entries
;
479 ILO_DEV_ASSERT(dev
, 6, 6);
482 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
483 * structure in a buffer.
486 surface_format
= ilo_format_translate_color(dev
, elem_format
);
488 num_entries
= size
/ struct_size
;
489 /* see if there is enough space to fit another element */
490 if (size
% struct_size
>= elem_size
)
494 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
496 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
497 * Address) specifies the base address of first element of the
498 * surface. The surface is interpreted as a simple array of that
499 * single element type. The address must be naturally-aligned to the
500 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
501 * must be 16-byte aligned).
503 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
504 * the base address of the first element of the surface, computed in
505 * software by adding the surface base address to the byte offset of
506 * the element in the buffer."
509 assert(offset
% elem_size
== 0);
512 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
514 * "For buffer surfaces, the number of entries in the buffer ranges
517 assert(num_entries
>= 1 && num_entries
<= 1 << 27);
520 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
522 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
523 * indicates the size of the structure."
530 width
= (num_entries
& 0x0000007f);
532 height
= (num_entries
& 0x000fff80) >> 7;
534 depth
= (num_entries
& 0x07f00000) >> 20;
536 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
539 dw
[0] = GEN6_SURFTYPE_BUFFER
<< GEN6_SURFACE_DW0_TYPE__SHIFT
|
540 surface_format
<< GEN6_SURFACE_DW0_FORMAT__SHIFT
;
542 dw
[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW
;
546 dw
[2] = height
<< GEN6_SURFACE_DW2_HEIGHT__SHIFT
|
547 width
<< GEN6_SURFACE_DW2_WIDTH__SHIFT
;
549 dw
[3] = depth
<< GEN6_SURFACE_DW3_DEPTH__SHIFT
|
550 pitch
<< GEN6_SURFACE_DW3_PITCH__SHIFT
;
557 view_init_for_texture_gen6(const struct ilo_dev
*dev
,
558 const struct ilo_texture
*tex
,
559 enum pipe_format format
,
560 unsigned first_level
,
562 unsigned first_layer
,
565 struct ilo_view_surface
*surf
)
567 int surface_type
, surface_format
;
568 int width
, height
, depth
, pitch
, lod
;
571 ILO_DEV_ASSERT(dev
, 6, 6);
573 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
574 assert(surface_type
!= GEN6_SURFTYPE_BUFFER
);
576 if (format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&& tex
->separate_s8
)
577 format
= PIPE_FORMAT_Z32_FLOAT
;
580 surface_format
= ilo_format_translate_render(dev
, format
);
582 surface_format
= ilo_format_translate_texture(dev
, format
);
583 assert(surface_format
>= 0);
585 width
= tex
->image
.width0
;
586 height
= tex
->image
.height0
;
587 depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
588 tex
->base
.depth0
: num_layers
;
589 pitch
= tex
->image
.bo_stride
;
591 if (surface_type
== GEN6_SURFTYPE_CUBE
) {
593 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
595 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
596 * range of this field (Depth) is [0,84], indicating the number of
597 * cube array elements (equal to the number of underlying 2D array
598 * elements divided by 6). For other surfaces, this field must be
601 * When is_rt is true, we treat the texture as a 2D one to avoid the
605 surface_type
= GEN6_SURFTYPE_2D
;
608 assert(num_layers
% 6 == 0);
609 depth
= num_layers
/ 6;
613 /* sanity check the size */
614 assert(width
>= 1 && height
>= 1 && depth
>= 1 && pitch
>= 1);
615 switch (surface_type
) {
616 case GEN6_SURFTYPE_1D
:
617 assert(width
<= 8192 && height
== 1 && depth
<= 512);
618 assert(first_layer
< 512 && num_layers
<= 512);
620 case GEN6_SURFTYPE_2D
:
621 assert(width
<= 8192 && height
<= 8192 && depth
<= 512);
622 assert(first_layer
< 512 && num_layers
<= 512);
624 case GEN6_SURFTYPE_3D
:
625 assert(width
<= 2048 && height
<= 2048 && depth
<= 2048);
626 assert(first_layer
< 2048 && num_layers
<= 512);
628 assert(first_layer
== 0);
630 case GEN6_SURFTYPE_CUBE
:
631 assert(width
<= 8192 && height
<= 8192 && depth
<= 85);
632 assert(width
== height
);
633 assert(first_layer
< 512 && num_layers
<= 512);
635 assert(first_layer
== 0);
638 assert(!"unexpected surface type");
642 /* non-full array spacing is supported only on GEN7+ */
643 assert(tex
->image
.walk
!= ILO_IMAGE_WALK_LOD
);
644 /* non-interleaved samples are supported only on GEN7+ */
645 if (tex
->base
.nr_samples
> 1)
646 assert(tex
->image
.interleaved_samples
);
649 assert(num_levels
== 1);
653 lod
= num_levels
- 1;
657 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
659 * "Linear render target surface base addresses must be element-size
660 * aligned, for non-YUV surface formats, or a multiple of 2
661 * element-sizes for YUV surface formats. Other linear surfaces have
662 * no alignment requirements (byte alignment is sufficient.)"
664 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
666 * "For linear render target surfaces, the pitch must be a multiple
667 * of the element size for non-YUV surface formats. Pitch must be a
668 * multiple of 2 * element size for YUV surface formats."
670 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
672 * "For linear surfaces, this field (X Offset) must be zero"
674 if (tex
->image
.tiling
== GEN6_TILING_NONE
) {
676 const int elem_size
= util_format_get_blocksize(format
);
677 assert(pitch
% elem_size
== 0);
681 STATIC_ASSERT(Elements(surf
->payload
) >= 6);
684 dw
[0] = surface_type
<< GEN6_SURFACE_DW0_TYPE__SHIFT
|
685 surface_format
<< GEN6_SURFACE_DW0_FORMAT__SHIFT
|
686 GEN6_SURFACE_DW0_MIPLAYOUT_BELOW
;
688 if (surface_type
== GEN6_SURFTYPE_CUBE
&& !is_rt
) {
690 GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK
;
694 dw
[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW
;
698 dw
[2] = (height
- 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT
|
699 (width
- 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT
|
700 lod
<< GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT
;
702 assert(tex
->image
.tiling
!= GEN8_TILING_W
);
703 dw
[3] = (depth
- 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT
|
704 (pitch
- 1) << GEN6_SURFACE_DW3_PITCH__SHIFT
|
707 dw
[4] = first_level
<< GEN6_SURFACE_DW4_MIN_LOD__SHIFT
|
709 (num_layers
- 1) << 8 |
710 ((tex
->base
.nr_samples
> 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4
:
711 GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1
);
715 assert(tex
->image
.align_j
== 2 || tex
->image
.align_j
== 4);
716 if (tex
->image
.align_j
== 4)
717 dw
[5] |= GEN6_SURFACE_DW5_VALIGN_4
;
721 view_init_null_gen7(const struct ilo_dev
*dev
,
722 unsigned width
, unsigned height
,
723 unsigned depth
, unsigned level
,
724 struct ilo_view_surface
*surf
)
728 ILO_DEV_ASSERT(dev
, 7, 8);
730 assert(width
>= 1 && height
>= 1 && depth
>= 1);
733 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
735 * "A null surface is used in instances where an actual surface is not
736 * bound. When a write message is generated to a null surface, no
737 * actual surface is written to. When a read message (including any
738 * sampling engine message) is generated to a null surface, the result
739 * is all zeros. Note that a null surface type is allowed to be used
740 * with all messages, even if it is not specificially indicated as
741 * supported. All of the remaining fields in surface state are ignored
742 * for null surfaces, with the following exceptions:
744 * * Width, Height, Depth, LOD, and Render Target View Extent fields
745 * must match the depth buffer's corresponding state for all render
746 * target surfaces, including null.
747 * * All sampling engine and data port messages support null surfaces
748 * with the above behavior, even if not mentioned as specifically
749 * supported, except for the following:
750 * * Data Port Media Block Read/Write messages.
751 * * The Surface Type of a surface used as a render target (accessed
752 * via the Data Port's Render Target Write message) must be the same
753 * as the Surface Type of all other render targets and of the depth
754 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
755 * buffer or render targets are SURFTYPE_NULL."
757 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
759 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
763 STATIC_ASSERT(Elements(surf
->payload
) >= 13);
766 dw
[0] = GEN6_SURFTYPE_NULL
<< GEN7_SURFACE_DW0_TYPE__SHIFT
|
767 GEN6_FORMAT_B8G8R8A8_UNORM
<< GEN7_SURFACE_DW0_FORMAT__SHIFT
;
769 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
770 dw
[0] |= GEN6_TILING_X
<< GEN8_SURFACE_DW0_TILING__SHIFT
;
772 dw
[0] |= GEN6_TILING_X
<< GEN7_SURFACE_DW0_TILING__SHIFT
;
776 dw
[2] = GEN_SHIFT32(height
- 1, GEN7_SURFACE_DW2_HEIGHT
) |
777 GEN_SHIFT32(width
- 1, GEN7_SURFACE_DW2_WIDTH
);
779 dw
[3] = GEN_SHIFT32(depth
- 1, GEN7_SURFACE_DW3_DEPTH
);
787 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
788 memset(&dw
[8], 0, sizeof(*dw
) * (13 - 8));
792 view_init_for_buffer_gen7(const struct ilo_dev
*dev
,
793 const struct ilo_buffer
*buf
,
794 unsigned offset
, unsigned size
,
795 unsigned struct_size
,
796 enum pipe_format elem_format
,
797 bool is_rt
, bool render_cache_rw
,
798 struct ilo_view_surface
*surf
)
800 const bool typed
= (elem_format
!= PIPE_FORMAT_NONE
);
801 const bool structured
= (!typed
&& struct_size
> 1);
802 const int elem_size
= (typed
) ?
803 util_format_get_blocksize(elem_format
) : 1;
804 int width
, height
, depth
, pitch
;
805 int surface_type
, surface_format
, num_entries
;
808 ILO_DEV_ASSERT(dev
, 7, 8);
810 surface_type
= (structured
) ? GEN7_SURFTYPE_STRBUF
: GEN6_SURFTYPE_BUFFER
;
812 surface_format
= (typed
) ?
813 ilo_format_translate_color(dev
, elem_format
) : GEN6_FORMAT_RAW
;
815 num_entries
= size
/ struct_size
;
816 /* see if there is enough space to fit another element */
817 if (size
% struct_size
>= elem_size
&& !structured
)
821 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
823 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
824 * Address) specifies the base address of first element of the
825 * surface. The surface is interpreted as a simple array of that
826 * single element type. The address must be naturally-aligned to the
827 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
828 * must be 16-byte aligned)
830 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
831 * the base address of the first element of the surface, computed in
832 * software by adding the surface base address to the byte offset of
833 * the element in the buffer."
836 assert(offset
% elem_size
== 0);
839 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
841 * "For typed buffer and structured buffer surfaces, the number of
842 * entries in the buffer ranges from 1 to 2^27. For raw buffer
843 * surfaces, the number of entries in the buffer is the number of
844 * bytes which can range from 1 to 2^30."
846 assert(num_entries
>= 1 &&
847 num_entries
<= 1 << ((typed
|| structured
) ? 27 : 30));
850 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
852 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
853 * 11 if the Surface Format is RAW (the size of the buffer must be a
854 * multiple of 4 bytes)."
856 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
858 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
859 * field (Surface Pitch) indicates the size of the structure."
861 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
862 * must be a multiple of 4 bytes."
865 assert(struct_size
% 4 == 0);
867 assert(num_entries
% 4 == 0);
874 width
= (num_entries
& 0x0000007f);
876 height
= (num_entries
& 0x001fff80) >> 7;
878 depth
= (num_entries
& 0x7fe00000) >> 21;
879 /* limit to [26:21] */
880 if (typed
|| structured
)
883 STATIC_ASSERT(Elements(surf
->payload
) >= 13);
886 dw
[0] = surface_type
<< GEN7_SURFACE_DW0_TYPE__SHIFT
|
887 surface_format
<< GEN7_SURFACE_DW0_FORMAT__SHIFT
;
889 dw
[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW
;
891 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
893 memset(&dw
[9], 0, sizeof(*dw
) * (13 - 9));
898 dw
[2] = GEN_SHIFT32(height
, GEN7_SURFACE_DW2_HEIGHT
) |
899 GEN_SHIFT32(width
, GEN7_SURFACE_DW2_WIDTH
);
901 dw
[3] = GEN_SHIFT32(depth
, GEN7_SURFACE_DW3_DEPTH
) |
910 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5)) {
911 dw
[7] |= GEN_SHIFT32(GEN75_SCS_RED
, GEN75_SURFACE_DW7_SCS_R
) |
912 GEN_SHIFT32(GEN75_SCS_GREEN
, GEN75_SURFACE_DW7_SCS_G
) |
913 GEN_SHIFT32(GEN75_SCS_BLUE
, GEN75_SURFACE_DW7_SCS_B
) |
914 GEN_SHIFT32(GEN75_SCS_ALPHA
, GEN75_SURFACE_DW7_SCS_A
);
919 view_init_for_texture_gen7(const struct ilo_dev
*dev
,
920 const struct ilo_texture
*tex
,
921 enum pipe_format format
,
922 unsigned first_level
,
924 unsigned first_layer
,
927 struct ilo_view_surface
*surf
)
929 int surface_type
, surface_format
;
930 int width
, height
, depth
, pitch
, lod
;
933 ILO_DEV_ASSERT(dev
, 7, 8);
935 surface_type
= ilo_gpe_gen6_translate_texture(tex
->base
.target
);
936 assert(surface_type
!= GEN6_SURFTYPE_BUFFER
);
938 if (format
== PIPE_FORMAT_Z32_FLOAT_S8X24_UINT
&& tex
->separate_s8
)
939 format
= PIPE_FORMAT_Z32_FLOAT
;
942 surface_format
= ilo_format_translate_render(dev
, format
);
944 surface_format
= ilo_format_translate_texture(dev
, format
);
945 assert(surface_format
>= 0);
947 width
= tex
->image
.width0
;
948 height
= tex
->image
.height0
;
949 depth
= (tex
->base
.target
== PIPE_TEXTURE_3D
) ?
950 tex
->base
.depth0
: num_layers
;
951 pitch
= tex
->image
.bo_stride
;
953 if (surface_type
== GEN6_SURFTYPE_CUBE
) {
955 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
957 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
958 * this field is [0,340], indicating the number of cube array
959 * elements (equal to the number of underlying 2D array elements
960 * divided by 6). For other surfaces, this field must be zero."
962 * When is_rt is true, we treat the texture as a 2D one to avoid the
966 surface_type
= GEN6_SURFTYPE_2D
;
969 assert(num_layers
% 6 == 0);
970 depth
= num_layers
/ 6;
974 /* sanity check the size */
975 assert(width
>= 1 && height
>= 1 && depth
>= 1 && pitch
>= 1);
976 assert(first_layer
< 2048 && num_layers
<= 2048);
977 switch (surface_type
) {
978 case GEN6_SURFTYPE_1D
:
979 assert(width
<= 16384 && height
== 1 && depth
<= 2048);
981 case GEN6_SURFTYPE_2D
:
982 assert(width
<= 16384 && height
<= 16384 && depth
<= 2048);
984 case GEN6_SURFTYPE_3D
:
985 assert(width
<= 2048 && height
<= 2048 && depth
<= 2048);
987 assert(first_layer
== 0);
989 case GEN6_SURFTYPE_CUBE
:
990 assert(width
<= 16384 && height
<= 16384 && depth
<= 86);
991 assert(width
== height
);
993 assert(first_layer
== 0);
996 assert(!"unexpected surface type");
1001 assert(num_levels
== 1);
1005 lod
= num_levels
- 1;
1009 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
1011 * "The Base Address for linear render target surfaces and surfaces
1012 * accessed with the typed surface read/write data port messages must
1013 * be element-size aligned, for non-YUV surface formats, or a multiple
1014 * of 2 element-sizes for YUV surface formats. Other linear surfaces
1015 * have no alignment requirements (byte alignment is sufficient)."
1017 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
1019 * "For linear render target surfaces and surfaces accessed with the
1020 * typed data port messages, the pitch must be a multiple of the
1021 * element size for non-YUV surface formats. Pitch must be a multiple
1022 * of 2 * element size for YUV surface formats. For linear surfaces
1023 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
1024 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
1027 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
1029 * "For linear surfaces, this field (X Offset) must be zero."
1031 if (tex
->image
.tiling
== GEN6_TILING_NONE
) {
1033 const int elem_size
= util_format_get_blocksize(format
);
1034 assert(pitch
% elem_size
== 0);
1038 STATIC_ASSERT(Elements(surf
->payload
) >= 13);
1041 dw
[0] = surface_type
<< GEN7_SURFACE_DW0_TYPE__SHIFT
|
1042 surface_format
<< GEN7_SURFACE_DW0_FORMAT__SHIFT
;
1045 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
1047 * "If this field (Surface Array) is enabled, the Surface Type must be
1048 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
1049 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
1050 * SURFTYPE_CUBE, the Depth field must be set to zero."
1052 * For non-3D sampler surfaces, resinfo (the sampler message) always
1053 * returns zero for the number of layers when this field is not set.
1055 if (surface_type
!= GEN6_SURFTYPE_3D
) {
1056 if (util_resource_is_array_texture(&tex
->base
))
1057 dw
[0] |= GEN7_SURFACE_DW0_IS_ARRAY
;
1062 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
1063 switch (tex
->image
.align_j
) {
1065 dw
[0] |= GEN7_SURFACE_DW0_VALIGN_4
;
1068 dw
[0] |= GEN8_SURFACE_DW0_VALIGN_8
;
1071 dw
[0] |= GEN8_SURFACE_DW0_VALIGN_16
;
1074 assert(!"unsupported valign");
1078 switch (tex
->image
.align_i
) {
1080 dw
[0] |= GEN8_SURFACE_DW0_HALIGN_4
;
1083 dw
[0] |= GEN8_SURFACE_DW0_HALIGN_8
;
1086 dw
[0] |= GEN8_SURFACE_DW0_HALIGN_16
;
1089 assert(!"unsupported halign");
1093 dw
[0] |= tex
->image
.tiling
<< GEN8_SURFACE_DW0_TILING__SHIFT
;
1095 assert(tex
->image
.align_i
== 4 || tex
->image
.align_i
== 8);
1096 assert(tex
->image
.align_j
== 2 || tex
->image
.align_j
== 4);
1098 if (tex
->image
.align_j
== 4)
1099 dw
[0] |= GEN7_SURFACE_DW0_VALIGN_4
;
1101 if (tex
->image
.align_i
== 8)
1102 dw
[0] |= GEN7_SURFACE_DW0_HALIGN_8
;
1104 assert(tex
->image
.tiling
!= GEN8_TILING_W
);
1105 dw
[0] |= tex
->image
.tiling
<< GEN7_SURFACE_DW0_TILING__SHIFT
;
1107 if (tex
->image
.walk
== ILO_IMAGE_WALK_LOD
)
1108 dw
[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0
;
1110 dw
[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL
;
1114 dw
[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW
;
1116 if (surface_type
== GEN6_SURFTYPE_CUBE
&& !is_rt
)
1117 dw
[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK
;
1119 if (ilo_dev_gen(dev
) >= ILO_GEN(8)) {
1120 assert(tex
->image
.walk_layer_height
% 4 == 0);
1121 dw
[1] = tex
->image
.walk_layer_height
/ 4;
1126 dw
[2] = GEN_SHIFT32(height
- 1, GEN7_SURFACE_DW2_HEIGHT
) |
1127 GEN_SHIFT32(width
- 1, GEN7_SURFACE_DW2_WIDTH
);
1129 dw
[3] = GEN_SHIFT32(depth
- 1, GEN7_SURFACE_DW3_DEPTH
) |
1132 dw
[4] = first_layer
<< 18 |
1133 (num_layers
- 1) << 7;
1136 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
1137 * means the samples are interleaved. The layouts are the same when the
1138 * number of samples is 1.
1140 if (tex
->image
.interleaved_samples
&& tex
->base
.nr_samples
> 1) {
1142 dw
[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL
;
1145 dw
[4] |= GEN7_SURFACE_DW4_MSFMT_MSS
;
1148 switch (tex
->base
.nr_samples
) {
1152 dw
[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1
;
1155 dw
[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_2
;
1158 dw
[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4
;
1161 dw
[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8
;
1164 dw
[4] |= GEN8_SURFACE_DW4_MULTISAMPLECOUNT_16
;
1168 dw
[5] = GEN_SHIFT32(first_level
, GEN7_SURFACE_DW5_MIN_LOD
) |
1174 if (ilo_dev_gen(dev
) >= ILO_GEN(7.5)) {
1175 dw
[7] |= GEN_SHIFT32(GEN75_SCS_RED
, GEN75_SURFACE_DW7_SCS_R
) |
1176 GEN_SHIFT32(GEN75_SCS_GREEN
, GEN75_SURFACE_DW7_SCS_G
) |
1177 GEN_SHIFT32(GEN75_SCS_BLUE
, GEN75_SURFACE_DW7_SCS_B
) |
1178 GEN_SHIFT32(GEN75_SCS_ALPHA
, GEN75_SURFACE_DW7_SCS_A
);
1181 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
1182 memset(&dw
[8], 0, sizeof(*dw
) * (13 - 8));
1186 ilo_gpe_init_view_surface_null(const struct ilo_dev
*dev
,
1187 unsigned width
, unsigned height
,
1188 unsigned depth
, unsigned level
,
1189 struct ilo_view_surface
*surf
)
1191 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1192 view_init_null_gen7(dev
,
1193 width
, height
, depth
, level
, surf
);
1195 view_init_null_gen6(dev
,
1196 width
, height
, depth
, level
, surf
);
1200 surf
->scanout
= false;
1204 ilo_gpe_init_view_surface_for_buffer(const struct ilo_dev
*dev
,
1205 const struct ilo_buffer
*buf
,
1206 unsigned offset
, unsigned size
,
1207 unsigned struct_size
,
1208 enum pipe_format elem_format
,
1209 bool is_rt
, bool render_cache_rw
,
1210 struct ilo_view_surface
*surf
)
1212 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1213 view_init_for_buffer_gen7(dev
, buf
, offset
, size
,
1214 struct_size
, elem_format
, is_rt
, render_cache_rw
, surf
);
1216 view_init_for_buffer_gen6(dev
, buf
, offset
, size
,
1217 struct_size
, elem_format
, is_rt
, render_cache_rw
, surf
);
1220 /* do not increment reference count */
1222 surf
->scanout
= false;
1226 ilo_gpe_init_view_surface_for_texture(const struct ilo_dev
*dev
,
1227 const struct ilo_texture
*tex
,
1228 enum pipe_format format
,
1229 unsigned first_level
,
1230 unsigned num_levels
,
1231 unsigned first_layer
,
1232 unsigned num_layers
,
1234 struct ilo_view_surface
*surf
)
1236 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1237 view_init_for_texture_gen7(dev
, tex
, format
,
1238 first_level
, num_levels
, first_layer
, num_layers
,
1241 view_init_for_texture_gen6(dev
, tex
, format
,
1242 first_level
, num_levels
, first_layer
, num_layers
,
1246 /* do not increment reference count */
1247 surf
->bo
= tex
->image
.bo
;
1249 /* assume imported RTs are scanouts */
1250 surf
->scanout
= ((tex
->base
.bind
& PIPE_BIND_SCANOUT
) ||
1251 (tex
->imported
&& (tex
->base
.bind
& PIPE_BIND_RENDER_TARGET
)));
1255 sampler_init_border_color_gen6(const struct ilo_dev
*dev
,
1256 const union pipe_color_union
*color
,
1257 uint32_t *dw
, int num_dwords
)
1260 color
->f
[0], color
->f
[1], color
->f
[2], color
->f
[3],
1263 ILO_DEV_ASSERT(dev
, 6, 6);
1265 assert(num_dwords
>= 12);
1268 * This state is not documented in the Sandy Bridge PRM, but in the
1269 * Ironlake PRM. SNORM8 seems to be in DW11 instead of DW1.
1273 dw
[1] = fui(rgba
[0]);
1274 dw
[2] = fui(rgba
[1]);
1275 dw
[3] = fui(rgba
[2]);
1276 dw
[4] = fui(rgba
[3]);
1279 dw
[5] = util_float_to_half(rgba
[0]) |
1280 util_float_to_half(rgba
[1]) << 16;
1281 dw
[6] = util_float_to_half(rgba
[2]) |
1282 util_float_to_half(rgba
[3]) << 16;
1284 /* clamp to [-1.0f, 1.0f] */
1285 rgba
[0] = CLAMP(rgba
[0], -1.0f
, 1.0f
);
1286 rgba
[1] = CLAMP(rgba
[1], -1.0f
, 1.0f
);
1287 rgba
[2] = CLAMP(rgba
[2], -1.0f
, 1.0f
);
1288 rgba
[3] = CLAMP(rgba
[3], -1.0f
, 1.0f
);
1291 dw
[9] = (int16_t) util_iround(rgba
[0] * 32767.0f
) |
1292 (int16_t) util_iround(rgba
[1] * 32767.0f
) << 16;
1293 dw
[10] = (int16_t) util_iround(rgba
[2] * 32767.0f
) |
1294 (int16_t) util_iround(rgba
[3] * 32767.0f
) << 16;
1297 dw
[11] = (int8_t) util_iround(rgba
[0] * 127.0f
) |
1298 (int8_t) util_iround(rgba
[1] * 127.0f
) << 8 |
1299 (int8_t) util_iround(rgba
[2] * 127.0f
) << 16 |
1300 (int8_t) util_iround(rgba
[3] * 127.0f
) << 24;
1302 /* clamp to [0.0f, 1.0f] */
1303 rgba
[0] = CLAMP(rgba
[0], 0.0f
, 1.0f
);
1304 rgba
[1] = CLAMP(rgba
[1], 0.0f
, 1.0f
);
1305 rgba
[2] = CLAMP(rgba
[2], 0.0f
, 1.0f
);
1306 rgba
[3] = CLAMP(rgba
[3], 0.0f
, 1.0f
);
1309 dw
[0] = (uint8_t) util_iround(rgba
[0] * 255.0f
) |
1310 (uint8_t) util_iround(rgba
[1] * 255.0f
) << 8 |
1311 (uint8_t) util_iround(rgba
[2] * 255.0f
) << 16 |
1312 (uint8_t) util_iround(rgba
[3] * 255.0f
) << 24;
1315 dw
[7] = (uint16_t) util_iround(rgba
[0] * 65535.0f
) |
1316 (uint16_t) util_iround(rgba
[1] * 65535.0f
) << 16;
1317 dw
[8] = (uint16_t) util_iround(rgba
[2] * 65535.0f
) |
1318 (uint16_t) util_iround(rgba
[3] * 65535.0f
) << 16;
1322 * Translate a pipe texture mipfilter to the matching hardware mipfilter.
1325 gen6_translate_tex_mipfilter(unsigned filter
)
1328 case PIPE_TEX_MIPFILTER_NEAREST
: return GEN6_MIPFILTER_NEAREST
;
1329 case PIPE_TEX_MIPFILTER_LINEAR
: return GEN6_MIPFILTER_LINEAR
;
1330 case PIPE_TEX_MIPFILTER_NONE
: return GEN6_MIPFILTER_NONE
;
1332 assert(!"unknown mipfilter");
1333 return GEN6_MIPFILTER_NONE
;
1338 * Translate a pipe texture filter to the matching hardware mapfilter.
1341 gen6_translate_tex_filter(unsigned filter
)
1344 case PIPE_TEX_FILTER_NEAREST
: return GEN6_MAPFILTER_NEAREST
;
1345 case PIPE_TEX_FILTER_LINEAR
: return GEN6_MAPFILTER_LINEAR
;
1347 assert(!"unknown sampler filter");
1348 return GEN6_MAPFILTER_NEAREST
;
1353 * Translate a pipe texture coordinate wrapping mode to the matching hardware
1357 gen6_translate_tex_wrap(unsigned wrap
)
1360 case PIPE_TEX_WRAP_CLAMP
: return GEN8_TEXCOORDMODE_HALF_BORDER
;
1361 case PIPE_TEX_WRAP_REPEAT
: return GEN6_TEXCOORDMODE_WRAP
;
1362 case PIPE_TEX_WRAP_CLAMP_TO_EDGE
: return GEN6_TEXCOORDMODE_CLAMP
;
1363 case PIPE_TEX_WRAP_CLAMP_TO_BORDER
: return GEN6_TEXCOORDMODE_CLAMP_BORDER
;
1364 case PIPE_TEX_WRAP_MIRROR_REPEAT
: return GEN6_TEXCOORDMODE_MIRROR
;
1365 case PIPE_TEX_WRAP_MIRROR_CLAMP
:
1366 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE
:
1367 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER
:
1369 assert(!"unknown sampler wrap mode");
1370 return GEN6_TEXCOORDMODE_WRAP
;
1375 * Translate a pipe shadow compare function to the matching hardware shadow
1379 gen6_translate_shadow_func(unsigned func
)
1382 * For PIPE_FUNC_x, the reference value is on the left-hand side of the
1383 * comparison, and 1.0 is returned when the comparison is true.
1385 * For GEN6_COMPAREFUNCTION_x, the reference value is on the right-hand side of
1386 * the comparison, and 0.0 is returned when the comparison is true.
1389 case PIPE_FUNC_NEVER
: return GEN6_COMPAREFUNCTION_ALWAYS
;
1390 case PIPE_FUNC_LESS
: return GEN6_COMPAREFUNCTION_LEQUAL
;
1391 case PIPE_FUNC_EQUAL
: return GEN6_COMPAREFUNCTION_NOTEQUAL
;
1392 case PIPE_FUNC_LEQUAL
: return GEN6_COMPAREFUNCTION_LESS
;
1393 case PIPE_FUNC_GREATER
: return GEN6_COMPAREFUNCTION_GEQUAL
;
1394 case PIPE_FUNC_NOTEQUAL
: return GEN6_COMPAREFUNCTION_EQUAL
;
1395 case PIPE_FUNC_GEQUAL
: return GEN6_COMPAREFUNCTION_GREATER
;
1396 case PIPE_FUNC_ALWAYS
: return GEN6_COMPAREFUNCTION_NEVER
;
1398 assert(!"unknown shadow compare function");
1399 return GEN6_COMPAREFUNCTION_NEVER
;
1404 ilo_gpe_init_sampler_cso(const struct ilo_dev
*dev
,
1405 const struct pipe_sampler_state
*state
,
1406 struct ilo_sampler_cso
*sampler
)
1408 int mip_filter
, min_filter
, mag_filter
, max_aniso
;
1409 int lod_bias
, max_lod
, min_lod
;
1410 int wrap_s
, wrap_t
, wrap_r
, wrap_cube
;
1411 uint32_t dw0
, dw1
, dw3
;
1413 ILO_DEV_ASSERT(dev
, 6, 8);
1415 memset(sampler
, 0, sizeof(*sampler
));
1417 mip_filter
= gen6_translate_tex_mipfilter(state
->min_mip_filter
);
1418 min_filter
= gen6_translate_tex_filter(state
->min_img_filter
);
1419 mag_filter
= gen6_translate_tex_filter(state
->mag_img_filter
);
1421 sampler
->anisotropic
= state
->max_anisotropy
;
1423 if (state
->max_anisotropy
>= 2 && state
->max_anisotropy
<= 16)
1424 max_aniso
= state
->max_anisotropy
/ 2 - 1;
1425 else if (state
->max_anisotropy
> 16)
1426 max_aniso
= GEN6_ANISORATIO_16
;
1428 max_aniso
= GEN6_ANISORATIO_2
;
1432 * Here is how the hardware calculate per-pixel LOD, from my reading of the
1435 * 1) LOD is set to log2(ratio of texels to pixels) if not specified in
1436 * other ways. The number of texels is measured using level
1438 * 2) Bias is added to LOD.
1439 * 3) LOD is clamped to [MinLod, MaxLod], and the clamped value is
1440 * compared with Base to determine whether magnification or
1441 * minification is needed. (if preclamp is disabled, LOD is compared
1442 * with Base before clamping)
1443 * 4) If magnification is needed, or no mipmapping is requested, LOD is
1444 * set to floor(MinLod).
1445 * 5) LOD is clamped to [0, MIPCnt], and SurfMinLod is added to LOD.
1447 * With Gallium interface, Base is always zero and
1448 * pipe_sampler_view::u.tex.first_level specifies SurfMinLod.
1450 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1451 const float scale
= 256.0f
;
1453 /* [-16.0, 16.0) in S4.8 */
1455 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
1458 /* [0.0, 14.0] in U4.8 */
1459 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 14.0f
) * scale
);
1460 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 14.0f
) * scale
);
1463 const float scale
= 64.0f
;
1465 /* [-16.0, 16.0) in S4.6 */
1467 (CLAMP(state
->lod_bias
, -16.0f
, 15.9f
) * scale
);
1470 /* [0.0, 13.0] in U4.6 */
1471 max_lod
= (int) (CLAMP(state
->max_lod
, 0.0f
, 13.0f
) * scale
);
1472 min_lod
= (int) (CLAMP(state
->min_lod
, 0.0f
, 13.0f
) * scale
);
1476 * We want LOD to be clamped to determine magnification/minification, and
1477 * get set to zero when it is magnification or when mipmapping is disabled.
1478 * The hardware would set LOD to floor(MinLod) and that is a problem when
1479 * MinLod is greater than or equal to 1.0f.
1481 * With Base being zero, it is always minification when MinLod is non-zero.
1482 * To achieve our goal, we just need to set MinLod to zero and set
1483 * MagFilter to MinFilter when mipmapping is disabled.
1485 if (state
->min_mip_filter
== PIPE_TEX_MIPFILTER_NONE
&& min_lod
) {
1487 mag_filter
= min_filter
;
1490 /* determine wrap s/t/r */
1491 wrap_s
= gen6_translate_tex_wrap(state
->wrap_s
);
1492 wrap_t
= gen6_translate_tex_wrap(state
->wrap_t
);
1493 wrap_r
= gen6_translate_tex_wrap(state
->wrap_r
);
1494 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
1496 * For nearest filtering, PIPE_TEX_WRAP_CLAMP means
1497 * PIPE_TEX_WRAP_CLAMP_TO_EDGE; for linear filtering,
1498 * PIPE_TEX_WRAP_CLAMP means PIPE_TEX_WRAP_CLAMP_TO_BORDER while
1499 * additionally clamping the texture coordinates to [0.0, 1.0].
1501 * PIPE_TEX_WRAP_CLAMP is not supported natively until Gen8. The
1502 * clamping has to be taken care of in the shaders. There are two
1503 * filters here, but let the minification one has a say.
1505 const bool clamp_is_to_edge
=
1506 (state
->min_img_filter
== PIPE_TEX_FILTER_NEAREST
);
1508 if (clamp_is_to_edge
) {
1509 if (wrap_s
== GEN8_TEXCOORDMODE_HALF_BORDER
)
1510 wrap_s
= GEN6_TEXCOORDMODE_CLAMP
;
1511 if (wrap_t
== GEN8_TEXCOORDMODE_HALF_BORDER
)
1512 wrap_t
= GEN6_TEXCOORDMODE_CLAMP
;
1513 if (wrap_r
== GEN8_TEXCOORDMODE_HALF_BORDER
)
1514 wrap_r
= GEN6_TEXCOORDMODE_CLAMP
;
1516 if (wrap_s
== GEN8_TEXCOORDMODE_HALF_BORDER
) {
1517 wrap_s
= GEN6_TEXCOORDMODE_CLAMP_BORDER
;
1518 sampler
->saturate_s
= true;
1520 if (wrap_t
== GEN8_TEXCOORDMODE_HALF_BORDER
) {
1521 wrap_t
= GEN6_TEXCOORDMODE_CLAMP_BORDER
;
1522 sampler
->saturate_t
= true;
1524 if (wrap_r
== GEN8_TEXCOORDMODE_HALF_BORDER
) {
1525 wrap_r
= GEN6_TEXCOORDMODE_CLAMP_BORDER
;
1526 sampler
->saturate_r
= true;
1532 * From the Sandy Bridge PRM, volume 4 part 1, page 107:
1534 * "When using cube map texture coordinates, only TEXCOORDMODE_CLAMP
1535 * and TEXCOORDMODE_CUBE settings are valid, and each TC component
1536 * must have the same Address Control mode."
1538 * From the Ivy Bridge PRM, volume 4 part 1, page 96:
1540 * "This field (Cube Surface Control Mode) must be set to
1541 * CUBECTRLMODE_PROGRAMMED"
1543 * Therefore, we cannot use "Cube Surface Control Mode" for semless cube
1546 if (state
->seamless_cube_map
&&
1547 (state
->min_img_filter
!= PIPE_TEX_FILTER_NEAREST
||
1548 state
->mag_img_filter
!= PIPE_TEX_FILTER_NEAREST
)) {
1549 wrap_cube
= GEN6_TEXCOORDMODE_CUBE
;
1552 wrap_cube
= GEN6_TEXCOORDMODE_CLAMP
;
1555 if (!state
->normalized_coords
) {
1557 * From the Ivy Bridge PRM, volume 4 part 1, page 98:
1559 * "The following state must be set as indicated if this field
1560 * (Non-normalized Coordinate Enable) is enabled:
1562 * - TCX/Y/Z Address Control Mode must be TEXCOORDMODE_CLAMP,
1563 * TEXCOORDMODE_HALF_BORDER, or TEXCOORDMODE_CLAMP_BORDER.
1564 * - Surface Type must be SURFTYPE_2D or SURFTYPE_3D.
1565 * - Mag Mode Filter must be MAPFILTER_NEAREST or
1567 * - Min Mode Filter must be MAPFILTER_NEAREST or
1569 * - Mip Mode Filter must be MIPFILTER_NONE.
1570 * - Min LOD must be 0.
1571 * - Max LOD must be 0.
1572 * - MIP Count must be 0.
1573 * - Surface Min LOD must be 0.
1574 * - Texture LOD Bias must be 0."
1576 assert(wrap_s
== GEN6_TEXCOORDMODE_CLAMP
||
1577 wrap_s
== GEN6_TEXCOORDMODE_CLAMP_BORDER
);
1578 assert(wrap_t
== GEN6_TEXCOORDMODE_CLAMP
||
1579 wrap_t
== GEN6_TEXCOORDMODE_CLAMP_BORDER
);
1580 assert(wrap_r
== GEN6_TEXCOORDMODE_CLAMP
||
1581 wrap_r
== GEN6_TEXCOORDMODE_CLAMP_BORDER
);
1583 assert(mag_filter
== GEN6_MAPFILTER_NEAREST
||
1584 mag_filter
== GEN6_MAPFILTER_LINEAR
);
1585 assert(min_filter
== GEN6_MAPFILTER_NEAREST
||
1586 min_filter
== GEN6_MAPFILTER_LINEAR
);
1588 /* work around a bug in util_blitter */
1589 mip_filter
= GEN6_MIPFILTER_NONE
;
1591 assert(mip_filter
== GEN6_MIPFILTER_NONE
);
1594 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
1599 sampler
->dw_filter
= mag_filter
<< 17 |
1602 sampler
->dw_filter_aniso
= GEN6_MAPFILTER_ANISOTROPIC
<< 17 |
1603 GEN6_MAPFILTER_ANISOTROPIC
<< 14 |
1606 dw1
= min_lod
<< 20 |
1609 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
1610 dw1
|= gen6_translate_shadow_func(state
->compare_func
) << 1;
1612 dw3
= max_aniso
<< 19;
1614 /* round the coordinates for linear filtering */
1615 if (min_filter
!= GEN6_MAPFILTER_NEAREST
) {
1616 dw3
|= (GEN6_SAMPLER_DW3_U_MIN_ROUND
|
1617 GEN6_SAMPLER_DW3_V_MIN_ROUND
|
1618 GEN6_SAMPLER_DW3_R_MIN_ROUND
);
1620 if (mag_filter
!= GEN6_MAPFILTER_NEAREST
) {
1621 dw3
|= (GEN6_SAMPLER_DW3_U_MAG_ROUND
|
1622 GEN6_SAMPLER_DW3_V_MAG_ROUND
|
1623 GEN6_SAMPLER_DW3_R_MAG_ROUND
);
1626 if (!state
->normalized_coords
)
1629 sampler
->dw_wrap
= wrap_s
<< 6 |
1634 * As noted in the classic i965 driver, the HW may still reference
1635 * wrap_t and wrap_r for 1D textures. We need to set them to a safe
1638 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
1639 GEN6_TEXCOORDMODE_WRAP
<< 3 |
1640 GEN6_TEXCOORDMODE_WRAP
;
1642 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
1646 STATIC_ASSERT(Elements(sampler
->payload
) >= 7);
1648 sampler
->payload
[0] = dw0
;
1649 sampler
->payload
[1] = dw1
;
1650 sampler
->payload
[2] = dw3
;
1652 memcpy(&sampler
->payload
[3],
1653 state
->border_color
.ui
, sizeof(state
->border_color
.ui
));
1660 if (state
->compare_mode
!= PIPE_TEX_COMPARE_NONE
)
1661 dw0
|= gen6_translate_shadow_func(state
->compare_func
);
1663 sampler
->dw_filter
= (min_filter
!= mag_filter
) << 27 |
1667 sampler
->dw_filter_aniso
= GEN6_MAPFILTER_ANISOTROPIC
<< 17 |
1668 GEN6_MAPFILTER_ANISOTROPIC
<< 14;
1670 dw1
= min_lod
<< 22 |
1673 sampler
->dw_wrap
= wrap_s
<< 6 |
1677 sampler
->dw_wrap_1d
= wrap_s
<< 6 |
1678 GEN6_TEXCOORDMODE_WRAP
<< 3 |
1679 GEN6_TEXCOORDMODE_WRAP
;
1681 sampler
->dw_wrap_cube
= wrap_cube
<< 6 |
1685 dw3
= max_aniso
<< 19;
1687 /* round the coordinates for linear filtering */
1688 if (min_filter
!= GEN6_MAPFILTER_NEAREST
) {
1689 dw3
|= (GEN6_SAMPLER_DW3_U_MIN_ROUND
|
1690 GEN6_SAMPLER_DW3_V_MIN_ROUND
|
1691 GEN6_SAMPLER_DW3_R_MIN_ROUND
);
1693 if (mag_filter
!= GEN6_MAPFILTER_NEAREST
) {
1694 dw3
|= (GEN6_SAMPLER_DW3_U_MAG_ROUND
|
1695 GEN6_SAMPLER_DW3_V_MAG_ROUND
|
1696 GEN6_SAMPLER_DW3_R_MAG_ROUND
);
1699 if (!state
->normalized_coords
)
1702 STATIC_ASSERT(Elements(sampler
->payload
) >= 15);
1704 sampler
->payload
[0] = dw0
;
1705 sampler
->payload
[1] = dw1
;
1706 sampler
->payload
[2] = dw3
;
1708 sampler_init_border_color_gen6(dev
,
1709 &state
->border_color
, &sampler
->payload
[3], 12);