2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "anv_private.h"
27 #include "nir/nir_builder.h"
31 nir_ssa_def
*image_size
;
32 nir_tex_instr
*origin_tex
;
33 nir_deref_instr
*tex_deref
;
34 struct anv_ycbcr_conversion
*conversion
;
38 y_range(nir_builder
*b
,
39 nir_ssa_def
*y_channel
,
41 VkSamplerYcbcrRangeKHR range
)
44 case VK_SAMPLER_YCBCR_RANGE_ITU_FULL
:
46 case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW
:
49 nir_fmul(b
, y_channel
,
50 nir_imm_float(b
, pow(2, bpc
) - 1)),
51 nir_imm_float(b
, -16.0f
* pow(2, bpc
- 8))),
52 nir_frcp(b
, nir_imm_float(b
, 219.0f
* pow(2, bpc
- 8))));
54 unreachable("missing Ycbcr range");
60 chroma_range(nir_builder
*b
,
61 nir_ssa_def
*chroma_channel
,
63 VkSamplerYcbcrRangeKHR range
)
66 case VK_SAMPLER_YCBCR_RANGE_ITU_FULL
:
67 return nir_fadd(b
, chroma_channel
,
68 nir_imm_float(b
, -pow(2, bpc
- 1) / (pow(2, bpc
) - 1.0f
)));
69 case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW
:
72 nir_fmul(b
, chroma_channel
,
73 nir_imm_float(b
, pow(2, bpc
) - 1)),
74 nir_imm_float(b
, -128.0f
* pow(2, bpc
- 8))),
75 nir_frcp(b
, nir_imm_float(b
, 224.0f
* pow(2, bpc
- 8))));
77 unreachable("missing Ycbcr range");
82 static const nir_const_value
*
83 ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversionKHR model
)
86 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601
: {
87 static const nir_const_value bt601
[3] = {
88 { .f32
= { 1.402f
, 1.0f
, 0.0f
, 0.0f
} },
89 { .f32
= { -0.714136286201022f
, 1.0f
, -0.344136286201022f
, 0.0f
} },
90 { .f32
= { 0.0f
, 1.0f
, 1.772f
, 0.0f
} }
95 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709
: {
96 static const nir_const_value bt709
[3] = {
97 { .f32
= { 1.5748031496063f
, 1.0f
, 0.0, 0.0f
} },
98 { .f32
= { -0.468125209181067f
, 1.0f
, -0.187327487470334f
, 0.0f
} },
99 { .f32
= { 0.0f
, 1.0f
, 1.85563184264242f
, 0.0f
} }
104 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020
: {
105 static const nir_const_value bt2020
[3] = {
106 { .f32
= { 1.4746f
, 1.0f
, 0.0f
, 0.0f
} },
107 { .f32
= { -0.571353126843658f
, 1.0f
, -0.164553126843658f
, 0.0f
} },
108 { .f32
= { 0.0f
, 1.0f
, 1.8814f
, 0.0f
} }
114 unreachable("missing Ycbcr model");
120 convert_ycbcr(struct ycbcr_state
*state
,
121 nir_ssa_def
*raw_channels
,
124 nir_builder
*b
= state
->builder
;
125 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
127 nir_ssa_def
*expanded_channels
=
129 chroma_range(b
, nir_channel(b
, raw_channels
, 0),
130 bpcs
[0], conversion
->ycbcr_range
),
131 y_range(b
, nir_channel(b
, raw_channels
, 1),
132 bpcs
[1], conversion
->ycbcr_range
),
133 chroma_range(b
, nir_channel(b
, raw_channels
, 2),
134 bpcs
[2], conversion
->ycbcr_range
),
135 nir_imm_float(b
, 1.0f
));
137 if (conversion
->ycbcr_model
== VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY
)
138 return expanded_channels
;
140 const nir_const_value
*conversion_matrix
=
141 ycbcr_model_to_rgb_matrix(conversion
->ycbcr_model
);
143 nir_ssa_def
*converted_channels
[] = {
144 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
[0])),
145 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
[1])),
146 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
[2]))
150 converted_channels
[0], converted_channels
[1],
151 converted_channels
[2], nir_imm_float(b
, 1.0f
));
154 /* TODO: we should probably replace this with a push constant/uniform. */
156 get_texture_size(struct ycbcr_state
*state
, nir_deref_instr
*texture
)
158 if (state
->image_size
)
159 return state
->image_size
;
161 nir_builder
*b
= state
->builder
;
162 const struct glsl_type
*type
= texture
->type
;
163 nir_tex_instr
*tex
= nir_tex_instr_create(b
->shader
, 1);
165 tex
->op
= nir_texop_txs
;
166 tex
->sampler_dim
= glsl_get_sampler_dim(type
);
167 tex
->is_array
= glsl_sampler_type_is_array(type
);
168 tex
->is_shadow
= glsl_sampler_type_is_shadow(type
);
169 tex
->dest_type
= nir_type_int
;
171 tex
->src
[0].src_type
= nir_tex_src_texture_deref
;
172 tex
->src
[0].src
= nir_src_for_ssa(&texture
->dest
.ssa
);
174 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
,
175 nir_tex_instr_dest_size(tex
), 32, NULL
);
176 nir_builder_instr_insert(b
, &tex
->instr
);
178 state
->image_size
= nir_i2f32(b
, &tex
->dest
.ssa
);
180 return state
->image_size
;
184 implicit_downsampled_coord(nir_builder
*b
,
186 nir_ssa_def
*max_value
,
192 nir_imm_float(b
, 1.0f
),
194 nir_imm_float(b
, div_scale
),
199 implicit_downsampled_coords(struct ycbcr_state
*state
,
200 nir_ssa_def
*old_coords
,
201 const struct anv_format_plane
*plane_format
)
203 nir_builder
*b
= state
->builder
;
204 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
205 nir_ssa_def
*image_size
= get_texture_size(state
, state
->tex_deref
);
206 nir_ssa_def
*comp
[4] = { NULL
, };
209 for (c
= 0; c
< ARRAY_SIZE(conversion
->chroma_offsets
); c
++) {
210 if (plane_format
->denominator_scales
[c
] > 1 &&
211 conversion
->chroma_offsets
[c
] == VK_CHROMA_LOCATION_COSITED_EVEN
) {
212 comp
[c
] = implicit_downsampled_coord(b
,
213 nir_channel(b
, old_coords
, c
),
214 nir_channel(b
, image_size
, c
),
215 plane_format
->denominator_scales
[c
]);
217 comp
[c
] = nir_channel(b
, old_coords
, c
);
221 /* Leave other coordinates untouched */
222 for (; c
< old_coords
->num_components
; c
++)
223 comp
[c
] = nir_channel(b
, old_coords
, c
);
225 return nir_vec(b
, comp
, old_coords
->num_components
);
229 create_plane_tex_instr_implicit(struct ycbcr_state
*state
,
232 nir_builder
*b
= state
->builder
;
233 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
234 const struct anv_format_plane
*plane_format
=
235 &conversion
->format
->planes
[plane
];
236 nir_tex_instr
*old_tex
= state
->origin_tex
;
237 nir_tex_instr
*tex
= nir_tex_instr_create(b
->shader
, old_tex
->num_srcs
+ 1);
239 for (uint32_t i
= 0; i
< old_tex
->num_srcs
; i
++) {
240 tex
->src
[i
].src_type
= old_tex
->src
[i
].src_type
;
242 switch (old_tex
->src
[i
].src_type
) {
243 case nir_tex_src_coord
:
244 if (plane_format
->has_chroma
&& conversion
->chroma_reconstruction
) {
245 assert(old_tex
->src
[i
].src
.is_ssa
);
247 nir_src_for_ssa(implicit_downsampled_coords(state
,
248 old_tex
->src
[i
].src
.ssa
,
254 nir_src_copy(&tex
->src
[i
].src
, &old_tex
->src
[i
].src
, tex
);
258 tex
->src
[tex
->num_srcs
- 1].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
259 tex
->src
[tex
->num_srcs
- 1].src_type
= nir_tex_src_plane
;
261 tex
->sampler_dim
= old_tex
->sampler_dim
;
262 tex
->dest_type
= old_tex
->dest_type
;
264 tex
->op
= old_tex
->op
;
265 tex
->coord_components
= old_tex
->coord_components
;
266 tex
->is_new_style_shadow
= old_tex
->is_new_style_shadow
;
267 tex
->component
= old_tex
->component
;
269 tex
->texture_index
= old_tex
->texture_index
;
270 tex
->texture_array_size
= old_tex
->texture_array_size
;
271 tex
->sampler_index
= old_tex
->sampler_index
;
273 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
,
274 old_tex
->dest
.ssa
.num_components
,
275 nir_dest_bit_size(old_tex
->dest
), NULL
);
276 nir_builder_instr_insert(b
, &tex
->instr
);
278 return &tex
->dest
.ssa
;
282 channel_to_component(enum isl_channel_select channel
)
285 case ISL_CHANNEL_SELECT_RED
:
287 case ISL_CHANNEL_SELECT_GREEN
:
289 case ISL_CHANNEL_SELECT_BLUE
:
291 case ISL_CHANNEL_SELECT_ALPHA
:
294 unreachable("invalid channel");
299 static enum isl_channel_select
300 swizzle_channel(struct isl_swizzle swizzle
, unsigned channel
)
312 unreachable("invalid channel");
318 try_lower_tex_ycbcr(struct anv_pipeline_layout
*layout
,
319 nir_builder
*builder
,
322 int deref_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_texture_deref
);
323 assert(deref_src_idx
>= 0);
324 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
326 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
327 const struct anv_descriptor_set_layout
*set_layout
=
328 layout
->set
[var
->data
.descriptor_set
].layout
;
329 const struct anv_descriptor_set_binding_layout
*binding
=
330 &set_layout
->binding
[var
->data
.binding
];
332 /* For the following instructions, we don't apply any change and let the
333 * instruction apply to the first plane.
335 if (tex
->op
== nir_texop_txs
||
336 tex
->op
== nir_texop_query_levels
||
337 tex
->op
== nir_texop_lod
)
340 if (binding
->immutable_samplers
== NULL
)
343 unsigned texture_index
= tex
->texture_index
;
344 if (deref
->deref_type
!= nir_deref_type_var
) {
345 assert(deref
->deref_type
== nir_deref_type_array
);
346 nir_const_value
*const_index
= nir_src_as_const_value(deref
->arr
.index
);
349 size_t hw_binding_size
=
350 anv_descriptor_set_binding_layout_get_hw_size(binding
);
351 texture_index
+= MIN2(const_index
->u32
[0], hw_binding_size
- 1);
353 const struct anv_sampler
*sampler
=
354 binding
->immutable_samplers
[texture_index
];
356 if (sampler
->conversion
== NULL
)
359 struct ycbcr_state state
= {
363 .conversion
= sampler
->conversion
,
366 builder
->cursor
= nir_before_instr(&tex
->instr
);
368 const struct anv_format
*format
= state
.conversion
->format
;
369 const struct isl_format_layout
*y_isl_layout
= NULL
;
370 for (uint32_t p
= 0; p
< format
->n_planes
; p
++) {
371 if (!format
->planes
[p
].has_chroma
)
372 y_isl_layout
= isl_format_get_layout(format
->planes
[p
].isl_format
);
374 assert(y_isl_layout
!= NULL
);
375 uint8_t y_bpc
= y_isl_layout
->channels_array
[0].bits
;
377 /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
378 nir_ssa_def
*ycbcr_comp
[5] = { NULL
, NULL
, NULL
,
379 /* Use extra 2 channels for following swizzle */
380 nir_imm_float(builder
, 1.0f
),
381 nir_imm_float(builder
, 0.0f
),
383 uint8_t ycbcr_bpcs
[5];
384 memset(ycbcr_bpcs
, y_bpc
, sizeof(ycbcr_bpcs
));
386 /* Go through all the planes and gather the samples into a |ycbcr_comp|
387 * while applying a swizzle required by the spec:
389 * R, G, B should respectively map to Cr, Y, Cb
391 for (uint32_t p
= 0; p
< format
->n_planes
; p
++) {
392 const struct anv_format_plane
*plane_format
= &format
->planes
[p
];
393 nir_ssa_def
*plane_sample
= create_plane_tex_instr_implicit(&state
, p
);
395 for (uint32_t pc
= 0; pc
< 4; pc
++) {
396 enum isl_channel_select ycbcr_swizzle
=
397 swizzle_channel(plane_format
->ycbcr_swizzle
, pc
);
398 if (ycbcr_swizzle
== ISL_CHANNEL_SELECT_ZERO
)
401 unsigned ycbcr_component
= channel_to_component(ycbcr_swizzle
);
402 ycbcr_comp
[ycbcr_component
] = nir_channel(builder
, plane_sample
, pc
);
404 /* Also compute the number of bits for each component. */
405 const struct isl_format_layout
*isl_layout
=
406 isl_format_get_layout(plane_format
->isl_format
);
407 ycbcr_bpcs
[ycbcr_component
] = isl_layout
->channels_array
[pc
].bits
;
411 /* Now remaps components to the order specified by the conversion. */
412 nir_ssa_def
*swizzled_comp
[4] = { NULL
, };
413 uint32_t swizzled_bpcs
[4] = { 0, };
415 for (uint32_t i
= 0; i
< ARRAY_SIZE(state
.conversion
->mapping
); i
++) {
416 /* Maps to components in |ycbcr_comp| */
417 static const uint32_t swizzle_mapping
[] = {
418 [VK_COMPONENT_SWIZZLE_ZERO
] = 4,
419 [VK_COMPONENT_SWIZZLE_ONE
] = 3,
420 [VK_COMPONENT_SWIZZLE_R
] = 0,
421 [VK_COMPONENT_SWIZZLE_G
] = 1,
422 [VK_COMPONENT_SWIZZLE_B
] = 2,
423 [VK_COMPONENT_SWIZZLE_A
] = 3,
425 const VkComponentSwizzle m
= state
.conversion
->mapping
[i
];
427 if (m
== VK_COMPONENT_SWIZZLE_IDENTITY
) {
428 swizzled_comp
[i
] = ycbcr_comp
[i
];
429 swizzled_bpcs
[i
] = ycbcr_bpcs
[i
];
431 swizzled_comp
[i
] = ycbcr_comp
[swizzle_mapping
[m
]];
432 swizzled_bpcs
[i
] = ycbcr_bpcs
[swizzle_mapping
[m
]];
436 nir_ssa_def
*result
= nir_vec(builder
, swizzled_comp
, 4);
437 if (state
.conversion
->ycbcr_model
!= VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY
)
438 result
= convert_ycbcr(&state
, result
, swizzled_bpcs
);
440 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
441 nir_instr_remove(&tex
->instr
);
447 anv_nir_lower_ycbcr_textures(nir_shader
*shader
,
448 struct anv_pipeline_layout
*layout
)
450 bool progress
= false;
452 nir_foreach_function(function
, shader
) {
456 bool function_progress
= false;
458 nir_builder_init(&builder
, function
->impl
);
460 nir_foreach_block(block
, function
->impl
) {
461 nir_foreach_instr_safe(instr
, block
) {
462 if (instr
->type
!= nir_instr_type_tex
)
465 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
466 function_progress
|= try_lower_tex_ycbcr(layout
, &builder
, tex
);
470 if (function_progress
) {
471 nir_metadata_preserve(function
->impl
,
472 nir_metadata_block_index
|
473 nir_metadata_dominance
);
476 progress
|= function_progress
;