2 * Copyright © 2017 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "anv_private.h"
27 #include "nir/nir_builder.h"
31 nir_ssa_def
*image_size
;
32 nir_tex_instr
*origin_tex
;
33 nir_deref_instr
*tex_deref
;
34 struct anv_ycbcr_conversion
*conversion
;
38 y_range(nir_builder
*b
,
39 nir_ssa_def
*y_channel
,
41 VkSamplerYcbcrRange range
)
44 case VK_SAMPLER_YCBCR_RANGE_ITU_FULL
:
46 case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW
:
49 nir_fmul(b
, y_channel
,
50 nir_imm_float(b
, pow(2, bpc
) - 1)),
51 nir_imm_float(b
, -16.0f
* pow(2, bpc
- 8))),
52 nir_frcp(b
, nir_imm_float(b
, 219.0f
* pow(2, bpc
- 8))));
54 unreachable("missing Ycbcr range");
60 chroma_range(nir_builder
*b
,
61 nir_ssa_def
*chroma_channel
,
63 VkSamplerYcbcrRange range
)
66 case VK_SAMPLER_YCBCR_RANGE_ITU_FULL
:
67 return nir_fadd(b
, chroma_channel
,
68 nir_imm_float(b
, -pow(2, bpc
- 1) / (pow(2, bpc
) - 1.0f
)));
69 case VK_SAMPLER_YCBCR_RANGE_ITU_NARROW
:
72 nir_fmul(b
, chroma_channel
,
73 nir_imm_float(b
, pow(2, bpc
) - 1)),
74 nir_imm_float(b
, -128.0f
* pow(2, bpc
- 8))),
75 nir_frcp(b
, nir_imm_float(b
, 224.0f
* pow(2, bpc
- 8))));
77 unreachable("missing Ycbcr range");
82 typedef struct nir_const_value_3_4
{
83 nir_const_value v
[3][4];
84 } nir_const_value_3_4
;
86 static const nir_const_value_3_4
*
87 ycbcr_model_to_rgb_matrix(VkSamplerYcbcrModelConversion model
)
90 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601
: {
91 static const nir_const_value_3_4 bt601
= { {
92 { { .f32
= 1.402f
}, { .f32
= 1.0f
}, { .f32
= 0.0f
}, { .f32
= 0.0f
} },
93 { { .f32
= -0.714136286201022f
}, { .f32
= 1.0f
}, { .f32
= -0.344136286201022f
}, { .f32
= 0.0f
} },
94 { { .f32
= 0.0f
}, { .f32
= 1.0f
}, { .f32
= 1.772f
}, { .f32
= 0.0f
} },
99 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709
: {
100 static const nir_const_value_3_4 bt709
= { {
101 { { .f32
= 1.5748031496063f
}, { .f32
= 1.0f
}, { .f32
= 0.0f
}, { .f32
= 0.0f
} },
102 { { .f32
= -0.468125209181067f
}, { .f32
= 1.0f
}, { .f32
= -0.187327487470334f
}, { .f32
= 0.0f
} },
103 { { .f32
= 0.0f
}, { .f32
= 1.0f
}, { .f32
= 1.85563184264242f
}, { .f32
= 0.0f
} },
108 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020
: {
109 static const nir_const_value_3_4 bt2020
= { {
110 { { .f32
= 1.4746f
}, { .f32
= 1.0f
}, { .f32
= 0.0f
}, { .f32
= 0.0f
} },
111 { { .f32
= -0.571353126843658f
}, { .f32
= 1.0f
}, { .f32
= -0.164553126843658f
}, { .f32
= 0.0f
} },
112 { { .f32
= 0.0f
}, { .f32
= 1.0f
}, { .f32
= 1.8814f
}, { .f32
= 0.0f
} },
118 unreachable("missing Ycbcr model");
124 convert_ycbcr(struct ycbcr_state
*state
,
125 nir_ssa_def
*raw_channels
,
128 nir_builder
*b
= state
->builder
;
129 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
131 nir_ssa_def
*expanded_channels
=
133 chroma_range(b
, nir_channel(b
, raw_channels
, 0),
134 bpcs
[0], conversion
->ycbcr_range
),
135 y_range(b
, nir_channel(b
, raw_channels
, 1),
136 bpcs
[1], conversion
->ycbcr_range
),
137 chroma_range(b
, nir_channel(b
, raw_channels
, 2),
138 bpcs
[2], conversion
->ycbcr_range
),
139 nir_imm_float(b
, 1.0f
));
141 if (conversion
->ycbcr_model
== VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY
)
142 return expanded_channels
;
144 const nir_const_value_3_4
*conversion_matrix
=
145 ycbcr_model_to_rgb_matrix(conversion
->ycbcr_model
);
147 nir_ssa_def
*converted_channels
[] = {
148 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
->v
[0])),
149 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
->v
[1])),
150 nir_fdot4(b
, expanded_channels
, nir_build_imm(b
, 4, 32, conversion_matrix
->v
[2]))
154 converted_channels
[0], converted_channels
[1],
155 converted_channels
[2], nir_imm_float(b
, 1.0f
));
158 /* TODO: we should probably replace this with a push constant/uniform. */
160 get_texture_size(struct ycbcr_state
*state
, nir_deref_instr
*texture
)
162 if (state
->image_size
)
163 return state
->image_size
;
165 nir_builder
*b
= state
->builder
;
166 const struct glsl_type
*type
= texture
->type
;
167 nir_tex_instr
*tex
= nir_tex_instr_create(b
->shader
, 1);
169 tex
->op
= nir_texop_txs
;
170 tex
->sampler_dim
= glsl_get_sampler_dim(type
);
171 tex
->is_array
= glsl_sampler_type_is_array(type
);
172 tex
->is_shadow
= glsl_sampler_type_is_shadow(type
);
173 tex
->dest_type
= nir_type_int
;
175 tex
->src
[0].src_type
= nir_tex_src_texture_deref
;
176 tex
->src
[0].src
= nir_src_for_ssa(&texture
->dest
.ssa
);
178 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
,
179 nir_tex_instr_dest_size(tex
), 32, NULL
);
180 nir_builder_instr_insert(b
, &tex
->instr
);
182 state
->image_size
= nir_i2f32(b
, &tex
->dest
.ssa
);
184 return state
->image_size
;
188 implicit_downsampled_coord(nir_builder
*b
,
190 nir_ssa_def
*max_value
,
196 nir_imm_float(b
, 1.0f
),
198 nir_imm_float(b
, div_scale
),
203 implicit_downsampled_coords(struct ycbcr_state
*state
,
204 nir_ssa_def
*old_coords
,
205 const struct anv_format_plane
*plane_format
)
207 nir_builder
*b
= state
->builder
;
208 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
209 nir_ssa_def
*image_size
= get_texture_size(state
, state
->tex_deref
);
210 nir_ssa_def
*comp
[4] = { NULL
, };
213 for (c
= 0; c
< ARRAY_SIZE(conversion
->chroma_offsets
); c
++) {
214 if (plane_format
->denominator_scales
[c
] > 1 &&
215 conversion
->chroma_offsets
[c
] == VK_CHROMA_LOCATION_COSITED_EVEN
) {
216 comp
[c
] = implicit_downsampled_coord(b
,
217 nir_channel(b
, old_coords
, c
),
218 nir_channel(b
, image_size
, c
),
219 plane_format
->denominator_scales
[c
]);
221 comp
[c
] = nir_channel(b
, old_coords
, c
);
225 /* Leave other coordinates untouched */
226 for (; c
< old_coords
->num_components
; c
++)
227 comp
[c
] = nir_channel(b
, old_coords
, c
);
229 return nir_vec(b
, comp
, old_coords
->num_components
);
233 create_plane_tex_instr_implicit(struct ycbcr_state
*state
,
236 nir_builder
*b
= state
->builder
;
237 struct anv_ycbcr_conversion
*conversion
= state
->conversion
;
238 const struct anv_format_plane
*plane_format
=
239 &conversion
->format
->planes
[plane
];
240 nir_tex_instr
*old_tex
= state
->origin_tex
;
241 nir_tex_instr
*tex
= nir_tex_instr_create(b
->shader
, old_tex
->num_srcs
+ 1);
243 for (uint32_t i
= 0; i
< old_tex
->num_srcs
; i
++) {
244 tex
->src
[i
].src_type
= old_tex
->src
[i
].src_type
;
246 switch (old_tex
->src
[i
].src_type
) {
247 case nir_tex_src_coord
:
248 if (plane_format
->has_chroma
&& conversion
->chroma_reconstruction
) {
249 assert(old_tex
->src
[i
].src
.is_ssa
);
251 nir_src_for_ssa(implicit_downsampled_coords(state
,
252 old_tex
->src
[i
].src
.ssa
,
258 nir_src_copy(&tex
->src
[i
].src
, &old_tex
->src
[i
].src
, tex
);
262 tex
->src
[tex
->num_srcs
- 1].src
= nir_src_for_ssa(nir_imm_int(b
, plane
));
263 tex
->src
[tex
->num_srcs
- 1].src_type
= nir_tex_src_plane
;
265 tex
->sampler_dim
= old_tex
->sampler_dim
;
266 tex
->dest_type
= old_tex
->dest_type
;
268 tex
->op
= old_tex
->op
;
269 tex
->coord_components
= old_tex
->coord_components
;
270 tex
->is_new_style_shadow
= old_tex
->is_new_style_shadow
;
271 tex
->component
= old_tex
->component
;
273 tex
->texture_index
= old_tex
->texture_index
;
274 tex
->texture_array_size
= old_tex
->texture_array_size
;
275 tex
->sampler_index
= old_tex
->sampler_index
;
276 tex
->is_array
= old_tex
->is_array
;
278 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
,
279 old_tex
->dest
.ssa
.num_components
,
280 nir_dest_bit_size(old_tex
->dest
), NULL
);
281 nir_builder_instr_insert(b
, &tex
->instr
);
283 return &tex
->dest
.ssa
;
287 channel_to_component(enum isl_channel_select channel
)
290 case ISL_CHANNEL_SELECT_RED
:
292 case ISL_CHANNEL_SELECT_GREEN
:
294 case ISL_CHANNEL_SELECT_BLUE
:
296 case ISL_CHANNEL_SELECT_ALPHA
:
299 unreachable("invalid channel");
304 static enum isl_channel_select
305 swizzle_channel(struct isl_swizzle swizzle
, unsigned channel
)
317 unreachable("invalid channel");
323 try_lower_tex_ycbcr(struct anv_pipeline_layout
*layout
,
324 nir_builder
*builder
,
327 int deref_src_idx
= nir_tex_instr_src_index(tex
, nir_tex_src_texture_deref
);
328 assert(deref_src_idx
>= 0);
329 nir_deref_instr
*deref
= nir_src_as_deref(tex
->src
[deref_src_idx
].src
);
331 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
332 const struct anv_descriptor_set_layout
*set_layout
=
333 layout
->set
[var
->data
.descriptor_set
].layout
;
334 const struct anv_descriptor_set_binding_layout
*binding
=
335 &set_layout
->binding
[var
->data
.binding
];
337 /* For the following instructions, we don't apply any change and let the
338 * instruction apply to the first plane.
340 if (tex
->op
== nir_texop_txs
||
341 tex
->op
== nir_texop_query_levels
||
342 tex
->op
== nir_texop_lod
)
345 if (binding
->immutable_samplers
== NULL
)
348 assert(tex
->texture_index
== 0);
349 unsigned array_index
= 0;
350 if (deref
->deref_type
!= nir_deref_type_var
) {
351 assert(deref
->deref_type
== nir_deref_type_array
);
352 if (!nir_src_is_const(deref
->arr
.index
))
354 array_index
= nir_src_as_uint(deref
->arr
.index
);
355 array_index
= MIN2(array_index
, binding
->array_size
- 1);
357 const struct anv_sampler
*sampler
= binding
->immutable_samplers
[array_index
];
359 if (sampler
->conversion
== NULL
)
362 struct ycbcr_state state
= {
366 .conversion
= sampler
->conversion
,
369 builder
->cursor
= nir_before_instr(&tex
->instr
);
371 const struct anv_format
*format
= state
.conversion
->format
;
372 const struct isl_format_layout
*y_isl_layout
= NULL
;
373 for (uint32_t p
= 0; p
< format
->n_planes
; p
++) {
374 if (!format
->planes
[p
].has_chroma
)
375 y_isl_layout
= isl_format_get_layout(format
->planes
[p
].isl_format
);
377 assert(y_isl_layout
!= NULL
);
378 uint8_t y_bpc
= y_isl_layout
->channels_array
[0].bits
;
380 /* |ycbcr_comp| holds components in the order : Cr-Y-Cb */
381 nir_ssa_def
*zero
= nir_imm_float(builder
, 0.0f
);
382 nir_ssa_def
*one
= nir_imm_float(builder
, 1.0f
);
383 /* Use extra 2 channels for following swizzle */
384 nir_ssa_def
*ycbcr_comp
[5] = { zero
, zero
, zero
, one
, zero
};
386 uint8_t ycbcr_bpcs
[5];
387 memset(ycbcr_bpcs
, y_bpc
, sizeof(ycbcr_bpcs
));
389 /* Go through all the planes and gather the samples into a |ycbcr_comp|
390 * while applying a swizzle required by the spec:
392 * R, G, B should respectively map to Cr, Y, Cb
394 for (uint32_t p
= 0; p
< format
->n_planes
; p
++) {
395 const struct anv_format_plane
*plane_format
= &format
->planes
[p
];
396 nir_ssa_def
*plane_sample
= create_plane_tex_instr_implicit(&state
, p
);
398 for (uint32_t pc
= 0; pc
< 4; pc
++) {
399 enum isl_channel_select ycbcr_swizzle
=
400 swizzle_channel(plane_format
->ycbcr_swizzle
, pc
);
401 if (ycbcr_swizzle
== ISL_CHANNEL_SELECT_ZERO
)
404 unsigned ycbcr_component
= channel_to_component(ycbcr_swizzle
);
405 ycbcr_comp
[ycbcr_component
] = nir_channel(builder
, plane_sample
, pc
);
407 /* Also compute the number of bits for each component. */
408 const struct isl_format_layout
*isl_layout
=
409 isl_format_get_layout(plane_format
->isl_format
);
410 ycbcr_bpcs
[ycbcr_component
] = isl_layout
->channels_array
[pc
].bits
;
414 /* Now remaps components to the order specified by the conversion. */
415 nir_ssa_def
*swizzled_comp
[4] = { NULL
, };
416 uint32_t swizzled_bpcs
[4] = { 0, };
418 for (uint32_t i
= 0; i
< ARRAY_SIZE(state
.conversion
->mapping
); i
++) {
419 /* Maps to components in |ycbcr_comp| */
420 static const uint32_t swizzle_mapping
[] = {
421 [VK_COMPONENT_SWIZZLE_ZERO
] = 4,
422 [VK_COMPONENT_SWIZZLE_ONE
] = 3,
423 [VK_COMPONENT_SWIZZLE_R
] = 0,
424 [VK_COMPONENT_SWIZZLE_G
] = 1,
425 [VK_COMPONENT_SWIZZLE_B
] = 2,
426 [VK_COMPONENT_SWIZZLE_A
] = 3,
428 const VkComponentSwizzle m
= state
.conversion
->mapping
[i
];
430 if (m
== VK_COMPONENT_SWIZZLE_IDENTITY
) {
431 swizzled_comp
[i
] = ycbcr_comp
[i
];
432 swizzled_bpcs
[i
] = ycbcr_bpcs
[i
];
434 swizzled_comp
[i
] = ycbcr_comp
[swizzle_mapping
[m
]];
435 swizzled_bpcs
[i
] = ycbcr_bpcs
[swizzle_mapping
[m
]];
439 nir_ssa_def
*result
= nir_vec(builder
, swizzled_comp
, 4);
440 if (state
.conversion
->ycbcr_model
!= VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY
)
441 result
= convert_ycbcr(&state
, result
, swizzled_bpcs
);
443 nir_ssa_def_rewrite_uses(&tex
->dest
.ssa
, nir_src_for_ssa(result
));
444 nir_instr_remove(&tex
->instr
);
450 anv_nir_lower_ycbcr_textures(nir_shader
*shader
,
451 struct anv_pipeline_layout
*layout
)
453 bool progress
= false;
455 nir_foreach_function(function
, shader
) {
459 bool function_progress
= false;
461 nir_builder_init(&builder
, function
->impl
);
463 nir_foreach_block(block
, function
->impl
) {
464 nir_foreach_instr_safe(instr
, block
) {
465 if (instr
->type
!= nir_instr_type_tex
)
468 nir_tex_instr
*tex
= nir_instr_as_tex(instr
);
469 function_progress
|= try_lower_tex_ycbcr(layout
, &builder
, tex
);
473 if (function_progress
) {
474 nir_metadata_preserve(function
->impl
,
475 nir_metadata_block_index
|
476 nir_metadata_dominance
);
479 progress
|= function_progress
;