2 * Copyright © 2018 Intel Corporation
3 * Copyright © 2018 Broadcom
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "v3d_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "compiler/nir/nir_format_convert.h"
29 /** @file v3d_nir_lower_image_load_store.c
31 * Performs any necessary lowering of GL_ARB_shader_image_load_store
34 * On V3D 4.x, we just need to do format conversion for stores such that the
35 * GPU can effectively memcpy the arguments (in increments of 32-bit words)
36 * into the texel. Loads are the same as texturing, where we may need to
37 * unpack from 16-bit ints or floats.
39 * On V3D 3.x, to implement image load store we would need to do manual tiling
40 * calculations and load/store using the TMU general memory access path.
44 v3d_gl_format_is_return_32(GLenum format
)
59 case GL_R11F_G11F_B10F
:
89 unreachable("Invalid image format");
93 /* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
94 * 32-bit SSA value, with as many channels as necessary to store all the bits
97 pack_bits(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
98 int num_components
, bool mask
)
100 nir_ssa_def
*results
[4];
102 for (int i
= 0; i
< num_components
; i
++) {
103 nir_ssa_def
*chan
= nir_channel(b
, color
, i
);
105 /* Channels being stored shouldn't cross a 32-bit boundary. */
106 assert((offset
& ~31) == ((offset
+ bits
[i
] - 1) & ~31));
109 chan
= nir_iand(b
, chan
,
110 nir_imm_int(b
, (1 << bits
[i
]) - 1));
113 if (offset
% 32 == 0) {
114 results
[offset
/ 32] = chan
;
116 results
[offset
/ 32] =
117 nir_ior(b
, results
[offset
/ 32],
119 nir_imm_int(b
, offset
% 32)));
124 return nir_vec(b
, results
, DIV_ROUND_UP(offset
, 32));
128 pack_unorm(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
131 color
= nir_channels(b
, color
, (1 << num_components
) - 1);
132 color
= nir_format_float_to_unorm(b
, color
, bits
);
133 return pack_bits(b
, color
, bits
, color
->num_components
, false);
137 pack_snorm(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
140 color
= nir_channels(b
, color
, (1 << num_components
) - 1);
141 color
= nir_format_float_to_snorm(b
, color
, bits
);
142 return pack_bits(b
, color
, bits
, color
->num_components
, true);
146 pack_uint(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
149 color
= nir_channels(b
, color
, (1 << num_components
) - 1);
150 color
= nir_format_clamp_uint(b
, color
, bits
);
151 return pack_bits(b
, color
, bits
, num_components
, false);
155 pack_sint(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
158 color
= nir_channels(b
, color
, (1 << num_components
) - 1);
159 color
= nir_format_clamp_uint(b
, color
, bits
);
160 return pack_bits(b
, color
, bits
, num_components
, true);
164 pack_half(nir_builder
*b
, nir_ssa_def
*color
, const unsigned *bits
,
167 color
= nir_channels(b
, color
, (1 << num_components
) - 1);
168 color
= nir_format_float_to_half(b
, color
);
169 return pack_bits(b
, color
, bits
, color
->num_components
, false);
173 v3d_nir_lower_image_store(nir_builder
*b
, nir_intrinsic_instr
*instr
)
175 nir_variable
*var
= nir_intrinsic_get_var(instr
, 0);
176 GLenum format
= var
->data
.image
.format
;
177 static const unsigned bits_8
[4] = {8, 8, 8, 8};
178 static const unsigned bits_16
[4] = {16, 16, 16, 16};
179 static const unsigned bits_1010102
[4] = {10, 10, 10, 2};
181 b
->cursor
= nir_before_instr(&instr
->instr
);
183 nir_ssa_def
*unformatted
= nir_ssa_for_src(b
, instr
->src
[3], 4);
184 nir_ssa_def
*formatted
= NULL
;
189 /* For 4-component 32-bit components, there's no packing to be
197 /* For other 32-bit components, just reduce the size of
200 formatted
= nir_channels(b
, unformatted
, 1);
205 formatted
= nir_channels(b
, unformatted
, 2);
209 formatted
= pack_unorm(b
, unformatted
, bits_8
, 1);
212 formatted
= pack_unorm(b
, unformatted
, bits_8
, 2);
215 formatted
= pack_unorm(b
, unformatted
, bits_8
, 4);
219 formatted
= pack_snorm(b
, unformatted
, bits_8
, 1);
222 formatted
= pack_snorm(b
, unformatted
, bits_8
, 2);
225 formatted
= pack_snorm(b
, unformatted
, bits_8
, 4);
229 formatted
= pack_unorm(b
, unformatted
, bits_16
, 1);
232 formatted
= pack_unorm(b
, unformatted
, bits_16
, 2);
235 formatted
= pack_unorm(b
, unformatted
, bits_16
, 4);
239 formatted
= pack_snorm(b
, unformatted
, bits_16
, 1);
242 formatted
= pack_snorm(b
, unformatted
, bits_16
, 2);
244 case GL_RGBA16_SNORM
:
245 formatted
= pack_snorm(b
, unformatted
, bits_16
, 4);
249 formatted
= pack_half(b
, unformatted
, bits_16
, 1);
252 formatted
= pack_half(b
, unformatted
, bits_16
, 2);
255 formatted
= pack_half(b
, unformatted
, bits_16
, 4);
259 formatted
= pack_uint(b
, unformatted
, bits_8
, 1);
262 formatted
= pack_sint(b
, unformatted
, bits_8
, 1);
265 formatted
= pack_uint(b
, unformatted
, bits_8
, 2);
268 formatted
= pack_sint(b
, unformatted
, bits_8
, 2);
271 formatted
= pack_uint(b
, unformatted
, bits_8
, 4);
274 formatted
= pack_sint(b
, unformatted
, bits_8
, 4);
278 formatted
= pack_uint(b
, unformatted
, bits_16
, 1);
281 formatted
= pack_sint(b
, unformatted
, bits_16
, 1);
284 formatted
= pack_uint(b
, unformatted
, bits_16
, 2);
287 formatted
= pack_sint(b
, unformatted
, bits_16
, 2);
290 formatted
= pack_uint(b
, unformatted
, bits_16
, 4);
293 formatted
= pack_sint(b
, unformatted
, bits_16
, 4);
296 case GL_R11F_G11F_B10F
:
297 formatted
= nir_format_pack_11f11f10f(b
, unformatted
);
300 formatted
= nir_format_pack_r9g9b9e5(b
, unformatted
);
304 formatted
= pack_unorm(b
, unformatted
, bits_1010102
, 4);
308 formatted
= pack_uint(b
, unformatted
, bits_1010102
, 4);
312 unreachable("bad format");
315 nir_instr_rewrite_src(&instr
->instr
, &instr
->src
[3],
316 nir_src_for_ssa(formatted
));
317 instr
->num_components
= formatted
->num_components
;
321 v3d_nir_lower_image_load(nir_builder
*b
, nir_intrinsic_instr
*instr
)
323 static const unsigned bits16
[] = {16, 16, 16, 16};
324 nir_variable
*var
= nir_intrinsic_get_var(instr
, 0);
325 const struct glsl_type
*sampler_type
= glsl_without_array(var
->type
);
326 enum glsl_base_type base_type
=
327 glsl_get_sampler_result_type(sampler_type
);
329 if (v3d_gl_format_is_return_32(var
->data
.image
.format
))
332 b
->cursor
= nir_after_instr(&instr
->instr
);
334 assert(instr
->dest
.is_ssa
);
335 nir_ssa_def
*result
= &instr
->dest
.ssa
;
336 if (base_type
== GLSL_TYPE_FLOAT
) {
337 nir_ssa_def
*rg
= nir_channel(b
, result
, 0);
338 nir_ssa_def
*ba
= nir_channel(b
, result
, 1);
340 nir_unpack_half_2x16_split_x(b
, rg
),
341 nir_unpack_half_2x16_split_y(b
, rg
),
342 nir_unpack_half_2x16_split_x(b
, ba
),
343 nir_unpack_half_2x16_split_y(b
, ba
));
344 } else if (base_type
== GLSL_TYPE_INT
) {
345 result
= nir_format_unpack_sint(b
, result
, bits16
, 4);
347 assert(base_type
== GLSL_TYPE_UINT
);
348 result
= nir_format_unpack_uint(b
, result
, bits16
, 4);
351 nir_ssa_def_rewrite_uses_after(&instr
->dest
.ssa
, nir_src_for_ssa(result
),
352 result
->parent_instr
);
356 v3d_nir_lower_image_load_store(nir_shader
*s
)
358 nir_foreach_function(function
, s
) {
363 nir_builder_init(&b
, function
->impl
);
365 nir_foreach_block(block
, function
->impl
) {
366 nir_foreach_instr_safe(instr
, block
) {
367 if (instr
->type
!= nir_instr_type_intrinsic
)
370 nir_intrinsic_instr
*intr
=
371 nir_instr_as_intrinsic(instr
);
373 switch (intr
->intrinsic
) {
374 case nir_intrinsic_image_deref_load
:
375 v3d_nir_lower_image_load(&b
, intr
);
377 case nir_intrinsic_image_deref_store
:
378 v3d_nir_lower_image_store(&b
, intr
);
386 nir_metadata_preserve(function
->impl
,
387 nir_metadata_block_index
|
388 nir_metadata_dominance
);