e4db541f1b426f052355672da73be020929c70b2
[mesa.git] / src / broadcom / compiler / v3d_nir_lower_image_load_store.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 * Copyright © 2018 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "v3d_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "compiler/nir/nir_format_convert.h"
28
29 /** @file v3d_nir_lower_image_load_store.c
30 *
31 * Performs any necessary lowering of GL_ARB_shader_image_load_store
32 * operations.
33 *
34 * On V3D 4.x, we just need to do format conversion for stores such that the
35 * GPU can effectively memcpy the arguments (in increments of 32-bit words)
36 * into the texel. Loads are the same as texturing, where we may need to
37 * unpack from 16-bit ints or floats.
38 *
39 * On V3D 3.x, to implement image load store we would need to do manual tiling
40 * calculations and load/store using the TMU general memory access path.
41 */
42
43 bool
44 v3d_gl_format_is_return_32(enum pipe_format format)
45 {
46 const struct util_format_description *desc =
47 util_format_description(format);
48 const struct util_format_channel_description *chan = &desc->channel[0];
49
50 return chan->size > 16 || (chan->size == 16 && chan->normalized);
51 }
52
53 /* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
54 * 32-bit SSA value, with as many channels as necessary to store all the bits
55 */
56 static nir_ssa_def *
57 pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
58 int num_components, bool mask)
59 {
60 nir_ssa_def *results[4];
61 int offset = 0;
62 for (int i = 0; i < num_components; i++) {
63 nir_ssa_def *chan = nir_channel(b, color, i);
64
65 /* Channels being stored shouldn't cross a 32-bit boundary. */
66 assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
67
68 if (mask) {
69 chan = nir_iand(b, chan,
70 nir_imm_int(b, (1 << bits[i]) - 1));
71 }
72
73 if (offset % 32 == 0) {
74 results[offset / 32] = chan;
75 } else {
76 results[offset / 32] =
77 nir_ior(b, results[offset / 32],
78 nir_ishl(b, chan,
79 nir_imm_int(b, offset % 32)));
80 }
81 offset += bits[i];
82 }
83
84 return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
85 }
86
87 static void
88 v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
89 {
90 nir_variable *var = nir_intrinsic_get_var(instr, 0);
91 enum pipe_format format = var->data.image.format;
92 const struct util_format_description *desc =
93 util_format_description(format);
94 const struct util_format_channel_description *r_chan = &desc->channel[0];
95 unsigned num_components = util_format_get_nr_components(format);
96
97 b->cursor = nir_before_instr(&instr->instr);
98
99 nir_ssa_def *color = nir_channels(b,
100 nir_ssa_for_src(b, instr->src[3], 4),
101 (1 << num_components) - 1);
102 nir_ssa_def *formatted = NULL;
103
104 if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
105 formatted = nir_format_pack_11f11f10f(b, color);
106 } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
107 formatted = nir_format_pack_r9g9b9e5(b, color);
108 } else if (r_chan->size == 32) {
109 /* For 32-bit formats, we just have to move the vector
110 * across (possibly reducing the number of channels).
111 */
112 formatted = color;
113 } else {
114 static const unsigned bits_8[4] = {8, 8, 8, 8};
115 static const unsigned bits_16[4] = {16, 16, 16, 16};
116 static const unsigned bits_1010102[4] = {10, 10, 10, 2};
117 const unsigned *bits;
118
119 switch (r_chan->size) {
120 case 8:
121 bits = bits_8;
122 break;
123 case 10:
124 bits = bits_1010102;
125 break;
126 case 16:
127 bits = bits_16;
128 break;
129 default:
130 unreachable("unrecognized bits");
131 }
132
133 bool pack_mask = false;
134 if (r_chan->pure_integer &&
135 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
136 formatted = nir_format_clamp_sint(b, color, bits);
137 pack_mask = true;
138 } else if (r_chan->pure_integer &&
139 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
140 formatted = nir_format_clamp_uint(b, color, bits);
141 } else if (r_chan->normalized &&
142 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
143 formatted = nir_format_float_to_snorm(b, color, bits);
144 pack_mask = true;
145 } else if (r_chan->normalized &&
146 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
147 formatted = nir_format_float_to_unorm(b, color, bits);
148 } else {
149 assert(r_chan->size == 16);
150 assert(r_chan->type == UTIL_FORMAT_TYPE_FLOAT);
151 formatted = nir_format_float_to_half(b, color);
152 }
153
154 formatted = pack_bits(b, formatted, bits, num_components,
155 pack_mask);
156 }
157
158 nir_instr_rewrite_src(&instr->instr, &instr->src[3],
159 nir_src_for_ssa(formatted));
160 instr->num_components = formatted->num_components;
161 }
162
163 static void
164 v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
165 {
166 static const unsigned bits16[] = {16, 16, 16, 16};
167 nir_variable *var = nir_intrinsic_get_var(instr, 0);
168 const struct glsl_type *sampler_type = glsl_without_array(var->type);
169 enum glsl_base_type base_type =
170 glsl_get_sampler_result_type(sampler_type);
171
172 if (v3d_gl_format_is_return_32(var->data.image.format))
173 return;
174
175 b->cursor = nir_after_instr(&instr->instr);
176
177 assert(instr->dest.is_ssa);
178 nir_ssa_def *result = &instr->dest.ssa;
179 if (base_type == GLSL_TYPE_FLOAT) {
180 nir_ssa_def *rg = nir_channel(b, result, 0);
181 nir_ssa_def *ba = nir_channel(b, result, 1);
182 result = nir_vec4(b,
183 nir_unpack_half_2x16_split_x(b, rg),
184 nir_unpack_half_2x16_split_y(b, rg),
185 nir_unpack_half_2x16_split_x(b, ba),
186 nir_unpack_half_2x16_split_y(b, ba));
187 } else if (base_type == GLSL_TYPE_INT) {
188 result = nir_format_unpack_sint(b, result, bits16, 4);
189 } else {
190 assert(base_type == GLSL_TYPE_UINT);
191 result = nir_format_unpack_uint(b, result, bits16, 4);
192 }
193
194 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, nir_src_for_ssa(result),
195 result->parent_instr);
196 }
197
198 void
199 v3d_nir_lower_image_load_store(nir_shader *s)
200 {
201 nir_foreach_function(function, s) {
202 if (!function->impl)
203 continue;
204
205 nir_builder b;
206 nir_builder_init(&b, function->impl);
207
208 nir_foreach_block(block, function->impl) {
209 nir_foreach_instr_safe(instr, block) {
210 if (instr->type != nir_instr_type_intrinsic)
211 continue;
212
213 nir_intrinsic_instr *intr =
214 nir_instr_as_intrinsic(instr);
215
216 switch (intr->intrinsic) {
217 case nir_intrinsic_image_deref_load:
218 v3d_nir_lower_image_load(&b, intr);
219 break;
220 case nir_intrinsic_image_deref_store:
221 v3d_nir_lower_image_store(&b, intr);
222 break;
223 default:
224 break;
225 }
226 }
227 }
228
229 nir_metadata_preserve(function->impl,
230 nir_metadata_block_index |
231 nir_metadata_dominance);
232 }
233 }