v3d/tex: only look up the 2nd texture gather offset for 1d non-arrays
[mesa.git] / src / broadcom / compiler / v3d_nir_lower_image_load_store.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 * Copyright © 2018 Broadcom
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24
25 #include "v3d_compiler.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "compiler/nir/nir_format_convert.h"
28
29 /** @file v3d_nir_lower_image_load_store.c
30 *
31 * Performs any necessary lowering of GL_ARB_shader_image_load_store
32 * operations.
33 *
34 * On V3D 4.x, we just need to do format conversion for stores such that the
35 * GPU can effectively memcpy the arguments (in increments of 32-bit words)
36 * into the texel. Loads are the same as texturing, where we may need to
37 * unpack from 16-bit ints or floats.
38 *
39 * On V3D 3.x, to implement image load store we would need to do manual tiling
40 * calculations and load/store using the TMU general memory access path.
41 */
42
43 bool
44 v3d_gl_format_is_return_32(enum pipe_format format)
45 {
46 const struct util_format_description *desc =
47 util_format_description(format);
48 const struct util_format_channel_description *chan = &desc->channel[0];
49
50 return chan->size > 16 || (chan->size == 16 && chan->normalized);
51 }
52
53 /* Packs a 32-bit vector of colors in the range [0, (1 << bits[i]) - 1] to a
54 * 32-bit SSA value, with as many channels as necessary to store all the bits
55 */
56 static nir_ssa_def *
57 pack_bits(nir_builder *b, nir_ssa_def *color, const unsigned *bits,
58 int num_components, bool mask)
59 {
60 nir_ssa_def *results[4];
61 int offset = 0;
62 for (int i = 0; i < num_components; i++) {
63 nir_ssa_def *chan = nir_channel(b, color, i);
64
65 /* Channels being stored shouldn't cross a 32-bit boundary. */
66 assert((offset & ~31) == ((offset + bits[i] - 1) & ~31));
67
68 if (mask) {
69 chan = nir_iand(b, chan,
70 nir_imm_int(b, (1 << bits[i]) - 1));
71 }
72
73 if (offset % 32 == 0) {
74 results[offset / 32] = chan;
75 } else {
76 results[offset / 32] =
77 nir_ior(b, results[offset / 32],
78 nir_ishl(b, chan,
79 nir_imm_int(b, offset % 32)));
80 }
81 offset += bits[i];
82 }
83
84 return nir_vec(b, results, DIV_ROUND_UP(offset, 32));
85 }
86
87 static void
88 v3d_nir_lower_image_store(nir_builder *b, nir_intrinsic_instr *instr)
89 {
90 enum pipe_format format = nir_intrinsic_format(instr);
91 const struct util_format_description *desc =
92 util_format_description(format);
93 const struct util_format_channel_description *r_chan = &desc->channel[0];
94 unsigned num_components = util_format_get_nr_components(format);
95
96 b->cursor = nir_before_instr(&instr->instr);
97
98 nir_ssa_def *color = nir_channels(b,
99 nir_ssa_for_src(b, instr->src[3], 4),
100 (1 << num_components) - 1);
101 nir_ssa_def *formatted = NULL;
102
103 if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
104 formatted = nir_format_pack_11f11f10f(b, color);
105 } else if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
106 formatted = nir_format_pack_r9g9b9e5(b, color);
107 } else if (r_chan->size == 32) {
108 /* For 32-bit formats, we just have to move the vector
109 * across (possibly reducing the number of channels).
110 */
111 formatted = color;
112 } else {
113 static const unsigned bits_8[4] = {8, 8, 8, 8};
114 static const unsigned bits_16[4] = {16, 16, 16, 16};
115 static const unsigned bits_1010102[4] = {10, 10, 10, 2};
116 const unsigned *bits;
117
118 switch (r_chan->size) {
119 case 8:
120 bits = bits_8;
121 break;
122 case 10:
123 bits = bits_1010102;
124 break;
125 case 16:
126 bits = bits_16;
127 break;
128 default:
129 unreachable("unrecognized bits");
130 }
131
132 bool pack_mask = false;
133 if (r_chan->pure_integer &&
134 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
135 formatted = nir_format_clamp_sint(b, color, bits);
136 pack_mask = true;
137 } else if (r_chan->pure_integer &&
138 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
139 formatted = nir_format_clamp_uint(b, color, bits);
140 } else if (r_chan->normalized &&
141 r_chan->type == UTIL_FORMAT_TYPE_SIGNED) {
142 formatted = nir_format_float_to_snorm(b, color, bits);
143 pack_mask = true;
144 } else if (r_chan->normalized &&
145 r_chan->type == UTIL_FORMAT_TYPE_UNSIGNED) {
146 formatted = nir_format_float_to_unorm(b, color, bits);
147 } else {
148 assert(r_chan->size == 16);
149 assert(r_chan->type == UTIL_FORMAT_TYPE_FLOAT);
150 formatted = nir_format_float_to_half(b, color);
151 }
152
153 formatted = pack_bits(b, formatted, bits, num_components,
154 pack_mask);
155 }
156
157 nir_instr_rewrite_src(&instr->instr, &instr->src[3],
158 nir_src_for_ssa(formatted));
159 instr->num_components = formatted->num_components;
160 }
161
162 static void
163 v3d_nir_lower_image_load(nir_builder *b, nir_intrinsic_instr *instr)
164 {
165 static const unsigned bits16[] = {16, 16, 16, 16};
166 enum pipe_format format = nir_intrinsic_format(instr);
167
168 if (v3d_gl_format_is_return_32(format))
169 return;
170
171 b->cursor = nir_after_instr(&instr->instr);
172
173 assert(instr->dest.is_ssa);
174 nir_ssa_def *result = &instr->dest.ssa;
175 if (util_format_is_pure_uint(format)) {
176 result = nir_format_unpack_uint(b, result, bits16, 4);
177 } else if (util_format_is_pure_sint(format)) {
178 result = nir_format_unpack_sint(b, result, bits16, 4);
179 } else {
180 nir_ssa_def *rg = nir_channel(b, result, 0);
181 nir_ssa_def *ba = nir_channel(b, result, 1);
182 result = nir_vec4(b,
183 nir_unpack_half_2x16_split_x(b, rg),
184 nir_unpack_half_2x16_split_y(b, rg),
185 nir_unpack_half_2x16_split_x(b, ba),
186 nir_unpack_half_2x16_split_y(b, ba));
187 }
188
189 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, nir_src_for_ssa(result),
190 result->parent_instr);
191 }
192
193 void
194 v3d_nir_lower_image_load_store(nir_shader *s)
195 {
196 nir_foreach_function(function, s) {
197 if (!function->impl)
198 continue;
199
200 nir_builder b;
201 nir_builder_init(&b, function->impl);
202
203 nir_foreach_block(block, function->impl) {
204 nir_foreach_instr_safe(instr, block) {
205 if (instr->type != nir_instr_type_intrinsic)
206 continue;
207
208 nir_intrinsic_instr *intr =
209 nir_instr_as_intrinsic(instr);
210
211 switch (intr->intrinsic) {
212 case nir_intrinsic_image_load:
213 v3d_nir_lower_image_load(&b, intr);
214 break;
215 case nir_intrinsic_image_store:
216 v3d_nir_lower_image_store(&b, intr);
217 break;
218 default:
219 break;
220 }
221 }
222 }
223
224 nir_metadata_preserve(function->impl,
225 nir_metadata_block_index |
226 nir_metadata_dominance);
227 }
228 }