nir: move data.image.access to data.access
[mesa.git] / src / intel / compiler / brw_nir_lower_image_load_store.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "isl/isl.h"
25
26 #include "brw_nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
29
30 /* The higher compiler layers use the GL enums for image formats even if
31 * they come in from SPIR-V or Vulkan. We need to turn them into an ISL
32 * enum before we can use them.
33 */
34 static enum isl_format
35 isl_format_for_gl_format(uint32_t gl_format)
36 {
37 switch (gl_format) {
38 case GL_R8: return ISL_FORMAT_R8_UNORM;
39 case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM;
40 case GL_R8UI: return ISL_FORMAT_R8_UINT;
41 case GL_R8I: return ISL_FORMAT_R8_SINT;
42 case GL_RG8: return ISL_FORMAT_R8G8_UNORM;
43 case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM;
44 case GL_RG8UI: return ISL_FORMAT_R8G8_UINT;
45 case GL_RG8I: return ISL_FORMAT_R8G8_SINT;
46 case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM;
47 case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
48 case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT;
49 case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT;
50 case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
51 case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM;
52 case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT;
53 case GL_R16: return ISL_FORMAT_R16_UNORM;
54 case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM;
55 case GL_R16F: return ISL_FORMAT_R16_FLOAT;
56 case GL_R16UI: return ISL_FORMAT_R16_UINT;
57 case GL_R16I: return ISL_FORMAT_R16_SINT;
58 case GL_RG16: return ISL_FORMAT_R16G16_UNORM;
59 case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM;
60 case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT;
61 case GL_RG16UI: return ISL_FORMAT_R16G16_UINT;
62 case GL_RG16I: return ISL_FORMAT_R16G16_SINT;
63 case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM;
64 case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
65 case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT;
66 case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT;
67 case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT;
68 case GL_R32F: return ISL_FORMAT_R32_FLOAT;
69 case GL_R32UI: return ISL_FORMAT_R32_UINT;
70 case GL_R32I: return ISL_FORMAT_R32_SINT;
71 case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT;
72 case GL_RG32UI: return ISL_FORMAT_R32G32_UINT;
73 case GL_RG32I: return ISL_FORMAT_R32G32_SINT;
74 case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT;
75 case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT;
76 case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT;
77 case GL_NONE: return ISL_FORMAT_UNSUPPORTED;
78 default:
79 assert(!"Invalid image format");
80 return ISL_FORMAT_UNSUPPORTED;
81 }
82 }
83
84 static nir_ssa_def *
85 _load_image_param(nir_builder *b, nir_deref_instr *deref, unsigned offset)
86 {
87 nir_intrinsic_instr *load =
88 nir_intrinsic_instr_create(b->shader,
89 nir_intrinsic_image_deref_load_param_intel);
90 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
91 nir_intrinsic_set_base(load, offset / 4);
92
93 switch (offset) {
94 case BRW_IMAGE_PARAM_OFFSET_OFFSET:
95 case BRW_IMAGE_PARAM_SWIZZLING_OFFSET:
96 load->num_components = 2;
97 break;
98 case BRW_IMAGE_PARAM_TILING_OFFSET:
99 case BRW_IMAGE_PARAM_SIZE_OFFSET:
100 load->num_components = 3;
101 break;
102 case BRW_IMAGE_PARAM_STRIDE_OFFSET:
103 load->num_components = 4;
104 break;
105 default:
106 unreachable("Invalid param offset");
107 }
108 nir_ssa_dest_init(&load->instr, &load->dest,
109 load->num_components, 32, NULL);
110
111 nir_builder_instr_insert(b, &load->instr);
112 return &load->dest.ssa;
113 }
114
115 #define load_image_param(b, d, o) \
116 _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
117
118 static nir_ssa_def *
119 image_coord_is_in_bounds(nir_builder *b, nir_deref_instr *deref,
120 nir_ssa_def *coord)
121 {
122 nir_ssa_def *size = load_image_param(b, deref, SIZE);
123 nir_ssa_def *cmp = nir_ilt(b, coord, size);
124
125 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
126 nir_ssa_def *in_bounds = nir_imm_true(b);
127 for (unsigned i = 0; i < coord_comps; i++)
128 in_bounds = nir_iand(b, in_bounds, nir_channel(b, cmp, i));
129
130 return in_bounds;
131 }
132
133 /** Calculate the offset in memory of the texel given by \p coord.
134 *
135 * This is meant to be used with untyped surface messages to access a tiled
136 * surface, what involves taking into account the tiling and swizzling modes
137 * of the surface manually so it will hopefully not happen very often.
138 *
139 * The tiling algorithm implemented here matches either the X or Y tiling
140 * layouts supported by the hardware depending on the tiling coefficients
141 * passed to the program as uniforms. See Volume 1 Part 2 Section 4.5
142 * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
143 * the hardware tiling format.
144 */
145 static nir_ssa_def *
146 image_address(nir_builder *b, const struct gen_device_info *devinfo,
147 nir_deref_instr *deref, nir_ssa_def *coord)
148 {
149 if (glsl_get_sampler_dim(deref->type) == GLSL_SAMPLER_DIM_1D &&
150 glsl_sampler_type_is_array(deref->type)) {
151 /* It's easier if 1D arrays are treated like 2D arrays */
152 coord = nir_vec3(b, nir_channel(b, coord, 0),
153 nir_imm_int(b, 0),
154 nir_channel(b, coord, 1));
155 } else {
156 unsigned dims = glsl_get_sampler_coordinate_components(deref->type);
157 coord = nir_channels(b, coord, (1 << dims) - 1);
158 }
159
160 nir_ssa_def *offset = load_image_param(b, deref, OFFSET);
161 nir_ssa_def *tiling = load_image_param(b, deref, TILING);
162 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
163
164 /* Shift the coordinates by the fixed surface offset. It may be non-zero
165 * if the image is a single slice of a higher-dimensional surface, or if a
166 * non-zero mipmap level of the surface is bound to the pipeline. The
167 * offset needs to be applied here rather than at surface state set-up time
168 * because the desired slice-level may start mid-tile, so simply shifting
169 * the surface base address wouldn't give a well-formed tiled surface in
170 * the general case.
171 */
172 nir_ssa_def *xypos = (coord->num_components == 1) ?
173 nir_vec2(b, coord, nir_imm_int(b, 0)) :
174 nir_channels(b, coord, 0x3);
175 xypos = nir_iadd(b, xypos, offset);
176
177 /* The layout of 3-D textures in memory is sort-of like a tiling
178 * format. At each miplevel, the slices are arranged in rows of
179 * 2^level slices per row. The slice row is stored in tmp.y and
180 * the slice within the row is stored in tmp.x.
181 *
182 * The layout of 2-D array textures and cubemaps is much simpler:
183 * Depending on whether the ARYSPC_LOD0 layout is in use it will be
184 * stored in memory as an array of slices, each one being a 2-D
185 * arrangement of miplevels, or as a 2D arrangement of miplevels,
186 * each one being an array of slices. In either case the separation
187 * between slices of the same LOD is equal to the qpitch value
188 * provided as stride.w.
189 *
190 * This code can be made to handle either 2D arrays and 3D textures
191 * by passing in the miplevel as tile.z for 3-D textures and 0 in
192 * tile.z for 2-D array textures.
193 *
194 * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
195 * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
196 * of the hardware 3D texture and 2D array layouts.
197 */
198 if (coord->num_components > 2) {
199 /* Decompose z into a major (tmp.y) and a minor (tmp.x)
200 * index.
201 */
202 nir_ssa_def *z = nir_channel(b, coord, 2);
203 nir_ssa_def *z_x = nir_ubfe(b, z, nir_imm_int(b, 0),
204 nir_channel(b, tiling, 2));
205 nir_ssa_def *z_y = nir_ushr(b, z, nir_channel(b, tiling, 2));
206
207 /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
208 * slice offset.
209 */
210 xypos = nir_iadd(b, xypos, nir_imul(b, nir_vec2(b, z_x, z_y),
211 nir_channels(b, stride, 0xc)));
212 }
213
214 nir_ssa_def *addr;
215 if (coord->num_components > 1) {
216 /* Calculate the major/minor x and y indices. In order to
217 * accommodate both X and Y tiling, the Y-major tiling format is
218 * treated as being a bunch of narrow X-tiles placed next to each
219 * other. This means that the tile width for Y-tiling is actually
220 * the width of one sub-column of the Y-major tile where each 4K
221 * tile has 8 512B sub-columns.
222 *
223 * The major Y value is the row of tiles in which the pixel lives.
224 * The major X value is the tile sub-column in which the pixel
225 * lives; for X tiling, this is the same as the tile column, for Y
226 * tiling, each tile has 8 sub-columns. The minor X and Y indices
227 * are the position within the sub-column.
228 */
229
230 /* Calculate the minor x and y indices. */
231 nir_ssa_def *minor = nir_ubfe(b, xypos, nir_imm_int(b, 0),
232 nir_channels(b, tiling, 0x3));
233 nir_ssa_def *major = nir_ushr(b, xypos, nir_channels(b, tiling, 0x3));
234
235 /* Calculate the texel index from the start of the tile row and the
236 * vertical coordinate of the row.
237 * Equivalent to:
238 * tmp.x = (major.x << tile.y << tile.x) +
239 * (minor.y << tile.x) + minor.x
240 * tmp.y = major.y << tile.y
241 */
242 nir_ssa_def *idx_x, *idx_y;
243 idx_x = nir_ishl(b, nir_channel(b, major, 0), nir_channel(b, tiling, 1));
244 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 1));
245 idx_x = nir_ishl(b, idx_x, nir_channel(b, tiling, 0));
246 idx_x = nir_iadd(b, idx_x, nir_channel(b, minor, 0));
247 idx_y = nir_ishl(b, nir_channel(b, major, 1), nir_channel(b, tiling, 1));
248
249 /* Add it to the start of the tile row. */
250 nir_ssa_def *idx;
251 idx = nir_imul(b, idx_y, nir_channel(b, stride, 1));
252 idx = nir_iadd(b, idx, idx_x);
253
254 /* Multiply by the Bpp value. */
255 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
256
257 if (devinfo->gen < 8 && !devinfo->is_baytrail) {
258 /* Take into account the two dynamically specified shifts. Both are
259 * used to implement swizzling of X-tiled surfaces. For Y-tiled
260 * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
261 * address, so a swz value of 0xff (actually interpreted as 31 by the
262 * hardware) will be provided to cause the relevant bit of tmp.y to
263 * be zero and turn the first XOR into the identity. For linear
264 * surfaces or platforms lacking address swizzling both shifts will
265 * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
266 * what effectively disables swizzling.
267 */
268 nir_ssa_def *swizzle = load_image_param(b, deref, SWIZZLING);
269 nir_ssa_def *shift0 = nir_ushr(b, addr, nir_channel(b, swizzle, 0));
270 nir_ssa_def *shift1 = nir_ushr(b, addr, nir_channel(b, swizzle, 1));
271
272 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
273 nir_ssa_def *bit = nir_iand(b, nir_ixor(b, shift0, shift1),
274 nir_imm_int(b, 1 << 6));
275 addr = nir_ixor(b, addr, bit);
276 }
277 } else {
278 /* Multiply by the Bpp/stride value. Note that the addr.y may be
279 * non-zero even if the image is one-dimensional because a vertical
280 * offset may have been applied above to select a non-zero slice or
281 * level of a higher-dimensional texture.
282 */
283 nir_ssa_def *idx;
284 idx = nir_imul(b, nir_channel(b, xypos, 1), nir_channel(b, stride, 1));
285 idx = nir_iadd(b, nir_channel(b, xypos, 0), idx);
286 addr = nir_imul(b, idx, nir_channel(b, stride, 0));
287 }
288
289 return addr;
290 }
291
292 struct format_info {
293 const struct isl_format_layout *fmtl;
294 unsigned chans;
295 unsigned bits[4];
296 };
297
298 static struct format_info
299 get_format_info(enum isl_format fmt)
300 {
301 const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
302
303 return (struct format_info) {
304 .fmtl = fmtl,
305 .chans = isl_format_get_num_channels(fmt),
306 .bits = {
307 fmtl->channels.r.bits,
308 fmtl->channels.g.bits,
309 fmtl->channels.b.bits,
310 fmtl->channels.a.bits
311 },
312 };
313 }
314
315 static nir_ssa_def *
316 convert_color_for_load(nir_builder *b, const struct gen_device_info *devinfo,
317 nir_ssa_def *color,
318 enum isl_format image_fmt, enum isl_format lower_fmt,
319 unsigned dest_components)
320 {
321 if (image_fmt == lower_fmt)
322 goto expand_vec;
323
324 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
325 assert(lower_fmt == ISL_FORMAT_R32_UINT);
326 color = nir_format_unpack_11f11f10f(b, color);
327 goto expand_vec;
328 }
329
330 struct format_info image = get_format_info(image_fmt);
331 struct format_info lower = get_format_info(lower_fmt);
332
333 const bool needs_sign_extension =
334 isl_format_has_snorm_channel(image_fmt) ||
335 isl_format_has_sint_channel(image_fmt);
336
337 /* We only check the red channel to detect if we need to pack/unpack */
338 assert(image.bits[0] != lower.bits[0] ||
339 memcmp(image.bits, lower.bits, sizeof(image.bits)) == 0);
340
341 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
342 if (needs_sign_extension)
343 color = nir_format_unpack_sint(b, color, image.bits, image.chans);
344 else
345 color = nir_format_unpack_uint(b, color, image.bits, image.chans);
346 } else {
347 /* All these formats are homogeneous */
348 for (unsigned i = 1; i < image.chans; i++)
349 assert(image.bits[i] == image.bits[0]);
350
351 /* On IVB, we rely on the undocumented behavior that typed reads from
352 * surfaces of the unsupported R8 and R16 formats return useful data in
353 * their least significant bits. However, the data in the high bits is
354 * garbage so we have to discard it.
355 */
356 if (devinfo->gen == 7 && !devinfo->is_haswell &&
357 (lower_fmt == ISL_FORMAT_R16_UINT ||
358 lower_fmt == ISL_FORMAT_R8_UINT))
359 color = nir_format_mask_uvec(b, color, lower.bits);
360
361 if (image.bits[0] != lower.bits[0]) {
362 color = nir_format_bitcast_uvec_unmasked(b, color, lower.bits[0],
363 image.bits[0]);
364 }
365
366 if (needs_sign_extension)
367 color = nir_format_sign_extend_ivec(b, color, image.bits);
368 }
369
370 switch (image.fmtl->channels.r.type) {
371 case ISL_UNORM:
372 assert(isl_format_has_uint_channel(lower_fmt));
373 color = nir_format_unorm_to_float(b, color, image.bits);
374 break;
375
376 case ISL_SNORM:
377 assert(isl_format_has_uint_channel(lower_fmt));
378 color = nir_format_snorm_to_float(b, color, image.bits);
379 break;
380
381 case ISL_SFLOAT:
382 if (image.bits[0] == 16)
383 color = nir_unpack_half_2x16_split_x(b, color);
384 break;
385
386 case ISL_UINT:
387 case ISL_SINT:
388 break;
389
390 default:
391 unreachable("Invalid image channel type");
392 }
393
394 expand_vec:
395 assert(dest_components == 1 || dest_components == 4);
396 assert(color->num_components <= dest_components);
397 if (color->num_components == dest_components)
398 return color;
399
400 nir_ssa_def *comps[4];
401 for (unsigned i = 0; i < color->num_components; i++)
402 comps[i] = nir_channel(b, color, i);
403
404 for (unsigned i = color->num_components; i < 3; i++)
405 comps[i] = nir_imm_int(b, 0);
406
407 if (color->num_components < 4) {
408 if (isl_format_has_int_channel(image_fmt))
409 comps[3] = nir_imm_int(b, 1);
410 else
411 comps[3] = nir_imm_float(b, 1);
412 }
413
414 return nir_vec(b, comps, dest_components);
415 }
416
417 static bool
418 lower_image_load_instr(nir_builder *b,
419 const struct gen_device_info *devinfo,
420 nir_intrinsic_instr *intrin)
421 {
422 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
423 nir_variable *var = nir_deref_instr_get_variable(deref);
424 const enum isl_format image_fmt =
425 isl_format_for_gl_format(var->data.image.format);
426
427 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
428 const enum isl_format lower_fmt =
429 isl_lower_storage_image_format(devinfo, image_fmt);
430 const unsigned dest_components = intrin->num_components;
431
432 /* Use an undef to hold the uses of the load while we do the color
433 * conversion.
434 */
435 nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
436 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
437
438 intrin->num_components = isl_format_get_num_channels(lower_fmt);
439 intrin->dest.ssa.num_components = intrin->num_components;
440
441 b->cursor = nir_after_instr(&intrin->instr);
442
443 nir_ssa_def *color = convert_color_for_load(b, devinfo,
444 &intrin->dest.ssa,
445 image_fmt, lower_fmt,
446 dest_components);
447
448 nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(color));
449 nir_instr_remove(placeholder->parent_instr);
450 } else {
451 const struct isl_format_layout *image_fmtl =
452 isl_format_get_layout(image_fmt);
453 /* We have a matching typed format for everything 32b and below */
454 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
455 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
456 ISL_FORMAT_R32G32_UINT :
457 ISL_FORMAT_R32G32B32A32_UINT;
458 const unsigned dest_components = intrin->num_components;
459
460 b->cursor = nir_instr_remove(&intrin->instr);
461
462 nir_ssa_def *coord = intrin->src[1].ssa;
463
464 nir_ssa_def *do_load = image_coord_is_in_bounds(b, deref, coord);
465 if (devinfo->gen == 7 && !devinfo->is_haswell) {
466 /* Check whether the first stride component (i.e. the Bpp value)
467 * is greater than four, what on Gen7 indicates that a surface of
468 * type RAW has been bound for untyped access. Reading or writing
469 * to a surface of type other than RAW using untyped surface
470 * messages causes a hang on IVB and VLV.
471 */
472 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
473 nir_ssa_def *is_raw =
474 nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
475 do_load = nir_iand(b, do_load, is_raw);
476 }
477 nir_push_if(b, do_load);
478
479 nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
480 nir_intrinsic_instr *load =
481 nir_intrinsic_instr_create(b->shader,
482 nir_intrinsic_image_deref_load_raw_intel);
483 load->src[0] = nir_src_for_ssa(&deref->dest.ssa);
484 load->src[1] = nir_src_for_ssa(addr);
485 load->num_components = image_fmtl->bpb / 32;
486 nir_ssa_dest_init(&load->instr, &load->dest,
487 load->num_components, 32, NULL);
488 nir_builder_instr_insert(b, &load->instr);
489
490 nir_push_else(b, NULL);
491
492 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, 32);
493
494 nir_pop_if(b, NULL);
495
496 nir_ssa_def *value = nir_if_phi(b, &load->dest.ssa, zero);
497
498 nir_ssa_def *color = convert_color_for_load(b, devinfo, value,
499 image_fmt, raw_fmt,
500 dest_components);
501
502 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(color));
503 }
504
505 return true;
506 }
507
508 static nir_ssa_def *
509 convert_color_for_store(nir_builder *b, const struct gen_device_info *devinfo,
510 nir_ssa_def *color,
511 enum isl_format image_fmt, enum isl_format lower_fmt)
512 {
513 struct format_info image = get_format_info(image_fmt);
514 struct format_info lower = get_format_info(lower_fmt);
515
516 color = nir_channels(b, color, (1 << image.chans) - 1);
517
518 if (image_fmt == lower_fmt)
519 return color;
520
521 if (image_fmt == ISL_FORMAT_R11G11B10_FLOAT) {
522 assert(lower_fmt == ISL_FORMAT_R32_UINT);
523 return nir_format_pack_11f11f10f(b, color);
524 }
525
526 switch (image.fmtl->channels.r.type) {
527 case ISL_UNORM:
528 assert(isl_format_has_uint_channel(lower_fmt));
529 color = nir_format_float_to_unorm(b, color, image.bits);
530 break;
531
532 case ISL_SNORM:
533 assert(isl_format_has_uint_channel(lower_fmt));
534 color = nir_format_float_to_snorm(b, color, image.bits);
535 break;
536
537 case ISL_SFLOAT:
538 if (image.bits[0] == 16)
539 color = nir_format_float_to_half(b, color);
540 break;
541
542 case ISL_UINT:
543 color = nir_format_clamp_uint(b, color, image.bits);
544 break;
545
546 case ISL_SINT:
547 color = nir_format_clamp_sint(b, color, image.bits);
548 break;
549
550 default:
551 unreachable("Invalid image channel type");
552 }
553
554 if (image.bits[0] < 32 &&
555 (isl_format_has_snorm_channel(image_fmt) ||
556 isl_format_has_sint_channel(image_fmt)))
557 color = nir_format_mask_uvec(b, color, image.bits);
558
559 if (image.bits[0] != lower.bits[0] && lower_fmt == ISL_FORMAT_R32_UINT) {
560 color = nir_format_pack_uint(b, color, image.bits, image.chans);
561 } else {
562 /* All these formats are homogeneous */
563 for (unsigned i = 1; i < image.chans; i++)
564 assert(image.bits[i] == image.bits[0]);
565
566 if (image.bits[0] != lower.bits[0]) {
567 color = nir_format_bitcast_uvec_unmasked(b, color, image.bits[0],
568 lower.bits[0]);
569 }
570 }
571
572 return color;
573 }
574
575 static bool
576 lower_image_store_instr(nir_builder *b,
577 const struct gen_device_info *devinfo,
578 nir_intrinsic_instr *intrin)
579 {
580 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
581 nir_variable *var = nir_deref_instr_get_variable(deref);
582
583 /* For write-only surfaces, we trust that the hardware can just do the
584 * conversion for us.
585 */
586 if (var->data.access & ACCESS_NON_READABLE)
587 return false;
588
589 const enum isl_format image_fmt =
590 isl_format_for_gl_format(var->data.image.format);
591
592 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt)) {
593 const enum isl_format lower_fmt =
594 isl_lower_storage_image_format(devinfo, image_fmt);
595
596 /* Color conversion goes before the store */
597 b->cursor = nir_before_instr(&intrin->instr);
598
599 nir_ssa_def *color = convert_color_for_store(b, devinfo,
600 intrin->src[3].ssa,
601 image_fmt, lower_fmt);
602 intrin->num_components = isl_format_get_num_channels(lower_fmt);
603 nir_instr_rewrite_src(&intrin->instr, &intrin->src[3],
604 nir_src_for_ssa(color));
605 } else {
606 const struct isl_format_layout *image_fmtl =
607 isl_format_get_layout(image_fmt);
608 /* We have a matching typed format for everything 32b and below */
609 assert(image_fmtl->bpb == 64 || image_fmtl->bpb == 128);
610 enum isl_format raw_fmt = (image_fmtl->bpb == 64) ?
611 ISL_FORMAT_R32G32_UINT :
612 ISL_FORMAT_R32G32B32A32_UINT;
613
614 b->cursor = nir_instr_remove(&intrin->instr);
615
616 nir_ssa_def *coord = intrin->src[1].ssa;
617
618 nir_ssa_def *do_store = image_coord_is_in_bounds(b, deref, coord);
619 if (devinfo->gen == 7 && !devinfo->is_haswell) {
620 /* Check whether the first stride component (i.e. the Bpp value)
621 * is greater than four, what on Gen7 indicates that a surface of
622 * type RAW has been bound for untyped access. Reading or writing
623 * to a surface of type other than RAW using untyped surface
624 * messages causes a hang on IVB and VLV.
625 */
626 nir_ssa_def *stride = load_image_param(b, deref, STRIDE);
627 nir_ssa_def *is_raw =
628 nir_ilt(b, nir_imm_int(b, 4), nir_channel(b, stride, 0));
629 do_store = nir_iand(b, do_store, is_raw);
630 }
631 nir_push_if(b, do_store);
632
633 nir_ssa_def *addr = image_address(b, devinfo, deref, coord);
634 nir_ssa_def *color = convert_color_for_store(b, devinfo,
635 intrin->src[3].ssa,
636 image_fmt, raw_fmt);
637
638 nir_intrinsic_instr *store =
639 nir_intrinsic_instr_create(b->shader,
640 nir_intrinsic_image_deref_store_raw_intel);
641 store->src[0] = nir_src_for_ssa(&deref->dest.ssa);
642 store->src[1] = nir_src_for_ssa(addr);
643 store->src[2] = nir_src_for_ssa(color);
644 store->num_components = image_fmtl->bpb / 32;
645 nir_builder_instr_insert(b, &store->instr);
646
647 nir_pop_if(b, NULL);
648 }
649
650 return true;
651 }
652
653 static bool
654 lower_image_atomic_instr(nir_builder *b,
655 const struct gen_device_info *devinfo,
656 nir_intrinsic_instr *intrin)
657 {
658 if (devinfo->is_haswell || devinfo->gen >= 8)
659 return false;
660
661 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
662
663 b->cursor = nir_instr_remove(&intrin->instr);
664
665 /* Use an undef to hold the uses of the load conversion. */
666 nir_ssa_def *placeholder = nir_ssa_undef(b, 4, 32);
667 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(placeholder));
668
669 /* Check the first component of the size field to find out if the
670 * image is bound. Necessary on IVB for typed atomics because
671 * they don't seem to respect null surfaces and will happily
672 * corrupt or read random memory when no image is bound.
673 */
674 nir_ssa_def *size = load_image_param(b, deref, SIZE);
675 nir_ssa_def *zero = nir_imm_int(b, 0);
676 nir_push_if(b, nir_ine(b, nir_channel(b, size, 0), zero));
677
678 nir_builder_instr_insert(b, &intrin->instr);
679
680 nir_pop_if(b, NULL);
681
682 nir_ssa_def *result = nir_if_phi(b, &intrin->dest.ssa, zero);
683 nir_ssa_def_rewrite_uses(placeholder, nir_src_for_ssa(result));
684
685 return true;
686 }
687
688 static bool
689 lower_image_size_instr(nir_builder *b,
690 const struct gen_device_info *devinfo,
691 nir_intrinsic_instr *intrin)
692 {
693 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
694 nir_variable *var = nir_deref_instr_get_variable(deref);
695
696 /* For write-only images, we have an actual image surface so we fall back
697 * and let the back-end emit a TXS for this.
698 */
699 if (var->data.access & ACCESS_NON_READABLE)
700 return false;
701
702 /* If we have a matching typed format, then we have an actual image surface
703 * so we fall back and let the back-end emit a TXS for this.
704 */
705 const enum isl_format image_fmt =
706 isl_format_for_gl_format(var->data.image.format);
707 if (isl_has_matching_typed_storage_image_format(devinfo, image_fmt))
708 return false;
709
710 b->cursor = nir_instr_remove(&intrin->instr);
711
712 nir_ssa_def *size = load_image_param(b, deref, SIZE);
713
714 nir_ssa_def *comps[4] = { NULL, NULL, NULL, NULL };
715
716 enum glsl_sampler_dim dim = glsl_get_sampler_dim(deref->type);
717 unsigned coord_comps = glsl_get_sampler_coordinate_components(deref->type);
718 for (unsigned c = 0; c < coord_comps; c++) {
719 if (c == 2 && dim == GLSL_SAMPLER_DIM_CUBE) {
720 comps[2] = nir_idiv(b, nir_channel(b, size, 2), nir_imm_int(b, 6));
721 } else {
722 comps[c] = nir_channel(b, size, c);
723 }
724 }
725
726 for (unsigned c = coord_comps; c < intrin->dest.ssa.num_components; ++c)
727 comps[c] = nir_imm_int(b, 1);
728
729 nir_ssa_def *vec = nir_vec(b, comps, intrin->dest.ssa.num_components);
730 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(vec));
731
732 return true;
733 }
734
735 bool
736 brw_nir_lower_image_load_store(nir_shader *shader,
737 const struct gen_device_info *devinfo)
738 {
739 bool progress = false;
740
741 nir_foreach_function(function, shader) {
742 if (function->impl == NULL)
743 continue;
744
745 nir_foreach_block_safe(block, function->impl) {
746 nir_builder b;
747 nir_builder_init(&b, function->impl);
748
749 nir_foreach_instr_safe(instr, block) {
750 if (instr->type != nir_instr_type_intrinsic)
751 continue;
752
753 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
754 switch (intrin->intrinsic) {
755 case nir_intrinsic_image_deref_load:
756 if (lower_image_load_instr(&b, devinfo, intrin))
757 progress = true;
758 break;
759
760 case nir_intrinsic_image_deref_store:
761 if (lower_image_store_instr(&b, devinfo, intrin))
762 progress = true;
763 break;
764
765 case nir_intrinsic_image_deref_atomic_add:
766 case nir_intrinsic_image_deref_atomic_imin:
767 case nir_intrinsic_image_deref_atomic_umin:
768 case nir_intrinsic_image_deref_atomic_imax:
769 case nir_intrinsic_image_deref_atomic_umax:
770 case nir_intrinsic_image_deref_atomic_and:
771 case nir_intrinsic_image_deref_atomic_or:
772 case nir_intrinsic_image_deref_atomic_xor:
773 case nir_intrinsic_image_deref_atomic_exchange:
774 case nir_intrinsic_image_deref_atomic_comp_swap:
775 if (lower_image_atomic_instr(&b, devinfo, intrin))
776 progress = true;
777 break;
778
779 case nir_intrinsic_image_deref_size:
780 if (lower_image_size_instr(&b, devinfo, intrin))
781 progress = true;
782 break;
783
784 default:
785 /* Nothing to do */
786 break;
787 }
788 }
789 }
790
791 if (progress)
792 nir_metadata_preserve(function->impl, nir_metadata_none);
793 }
794
795 return progress;
796 }