2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/nir/nir_format_convert.h"
30 /* The higher compiler layers use the GL enums for image formats even if
31 * they come in from SPIR-V or Vulkan. We need to turn them into an ISL
32 * enum before we can use them.
34 static enum isl_format
35 isl_format_for_gl_format(uint32_t gl_format
)
38 case GL_R8
: return ISL_FORMAT_R8_UNORM
;
39 case GL_R8_SNORM
: return ISL_FORMAT_R8_SNORM
;
40 case GL_R8UI
: return ISL_FORMAT_R8_UINT
;
41 case GL_R8I
: return ISL_FORMAT_R8_SINT
;
42 case GL_RG8
: return ISL_FORMAT_R8G8_UNORM
;
43 case GL_RG8_SNORM
: return ISL_FORMAT_R8G8_SNORM
;
44 case GL_RG8UI
: return ISL_FORMAT_R8G8_UINT
;
45 case GL_RG8I
: return ISL_FORMAT_R8G8_SINT
;
46 case GL_RGBA8
: return ISL_FORMAT_R8G8B8A8_UNORM
;
47 case GL_RGBA8_SNORM
: return ISL_FORMAT_R8G8B8A8_SNORM
;
48 case GL_RGBA8UI
: return ISL_FORMAT_R8G8B8A8_UINT
;
49 case GL_RGBA8I
: return ISL_FORMAT_R8G8B8A8_SINT
;
50 case GL_R11F_G11F_B10F
: return ISL_FORMAT_R11G11B10_FLOAT
;
51 case GL_RGB10_A2
: return ISL_FORMAT_R10G10B10A2_UNORM
;
52 case GL_RGB10_A2UI
: return ISL_FORMAT_R10G10B10A2_UINT
;
53 case GL_R16
: return ISL_FORMAT_R16_UNORM
;
54 case GL_R16_SNORM
: return ISL_FORMAT_R16_SNORM
;
55 case GL_R16F
: return ISL_FORMAT_R16_FLOAT
;
56 case GL_R16UI
: return ISL_FORMAT_R16_UINT
;
57 case GL_R16I
: return ISL_FORMAT_R16_SINT
;
58 case GL_RG16
: return ISL_FORMAT_R16G16_UNORM
;
59 case GL_RG16_SNORM
: return ISL_FORMAT_R16G16_SNORM
;
60 case GL_RG16F
: return ISL_FORMAT_R16G16_FLOAT
;
61 case GL_RG16UI
: return ISL_FORMAT_R16G16_UINT
;
62 case GL_RG16I
: return ISL_FORMAT_R16G16_SINT
;
63 case GL_RGBA16
: return ISL_FORMAT_R16G16B16A16_UNORM
;
64 case GL_RGBA16_SNORM
: return ISL_FORMAT_R16G16B16A16_SNORM
;
65 case GL_RGBA16F
: return ISL_FORMAT_R16G16B16A16_FLOAT
;
66 case GL_RGBA16UI
: return ISL_FORMAT_R16G16B16A16_UINT
;
67 case GL_RGBA16I
: return ISL_FORMAT_R16G16B16A16_SINT
;
68 case GL_R32F
: return ISL_FORMAT_R32_FLOAT
;
69 case GL_R32UI
: return ISL_FORMAT_R32_UINT
;
70 case GL_R32I
: return ISL_FORMAT_R32_SINT
;
71 case GL_RG32F
: return ISL_FORMAT_R32G32_FLOAT
;
72 case GL_RG32UI
: return ISL_FORMAT_R32G32_UINT
;
73 case GL_RG32I
: return ISL_FORMAT_R32G32_SINT
;
74 case GL_RGBA32F
: return ISL_FORMAT_R32G32B32A32_FLOAT
;
75 case GL_RGBA32UI
: return ISL_FORMAT_R32G32B32A32_UINT
;
76 case GL_RGBA32I
: return ISL_FORMAT_R32G32B32A32_SINT
;
77 case GL_NONE
: return ISL_FORMAT_UNSUPPORTED
;
79 assert(!"Invalid image format");
80 return ISL_FORMAT_UNSUPPORTED
;
85 _load_image_param(nir_builder
*b
, nir_deref_instr
*deref
, unsigned offset
)
87 nir_intrinsic_instr
*load
=
88 nir_intrinsic_instr_create(b
->shader
,
89 nir_intrinsic_image_deref_load_param_intel
);
90 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
91 nir_intrinsic_set_base(load
, offset
/ 4);
94 case BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET
:
95 load
->num_components
= 1;
97 case BRW_IMAGE_PARAM_OFFSET_OFFSET
:
98 case BRW_IMAGE_PARAM_SWIZZLING_OFFSET
:
99 load
->num_components
= 2;
101 case BRW_IMAGE_PARAM_TILING_OFFSET
:
102 case BRW_IMAGE_PARAM_SIZE_OFFSET
:
103 load
->num_components
= 3;
105 case BRW_IMAGE_PARAM_STRIDE_OFFSET
:
106 load
->num_components
= 4;
109 unreachable("Invalid param offset");
111 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
112 load
->num_components
, 32, NULL
);
114 nir_builder_instr_insert(b
, &load
->instr
);
115 return &load
->dest
.ssa
;
118 #define load_image_param(b, d, o) \
119 _load_image_param(b, d, BRW_IMAGE_PARAM_##o##_OFFSET)
122 image_coord_is_in_bounds(nir_builder
*b
, nir_deref_instr
*deref
,
125 nir_ssa_def
*size
= load_image_param(b
, deref
, SIZE
);
126 nir_ssa_def
*cmp
= nir_ilt(b
, coord
, size
);
128 unsigned coord_comps
= glsl_get_sampler_coordinate_components(deref
->type
);
129 nir_ssa_def
*in_bounds
= nir_imm_int(b
, NIR_TRUE
);
130 for (unsigned i
= 0; i
< coord_comps
; i
++)
131 in_bounds
= nir_iand(b
, in_bounds
, nir_channel(b
, cmp
, i
));
136 /** Calculate the offset in memory of the texel given by \p coord.
138 * This is meant to be used with untyped surface messages to access a tiled
139 * surface, what involves taking into account the tiling and swizzling modes
140 * of the surface manually so it will hopefully not happen very often.
142 * The tiling algorithm implemented here matches either the X or Y tiling
143 * layouts supported by the hardware depending on the tiling coefficients
144 * passed to the program as uniforms. See Volume 1 Part 2 Section 4.5
145 * "Address Tiling Function" of the IVB PRM for an in-depth explanation of
146 * the hardware tiling format.
149 image_address(nir_builder
*b
, const struct gen_device_info
*devinfo
,
150 nir_deref_instr
*deref
, nir_ssa_def
*coord
)
152 if (glsl_get_sampler_dim(deref
->type
) == GLSL_SAMPLER_DIM_1D
&&
153 glsl_sampler_type_is_array(deref
->type
)) {
154 /* It's easier if 1D arrays are treated like 2D arrays */
155 coord
= nir_vec3(b
, nir_channel(b
, coord
, 0),
157 nir_channel(b
, coord
, 1));
159 unsigned dims
= glsl_get_sampler_coordinate_components(deref
->type
);
160 coord
= nir_channels(b
, coord
, (1 << dims
) - 1);
163 nir_ssa_def
*offset
= load_image_param(b
, deref
, OFFSET
);
164 nir_ssa_def
*tiling
= load_image_param(b
, deref
, TILING
);
165 nir_ssa_def
*stride
= load_image_param(b
, deref
, STRIDE
);
167 /* Shift the coordinates by the fixed surface offset. It may be non-zero
168 * if the image is a single slice of a higher-dimensional surface, or if a
169 * non-zero mipmap level of the surface is bound to the pipeline. The
170 * offset needs to be applied here rather than at surface state set-up time
171 * because the desired slice-level may start mid-tile, so simply shifting
172 * the surface base address wouldn't give a well-formed tiled surface in
175 nir_ssa_def
*xypos
= (coord
->num_components
== 1) ?
176 nir_vec2(b
, coord
, nir_imm_int(b
, 0)) :
177 nir_channels(b
, coord
, 0x3);
178 xypos
= nir_iadd(b
, xypos
, offset
);
180 /* The layout of 3-D textures in memory is sort-of like a tiling
181 * format. At each miplevel, the slices are arranged in rows of
182 * 2^level slices per row. The slice row is stored in tmp.y and
183 * the slice within the row is stored in tmp.x.
185 * The layout of 2-D array textures and cubemaps is much simpler:
186 * Depending on whether the ARYSPC_LOD0 layout is in use it will be
187 * stored in memory as an array of slices, each one being a 2-D
188 * arrangement of miplevels, or as a 2D arrangement of miplevels,
189 * each one being an array of slices. In either case the separation
190 * between slices of the same LOD is equal to the qpitch value
191 * provided as stride.w.
193 * This code can be made to handle either 2D arrays and 3D textures
194 * by passing in the miplevel as tile.z for 3-D textures and 0 in
195 * tile.z for 2-D array textures.
197 * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
198 * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
199 * of the hardware 3D texture and 2D array layouts.
201 if (coord
->num_components
> 2) {
202 /* Decompose z into a major (tmp.y) and a minor (tmp.x)
205 nir_ssa_def
*z
= nir_channel(b
, coord
, 2);
206 nir_ssa_def
*z_x
= nir_ubfe(b
, z
, nir_imm_int(b
, 0),
207 nir_channel(b
, tiling
, 2));
208 nir_ssa_def
*z_y
= nir_ushr(b
, z
, nir_channel(b
, tiling
, 2));
210 /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
213 xypos
= nir_iadd(b
, xypos
, nir_imul(b
, nir_vec2(b
, z_x
, z_y
),
214 nir_channels(b
, stride
, 0xc)));
218 if (coord
->num_components
> 1) {
219 /* Calculate the major/minor x and y indices. In order to
220 * accommodate both X and Y tiling, the Y-major tiling format is
221 * treated as being a bunch of narrow X-tiles placed next to each
222 * other. This means that the tile width for Y-tiling is actually
223 * the width of one sub-column of the Y-major tile where each 4K
224 * tile has 8 512B sub-columns.
226 * The major Y value is the row of tiles in which the pixel lives.
227 * The major X value is the tile sub-column in which the pixel
228 * lives; for X tiling, this is the same as the tile column, for Y
229 * tiling, each tile has 8 sub-columns. The minor X and Y indices
230 * are the position within the sub-column.
233 /* Calculate the minor x and y indices. */
234 nir_ssa_def
*minor
= nir_ubfe(b
, xypos
, nir_imm_int(b
, 0),
235 nir_channels(b
, tiling
, 0x3));
236 nir_ssa_def
*major
= nir_ushr(b
, xypos
, nir_channels(b
, tiling
, 0x3));
238 /* Calculate the texel index from the start of the tile row and the
239 * vertical coordinate of the row.
241 * tmp.x = (major.x << tile.y << tile.x) +
242 * (minor.y << tile.x) + minor.x
243 * tmp.y = major.y << tile.y
245 nir_ssa_def
*idx_x
, *idx_y
;
246 idx_x
= nir_ishl(b
, nir_channel(b
, major
, 0), nir_channel(b
, tiling
, 1));
247 idx_x
= nir_iadd(b
, idx_x
, nir_channel(b
, minor
, 1));
248 idx_x
= nir_ishl(b
, idx_x
, nir_channel(b
, tiling
, 0));
249 idx_x
= nir_iadd(b
, idx_x
, nir_channel(b
, minor
, 0));
250 idx_y
= nir_ishl(b
, nir_channel(b
, major
, 1), nir_channel(b
, tiling
, 1));
252 /* Add it to the start of the tile row. */
254 idx
= nir_imul(b
, idx_y
, nir_channel(b
, stride
, 1));
255 idx
= nir_iadd(b
, idx
, idx_x
);
257 /* Multiply by the Bpp value. */
258 addr
= nir_imul(b
, idx
, nir_channel(b
, stride
, 0));
260 if (devinfo
->gen
< 8 && !devinfo
->is_baytrail
) {
261 /* Take into account the two dynamically specified shifts. Both are
262 * used to implement swizzling of X-tiled surfaces. For Y-tiled
263 * surfaces only one bit needs to be XOR-ed with bit 6 of the memory
264 * address, so a swz value of 0xff (actually interpreted as 31 by the
265 * hardware) will be provided to cause the relevant bit of tmp.y to
266 * be zero and turn the first XOR into the identity. For linear
267 * surfaces or platforms lacking address swizzling both shifts will
268 * be 0xff causing the relevant bits of both tmp.x and .y to be zero,
269 * what effectively disables swizzling.
271 nir_ssa_def
*swizzle
= load_image_param(b
, deref
, SWIZZLING
);
272 nir_ssa_def
*shift0
= nir_ushr(b
, addr
, nir_channel(b
, swizzle
, 0));
273 nir_ssa_def
*shift1
= nir_ushr(b
, addr
, nir_channel(b
, swizzle
, 1));
275 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
276 nir_ssa_def
*bit
= nir_iand(b
, nir_ixor(b
, shift0
, shift1
),
277 nir_imm_int(b
, 1 << 6));
278 addr
= nir_ixor(b
, addr
, bit
);
281 /* Multiply by the Bpp/stride value. Note that the addr.y may be
282 * non-zero even if the image is one-dimensional because a vertical
283 * offset may have been applied above to select a non-zero slice or
284 * level of a higher-dimensional texture.
287 idx
= nir_imul(b
, nir_channel(b
, xypos
, 1), nir_channel(b
, stride
, 1));
288 idx
= nir_iadd(b
, nir_channel(b
, xypos
, 0), idx
);
289 addr
= nir_imul(b
, idx
, nir_channel(b
, stride
, 0));
296 const struct isl_format_layout
*fmtl
;
301 static struct format_info
302 get_format_info(enum isl_format fmt
)
304 const struct isl_format_layout
*fmtl
= isl_format_get_layout(fmt
);
306 return (struct format_info
) {
308 .chans
= isl_format_get_num_channels(fmt
),
310 fmtl
->channels
.r
.bits
,
311 fmtl
->channels
.g
.bits
,
312 fmtl
->channels
.b
.bits
,
313 fmtl
->channels
.a
.bits
319 nir_zero_vec(nir_builder
*b
, unsigned num_components
)
322 memset(&v
, 0, sizeof(v
));
324 return nir_build_imm(b
, num_components
, 32, v
);
328 convert_color_for_load(nir_builder
*b
, const struct gen_device_info
*devinfo
,
330 enum isl_format image_fmt
, enum isl_format lower_fmt
,
331 unsigned dest_components
)
333 if (image_fmt
== lower_fmt
)
336 if (image_fmt
== ISL_FORMAT_R11G11B10_FLOAT
) {
337 assert(lower_fmt
== ISL_FORMAT_R32_UINT
);
338 color
= nir_format_unpack_11f11f10f(b
, color
);
342 struct format_info image
= get_format_info(image_fmt
);
343 struct format_info lower
= get_format_info(lower_fmt
);
345 const bool needs_sign_extension
=
346 isl_format_has_snorm_channel(image_fmt
) ||
347 isl_format_has_sint_channel(image_fmt
);
349 /* We only check the red channel to detect if we need to pack/unpack */
350 assert(image
.bits
[0] != lower
.bits
[0] ||
351 memcmp(image
.bits
, lower
.bits
, sizeof(image
.bits
)) == 0);
353 if (image
.bits
[0] != lower
.bits
[0] && lower_fmt
== ISL_FORMAT_R32_UINT
) {
354 if (needs_sign_extension
)
355 color
= nir_format_unpack_sint(b
, color
, image
.bits
, image
.chans
);
357 color
= nir_format_unpack_uint(b
, color
, image
.bits
, image
.chans
);
359 /* All these formats are homogeneous */
360 for (unsigned i
= 1; i
< image
.chans
; i
++)
361 assert(image
.bits
[i
] == image
.bits
[0]);
363 /* On IVB, we rely on the undocumented behavior that typed reads from
364 * surfaces of the unsupported R8 and R16 formats return useful data in
365 * their least significant bits. However, the data in the high bits is
366 * garbage so we have to discard it.
368 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
&&
369 (lower_fmt
== ISL_FORMAT_R16_UINT
||
370 lower_fmt
== ISL_FORMAT_R8_UINT
))
371 color
= nir_format_mask_uvec(b
, color
, lower
.bits
);
373 if (image
.bits
[0] != lower
.bits
[0]) {
374 color
= nir_format_bitcast_uvec_unmasked(b
, color
, lower
.bits
[0],
378 if (needs_sign_extension
)
379 color
= nir_format_sign_extend_ivec(b
, color
, image
.bits
);
382 switch (image
.fmtl
->channels
.r
.type
) {
384 assert(isl_format_has_uint_channel(lower_fmt
));
385 color
= nir_format_unorm_to_float(b
, color
, image
.bits
);
389 assert(isl_format_has_uint_channel(lower_fmt
));
390 color
= nir_format_snorm_to_float(b
, color
, image
.bits
);
394 if (image
.bits
[0] == 16)
395 color
= nir_unpack_half_2x16_split_x(b
, color
);
403 unreachable("Invalid image channel type");
407 assert(dest_components
== 1 || dest_components
== 4);
408 assert(color
->num_components
<= dest_components
);
409 if (color
->num_components
== dest_components
)
412 nir_ssa_def
*comps
[4];
413 for (unsigned i
= 0; i
< color
->num_components
; i
++)
414 comps
[i
] = nir_channel(b
, color
, i
);
416 for (unsigned i
= color
->num_components
; i
< 3; i
++)
417 comps
[i
] = nir_imm_int(b
, 0);
419 if (color
->num_components
< 4) {
420 if (isl_format_has_int_channel(image_fmt
))
421 comps
[3] = nir_imm_int(b
, 1);
423 comps
[3] = nir_imm_float(b
, 1);
426 return nir_vec(b
, comps
, dest_components
);
430 lower_image_load_instr(nir_builder
*b
,
431 const struct gen_device_info
*devinfo
,
432 nir_intrinsic_instr
*intrin
)
434 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
435 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
436 const enum isl_format image_fmt
=
437 isl_format_for_gl_format(var
->data
.image
.format
);
439 if (isl_has_matching_typed_storage_image_format(devinfo
, image_fmt
)) {
440 const enum isl_format lower_fmt
=
441 isl_lower_storage_image_format(devinfo
, image_fmt
);
442 const unsigned dest_components
= intrin
->num_components
;
444 /* Use an undef to hold the uses of the load while we do the color
447 nir_ssa_def
*placeholder
= nir_ssa_undef(b
, 4, 32);
448 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(placeholder
));
450 intrin
->num_components
= isl_format_get_num_channels(lower_fmt
);
451 intrin
->dest
.ssa
.num_components
= intrin
->num_components
;
453 b
->cursor
= nir_after_instr(&intrin
->instr
);
455 nir_ssa_def
*color
= convert_color_for_load(b
, devinfo
,
457 image_fmt
, lower_fmt
,
460 nir_ssa_def_rewrite_uses(placeholder
, nir_src_for_ssa(color
));
461 nir_instr_remove(placeholder
->parent_instr
);
463 const struct isl_format_layout
*image_fmtl
=
464 isl_format_get_layout(image_fmt
);
465 /* We have a matching typed format for everything 32b and below */
466 assert(image_fmtl
->bpb
== 64 || image_fmtl
->bpb
== 128);
467 enum isl_format raw_fmt
= (image_fmtl
->bpb
== 64) ?
468 ISL_FORMAT_R32G32_UINT
:
469 ISL_FORMAT_R32G32B32A32_UINT
;
470 const unsigned dest_components
= intrin
->num_components
;
472 b
->cursor
= nir_instr_remove(&intrin
->instr
);
474 nir_ssa_def
*coord
= intrin
->src
[1].ssa
;
476 nir_ssa_def
*do_load
= image_coord_is_in_bounds(b
, deref
, coord
);
477 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
478 /* Check whether the first stride component (i.e. the Bpp value)
479 * is greater than four, what on Gen7 indicates that a surface of
480 * type RAW has been bound for untyped access. Reading or writing
481 * to a surface of type other than RAW using untyped surface
482 * messages causes a hang on IVB and VLV.
484 nir_ssa_def
*stride
= load_image_param(b
, deref
, STRIDE
);
485 nir_ssa_def
*is_raw
=
486 nir_ilt(b
, nir_imm_int(b
, 4), nir_channel(b
, stride
, 0));
487 do_load
= nir_iand(b
, do_load
, is_raw
);
489 nir_push_if(b
, do_load
);
491 nir_ssa_def
*addr
= image_address(b
, devinfo
, deref
, coord
);
492 nir_intrinsic_instr
*load
=
493 nir_intrinsic_instr_create(b
->shader
,
494 nir_intrinsic_image_deref_load_raw_intel
);
495 load
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
496 load
->src
[1] = nir_src_for_ssa(addr
);
497 load
->num_components
= image_fmtl
->bpb
/ 32;
498 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
499 load
->num_components
, 32, NULL
);
500 nir_builder_instr_insert(b
, &load
->instr
);
502 nir_push_else(b
, NULL
);
504 nir_ssa_def
*zero
= nir_zero_vec(b
, load
->num_components
);
508 nir_ssa_def
*value
= nir_if_phi(b
, &load
->dest
.ssa
, zero
);
510 nir_ssa_def
*color
= convert_color_for_load(b
, devinfo
, value
,
514 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(color
));
521 convert_color_for_store(nir_builder
*b
, const struct gen_device_info
*devinfo
,
523 enum isl_format image_fmt
, enum isl_format lower_fmt
)
525 struct format_info image
= get_format_info(image_fmt
);
526 struct format_info lower
= get_format_info(lower_fmt
);
528 color
= nir_channels(b
, color
, (1 << image
.chans
) - 1);
530 if (image_fmt
== lower_fmt
)
533 if (image_fmt
== ISL_FORMAT_R11G11B10_FLOAT
) {
534 assert(lower_fmt
== ISL_FORMAT_R32_UINT
);
535 return nir_format_pack_11f11f10f(b
, color
);
538 switch (image
.fmtl
->channels
.r
.type
) {
540 assert(isl_format_has_uint_channel(lower_fmt
));
541 color
= nir_format_float_to_unorm(b
, color
, image
.bits
);
545 assert(isl_format_has_uint_channel(lower_fmt
));
546 color
= nir_format_float_to_snorm(b
, color
, image
.bits
);
550 if (image
.bits
[0] == 16) {
551 nir_ssa_def
*f16comps
[4];
552 for (unsigned i
= 0; i
< image
.chans
; i
++) {
553 f16comps
[i
] = nir_pack_half_2x16_split(b
, nir_channel(b
, color
, i
),
554 nir_imm_float(b
, 0));
556 color
= nir_vec(b
, f16comps
, image
.chans
);
561 if (image
.bits
[0] < 32) {
563 for (unsigned i
= 0; i
< image
.chans
; i
++) {
564 assert(image
.bits
[i
] < 32);
565 max
.u32
[i
] = (1u << image
.bits
[i
]) - 1;
567 color
= nir_umin(b
, color
, nir_build_imm(b
, image
.chans
, 32, max
));
572 if (image
.bits
[0] < 32) {
573 nir_const_value min
, max
;
574 for (unsigned i
= 0; i
< image
.chans
; i
++) {
575 assert(image
.bits
[i
] < 32);
576 max
.i32
[i
] = (1 << (image
.bits
[i
] - 1)) - 1;
577 min
.i32
[i
] = -(1 << (image
.bits
[i
] - 1));
579 color
= nir_imin(b
, color
, nir_build_imm(b
, image
.chans
, 32, max
));
580 color
= nir_imax(b
, color
, nir_build_imm(b
, image
.chans
, 32, min
));
585 unreachable("Invalid image channel type");
588 if (image
.bits
[0] < 32 &&
589 (isl_format_has_snorm_channel(image_fmt
) ||
590 isl_format_has_sint_channel(image_fmt
)))
591 color
= nir_format_mask_uvec(b
, color
, image
.bits
);
593 if (image
.bits
[0] != lower
.bits
[0] && lower_fmt
== ISL_FORMAT_R32_UINT
) {
594 color
= nir_format_pack_uint(b
, color
, image
.bits
, image
.chans
);
596 /* All these formats are homogeneous */
597 for (unsigned i
= 1; i
< image
.chans
; i
++)
598 assert(image
.bits
[i
] == image
.bits
[0]);
600 if (image
.bits
[0] != lower
.bits
[0]) {
601 color
= nir_format_bitcast_uvec_unmasked(b
, color
, image
.bits
[0],
610 lower_image_store_instr(nir_builder
*b
,
611 const struct gen_device_info
*devinfo
,
612 nir_intrinsic_instr
*intrin
)
614 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
615 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
617 /* For write-only surfaces, we trust that the hardware can just do the
620 if (var
->data
.image
.access
& ACCESS_NON_READABLE
)
623 const enum isl_format image_fmt
=
624 isl_format_for_gl_format(var
->data
.image
.format
);
626 if (isl_has_matching_typed_storage_image_format(devinfo
, image_fmt
)) {
627 const enum isl_format lower_fmt
=
628 isl_lower_storage_image_format(devinfo
, image_fmt
);
630 /* Color conversion goes before the store */
631 b
->cursor
= nir_before_instr(&intrin
->instr
);
633 nir_ssa_def
*color
= convert_color_for_store(b
, devinfo
,
635 image_fmt
, lower_fmt
);
636 intrin
->num_components
= isl_format_get_num_channels(lower_fmt
);
637 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[3],
638 nir_src_for_ssa(color
));
640 const struct isl_format_layout
*image_fmtl
=
641 isl_format_get_layout(image_fmt
);
642 /* We have a matching typed format for everything 32b and below */
643 assert(image_fmtl
->bpb
== 64 || image_fmtl
->bpb
== 128);
644 enum isl_format raw_fmt
= (image_fmtl
->bpb
== 64) ?
645 ISL_FORMAT_R32G32_UINT
:
646 ISL_FORMAT_R32G32B32A32_UINT
;
648 b
->cursor
= nir_instr_remove(&intrin
->instr
);
650 nir_ssa_def
*coord
= intrin
->src
[1].ssa
;
652 nir_ssa_def
*do_store
= image_coord_is_in_bounds(b
, deref
, coord
);
653 if (devinfo
->gen
== 7 && !devinfo
->is_haswell
) {
654 /* Check whether the first stride component (i.e. the Bpp value)
655 * is greater than four, what on Gen7 indicates that a surface of
656 * type RAW has been bound for untyped access. Reading or writing
657 * to a surface of type other than RAW using untyped surface
658 * messages causes a hang on IVB and VLV.
660 nir_ssa_def
*stride
= load_image_param(b
, deref
, STRIDE
);
661 nir_ssa_def
*is_raw
=
662 nir_ilt(b
, nir_imm_int(b
, 4), nir_channel(b
, stride
, 0));
663 do_store
= nir_iand(b
, do_store
, is_raw
);
665 nir_push_if(b
, do_store
);
667 nir_ssa_def
*addr
= image_address(b
, devinfo
, deref
, coord
);
668 nir_ssa_def
*color
= convert_color_for_store(b
, devinfo
,
672 nir_intrinsic_instr
*store
=
673 nir_intrinsic_instr_create(b
->shader
,
674 nir_intrinsic_image_deref_store_raw_intel
);
675 store
->src
[0] = nir_src_for_ssa(&deref
->dest
.ssa
);
676 store
->src
[1] = nir_src_for_ssa(addr
);
677 store
->src
[2] = nir_src_for_ssa(color
);
678 store
->num_components
= image_fmtl
->bpb
/ 32;
679 nir_builder_instr_insert(b
, &store
->instr
);
688 lower_image_atomic_instr(nir_builder
*b
,
689 const struct gen_device_info
*devinfo
,
690 nir_intrinsic_instr
*intrin
)
692 if (devinfo
->is_haswell
|| devinfo
->gen
>= 8)
695 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
697 b
->cursor
= nir_instr_remove(&intrin
->instr
);
699 /* Use an undef to hold the uses of the load conversion. */
700 nir_ssa_def
*placeholder
= nir_ssa_undef(b
, 4, 32);
701 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(placeholder
));
703 /* Check the first component of the size field to find out if the
704 * image is bound. Necessary on IVB for typed atomics because
705 * they don't seem to respect null surfaces and will happily
706 * corrupt or read random memory when no image is bound.
708 nir_ssa_def
*size
= load_image_param(b
, deref
, SIZE
);
709 nir_ssa_def
*zero
= nir_imm_int(b
, 0);
710 nir_push_if(b
, nir_ine(b
, nir_channel(b
, size
, 0), zero
));
712 nir_builder_instr_insert(b
, &intrin
->instr
);
716 nir_ssa_def
*result
= nir_if_phi(b
, &intrin
->dest
.ssa
, zero
);
717 nir_ssa_def_rewrite_uses(placeholder
, nir_src_for_ssa(result
));
723 lower_image_size_instr(nir_builder
*b
,
724 const struct gen_device_info
*devinfo
,
725 nir_intrinsic_instr
*intrin
)
727 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
728 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
730 /* For write-only images, we have an actual image surface so we fall back
731 * and let the back-end emit a TXS for this.
733 if (var
->data
.image
.access
& ACCESS_NON_READABLE
)
736 /* If we have a matching typed format, then we have an actual image surface
737 * so we fall back and let the back-end emit a TXS for this.
739 const enum isl_format image_fmt
=
740 isl_format_for_gl_format(var
->data
.image
.format
);
741 if (isl_has_matching_typed_storage_image_format(devinfo
, image_fmt
))
744 b
->cursor
= nir_instr_remove(&intrin
->instr
);
746 nir_ssa_def
*size
= load_image_param(b
, deref
, SIZE
);
748 nir_ssa_def
*comps
[4] = { NULL
, NULL
, NULL
, NULL
};
750 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(deref
->type
);
751 unsigned coord_comps
= glsl_get_sampler_coordinate_components(deref
->type
);
752 for (unsigned c
= 0; c
< coord_comps
; c
++) {
753 if (c
== 2 && dim
== GLSL_SAMPLER_DIM_CUBE
) {
754 comps
[2] = nir_idiv(b
, nir_channel(b
, size
, 2), nir_imm_int(b
, 6));
756 comps
[c
] = nir_channel(b
, size
, c
);
760 for (unsigned c
= coord_comps
; c
< intrin
->dest
.ssa
.num_components
; ++c
)
761 comps
[c
] = nir_imm_int(b
, 1);
763 nir_ssa_def
*vec
= nir_vec(b
, comps
, intrin
->dest
.ssa
.num_components
);
764 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
, nir_src_for_ssa(vec
));
770 brw_nir_lower_image_load_store(nir_shader
*shader
,
771 const struct gen_device_info
*devinfo
)
773 bool progress
= false;
775 nir_foreach_function(function
, shader
) {
776 if (function
->impl
== NULL
)
779 nir_foreach_block_safe(block
, function
->impl
) {
781 nir_builder_init(&b
, function
->impl
);
783 nir_foreach_instr_safe(instr
, block
) {
784 if (instr
->type
!= nir_instr_type_intrinsic
)
787 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
788 switch (intrin
->intrinsic
) {
789 case nir_intrinsic_image_deref_load
:
790 if (lower_image_load_instr(&b
, devinfo
, intrin
))
794 case nir_intrinsic_image_deref_store
:
795 if (lower_image_store_instr(&b
, devinfo
, intrin
))
799 case nir_intrinsic_image_deref_atomic_add
:
800 case nir_intrinsic_image_deref_atomic_min
:
801 case nir_intrinsic_image_deref_atomic_max
:
802 case nir_intrinsic_image_deref_atomic_and
:
803 case nir_intrinsic_image_deref_atomic_or
:
804 case nir_intrinsic_image_deref_atomic_xor
:
805 case nir_intrinsic_image_deref_atomic_exchange
:
806 case nir_intrinsic_image_deref_atomic_comp_swap
:
807 if (lower_image_atomic_instr(&b
, devinfo
, intrin
))
811 case nir_intrinsic_image_deref_size
:
812 if (lower_image_size_instr(&b
, devinfo
, intrin
))
823 nir_metadata_preserve(function
->impl
, nir_metadata_block_index
|
824 nir_metadata_dominance
);
831 brw_nir_rewrite_image_intrinsic(nir_intrinsic_instr
*intrin
,
834 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
835 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
837 switch (intrin
->intrinsic
) {
839 case nir_intrinsic_image_deref_##op: \
840 intrin->intrinsic = nir_intrinsic_image_##op; \
850 CASE(atomic_exchange
)
851 CASE(atomic_comp_swap
)
856 CASE(store_raw_intel
)
859 unreachable("Unhanded image intrinsic");
862 nir_intrinsic_set_image_dim(intrin
, glsl_get_sampler_dim(deref
->type
));
863 nir_intrinsic_set_image_array(intrin
, glsl_sampler_type_is_array(deref
->type
));
864 nir_intrinsic_set_access(intrin
, var
->data
.image
.access
);
865 nir_intrinsic_set_format(intrin
, var
->data
.image
.format
);
867 nir_instr_rewrite_src(&intrin
->instr
, &intrin
->src
[0],
868 nir_src_for_ssa(index
));