0b8418ca7364cc798e7678ad1298722071f375fa
[mesa.git] / src / intel / compiler / brw_fs_surface_builder.cpp
1 /*
2 * Copyright © 2013-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "isl/isl.h"
25 #include "brw_fs_surface_builder.h"
26 #include "brw_fs.h"
27
28 using namespace brw;
29
30 namespace brw {
31 namespace surface_access {
32 namespace {
33 /**
34 * Generate a logical send opcode for a surface message and return
35 * the result.
36 */
37 fs_reg
38 emit_send(const fs_builder &bld, enum opcode opcode,
39 const fs_reg &addr, const fs_reg &src, const fs_reg &surface,
40 unsigned dims, unsigned arg, unsigned rsize,
41 brw_predicate pred = BRW_PREDICATE_NONE)
42 {
43 /* Reduce the dynamically uniform surface index to a single
44 * scalar.
45 */
46 const fs_reg usurface = bld.emit_uniformize(surface);
47 const fs_reg srcs[] = {
48 addr, src, usurface, brw_imm_ud(dims), brw_imm_ud(arg)
49 };
50 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, rsize);
51 fs_inst *inst = bld.emit(opcode, dst, srcs, ARRAY_SIZE(srcs));
52
53 inst->size_written = rsize * dst.component_size(inst->exec_size);
54 inst->predicate = pred;
55 return dst;
56 }
57 }
58
59 /**
60 * Emit an untyped surface read opcode. \p dims determines the number
61 * of components of the address and \p size the number of components of
62 * the returned value.
63 */
64 fs_reg
65 emit_untyped_read(const fs_builder &bld,
66 const fs_reg &surface, const fs_reg &addr,
67 unsigned dims, unsigned size,
68 brw_predicate pred)
69 {
70 return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
71 addr, fs_reg(), surface, dims, size, size, pred);
72 }
73
74 /**
75 * Emit an untyped surface write opcode. \p dims determines the number
76 * of components of the address and \p size the number of components of
77 * the argument.
78 */
79 void
80 emit_untyped_write(const fs_builder &bld, const fs_reg &surface,
81 const fs_reg &addr, const fs_reg &src,
82 unsigned dims, unsigned size,
83 brw_predicate pred)
84 {
85 emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
86 addr, src, surface, dims, size, 0, pred);
87 }
88
89 /**
90 * Emit an untyped surface atomic opcode. \p dims determines the number
91 * of components of the address and \p rsize the number of components of
92 * the returned value (either zero or one).
93 */
94 fs_reg
95 emit_untyped_atomic(const fs_builder &bld,
96 const fs_reg &surface, const fs_reg &addr,
97 const fs_reg &src0, const fs_reg &src1,
98 unsigned dims, unsigned rsize, unsigned op,
99 brw_predicate pred)
100 {
101 /* FINISHME: Factor out this frequently recurring pattern into a
102 * helper function.
103 */
104 const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
105 const fs_reg srcs[] = { src0, src1 };
106 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
107 bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
108
109 return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL,
110 addr, tmp, surface, dims, op, rsize, pred);
111 }
112
113 /**
114 * Emit an untyped surface atomic float opcode. \p dims determines the
115 * number of components of the address and \p rsize the number of
116 * components of the returned value (either zero or one).
117 */
118 fs_reg
119 emit_untyped_atomic_float(const fs_builder &bld,
120 const fs_reg &surface, const fs_reg &addr,
121 const fs_reg &src0, const fs_reg &src1,
122 unsigned dims, unsigned rsize, unsigned op,
123 brw_predicate pred)
124 {
125 /* FINISHME: Factor out this frequently recurring pattern into a
126 * helper function.
127 */
128 const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
129 const fs_reg srcs[] = { src0, src1 };
130 const fs_reg tmp = bld.vgrf(src0.type, n);
131 bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
132
133 return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL,
134 addr, tmp, surface, dims, op, rsize, pred);
135 }
136
137 /**
138 * Emit a typed surface read opcode. \p dims determines the number of
139 * components of the address and \p size the number of components of the
140 * returned value.
141 */
142 fs_reg
143 emit_typed_read(const fs_builder &bld, const fs_reg &surface,
144 const fs_reg &addr, unsigned dims, unsigned size)
145 {
146 return emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
147 addr, fs_reg(), surface, dims, size, size);
148 }
149
150 /**
151 * Emit a typed surface write opcode. \p dims determines the number of
152 * components of the address and \p size the number of components of the
153 * argument.
154 */
155 void
156 emit_typed_write(const fs_builder &bld, const fs_reg &surface,
157 const fs_reg &addr, const fs_reg &src,
158 unsigned dims, unsigned size)
159 {
160 emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
161 addr, src, surface, dims, size, 0);
162 }
163
164 /**
165 * Emit a typed surface atomic opcode. \p dims determines the number of
166 * components of the address and \p rsize the number of components of
167 * the returned value (either zero or one).
168 */
169 fs_reg
170 emit_typed_atomic(const fs_builder &bld, const fs_reg &surface,
171 const fs_reg &addr,
172 const fs_reg &src0, const fs_reg &src1,
173 unsigned dims, unsigned rsize, unsigned op,
174 brw_predicate pred)
175 {
176 /* FINISHME: Factor out this frequently recurring pattern into a
177 * helper function.
178 */
179 const unsigned n = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
180 const fs_reg srcs[] = { src0, src1 };
181 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, n);
182 bld.LOAD_PAYLOAD(tmp, srcs, n, 0);
183
184 return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
185 addr, tmp, surface, dims, op, rsize);
186 }
187
188 fs_reg
189 emit_byte_scattered_read(const fs_builder &bld,
190 const fs_reg &surface, const fs_reg &addr,
191 unsigned dims, unsigned size,
192 unsigned bit_size, brw_predicate pred)
193 {
194 return emit_send(bld, SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL,
195 addr, fs_reg(), surface, dims, bit_size, size, pred);
196 }
197
198 void
199 emit_byte_scattered_write(const fs_builder &bld, const fs_reg &surface,
200 const fs_reg &addr, const fs_reg &src,
201 unsigned dims,
202 unsigned bit_size, brw_predicate pred)
203 {
204 emit_send(bld, SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL,
205 addr, src, surface, dims, bit_size, 0, pred);
206 }
207 }
208 }
209
210 namespace {
211 namespace image_format_info {
212 /* The higher compiler layers use the GL enums for image formats even if
213 * they come in from SPIR-V or Vulkan. We need to turn them into an ISL
214 * enum before we can use them.
215 */
216 static enum isl_format
217 isl_format_for_gl_format(uint32_t gl_format)
218 {
219 switch (gl_format) {
220 case GL_R8: return ISL_FORMAT_R8_UNORM;
221 case GL_R8_SNORM: return ISL_FORMAT_R8_SNORM;
222 case GL_R8UI: return ISL_FORMAT_R8_UINT;
223 case GL_R8I: return ISL_FORMAT_R8_SINT;
224 case GL_RG8: return ISL_FORMAT_R8G8_UNORM;
225 case GL_RG8_SNORM: return ISL_FORMAT_R8G8_SNORM;
226 case GL_RG8UI: return ISL_FORMAT_R8G8_UINT;
227 case GL_RG8I: return ISL_FORMAT_R8G8_SINT;
228 case GL_RGBA8: return ISL_FORMAT_R8G8B8A8_UNORM;
229 case GL_RGBA8_SNORM: return ISL_FORMAT_R8G8B8A8_SNORM;
230 case GL_RGBA8UI: return ISL_FORMAT_R8G8B8A8_UINT;
231 case GL_RGBA8I: return ISL_FORMAT_R8G8B8A8_SINT;
232 case GL_R11F_G11F_B10F: return ISL_FORMAT_R11G11B10_FLOAT;
233 case GL_RGB10_A2: return ISL_FORMAT_R10G10B10A2_UNORM;
234 case GL_RGB10_A2UI: return ISL_FORMAT_R10G10B10A2_UINT;
235 case GL_R16: return ISL_FORMAT_R16_UNORM;
236 case GL_R16_SNORM: return ISL_FORMAT_R16_SNORM;
237 case GL_R16F: return ISL_FORMAT_R16_FLOAT;
238 case GL_R16UI: return ISL_FORMAT_R16_UINT;
239 case GL_R16I: return ISL_FORMAT_R16_SINT;
240 case GL_RG16: return ISL_FORMAT_R16G16_UNORM;
241 case GL_RG16_SNORM: return ISL_FORMAT_R16G16_SNORM;
242 case GL_RG16F: return ISL_FORMAT_R16G16_FLOAT;
243 case GL_RG16UI: return ISL_FORMAT_R16G16_UINT;
244 case GL_RG16I: return ISL_FORMAT_R16G16_SINT;
245 case GL_RGBA16: return ISL_FORMAT_R16G16B16A16_UNORM;
246 case GL_RGBA16_SNORM: return ISL_FORMAT_R16G16B16A16_SNORM;
247 case GL_RGBA16F: return ISL_FORMAT_R16G16B16A16_FLOAT;
248 case GL_RGBA16UI: return ISL_FORMAT_R16G16B16A16_UINT;
249 case GL_RGBA16I: return ISL_FORMAT_R16G16B16A16_SINT;
250 case GL_R32F: return ISL_FORMAT_R32_FLOAT;
251 case GL_R32UI: return ISL_FORMAT_R32_UINT;
252 case GL_R32I: return ISL_FORMAT_R32_SINT;
253 case GL_RG32F: return ISL_FORMAT_R32G32_FLOAT;
254 case GL_RG32UI: return ISL_FORMAT_R32G32_UINT;
255 case GL_RG32I: return ISL_FORMAT_R32G32_SINT;
256 case GL_RGBA32F: return ISL_FORMAT_R32G32B32A32_FLOAT;
257 case GL_RGBA32UI: return ISL_FORMAT_R32G32B32A32_UINT;
258 case GL_RGBA32I: return ISL_FORMAT_R32G32B32A32_SINT;
259 case GL_NONE: return ISL_FORMAT_UNSUPPORTED;
260 default:
261 assert(!"Invalid image format");
262 return ISL_FORMAT_UNSUPPORTED;
263 }
264 }
265
266 /**
267 * Simple 4-tuple of scalars used to pass around per-color component
268 * values.
269 */
270 struct color_u {
271 color_u(unsigned x = 0) : r(x), g(x), b(x), a(x)
272 {
273 }
274
275 color_u(unsigned r, unsigned g, unsigned b, unsigned a) :
276 r(r), g(g), b(b), a(a)
277 {
278 }
279
280 unsigned
281 operator[](unsigned i) const
282 {
283 const unsigned xs[] = { r, g, b, a };
284 return xs[i];
285 }
286
287 unsigned r, g, b, a;
288 };
289
290 /**
291 * Return the per-channel bitfield widths for a given image format.
292 */
293 inline color_u
294 get_bit_widths(isl_format format)
295 {
296 const isl_format_layout *fmtl = isl_format_get_layout(format);
297
298 return color_u(fmtl->channels.r.bits,
299 fmtl->channels.g.bits,
300 fmtl->channels.b.bits,
301 fmtl->channels.a.bits);
302 }
303
304 /**
305 * Return the per-channel bitfield shifts for a given image format.
306 */
307 inline color_u
308 get_bit_shifts(isl_format format)
309 {
310 const color_u widths = get_bit_widths(format);
311 return color_u(0, widths.r, widths.r + widths.g,
312 widths.r + widths.g + widths.b);
313 }
314
315 /**
316 * Return true if all present components have the same bit width.
317 */
318 inline bool
319 is_homogeneous(isl_format format)
320 {
321 const color_u widths = get_bit_widths(format);
322 return ((widths.g == 0 || widths.g == widths.r) &&
323 (widths.b == 0 || widths.b == widths.r) &&
324 (widths.a == 0 || widths.a == widths.r));
325 }
326
327 /**
328 * Return true if the format conversion boils down to a trivial copy.
329 */
330 inline bool
331 is_conversion_trivial(const gen_device_info *devinfo, isl_format format)
332 {
333 return (get_bit_widths(format).r == 32 && is_homogeneous(format)) ||
334 format == isl_lower_storage_image_format(devinfo, format);
335 }
336
337 /**
338 * Return true if the hardware natively supports some format with
339 * compatible bitfield layout, but possibly different data types.
340 */
341 inline bool
342 has_supported_bit_layout(const gen_device_info *devinfo,
343 isl_format format)
344 {
345 const color_u widths = get_bit_widths(format);
346 const color_u lower_widths = get_bit_widths(
347 isl_lower_storage_image_format(devinfo, format));
348
349 return (widths.r == lower_widths.r &&
350 widths.g == lower_widths.g &&
351 widths.b == lower_widths.b &&
352 widths.a == lower_widths.a);
353 }
354
355 /**
356 * Return true if we are required to spread individual components over
357 * several components of the format used by the hardware (RG32 and
358 * friends implemented as RGBA16UI).
359 */
360 inline bool
361 has_split_bit_layout(const gen_device_info *devinfo, isl_format format)
362 {
363 const isl_format lower_format =
364 isl_lower_storage_image_format(devinfo, format);
365
366 return (isl_format_get_num_channels(format) <
367 isl_format_get_num_channels(lower_format));
368 }
369
370 /**
371 * Return true if the hardware returns garbage in the unused high bits
372 * of each component. This may happen on IVB because we rely on the
373 * undocumented behavior that typed reads from surfaces of the
374 * unsupported R8 and R16 formats return useful data in their least
375 * significant bits.
376 */
377 inline bool
378 has_undefined_high_bits(const gen_device_info *devinfo,
379 isl_format format)
380 {
381 const isl_format lower_format =
382 isl_lower_storage_image_format(devinfo, format);
383
384 return (devinfo->gen == 7 && !devinfo->is_haswell &&
385 (lower_format == ISL_FORMAT_R16_UINT ||
386 lower_format == ISL_FORMAT_R8_UINT));
387 }
388
389 /**
390 * Return true if the format represents values as signed integers
391 * requiring sign extension when unpacking.
392 */
393 inline bool
394 needs_sign_extension(isl_format format)
395 {
396 return isl_format_has_snorm_channel(format) ||
397 isl_format_has_sint_channel(format);
398 }
399 }
400
401 namespace image_validity {
402 /**
403 * Check whether the bound image is suitable for untyped access.
404 */
405 static brw_predicate
406 emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
407 brw_predicate pred)
408 {
409 const gen_device_info *devinfo = bld.shader->devinfo;
410 const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
411
412 if (devinfo->gen == 7 && !devinfo->is_haswell) {
413 /* Check whether the first stride component (i.e. the Bpp value)
414 * is greater than four, what on Gen7 indicates that a surface of
415 * type RAW has been bound for untyped access. Reading or writing
416 * to a surface of type other than RAW using untyped surface
417 * messages causes a hang on IVB and VLV.
418 */
419 set_predicate(pred,
420 bld.CMP(bld.null_reg_ud(), stride, brw_imm_d(4),
421 BRW_CONDITIONAL_G));
422
423 return BRW_PREDICATE_NORMAL;
424 } else {
425 /* More recent generations handle the format mismatch
426 * gracefully.
427 */
428 return pred;
429 }
430 }
431
432 /**
433 * Check whether there is an image bound at the given index and write
434 * the comparison result to f0.0. Returns an appropriate predication
435 * mode to use on subsequent image operations.
436 */
437 static brw_predicate
438 emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
439 {
440 const gen_device_info *devinfo = bld.shader->devinfo;
441 const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
442
443 if (devinfo->gen == 7 && !devinfo->is_haswell) {
444 /* Check the first component of the size field to find out if the
445 * image is bound. Necessary on IVB for typed atomics because
446 * they don't seem to respect null surfaces and will happily
447 * corrupt or read random memory when no image is bound.
448 */
449 bld.CMP(bld.null_reg_ud(),
450 retype(size, BRW_REGISTER_TYPE_UD),
451 brw_imm_d(0), BRW_CONDITIONAL_NZ);
452
453 return BRW_PREDICATE_NORMAL;
454 } else {
455 /* More recent platforms implement compliant behavior when a null
456 * surface is bound.
457 */
458 return BRW_PREDICATE_NONE;
459 }
460 }
461
462 /**
463 * Check whether the provided coordinates are within the image bounds
464 * and write the comparison result to f0.0. Returns an appropriate
465 * predication mode to use on subsequent image operations.
466 */
467 static brw_predicate
468 emit_bounds_check(const fs_builder &bld, const fs_reg &image,
469 const fs_reg &addr, unsigned dims)
470 {
471 const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
472
473 for (unsigned c = 0; c < dims; ++c)
474 set_predicate(c == 0 ? BRW_PREDICATE_NONE : BRW_PREDICATE_NORMAL,
475 bld.CMP(bld.null_reg_ud(),
476 offset(retype(addr, BRW_REGISTER_TYPE_UD), bld, c),
477 offset(size, bld, c),
478 BRW_CONDITIONAL_L));
479
480 return BRW_PREDICATE_NORMAL;
481 }
482 }
483
484 namespace image_coordinates {
485 /**
486 * Return the total number of coordinates needed to address a texel of
487 * the surface, which may be more than the sum of \p surf_dims and \p
488 * arr_dims if padding is required.
489 */
490 static unsigned
491 num_image_coordinates(const fs_builder &bld,
492 unsigned surf_dims, unsigned arr_dims,
493 isl_format format)
494 {
495 /* HSW in vec4 mode and our software coordinate handling for untyped
496 * reads want the array index to be at the Z component.
497 */
498 const bool array_index_at_z =
499 format != ISL_FORMAT_UNSUPPORTED &&
500 !isl_has_matching_typed_storage_image_format(
501 bld.shader->devinfo, format);
502 const unsigned zero_dims =
503 ((surf_dims == 1 && arr_dims == 1 && array_index_at_z) ? 1 : 0);
504
505 return surf_dims + zero_dims + arr_dims;
506 }
507
508 /**
509 * Transform image coordinates into the form expected by the
510 * implementation.
511 */
512 static fs_reg
513 emit_image_coordinates(const fs_builder &bld, const fs_reg &addr,
514 unsigned surf_dims, unsigned arr_dims,
515 isl_format format)
516 {
517 const unsigned dims =
518 num_image_coordinates(bld, surf_dims, arr_dims, format);
519
520 if (dims > surf_dims + arr_dims) {
521 assert(surf_dims == 1 && arr_dims == 1 && dims == 3);
522 /* The array index is required to be passed in as the Z component,
523 * insert a zero at the Y component to shift it to the right
524 * position.
525 *
526 * FINISHME: Factor out this frequently recurring pattern into a
527 * helper function.
528 */
529 const fs_reg srcs[] = { addr, brw_imm_d(0), offset(addr, bld, 1) };
530 const fs_reg dst = bld.vgrf(addr.type, dims);
531 bld.LOAD_PAYLOAD(dst, srcs, dims, 0);
532 return dst;
533 } else {
534 return addr;
535 }
536 }
537
538 /**
539 * Calculate the offset in memory of the texel given by \p coord.
540 *
541 * This is meant to be used with untyped surface messages to access a
542 * tiled surface, what involves taking into account the tiling and
543 * swizzling modes of the surface manually so it will hopefully not
544 * happen very often.
545 *
546 * The tiling algorithm implemented here matches either the X or Y
547 * tiling layouts supported by the hardware depending on the tiling
548 * coefficients passed to the program as uniforms. See Volume 1 Part 2
549 * Section 4.5 "Address Tiling Function" of the IVB PRM for an in-depth
550 * explanation of the hardware tiling format.
551 */
552 static fs_reg
553 emit_address_calculation(const fs_builder &bld, const fs_reg &image,
554 const fs_reg &coord, unsigned dims)
555 {
556 const gen_device_info *devinfo = bld.shader->devinfo;
557 const fs_reg off = offset(image, bld, BRW_IMAGE_PARAM_OFFSET_OFFSET);
558 const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
559 const fs_reg tile = offset(image, bld, BRW_IMAGE_PARAM_TILING_OFFSET);
560 const fs_reg swz = offset(image, bld, BRW_IMAGE_PARAM_SWIZZLING_OFFSET);
561 const fs_reg addr = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
562 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
563 const fs_reg minor = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
564 const fs_reg major = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
565 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
566
567 /* Shift the coordinates by the fixed surface offset. It may be
568 * non-zero if the image is a single slice of a higher-dimensional
569 * surface, or if a non-zero mipmap level of the surface is bound to
570 * the pipeline. The offset needs to be applied here rather than at
571 * surface state set-up time because the desired slice-level may
572 * start mid-tile, so simply shifting the surface base address
573 * wouldn't give a well-formed tiled surface in the general case.
574 */
575 for (unsigned c = 0; c < 2; ++c)
576 bld.ADD(offset(addr, bld, c), offset(off, bld, c),
577 (c < dims ?
578 offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, c) :
579 fs_reg(brw_imm_d(0))));
580
581 /* The layout of 3-D textures in memory is sort-of like a tiling
582 * format. At each miplevel, the slices are arranged in rows of
583 * 2^level slices per row. The slice row is stored in tmp.y and
584 * the slice within the row is stored in tmp.x.
585 *
586 * The layout of 2-D array textures and cubemaps is much simpler:
587 * Depending on whether the ARYSPC_LOD0 layout is in use it will be
588 * stored in memory as an array of slices, each one being a 2-D
589 * arrangement of miplevels, or as a 2D arrangement of miplevels,
590 * each one being an array of slices. In either case the separation
591 * between slices of the same LOD is equal to the qpitch value
592 * provided as stride.w.
593 *
594 * This code can be made to handle either 2D arrays and 3D textures
595 * by passing in the miplevel as tile.z for 3-D textures and 0 in
596 * tile.z for 2-D array textures.
597 *
598 * See Volume 1 Part 1 of the Gen7 PRM, sections 6.18.4.7 "Surface
599 * Arrays" and 6.18.6 "3D Surfaces" for a more extensive discussion
600 * of the hardware 3D texture and 2D array layouts.
601 */
602 if (dims > 2) {
603 /* Decompose z into a major (tmp.y) and a minor (tmp.x)
604 * index.
605 */
606 bld.BFE(offset(tmp, bld, 0), offset(tile, bld, 2), brw_imm_d(0),
607 offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2));
608 bld.SHR(offset(tmp, bld, 1),
609 offset(retype(coord, BRW_REGISTER_TYPE_UD), bld, 2),
610 offset(tile, bld, 2));
611
612 /* Take into account the horizontal (tmp.x) and vertical (tmp.y)
613 * slice offset.
614 */
615 for (unsigned c = 0; c < 2; ++c) {
616 bld.MUL(offset(tmp, bld, c),
617 offset(stride, bld, 2 + c), offset(tmp, bld, c));
618 bld.ADD(offset(addr, bld, c),
619 offset(addr, bld, c), offset(tmp, bld, c));
620 }
621 }
622
623 if (dims > 1) {
624 /* Calculate the major/minor x and y indices. In order to
625 * accommodate both X and Y tiling, the Y-major tiling format is
626 * treated as being a bunch of narrow X-tiles placed next to each
627 * other. This means that the tile width for Y-tiling is actually
628 * the width of one sub-column of the Y-major tile where each 4K
629 * tile has 8 512B sub-columns.
630 *
631 * The major Y value is the row of tiles in which the pixel lives.
632 * The major X value is the tile sub-column in which the pixel
633 * lives; for X tiling, this is the same as the tile column, for Y
634 * tiling, each tile has 8 sub-columns. The minor X and Y indices
635 * are the position within the sub-column.
636 */
637 for (unsigned c = 0; c < 2; ++c) {
638 /* Calculate the minor x and y indices. */
639 bld.BFE(offset(minor, bld, c), offset(tile, bld, c),
640 brw_imm_d(0), offset(addr, bld, c));
641
642 /* Calculate the major x and y indices. */
643 bld.SHR(offset(major, bld, c),
644 offset(addr, bld, c), offset(tile, bld, c));
645 }
646
647 /* Calculate the texel index from the start of the tile row and
648 * the vertical coordinate of the row.
649 * Equivalent to:
650 * tmp.x = (major.x << tile.y << tile.x) +
651 * (minor.y << tile.x) + minor.x
652 * tmp.y = major.y << tile.y
653 */
654 bld.SHL(tmp, major, offset(tile, bld, 1));
655 bld.ADD(tmp, tmp, offset(minor, bld, 1));
656 bld.SHL(tmp, tmp, offset(tile, bld, 0));
657 bld.ADD(tmp, tmp, minor);
658 bld.SHL(offset(tmp, bld, 1),
659 offset(major, bld, 1), offset(tile, bld, 1));
660
661 /* Add it to the start of the tile row. */
662 bld.MUL(offset(tmp, bld, 1),
663 offset(tmp, bld, 1), offset(stride, bld, 1));
664 bld.ADD(tmp, tmp, offset(tmp, bld, 1));
665
666 /* Multiply by the Bpp value. */
667 bld.MUL(dst, tmp, stride);
668
669 if (devinfo->gen < 8 && !devinfo->is_baytrail) {
670 /* Take into account the two dynamically specified shifts.
671 * Both need are used to implement swizzling of X-tiled
672 * surfaces. For Y-tiled surfaces only one bit needs to be
673 * XOR-ed with bit 6 of the memory address, so a swz value of
674 * 0xff (actually interpreted as 31 by the hardware) will be
675 * provided to cause the relevant bit of tmp.y to be zero and
676 * turn the first XOR into the identity. For linear surfaces
677 * or platforms lacking address swizzling both shifts will be
678 * 0xff causing the relevant bits of both tmp.x and .y to be
679 * zero, what effectively disables swizzling.
680 */
681 for (unsigned c = 0; c < 2; ++c)
682 bld.SHR(offset(tmp, bld, c), dst, offset(swz, bld, c));
683
684 /* XOR tmp.x and tmp.y with bit 6 of the memory address. */
685 bld.XOR(tmp, tmp, offset(tmp, bld, 1));
686 bld.AND(tmp, tmp, brw_imm_d(1 << 6));
687 bld.XOR(dst, dst, tmp);
688 }
689
690 } else {
691 /* Multiply by the Bpp/stride value. Note that the addr.y may be
692 * non-zero even if the image is one-dimensional because a
693 * vertical offset may have been applied above to select a
694 * non-zero slice or level of a higher-dimensional texture.
695 */
696 bld.MUL(offset(addr, bld, 1),
697 offset(addr, bld, 1), offset(stride, bld, 1));
698 bld.ADD(addr, addr, offset(addr, bld, 1));
699 bld.MUL(dst, addr, stride);
700 }
701
702 return dst;
703 }
704 }
705
706 namespace image_format_conversion {
707 using image_format_info::color_u;
708
709 namespace {
710 /**
711 * Maximum representable value in an unsigned integer with the given
712 * number of bits.
713 */
714 inline unsigned
715 scale(unsigned n)
716 {
717 return (1 << n) - 1;
718 }
719 }
720
721 /**
722 * Pack the vector \p src in a bitfield given the per-component bit
723 * shifts and widths. Note that bitfield components are not allowed to
724 * cross 32-bit boundaries.
725 */
726 static fs_reg
727 emit_pack(const fs_builder &bld, const fs_reg &src,
728 const color_u &shifts, const color_u &widths)
729 {
730 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
731 bool seen[4] = {};
732
733 for (unsigned c = 0; c < 4; ++c) {
734 if (widths[c]) {
735 const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
736
737 /* Shift each component left to the correct bitfield position. */
738 bld.SHL(tmp, offset(src, bld, c), brw_imm_ud(shifts[c] % 32));
739
740 /* Add everything up. */
741 if (seen[shifts[c] / 32]) {
742 bld.OR(offset(dst, bld, shifts[c] / 32),
743 offset(dst, bld, shifts[c] / 32), tmp);
744 } else {
745 bld.MOV(offset(dst, bld, shifts[c] / 32), tmp);
746 seen[shifts[c] / 32] = true;
747 }
748 }
749 }
750
751 return dst;
752 }
753
754 /**
755 * Unpack a vector from the bitfield \p src given the per-component bit
756 * shifts and widths. Note that bitfield components are not allowed to
757 * cross 32-bit boundaries.
758 */
759 static fs_reg
760 emit_unpack(const fs_builder &bld, const fs_reg &src,
761 const color_u &shifts, const color_u &widths)
762 {
763 const fs_reg dst = bld.vgrf(src.type, 4);
764
765 for (unsigned c = 0; c < 4; ++c) {
766 if (widths[c]) {
767 /* Shift left to discard the most significant bits. */
768 bld.SHL(offset(dst, bld, c),
769 offset(src, bld, shifts[c] / 32),
770 brw_imm_ud(32 - shifts[c] % 32 - widths[c]));
771
772 /* Shift back to the least significant bits using an arithmetic
773 * shift to get sign extension on signed types.
774 */
775 bld.ASR(offset(dst, bld, c),
776 offset(dst, bld, c), brw_imm_ud(32 - widths[c]));
777 }
778 }
779
780 return dst;
781 }
782
783 /**
784 * Convert an integer vector into another integer vector of the
785 * specified bit widths, properly handling overflow.
786 */
787 static fs_reg
788 emit_convert_to_integer(const fs_builder &bld, const fs_reg &src,
789 const color_u &widths, bool is_signed)
790 {
791 const unsigned s = (is_signed ? 1 : 0);
792 const fs_reg dst = bld.vgrf(
793 is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
794 assert(src.type == dst.type);
795
796 for (unsigned c = 0; c < 4; ++c) {
797 if (widths[c]) {
798 /* Clamp to the maximum value. */
799 bld.emit_minmax(offset(dst, bld, c), offset(src, bld, c),
800 brw_imm_d((int)scale(widths[c] - s)),
801 BRW_CONDITIONAL_L);
802
803 /* Clamp to the minimum value. */
804 if (is_signed)
805 bld.emit_minmax(offset(dst, bld, c), offset(dst, bld, c),
806 brw_imm_d(-(int)scale(widths[c] - s) - 1),
807 BRW_CONDITIONAL_GE);
808
809 /* Mask off all but the bits we actually want. Otherwise, if
810 * we pass a negative number into the hardware when it's
811 * expecting something like UINT8, it will happily clamp it to
812 * +255 for us.
813 */
814 if (is_signed && widths[c] < 32)
815 bld.AND(offset(dst, bld, c), offset(dst, bld, c),
816 brw_imm_d(scale(widths[c])));
817 }
818 }
819
820 return dst;
821 }
822
823 /**
824 * Convert a normalized fixed-point vector of the specified signedness
825 * and bit widths into a floating point vector.
826 */
827 static fs_reg
828 emit_convert_from_scaled(const fs_builder &bld, const fs_reg &src,
829 const color_u &widths, bool is_signed)
830 {
831 const unsigned s = (is_signed ? 1 : 0);
832 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
833
834 for (unsigned c = 0; c < 4; ++c) {
835 if (widths[c]) {
836 /* Convert to float. */
837 bld.MOV(offset(dst, bld, c), offset(src, bld, c));
838
839 /* Divide by the normalization constants. */
840 bld.MUL(offset(dst, bld, c), offset(dst, bld, c),
841 brw_imm_f(1.0f / scale(widths[c] - s)));
842
843 /* Clamp to the minimum value. */
844 if (is_signed)
845 bld.emit_minmax(offset(dst, bld, c),
846 offset(dst, bld, c), brw_imm_f(-1.0f),
847 BRW_CONDITIONAL_GE);
848 }
849 }
850 return dst;
851 }
852
853 /**
854 * Convert a floating-point vector into a normalized fixed-point vector
855 * of the specified signedness and bit widths.
856 */
857 static fs_reg
858 emit_convert_to_scaled(const fs_builder &bld, const fs_reg &src,
859 const color_u &widths, bool is_signed)
860 {
861 const unsigned s = (is_signed ? 1 : 0);
862 const fs_reg dst = bld.vgrf(
863 is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD, 4);
864 const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
865
866 for (unsigned c = 0; c < 4; ++c) {
867 if (widths[c]) {
868 /* Clamp the normalized floating-point argument. */
869 if (is_signed) {
870 bld.emit_minmax(offset(fdst, bld, c), offset(src, bld, c),
871 brw_imm_f(-1.0f), BRW_CONDITIONAL_GE);
872
873 bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
874 brw_imm_f(1.0f), BRW_CONDITIONAL_L);
875 } else {
876 set_saturate(true, bld.MOV(offset(fdst, bld, c),
877 offset(src, bld, c)));
878 }
879
880 /* Multiply by the normalization constants. */
881 bld.MUL(offset(fdst, bld, c), offset(fdst, bld, c),
882 brw_imm_f((float)scale(widths[c] - s)));
883
884 /* Convert to integer. */
885 bld.RNDE(offset(fdst, bld, c), offset(fdst, bld, c));
886 bld.MOV(offset(dst, bld, c), offset(fdst, bld, c));
887
888 /* Mask off all but the bits we actually want. Otherwise, if
889 * we pass a negative number into the hardware when it's
890 * expecting something like UINT8, it will happily clamp it to
891 * +255 for us.
892 */
893 if (is_signed && widths[c] < 32)
894 bld.AND(offset(dst, bld, c), offset(dst, bld, c),
895 brw_imm_d(scale(widths[c])));
896 }
897 }
898
899 return dst;
900 }
901
902 /**
903 * Convert a floating point vector of the specified bit widths into a
904 * 32-bit floating point vector.
905 */
906 static fs_reg
907 emit_convert_from_float(const fs_builder &bld, const fs_reg &src,
908 const color_u &widths)
909 {
910 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
911 const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
912
913 for (unsigned c = 0; c < 4; ++c) {
914 if (widths[c]) {
915 bld.MOV(offset(dst, bld, c), offset(src, bld, c));
916
917 /* Extend 10-bit and 11-bit floating point numbers to 15 bits.
918 * This works because they have a 5-bit exponent just like the
919 * 16-bit floating point format, and they have no sign bit.
920 */
921 if (widths[c] < 16)
922 bld.SHL(offset(dst, bld, c),
923 offset(dst, bld, c), brw_imm_ud(15 - widths[c]));
924
925 /* Convert to 32-bit floating point. */
926 bld.F16TO32(offset(fdst, bld, c), offset(dst, bld, c));
927 }
928 }
929
930 return fdst;
931 }
932
933 /**
934 * Convert a vector into a floating point vector of the specified bit
935 * widths.
936 */
937 static fs_reg
938 emit_convert_to_float(const fs_builder &bld, const fs_reg &src,
939 const color_u &widths)
940 {
941 const fs_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
942 const fs_reg fdst = retype(dst, BRW_REGISTER_TYPE_F);
943
944 for (unsigned c = 0; c < 4; ++c) {
945 if (widths[c]) {
946 bld.MOV(offset(fdst, bld, c), offset(src, bld, c));
947
948 /* Clamp to the minimum value. */
949 if (widths[c] < 16)
950 bld.emit_minmax(offset(fdst, bld, c), offset(fdst, bld, c),
951 brw_imm_f(0.0f), BRW_CONDITIONAL_GE);
952
953 /* Convert to 16-bit floating-point. */
954 bld.F32TO16(offset(dst, bld, c), offset(fdst, bld, c));
955
956 /* Discard the least significant bits to get floating point
957 * numbers of the requested width. This works because the
958 * 10-bit and 11-bit floating point formats have a 5-bit
959 * exponent just like the 16-bit format, and they have no sign
960 * bit.
961 */
962 if (widths[c] < 16)
963 bld.SHR(offset(dst, bld, c), offset(dst, bld, c),
964 brw_imm_ud(15 - widths[c]));
965 }
966 }
967
968 return dst;
969 }
970
971 /**
972 * Fill missing components of a vector with 0, 0, 0, 1.
973 */
974 static fs_reg
975 emit_pad(const fs_builder &bld, const fs_reg &src,
976 const color_u &widths)
977 {
978 const fs_reg dst = bld.vgrf(src.type, 4);
979 const unsigned pad[] = { 0, 0, 0, 1 };
980
981 for (unsigned c = 0; c < 4; ++c)
982 bld.MOV(offset(dst, bld, c),
983 widths[c] ? offset(src, bld, c)
984 : fs_reg(brw_imm_ud(pad[c])));
985
986 return dst;
987 }
988 }
989 }
990
991 namespace brw {
992 namespace image_access {
993 /**
994 * Load a vector from a surface of the given format and dimensionality
995 * at the given coordinates. \p surf_dims and \p arr_dims give the
996 * number of non-array and array coordinates of the image respectively.
997 */
998 fs_reg
999 emit_image_load(const fs_builder &bld,
1000 const fs_reg &image, const fs_reg &addr,
1001 unsigned surf_dims, unsigned arr_dims,
1002 unsigned gl_format)
1003 {
1004 using namespace image_format_info;
1005 using namespace image_format_conversion;
1006 using namespace image_validity;
1007 using namespace image_coordinates;
1008 using namespace surface_access;
1009 const gen_device_info *devinfo = bld.shader->devinfo;
1010 const isl_format format = isl_format_for_gl_format(gl_format);
1011 const isl_format lower_format =
1012 isl_lower_storage_image_format(devinfo, format);
1013 fs_reg tmp;
1014
1015 /* Transform the image coordinates into actual surface coordinates. */
1016 const fs_reg saddr =
1017 emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
1018 const unsigned dims =
1019 num_image_coordinates(bld, surf_dims, arr_dims, format);
1020
1021 if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
1022 /* Hopefully we get here most of the time... */
1023 tmp = emit_typed_read(bld, image, saddr, dims,
1024 isl_format_get_num_channels(lower_format));
1025 } else {
1026 /* Untyped surface reads return 32 bits of the surface per
1027 * component, without any sort of unpacking or type conversion,
1028 */
1029 const unsigned size = isl_format_get_layout(format)->bpb / 32;
1030 /* they don't properly handle out of bounds access, so we have to
1031 * check manually if the coordinates are valid and predicate the
1032 * surface read on the result,
1033 */
1034 const brw_predicate pred =
1035 emit_untyped_image_check(bld, image,
1036 emit_bounds_check(bld, image,
1037 saddr, dims));
1038
1039 /* and they don't know about surface coordinates, we need to
1040 * convert them to a raw memory offset.
1041 */
1042 const fs_reg laddr = emit_address_calculation(bld, image, saddr, dims);
1043
1044 tmp = emit_untyped_read(bld, image, laddr, 1, size, pred);
1045
1046 /* An out of bounds surface access should give zero as result. */
1047 for (unsigned c = 0; c < size; ++c)
1048 set_predicate(pred, bld.SEL(offset(tmp, bld, c),
1049 offset(tmp, bld, c), brw_imm_d(0)));
1050 }
1051
1052 /* Set the register type to D instead of UD if the data type is
1053 * represented as a signed integer in memory so that sign extension
1054 * is handled correctly by unpack.
1055 */
1056 if (needs_sign_extension(format))
1057 tmp = retype(tmp, BRW_REGISTER_TYPE_D);
1058
1059 if (!has_supported_bit_layout(devinfo, format)) {
1060 /* Unpack individual vector components from the bitfield if the
1061 * hardware is unable to do it for us.
1062 */
1063 if (has_split_bit_layout(devinfo, format))
1064 tmp = emit_pack(bld, tmp, get_bit_shifts(lower_format),
1065 get_bit_widths(lower_format));
1066 else
1067 tmp = emit_unpack(bld, tmp, get_bit_shifts(format),
1068 get_bit_widths(format));
1069
1070 } else if ((needs_sign_extension(format) &&
1071 !is_conversion_trivial(devinfo, format)) ||
1072 has_undefined_high_bits(devinfo, format)) {
1073 /* Perform a trivial unpack even though the bit layout matches in
1074 * order to get the most significant bits of each component
1075 * initialized properly.
1076 */
1077 tmp = emit_unpack(bld, tmp, color_u(0, 32, 64, 96),
1078 get_bit_widths(format));
1079 }
1080
1081 if (!isl_format_has_int_channel(format)) {
1082 if (is_conversion_trivial(devinfo, format)) {
1083 /* Just need to cast the vector to the target type. */
1084 tmp = retype(tmp, BRW_REGISTER_TYPE_F);
1085 } else {
1086 /* Do the right sort of type conversion to float. */
1087 if (isl_format_has_float_channel(format))
1088 tmp = emit_convert_from_float(
1089 bld, tmp, get_bit_widths(format));
1090 else
1091 tmp = emit_convert_from_scaled(
1092 bld, tmp, get_bit_widths(format),
1093 isl_format_has_snorm_channel(format));
1094 }
1095 }
1096
1097 /* Initialize missing components of the result. */
1098 return emit_pad(bld, tmp, get_bit_widths(format));
1099 }
1100
1101 /**
1102 * Store a vector in a surface of the given format and dimensionality at
1103 * the given coordinates. \p surf_dims and \p arr_dims give the number
1104 * of non-array and array coordinates of the image respectively.
1105 */
1106 void
1107 emit_image_store(const fs_builder &bld, const fs_reg &image,
1108 const fs_reg &addr, const fs_reg &src,
1109 unsigned surf_dims, unsigned arr_dims,
1110 unsigned gl_format)
1111 {
1112 using namespace image_format_info;
1113 using namespace image_format_conversion;
1114 using namespace image_validity;
1115 using namespace image_coordinates;
1116 using namespace surface_access;
1117 const isl_format format = isl_format_for_gl_format(gl_format);
1118 const gen_device_info *devinfo = bld.shader->devinfo;
1119
1120 /* Transform the image coordinates into actual surface coordinates. */
1121 const fs_reg saddr =
1122 emit_image_coordinates(bld, addr, surf_dims, arr_dims, format);
1123 const unsigned dims =
1124 num_image_coordinates(bld, surf_dims, arr_dims, format);
1125
1126 if (gl_format == GL_NONE) {
1127 /* We don't know what the format is, but that's fine because it
1128 * implies write-only access, and typed surface writes are always
1129 * able to take care of type conversion and packing for us.
1130 */
1131 emit_typed_write(bld, image, saddr, src, dims, 4);
1132
1133 } else {
1134 const isl_format lower_format =
1135 isl_lower_storage_image_format(devinfo, format);
1136 fs_reg tmp = src;
1137
1138 if (!is_conversion_trivial(devinfo, format)) {
1139 /* Do the right sort of type conversion. */
1140 if (isl_format_has_float_channel(format))
1141 tmp = emit_convert_to_float(bld, tmp, get_bit_widths(format));
1142
1143 else if (isl_format_has_int_channel(format))
1144 tmp = emit_convert_to_integer(bld, tmp, get_bit_widths(format),
1145 isl_format_has_sint_channel(format));
1146
1147 else
1148 tmp = emit_convert_to_scaled(bld, tmp, get_bit_widths(format),
1149 isl_format_has_snorm_channel(format));
1150 }
1151
1152 /* We're down to bit manipulation at this point. */
1153 tmp = retype(tmp, BRW_REGISTER_TYPE_UD);
1154
1155 if (!has_supported_bit_layout(devinfo, format)) {
1156 /* Pack the vector components into a bitfield if the hardware
1157 * is unable to do it for us.
1158 */
1159 if (has_split_bit_layout(devinfo, format))
1160 tmp = emit_unpack(bld, tmp, get_bit_shifts(lower_format),
1161 get_bit_widths(lower_format));
1162
1163 else
1164 tmp = emit_pack(bld, tmp, get_bit_shifts(format),
1165 get_bit_widths(format));
1166 }
1167
1168 if (isl_has_matching_typed_storage_image_format(devinfo, format)) {
1169 /* Hopefully we get here most of the time... */
1170 emit_typed_write(bld, image, saddr, tmp, dims,
1171 isl_format_get_num_channels(lower_format));
1172
1173 } else {
1174 /* Untyped surface writes store 32 bits of the surface per
1175 * component, without any sort of packing or type conversion,
1176 */
1177 const unsigned size = isl_format_get_layout(format)->bpb / 32;
1178
1179 /* they don't properly handle out of bounds access, so we have
1180 * to check manually if the coordinates are valid and predicate
1181 * the surface write on the result,
1182 */
1183 const brw_predicate pred =
1184 emit_untyped_image_check(bld, image,
1185 emit_bounds_check(bld, image,
1186 saddr, dims));
1187
1188 /* and, phew, they don't know about surface coordinates, we
1189 * need to convert them to a raw memory offset.
1190 */
1191 const fs_reg laddr = emit_address_calculation(
1192 bld, image, saddr, dims);
1193
1194 emit_untyped_write(bld, image, laddr, tmp, 1, size, pred);
1195 }
1196 }
1197 }
1198
1199 /**
1200 * Perform an atomic read-modify-write operation in a surface of the
1201 * given dimensionality at the given coordinates. \p surf_dims and \p
1202 * arr_dims give the number of non-array and array coordinates of the
1203 * image respectively. Main building block of the imageAtomic GLSL
1204 * built-ins.
1205 */
1206 fs_reg
1207 emit_image_atomic(const fs_builder &bld,
1208 const fs_reg &image, const fs_reg &addr,
1209 const fs_reg &src0, const fs_reg &src1,
1210 unsigned surf_dims, unsigned arr_dims,
1211 unsigned rsize, unsigned op)
1212 {
1213 using namespace image_validity;
1214 using namespace image_coordinates;
1215 using namespace surface_access;
1216 /* Avoid performing an atomic operation on an unbound surface. */
1217 const brw_predicate pred = emit_typed_atomic_check(bld, image);
1218
1219 /* Transform the image coordinates into actual surface coordinates. */
1220 const fs_reg saddr =
1221 emit_image_coordinates(bld, addr, surf_dims, arr_dims,
1222 ISL_FORMAT_R32_UINT);
1223 const unsigned dims =
1224 num_image_coordinates(bld, surf_dims, arr_dims,
1225 ISL_FORMAT_R32_UINT);
1226
1227 /* Thankfully we can do without untyped atomics here. */
1228 const fs_reg tmp = emit_typed_atomic(bld, image, saddr, src0, src1,
1229 dims, rsize, op, pred);
1230
1231 /* An unbound surface access should give zero as result. */
1232 if (rsize && pred)
1233 set_predicate(pred, bld.SEL(tmp, tmp, brw_imm_d(0)));
1234
1235 return retype(tmp, src0.type);
1236 }
1237 }
1238 }