2 * Copyright © 2013-2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_vec4_surface_builder.h"
29 namespace array_utils
{
31 * Copy one every \p src_stride logical components of the argument into
32 * one every \p dst_stride logical components of the result.
35 emit_stride(const vec4_builder
&bld
, const src_reg
&src
, unsigned size
,
36 unsigned dst_stride
, unsigned src_stride
)
38 if (src_stride
== 1 && dst_stride
== 1) {
41 const dst_reg dst
= bld
.vgrf(src
.type
,
42 DIV_ROUND_UP(size
* dst_stride
, 4));
44 for (unsigned i
= 0; i
< size
; ++i
)
45 bld
.MOV(writemask(offset(dst
, 8, i
* dst_stride
/ 4),
46 1 << (i
* dst_stride
% 4)),
47 swizzle(offset(src
, 8, i
* src_stride
/ 4),
48 brw_swizzle_for_mask(1 << (i
* src_stride
% 4))));
55 * Convert a VEC4 into an array of registers with the layout expected by
56 * the recipient shared unit. If \p has_simd4x2 is true the argument is
57 * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
61 emit_insert(const vec4_builder
&bld
, const src_reg
&src
,
62 unsigned n
, bool has_simd4x2
)
64 if (src
.file
== BAD_FILE
|| n
== 0) {
68 /* Pad unused components with zeroes. */
69 const unsigned mask
= (1 << n
) - 1;
70 const dst_reg tmp
= bld
.vgrf(src
.type
);
72 bld
.MOV(writemask(tmp
, mask
), src
);
74 bld
.MOV(writemask(tmp
, ~mask
), brw_imm_d(0));
76 return emit_stride(bld
, src_reg(tmp
), n
, has_simd4x2
? 1 : 4, 1);
83 namespace surface_access
{
85 using namespace array_utils
;
88 * Generate a send opcode for a surface message and return the
92 emit_send(const vec4_builder
&bld
, enum opcode op
,
93 const src_reg
&header
,
94 const src_reg
&addr
, unsigned addr_sz
,
95 const src_reg
&src
, unsigned src_sz
,
96 const src_reg
&surface
,
97 unsigned arg
, unsigned ret_sz
,
98 brw_predicate pred
= BRW_PREDICATE_NONE
)
100 /* Calculate the total number of components of the payload. */
101 const unsigned header_sz
= (header
.file
== BAD_FILE
? 0 : 1);
102 const unsigned sz
= header_sz
+ addr_sz
+ src_sz
;
104 /* Construct the payload. */
105 const dst_reg payload
= bld
.vgrf(BRW_REGISTER_TYPE_UD
, sz
);
109 bld
.exec_all().MOV(offset(payload
, 8, n
++),
110 retype(header
, BRW_REGISTER_TYPE_UD
));
112 for (unsigned i
= 0; i
< addr_sz
; i
++)
113 bld
.MOV(offset(payload
, 8, n
++),
114 offset(retype(addr
, BRW_REGISTER_TYPE_UD
), 8, i
));
116 for (unsigned i
= 0; i
< src_sz
; i
++)
117 bld
.MOV(offset(payload
, 8, n
++),
118 offset(retype(src
, BRW_REGISTER_TYPE_UD
), 8, i
));
120 /* Reduce the dynamically uniform surface index to a single
123 const src_reg usurface
= bld
.emit_uniformize(surface
);
125 /* Emit the message send instruction. */
126 const dst_reg dst
= bld
.vgrf(BRW_REGISTER_TYPE_UD
, ret_sz
);
127 vec4_instruction
*inst
=
128 bld
.emit(op
, dst
, src_reg(payload
), usurface
, brw_imm_ud(arg
));
130 inst
->size_written
= ret_sz
* REG_SIZE
;
131 inst
->header_size
= header_sz
;
132 inst
->predicate
= pred
;
139 * Emit an untyped surface read opcode. \p dims determines the number
140 * of components of the address and \p size the number of components of
141 * the returned value.
144 emit_untyped_read(const vec4_builder
&bld
,
145 const src_reg
&surface
, const src_reg
&addr
,
146 unsigned dims
, unsigned size
,
149 return emit_send(bld
, VEC4_OPCODE_UNTYPED_SURFACE_READ
, src_reg(),
150 emit_insert(bld
, addr
, dims
, true), 1,
152 surface
, size
, 1, pred
);
156 * Emit an untyped surface write opcode. \p dims determines the number
157 * of components of the address and \p size the number of components of
161 emit_untyped_write(const vec4_builder
&bld
, const src_reg
&surface
,
162 const src_reg
&addr
, const src_reg
&src
,
163 unsigned dims
, unsigned size
,
166 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
167 bld
.shader
->devinfo
->is_haswell
);
168 emit_send(bld
, VEC4_OPCODE_UNTYPED_SURFACE_WRITE
, src_reg(),
169 emit_insert(bld
, addr
, dims
, has_simd4x2
),
170 has_simd4x2
? 1 : dims
,
171 emit_insert(bld
, src
, size
, has_simd4x2
),
172 has_simd4x2
? 1 : size
,
173 surface
, size
, 0, pred
);
177 * Emit an untyped surface atomic opcode. \p dims determines the number
178 * of components of the address and \p rsize the number of components of
179 * the returned value (either zero or one).
182 emit_untyped_atomic(const vec4_builder
&bld
,
183 const src_reg
&surface
, const src_reg
&addr
,
184 const src_reg
&src0
, const src_reg
&src1
,
185 unsigned dims
, unsigned rsize
, unsigned op
,
188 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
189 bld
.shader
->devinfo
->is_haswell
);
191 /* Zip the components of both sources, they are represented as the X
192 * and Y components of the same vector.
194 const unsigned size
= (src0
.file
!= BAD_FILE
) + (src1
.file
!= BAD_FILE
);
195 const dst_reg srcs
= bld
.vgrf(BRW_REGISTER_TYPE_UD
);
198 bld
.MOV(writemask(srcs
, WRITEMASK_X
),
199 swizzle(src0
, BRW_SWIZZLE_XXXX
));
203 bld
.MOV(writemask(srcs
, WRITEMASK_Y
),
204 swizzle(src1
, BRW_SWIZZLE_XXXX
));
207 return emit_send(bld
, VEC4_OPCODE_UNTYPED_ATOMIC
, src_reg(),
208 emit_insert(bld
, addr
, dims
, has_simd4x2
),
209 has_simd4x2
? 1 : dims
,
210 emit_insert(bld
, src_reg(srcs
), size
, has_simd4x2
),
211 has_simd4x2
&& size
? 1 : size
,
212 surface
, op
, rsize
, pred
);