2 * Copyright © 2013-2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_vec4_surface_builder.h"
29 namespace array_utils
{
31 * Copy one every \p src_stride logical components of the argument into
32 * one every \p dst_stride logical components of the result.
35 emit_stride(const vec4_builder
&bld
, const src_reg
&src
, unsigned size
,
36 unsigned dst_stride
, unsigned src_stride
)
38 if (src_stride
== 1 && dst_stride
== 1) {
41 const dst_reg dst
= bld
.vgrf(src
.type
,
42 DIV_ROUND_UP(size
* dst_stride
, 4));
44 for (unsigned i
= 0; i
< size
; ++i
)
45 bld
.MOV(writemask(offset(dst
, 8, i
* dst_stride
/ 4),
46 1 << (i
* dst_stride
% 4)),
47 swizzle(offset(src
, 8, i
* src_stride
/ 4),
48 brw_swizzle_for_mask(1 << (i
* src_stride
% 4))));
55 * Convert a VEC4 into an array of registers with the layout expected by
56 * the recipient shared unit. If \p has_simd4x2 is true the argument is
57 * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
61 emit_insert(const vec4_builder
&bld
, const src_reg
&src
,
62 unsigned n
, bool has_simd4x2
)
64 if (src
.file
== BAD_FILE
|| n
== 0) {
68 /* Pad unused components with zeroes. */
69 const unsigned mask
= (1 << n
) - 1;
70 const dst_reg tmp
= bld
.vgrf(src
.type
);
72 bld
.MOV(writemask(tmp
, mask
), src
);
74 bld
.MOV(writemask(tmp
, ~mask
), brw_imm_d(0));
76 return emit_stride(bld
, src_reg(tmp
), n
, has_simd4x2
? 1 : 4, 1);
81 * Convert an array of registers back into a VEC4 according to the
82 * layout expected from some shared unit. If \p has_simd4x2 is true the
83 * argument is left unmodified in SIMD4x2 form, otherwise it will be
84 * rearranged from SIMD8 form.
87 emit_extract(const vec4_builder
&bld
, const src_reg src
,
88 unsigned n
, bool has_simd4x2
)
90 if (src
.file
== BAD_FILE
|| n
== 0) {
94 return emit_stride(bld
, src
, n
, 1, has_simd4x2
? 1 : 4);
101 namespace surface_access
{
103 using namespace array_utils
;
106 * Generate a send opcode for a surface message and return the
110 emit_send(const vec4_builder
&bld
, enum opcode op
,
111 const src_reg
&header
,
112 const src_reg
&addr
, unsigned addr_sz
,
113 const src_reg
&src
, unsigned src_sz
,
114 const src_reg
&surface
,
115 unsigned arg
, unsigned ret_sz
,
116 brw_predicate pred
= BRW_PREDICATE_NONE
)
118 /* Calculate the total number of components of the payload. */
119 const unsigned header_sz
= (header
.file
== BAD_FILE
? 0 : 1);
120 const unsigned sz
= header_sz
+ addr_sz
+ src_sz
;
122 /* Construct the payload. */
123 const dst_reg payload
= bld
.vgrf(BRW_REGISTER_TYPE_UD
, sz
);
127 bld
.exec_all().MOV(offset(payload
, 8, n
++),
128 retype(header
, BRW_REGISTER_TYPE_UD
));
130 for (unsigned i
= 0; i
< addr_sz
; i
++)
131 bld
.MOV(offset(payload
, 8, n
++),
132 offset(retype(addr
, BRW_REGISTER_TYPE_UD
), 8, i
));
134 for (unsigned i
= 0; i
< src_sz
; i
++)
135 bld
.MOV(offset(payload
, 8, n
++),
136 offset(retype(src
, BRW_REGISTER_TYPE_UD
), 8, i
));
138 /* Reduce the dynamically uniform surface index to a single
141 const src_reg usurface
= bld
.emit_uniformize(surface
);
143 /* Emit the message send instruction. */
144 const dst_reg dst
= bld
.vgrf(BRW_REGISTER_TYPE_UD
, ret_sz
);
145 vec4_instruction
*inst
=
146 bld
.emit(op
, dst
, src_reg(payload
), usurface
, brw_imm_ud(arg
));
148 inst
->size_written
= ret_sz
* REG_SIZE
;
149 inst
->header_size
= header_sz
;
150 inst
->predicate
= pred
;
157 * Emit an untyped surface read opcode. \p dims determines the number
158 * of components of the address and \p size the number of components of
159 * the returned value.
162 emit_untyped_read(const vec4_builder
&bld
,
163 const src_reg
&surface
, const src_reg
&addr
,
164 unsigned dims
, unsigned size
,
167 return emit_send(bld
, SHADER_OPCODE_UNTYPED_SURFACE_READ
, src_reg(),
168 emit_insert(bld
, addr
, dims
, true), 1,
170 surface
, size
, 1, pred
);
174 * Emit an untyped surface write opcode. \p dims determines the number
175 * of components of the address and \p size the number of components of
179 emit_untyped_write(const vec4_builder
&bld
, const src_reg
&surface
,
180 const src_reg
&addr
, const src_reg
&src
,
181 unsigned dims
, unsigned size
,
184 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
185 bld
.shader
->devinfo
->is_haswell
);
186 emit_send(bld
, SHADER_OPCODE_UNTYPED_SURFACE_WRITE
, src_reg(),
187 emit_insert(bld
, addr
, dims
, has_simd4x2
),
188 has_simd4x2
? 1 : dims
,
189 emit_insert(bld
, src
, size
, has_simd4x2
),
190 has_simd4x2
? 1 : size
,
191 surface
, size
, 0, pred
);
195 * Emit an untyped surface atomic opcode. \p dims determines the number
196 * of components of the address and \p rsize the number of components of
197 * the returned value (either zero or one).
200 emit_untyped_atomic(const vec4_builder
&bld
,
201 const src_reg
&surface
, const src_reg
&addr
,
202 const src_reg
&src0
, const src_reg
&src1
,
203 unsigned dims
, unsigned rsize
, unsigned op
,
206 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
207 bld
.shader
->devinfo
->is_haswell
);
209 /* Zip the components of both sources, they are represented as the X
210 * and Y components of the same vector.
212 const unsigned size
= (src0
.file
!= BAD_FILE
) + (src1
.file
!= BAD_FILE
);
213 const dst_reg srcs
= bld
.vgrf(BRW_REGISTER_TYPE_UD
);
216 bld
.MOV(writemask(srcs
, WRITEMASK_X
),
217 swizzle(src0
, BRW_SWIZZLE_XXXX
));
221 bld
.MOV(writemask(srcs
, WRITEMASK_Y
),
222 swizzle(src1
, BRW_SWIZZLE_XXXX
));
225 return emit_send(bld
, SHADER_OPCODE_UNTYPED_ATOMIC
, src_reg(),
226 emit_insert(bld
, addr
, dims
, has_simd4x2
),
227 has_simd4x2
? 1 : dims
,
228 emit_insert(bld
, src_reg(srcs
), size
, has_simd4x2
),
229 has_simd4x2
&& size
? 1 : size
,
230 surface
, op
, rsize
, pred
);
235 * Initialize the header present in typed surface messages.
238 emit_typed_message_header(const vec4_builder
&bld
)
240 const vec4_builder ubld
= bld
.exec_all();
241 const dst_reg dst
= bld
.vgrf(BRW_REGISTER_TYPE_UD
);
243 ubld
.MOV(dst
, brw_imm_d(0));
245 if (bld
.shader
->devinfo
->gen
== 7 &&
246 !bld
.shader
->devinfo
->is_haswell
) {
247 /* The sample mask is used on IVB for the SIMD8 messages that
248 * have no SIMD4x2 variant. We only use the two X channels
249 * in that case, mask everything else out.
251 ubld
.MOV(writemask(dst
, WRITEMASK_W
), brw_imm_d(0x11));
259 * Emit a typed surface read opcode. \p dims determines the number of
260 * components of the address and \p size the number of components of the
264 emit_typed_read(const vec4_builder
&bld
, const src_reg
&surface
,
265 const src_reg
&addr
, unsigned dims
, unsigned size
)
267 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
268 bld
.shader
->devinfo
->is_haswell
);
270 emit_send(bld
, SHADER_OPCODE_TYPED_SURFACE_READ
,
271 emit_typed_message_header(bld
),
272 emit_insert(bld
, addr
, dims
, has_simd4x2
),
273 has_simd4x2
? 1 : dims
,
276 has_simd4x2
? 1 : size
);
278 return emit_extract(bld
, tmp
, size
, has_simd4x2
);
282 * Emit a typed surface write opcode. \p dims determines the number of
283 * components of the address and \p size the number of components of the
287 emit_typed_write(const vec4_builder
&bld
, const src_reg
&surface
,
288 const src_reg
&addr
, const src_reg
&src
,
289 unsigned dims
, unsigned size
)
291 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
292 bld
.shader
->devinfo
->is_haswell
);
293 emit_send(bld
, SHADER_OPCODE_TYPED_SURFACE_WRITE
,
294 emit_typed_message_header(bld
),
295 emit_insert(bld
, addr
, dims
, has_simd4x2
),
296 has_simd4x2
? 1 : dims
,
297 emit_insert(bld
, src
, size
, has_simd4x2
),
298 has_simd4x2
? 1 : size
,
303 * Emit a typed surface atomic opcode. \p dims determines the number of
304 * components of the address and \p rsize the number of components of
305 * the returned value (either zero or one).
308 emit_typed_atomic(const vec4_builder
&bld
,
309 const src_reg
&surface
, const src_reg
&addr
,
310 const src_reg
&src0
, const src_reg
&src1
,
311 unsigned dims
, unsigned rsize
, unsigned op
,
314 const bool has_simd4x2
= (bld
.shader
->devinfo
->gen
>= 8 ||
315 bld
.shader
->devinfo
->is_haswell
);
317 /* Zip the components of both sources, they are represented as the X
318 * and Y components of the same vector.
320 const unsigned size
= (src0
.file
!= BAD_FILE
) + (src1
.file
!= BAD_FILE
);
321 const dst_reg srcs
= bld
.vgrf(BRW_REGISTER_TYPE_UD
);
324 bld
.MOV(writemask(srcs
, WRITEMASK_X
), src0
);
326 bld
.MOV(writemask(srcs
, WRITEMASK_Y
), src1
);
328 return emit_send(bld
, SHADER_OPCODE_TYPED_ATOMIC
,
329 emit_typed_message_header(bld
),
330 emit_insert(bld
, addr
, dims
, has_simd4x2
),
331 has_simd4x2
? 1 : dims
,
332 emit_insert(bld
, src_reg(srcs
), size
, has_simd4x2
),
333 has_simd4x2
? 1 : size
,
334 surface
, op
, rsize
, pred
);