i965/vec4: Fix swizzles on atomic sources.
[mesa.git] / src / intel / compiler / brw_vec4_surface_builder.cpp
1 /*
2 * Copyright © 2013-2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_vec4_surface_builder.h"
25
26 using namespace brw;
27
28 namespace {
29 namespace array_utils {
30 /**
31 * Copy one every \p src_stride logical components of the argument into
32 * one every \p dst_stride logical components of the result.
33 */
34 static src_reg
35 emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size,
36 unsigned dst_stride, unsigned src_stride)
37 {
38 if (src_stride == 1 && dst_stride == 1) {
39 return src;
40 } else {
41 const dst_reg dst = bld.vgrf(src.type,
42 DIV_ROUND_UP(size * dst_stride, 4));
43
44 for (unsigned i = 0; i < size; ++i)
45 bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4),
46 1 << (i * dst_stride % 4)),
47 swizzle(offset(src, 8, i * src_stride / 4),
48 brw_swizzle_for_mask(1 << (i * src_stride % 4))));
49
50 return src_reg(dst);
51 }
52 }
53
54 /**
55 * Convert a VEC4 into an array of registers with the layout expected by
56 * the recipient shared unit. If \p has_simd4x2 is true the argument is
57 * left unmodified in SIMD4x2 form, otherwise it will be rearranged into
58 * a SIMD8 vector.
59 */
60 static src_reg
61 emit_insert(const vec4_builder &bld, const src_reg &src,
62 unsigned n, bool has_simd4x2)
63 {
64 if (src.file == BAD_FILE || n == 0) {
65 return src_reg();
66
67 } else {
68 /* Pad unused components with zeroes. */
69 const unsigned mask = (1 << n) - 1;
70 const dst_reg tmp = bld.vgrf(src.type);
71
72 bld.MOV(writemask(tmp, mask), src);
73 if (n < 4)
74 bld.MOV(writemask(tmp, ~mask), brw_imm_d(0));
75
76 return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1);
77 }
78 }
79
80 /**
81 * Convert an array of registers back into a VEC4 according to the
82 * layout expected from some shared unit. If \p has_simd4x2 is true the
83 * argument is left unmodified in SIMD4x2 form, otherwise it will be
84 * rearranged from SIMD8 form.
85 */
86 static src_reg
87 emit_extract(const vec4_builder &bld, const src_reg src,
88 unsigned n, bool has_simd4x2)
89 {
90 if (src.file == BAD_FILE || n == 0) {
91 return src_reg();
92
93 } else {
94 return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4);
95 }
96 }
97 }
98 }
99
100 namespace brw {
101 namespace surface_access {
102 namespace {
103 using namespace array_utils;
104
105 /**
106 * Generate a send opcode for a surface message and return the
107 * result.
108 */
109 src_reg
110 emit_send(const vec4_builder &bld, enum opcode op,
111 const src_reg &header,
112 const src_reg &addr, unsigned addr_sz,
113 const src_reg &src, unsigned src_sz,
114 const src_reg &surface,
115 unsigned arg, unsigned ret_sz,
116 brw_predicate pred = BRW_PREDICATE_NONE)
117 {
118 /* Calculate the total number of components of the payload. */
119 const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1);
120 const unsigned sz = header_sz + addr_sz + src_sz;
121
122 /* Construct the payload. */
123 const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
124 unsigned n = 0;
125
126 if (header_sz)
127 bld.exec_all().MOV(offset(payload, 8, n++),
128 retype(header, BRW_REGISTER_TYPE_UD));
129
130 for (unsigned i = 0; i < addr_sz; i++)
131 bld.MOV(offset(payload, 8, n++),
132 offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i));
133
134 for (unsigned i = 0; i < src_sz; i++)
135 bld.MOV(offset(payload, 8, n++),
136 offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i));
137
138 /* Reduce the dynamically uniform surface index to a single
139 * scalar.
140 */
141 const src_reg usurface = bld.emit_uniformize(surface);
142
143 /* Emit the message send instruction. */
144 const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz);
145 vec4_instruction *inst =
146 bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg));
147 inst->mlen = sz;
148 inst->size_written = ret_sz * REG_SIZE;
149 inst->header_size = header_sz;
150 inst->predicate = pred;
151
152 return src_reg(dst);
153 }
154 }
155
156 /**
157 * Emit an untyped surface read opcode. \p dims determines the number
158 * of components of the address and \p size the number of components of
159 * the returned value.
160 */
161 src_reg
162 emit_untyped_read(const vec4_builder &bld,
163 const src_reg &surface, const src_reg &addr,
164 unsigned dims, unsigned size,
165 brw_predicate pred)
166 {
167 return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(),
168 emit_insert(bld, addr, dims, true), 1,
169 src_reg(), 0,
170 surface, size, 1, pred);
171 }
172
173 /**
174 * Emit an untyped surface write opcode. \p dims determines the number
175 * of components of the address and \p size the number of components of
176 * the argument.
177 */
178 void
179 emit_untyped_write(const vec4_builder &bld, const src_reg &surface,
180 const src_reg &addr, const src_reg &src,
181 unsigned dims, unsigned size,
182 brw_predicate pred)
183 {
184 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
185 bld.shader->devinfo->is_haswell);
186 emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(),
187 emit_insert(bld, addr, dims, has_simd4x2),
188 has_simd4x2 ? 1 : dims,
189 emit_insert(bld, src, size, has_simd4x2),
190 has_simd4x2 ? 1 : size,
191 surface, size, 0, pred);
192 }
193
194 /**
195 * Emit an untyped surface atomic opcode. \p dims determines the number
196 * of components of the address and \p rsize the number of components of
197 * the returned value (either zero or one).
198 */
199 src_reg
200 emit_untyped_atomic(const vec4_builder &bld,
201 const src_reg &surface, const src_reg &addr,
202 const src_reg &src0, const src_reg &src1,
203 unsigned dims, unsigned rsize, unsigned op,
204 brw_predicate pred)
205 {
206 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
207 bld.shader->devinfo->is_haswell);
208
209 /* Zip the components of both sources, they are represented as the X
210 * and Y components of the same vector.
211 */
212 const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
213 const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
214
215 if (size >= 1) {
216 bld.MOV(writemask(srcs, WRITEMASK_X),
217 swizzle(src0, BRW_SWIZZLE_XXXX));
218 }
219
220 if (size >= 2) {
221 bld.MOV(writemask(srcs, WRITEMASK_Y),
222 swizzle(src1, BRW_SWIZZLE_XXXX));
223 }
224
225 return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(),
226 emit_insert(bld, addr, dims, has_simd4x2),
227 has_simd4x2 ? 1 : dims,
228 emit_insert(bld, src_reg(srcs), size, has_simd4x2),
229 has_simd4x2 && size ? 1 : size,
230 surface, op, rsize, pred);
231 }
232
233 namespace {
234 /**
235 * Initialize the header present in typed surface messages.
236 */
237 src_reg
238 emit_typed_message_header(const vec4_builder &bld)
239 {
240 const vec4_builder ubld = bld.exec_all();
241 const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD);
242
243 ubld.MOV(dst, brw_imm_d(0));
244
245 if (bld.shader->devinfo->gen == 7 &&
246 !bld.shader->devinfo->is_haswell) {
247 /* The sample mask is used on IVB for the SIMD8 messages that
248 * have no SIMD4x2 variant. We only use the two X channels
249 * in that case, mask everything else out.
250 */
251 ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11));
252 }
253
254 return src_reg(dst);
255 }
256 }
257
258 /**
259 * Emit a typed surface read opcode. \p dims determines the number of
260 * components of the address and \p size the number of components of the
261 * returned value.
262 */
263 src_reg
264 emit_typed_read(const vec4_builder &bld, const src_reg &surface,
265 const src_reg &addr, unsigned dims, unsigned size)
266 {
267 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
268 bld.shader->devinfo->is_haswell);
269 const src_reg tmp =
270 emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ,
271 emit_typed_message_header(bld),
272 emit_insert(bld, addr, dims, has_simd4x2),
273 has_simd4x2 ? 1 : dims,
274 src_reg(), 0,
275 surface, size,
276 has_simd4x2 ? 1 : size);
277
278 return emit_extract(bld, tmp, size, has_simd4x2);
279 }
280
281 /**
282 * Emit a typed surface write opcode. \p dims determines the number of
283 * components of the address and \p size the number of components of the
284 * argument.
285 */
286 void
287 emit_typed_write(const vec4_builder &bld, const src_reg &surface,
288 const src_reg &addr, const src_reg &src,
289 unsigned dims, unsigned size)
290 {
291 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
292 bld.shader->devinfo->is_haswell);
293 emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE,
294 emit_typed_message_header(bld),
295 emit_insert(bld, addr, dims, has_simd4x2),
296 has_simd4x2 ? 1 : dims,
297 emit_insert(bld, src, size, has_simd4x2),
298 has_simd4x2 ? 1 : size,
299 surface, size, 0);
300 }
301
302 /**
303 * Emit a typed surface atomic opcode. \p dims determines the number of
304 * components of the address and \p rsize the number of components of
305 * the returned value (either zero or one).
306 */
307 src_reg
308 emit_typed_atomic(const vec4_builder &bld,
309 const src_reg &surface, const src_reg &addr,
310 const src_reg &src0, const src_reg &src1,
311 unsigned dims, unsigned rsize, unsigned op,
312 brw_predicate pred)
313 {
314 const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 ||
315 bld.shader->devinfo->is_haswell);
316
317 /* Zip the components of both sources, they are represented as the X
318 * and Y components of the same vector.
319 */
320 const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
321 const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD);
322
323 if (size >= 1)
324 bld.MOV(writemask(srcs, WRITEMASK_X), src0);
325 if (size >= 2)
326 bld.MOV(writemask(srcs, WRITEMASK_Y), src1);
327
328 return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC,
329 emit_typed_message_header(bld),
330 emit_insert(bld, addr, dims, has_simd4x2),
331 has_simd4x2 ? 1 : dims,
332 emit_insert(bld, src_reg(srcs), size, has_simd4x2),
333 has_simd4x2 ? 1 : size,
334 surface, op, rsize, pred);
335 }
336 }
337 }