vtn: handle SpvExecutionModelKernel
[mesa.git] / src / compiler / nir / nir_opt_find_array_copies.c
1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27
28 static bool
29 index_ssa_def_cb(nir_ssa_def *def, void *state)
30 {
31 unsigned *index = (unsigned *) state;
32 def->index = (*index)++;
33
34 return true;
35 }
36
37 static nir_deref_instr *
38 get_deref_for_load_src(nir_src src, unsigned first_valid_load)
39 {
40 if (!src.is_ssa)
41 return NULL;
42
43 if (src.ssa->parent_instr->type != nir_instr_type_intrinsic)
44 return NULL;
45
46 nir_intrinsic_instr *load = nir_instr_as_intrinsic(src.ssa->parent_instr);
47 if (load->intrinsic != nir_intrinsic_load_deref)
48 return NULL;
49
50 if (load->dest.ssa.index < first_valid_load)
51 return NULL;
52
53 return nir_src_as_deref(load->src[0]);
54 }
55
56 struct match_state {
57 /* Index into the array of the last copy or -1 for no ongoing copy. */
58 unsigned next_array_idx;
59
60 /* Length of the array we're copying */
61 unsigned array_len;
62
63 /* Index into the deref path to the array we think is being copied */
64 int src_deref_array_idx;
65 int dst_deref_array_idx;
66
67 /* Deref paths of the first load/store pair or copy */
68 nir_deref_path first_src_path;
69 nir_deref_path first_dst_path;
70 };
71
72 static void
73 match_state_init(struct match_state *state)
74 {
75 state->next_array_idx = 0;
76 state->array_len = 0;
77 state->src_deref_array_idx = -1;
78 state->dst_deref_array_idx = -1;
79 }
80
81 static void
82 match_state_finish(struct match_state *state)
83 {
84 if (state->next_array_idx > 0) {
85 nir_deref_path_finish(&state->first_src_path);
86 nir_deref_path_finish(&state->first_dst_path);
87 }
88 }
89
90 static void
91 match_state_reset(struct match_state *state)
92 {
93 match_state_finish(state);
94 match_state_init(state);
95 }
96
97 static bool
98 try_match_deref(nir_deref_path *base_path, int *path_array_idx,
99 nir_deref_instr *deref, int arr_idx, void *mem_ctx)
100 {
101 nir_deref_path deref_path;
102 nir_deref_path_init(&deref_path, deref, mem_ctx);
103
104 bool found = false;
105 for (int i = 0; ; i++) {
106 nir_deref_instr *b = base_path->path[i];
107 nir_deref_instr *d = deref_path.path[i];
108 /* They have to be the same length */
109 if ((b == NULL) != (d == NULL))
110 goto fail;
111
112 if (b == NULL)
113 break;
114
115 /* This can happen if one is a deref_array and the other a wildcard */
116 if (b->deref_type != d->deref_type)
117 goto fail;
118
119 switch (b->deref_type) {
120 case nir_deref_type_var:
121 if (b->var != d->var)
122 goto fail;
123 continue;
124
125 case nir_deref_type_array:
126 assert(b->arr.index.is_ssa && d->arr.index.is_ssa);
127 const bool const_b_idx = nir_src_is_const(b->arr.index);
128 const bool const_d_idx = nir_src_is_const(d->arr.index);
129 const unsigned b_idx = const_b_idx ? nir_src_as_uint(b->arr.index) : 0;
130 const unsigned d_idx = const_d_idx ? nir_src_as_uint(d->arr.index) : 0;
131
132 /* If we don't have an index into the path yet or if this entry in
133 * the path is at the array index, see if this is a candidate. We're
134 * looking for an index which is zero in the base deref and arr_idx
135 * in the search deref.
136 */
137 if ((*path_array_idx < 0 || *path_array_idx == i) &&
138 const_b_idx && b_idx == 0 &&
139 const_d_idx && d_idx == arr_idx) {
140 *path_array_idx = i;
141 continue;
142 }
143
144 /* We're at the array index but not a candidate */
145 if (*path_array_idx == i)
146 goto fail;
147
148 /* If we're not the path array index, we must match exactly. We
149 * could probably just compare SSA values and trust in copy
150 * propagation but doing it ourselves means this pass can run a bit
151 * earlier.
152 */
153 if (b->arr.index.ssa == d->arr.index.ssa ||
154 (const_b_idx && const_d_idx && b_idx == d_idx))
155 continue;
156
157 goto fail;
158
159 case nir_deref_type_array_wildcard:
160 continue;
161
162 case nir_deref_type_struct:
163 if (b->strct.index != d->strct.index)
164 goto fail;
165 continue;
166
167 default:
168 unreachable("Invalid deref type in a path");
169 }
170 }
171
172 /* If we got here without failing, we've matched. However, it isn't an
173 * array match unless we found an altered array index.
174 */
175 found = *path_array_idx > 0;
176
177 fail:
178 nir_deref_path_finish(&deref_path);
179 return found;
180 }
181
182 static nir_deref_instr *
183 build_wildcard_deref(nir_builder *b, nir_deref_path *path,
184 unsigned wildcard_idx)
185 {
186 assert(path->path[wildcard_idx]->deref_type == nir_deref_type_array);
187
188 nir_deref_instr *tail =
189 nir_build_deref_array_wildcard(b, path->path[wildcard_idx - 1]);
190
191 for (unsigned i = wildcard_idx + 1; path->path[i]; i++)
192 tail = nir_build_deref_follower(b, tail, path->path[i]);
193
194 return tail;
195 }
196
197 static bool
198 opt_find_array_copies_block(nir_builder *b, nir_block *block,
199 unsigned *num_ssa_defs, void *mem_ctx)
200 {
201 bool progress = false;
202
203 struct match_state s;
204 match_state_init(&s);
205
206 nir_variable *dst_var = NULL;
207 unsigned prev_dst_var_last_write = *num_ssa_defs;
208 unsigned dst_var_last_write = *num_ssa_defs;
209
210 nir_foreach_instr(instr, block) {
211 /* Index the SSA defs before we do anything else. */
212 nir_foreach_ssa_def(instr, index_ssa_def_cb, num_ssa_defs);
213
214 if (instr->type != nir_instr_type_intrinsic)
215 continue;
216
217 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
218 if (intrin->intrinsic != nir_intrinsic_copy_deref &&
219 intrin->intrinsic != nir_intrinsic_store_deref)
220 continue;
221
222 nir_deref_instr *dst_deref = nir_src_as_deref(intrin->src[0]);
223
224 /* The destination must be local. If we see a non-local store, we
225 * continue on because it won't affect local stores or read-only
226 * variables.
227 */
228 if (dst_deref->mode != nir_var_function_temp)
229 continue;
230
231 /* We keep track of the SSA indices where the two last-written
232 * variables are written. The prev_dst_var_last_write tells us when
233 * the last store_deref to something other than dst happened. If the
234 * SSA def index from a load is greater than or equal to this number
235 * then we know it happened afterwards and no writes to anything other
236 * than dst occur between the load and the current instruction.
237 */
238 if (nir_deref_instr_get_variable(dst_deref) != dst_var) {
239 prev_dst_var_last_write = dst_var_last_write;
240 dst_var = nir_deref_instr_get_variable(dst_deref);
241 }
242 dst_var_last_write = *num_ssa_defs;
243
244 /* If it's a full variable store or copy, reset. This will trigger
245 * eventually because we'll fail to match an array element. However,
246 * it's a cheap early-exit.
247 */
248 if (dst_deref->deref_type == nir_deref_type_var)
249 goto reset;
250
251 nir_deref_instr *src_deref;
252 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
253 src_deref = nir_src_as_deref(intrin->src[1]);
254 } else {
255 assert(intrin->intrinsic == nir_intrinsic_store_deref);
256 src_deref = get_deref_for_load_src(intrin->src[1],
257 prev_dst_var_last_write);
258
259 /* We can only handle full writes */
260 if (nir_intrinsic_write_mask(intrin) !=
261 (1 << glsl_get_components(dst_deref->type)) - 1)
262 goto reset;
263 }
264
265 /* If we didn't find a valid src, then we have an unknown store and it
266 * could mess things up.
267 */
268 if (src_deref == NULL)
269 goto reset;
270
271 /* The source must be either local or something that's guaranteed to be
272 * read-only.
273 */
274 const nir_variable_mode read_only_modes =
275 nir_var_shader_in | nir_var_uniform | nir_var_system_value;
276 if (!(src_deref->mode & (nir_var_function_temp | read_only_modes)))
277 goto reset;
278
279 /* If we don't yet have an active copy, then make this instruction the
280 * active copy.
281 */
282 if (s.next_array_idx == 0) {
283 /* We can't combine a copy if there is any chance the source and
284 * destination will end up aliasing. Just bail if they're the same
285 * variable.
286 */
287 if (nir_deref_instr_get_variable(src_deref) == dst_var)
288 goto reset;
289
290 /* The load/store pair is enough to guarantee the same bit size and
291 * number of components but a copy_var requires the actual types to
292 * match.
293 */
294 if (dst_deref->type != src_deref->type)
295 continue;
296
297 /* The first time we see a store, we don't know which array in the
298 * deref path is the one being copied so we just record the paths
299 * as-is and continue. On the next iteration, it will try to match
300 * based on which array index changed.
301 */
302 nir_deref_path_init(&s.first_dst_path, dst_deref, mem_ctx);
303 nir_deref_path_init(&s.first_src_path, src_deref, mem_ctx);
304 s.next_array_idx = 1;
305 continue;
306 }
307
308 if (!try_match_deref(&s.first_dst_path, &s.dst_deref_array_idx,
309 dst_deref, s.next_array_idx, mem_ctx) ||
310 !try_match_deref(&s.first_src_path, &s.src_deref_array_idx,
311 src_deref, s.next_array_idx, mem_ctx))
312 goto reset;
313
314 if (s.next_array_idx == 1) {
315 /* This is our first non-trivial match. We now have indices into
316 * the search paths so we can do a couple more checks.
317 */
318 assert(s.dst_deref_array_idx > 0 && s.src_deref_array_idx > 0);
319 const struct glsl_type *dst_arr_type =
320 s.first_dst_path.path[s.dst_deref_array_idx - 1]->type;
321 const struct glsl_type *src_arr_type =
322 s.first_src_path.path[s.src_deref_array_idx - 1]->type;
323
324 assert(glsl_type_is_array(dst_arr_type) ||
325 glsl_type_is_matrix(dst_arr_type));
326 assert(glsl_type_is_array(src_arr_type) ||
327 glsl_type_is_matrix(src_arr_type));
328
329 /* They must be the same length */
330 s.array_len = glsl_get_length(dst_arr_type);
331 if (s.array_len != glsl_get_length(src_arr_type))
332 goto reset;
333 }
334
335 s.next_array_idx++;
336
337 if (s.next_array_idx == s.array_len) {
338 /* Hooray, We found a copy! */
339 b->cursor = nir_after_instr(instr);
340 nir_copy_deref(b, build_wildcard_deref(b, &s.first_dst_path,
341 s.dst_deref_array_idx),
342 build_wildcard_deref(b, &s.first_src_path,
343 s.src_deref_array_idx));
344 match_state_reset(&s);
345 progress = true;
346 }
347
348 continue;
349
350 reset:
351 match_state_reset(&s);
352 }
353
354 return progress;
355 }
356
357 static bool
358 opt_find_array_copies_impl(nir_function_impl *impl)
359 {
360 nir_builder b;
361 nir_builder_init(&b, impl);
362
363 bool progress = false;
364
365 void *mem_ctx = ralloc_context(NULL);
366
367 /* We re-index the SSA defs as we go; it makes it easier to handle
368 * resetting the state machine.
369 */
370 unsigned num_ssa_defs = 0;
371
372 nir_foreach_block(block, impl) {
373 if (opt_find_array_copies_block(&b, block, &num_ssa_defs, mem_ctx))
374 progress = true;
375 }
376
377 impl->ssa_alloc = num_ssa_defs;
378
379 ralloc_free(mem_ctx);
380
381 if (progress) {
382 nir_metadata_preserve(impl, nir_metadata_block_index |
383 nir_metadata_dominance);
384 }
385
386 return progress;
387 }
388
389 /**
390 * This peephole optimization looks for a series of load/store_deref or
391 * copy_deref instructions that copy an array from one variable to another and
392 * turns it into a copy_deref that copies the entire array. The pattern it
393 * looks for is extremely specific but it's good enough to pick up on the
394 * input array copies in DXVK and should also be able to pick up the sequence
395 * generated by spirv_to_nir for a OpLoad of a large composite followed by
396 * OpStore.
397 *
398 * TODO: Use a hash table approach to support out-of-order and interleaved
399 * copies.
400 */
401 bool
402 nir_opt_find_array_copies(nir_shader *shader)
403 {
404 bool progress = false;
405
406 nir_foreach_function(function, shader) {
407 if (function->impl && opt_find_array_copies_impl(function->impl))
408 progress = true;
409 }
410
411 return progress;
412 }