2 * Copyright © 2018 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "nir_deref.h"
29 index_ssa_def_cb(nir_ssa_def
*def
, void *state
)
31 unsigned *index
= (unsigned *) state
;
32 def
->index
= (*index
)++;
37 static nir_deref_instr
*
38 get_deref_for_load_src(nir_src src
, unsigned first_valid_load
)
43 if (src
.ssa
->parent_instr
->type
!= nir_instr_type_intrinsic
)
46 nir_intrinsic_instr
*load
= nir_instr_as_intrinsic(src
.ssa
->parent_instr
);
47 if (load
->intrinsic
!= nir_intrinsic_load_deref
)
50 if (load
->dest
.ssa
.index
< first_valid_load
)
53 return nir_src_as_deref(load
->src
[0]);
57 /* Index into the array of the last copy or -1 for no ongoing copy. */
58 unsigned next_array_idx
;
60 /* Length of the array we're copying */
63 /* Index into the deref path to the array we think is being copied */
64 int src_deref_array_idx
;
65 int dst_deref_array_idx
;
67 /* Deref paths of the first load/store pair or copy */
68 nir_deref_path first_src_path
;
69 nir_deref_path first_dst_path
;
73 match_state_init(struct match_state
*state
)
75 state
->next_array_idx
= 0;
77 state
->src_deref_array_idx
= -1;
78 state
->dst_deref_array_idx
= -1;
82 match_state_finish(struct match_state
*state
)
84 if (state
->next_array_idx
> 0) {
85 nir_deref_path_finish(&state
->first_src_path
);
86 nir_deref_path_finish(&state
->first_dst_path
);
91 match_state_reset(struct match_state
*state
)
93 match_state_finish(state
);
94 match_state_init(state
);
98 try_match_deref(nir_deref_path
*base_path
, int *path_array_idx
,
99 nir_deref_instr
*deref
, int arr_idx
, void *mem_ctx
)
101 nir_deref_path deref_path
;
102 nir_deref_path_init(&deref_path
, deref
, mem_ctx
);
105 for (int i
= 0; ; i
++) {
106 nir_deref_instr
*b
= base_path
->path
[i
];
107 nir_deref_instr
*d
= deref_path
.path
[i
];
108 /* They have to be the same length */
109 if ((b
== NULL
) != (d
== NULL
))
115 /* This can happen if one is a deref_array and the other a wildcard */
116 if (b
->deref_type
!= d
->deref_type
)
119 switch (b
->deref_type
) {
120 case nir_deref_type_var
:
121 if (b
->var
!= d
->var
)
125 case nir_deref_type_array
:
126 assert(b
->arr
.index
.is_ssa
&& d
->arr
.index
.is_ssa
);
127 const bool const_b_idx
= nir_src_is_const(b
->arr
.index
);
128 const bool const_d_idx
= nir_src_is_const(d
->arr
.index
);
129 const unsigned b_idx
= const_b_idx
? nir_src_as_uint(b
->arr
.index
) : 0;
130 const unsigned d_idx
= const_d_idx
? nir_src_as_uint(d
->arr
.index
) : 0;
132 /* If we don't have an index into the path yet or if this entry in
133 * the path is at the array index, see if this is a candidate. We're
134 * looking for an index which is zero in the base deref and arr_idx
135 * in the search deref.
137 if ((*path_array_idx
< 0 || *path_array_idx
== i
) &&
138 const_b_idx
&& b_idx
== 0 &&
139 const_d_idx
&& d_idx
== arr_idx
) {
144 /* We're at the array index but not a candidate */
145 if (*path_array_idx
== i
)
148 /* If we're not the path array index, we must match exactly. We
149 * could probably just compare SSA values and trust in copy
150 * propagation but doing it ourselves means this pass can run a bit
153 if (b
->arr
.index
.ssa
== d
->arr
.index
.ssa
||
154 (const_b_idx
&& const_d_idx
&& b_idx
== d_idx
))
159 case nir_deref_type_array_wildcard
:
162 case nir_deref_type_struct
:
163 if (b
->strct
.index
!= d
->strct
.index
)
168 unreachable("Invalid deref type in a path");
172 /* If we got here without failing, we've matched. However, it isn't an
173 * array match unless we found an altered array index.
175 found
= *path_array_idx
> 0;
178 nir_deref_path_finish(&deref_path
);
182 static nir_deref_instr
*
183 build_wildcard_deref(nir_builder
*b
, nir_deref_path
*path
,
184 unsigned wildcard_idx
)
186 assert(path
->path
[wildcard_idx
]->deref_type
== nir_deref_type_array
);
188 nir_deref_instr
*tail
=
189 nir_build_deref_array_wildcard(b
, path
->path
[wildcard_idx
- 1]);
191 for (unsigned i
= wildcard_idx
+ 1; path
->path
[i
]; i
++)
192 tail
= nir_build_deref_follower(b
, tail
, path
->path
[i
]);
198 opt_find_array_copies_block(nir_builder
*b
, nir_block
*block
,
199 unsigned *num_ssa_defs
, void *mem_ctx
)
201 bool progress
= false;
203 struct match_state s
;
204 match_state_init(&s
);
206 nir_variable
*dst_var
= NULL
;
207 unsigned prev_dst_var_last_write
= *num_ssa_defs
;
208 unsigned dst_var_last_write
= *num_ssa_defs
;
210 nir_foreach_instr(instr
, block
) {
211 /* Index the SSA defs before we do anything else. */
212 nir_foreach_ssa_def(instr
, index_ssa_def_cb
, num_ssa_defs
);
214 if (instr
->type
!= nir_instr_type_intrinsic
)
217 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
218 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
&&
219 intrin
->intrinsic
!= nir_intrinsic_store_deref
)
222 nir_deref_instr
*dst_deref
= nir_src_as_deref(intrin
->src
[0]);
224 /* The destination must be local. If we see a non-local store, we
225 * continue on because it won't affect local stores or read-only
228 if (dst_deref
->mode
!= nir_var_function
)
231 /* We keep track of the SSA indices where the two last-written
232 * variables are written. The prev_dst_var_last_write tells us when
233 * the last store_deref to something other than dst happened. If the
234 * SSA def index from a load is greater than or equal to this number
235 * then we know it happened afterwards and no writes to anything other
236 * than dst occur between the load and the current instruction.
238 if (nir_deref_instr_get_variable(dst_deref
) != dst_var
) {
239 prev_dst_var_last_write
= dst_var_last_write
;
240 dst_var
= nir_deref_instr_get_variable(dst_deref
);
242 dst_var_last_write
= *num_ssa_defs
;
244 /* If it's a full variable store or copy, reset. This will trigger
245 * eventually because we'll fail to match an array element. However,
246 * it's a cheap early-exit.
248 if (dst_deref
->deref_type
== nir_deref_type_var
)
251 nir_deref_instr
*src_deref
;
252 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
253 src_deref
= nir_src_as_deref(intrin
->src
[1]);
255 assert(intrin
->intrinsic
== nir_intrinsic_store_deref
);
256 src_deref
= get_deref_for_load_src(intrin
->src
[1],
257 prev_dst_var_last_write
);
259 /* We can only handle full writes */
260 if (nir_intrinsic_write_mask(intrin
) !=
261 (1 << glsl_get_components(dst_deref
->type
)) - 1)
265 /* If we didn't find a valid src, then we have an unknown store and it
266 * could mess things up.
268 if (src_deref
== NULL
)
271 /* The source must be either local or something that's guaranteed to be
274 const nir_variable_mode read_only_modes
=
275 nir_var_shader_in
| nir_var_uniform
| nir_var_system_value
;
276 if (!(src_deref
->mode
& (nir_var_function
| read_only_modes
)))
279 /* If we don't yet have an active copy, then make this instruction the
282 if (s
.next_array_idx
== 0) {
283 /* We can't combine a copy if there is any chance the source and
284 * destination will end up aliasing. Just bail if they're the same
287 if (nir_deref_instr_get_variable(src_deref
) == dst_var
)
290 /* The load/store pair is enough to guarantee the same bit size and
291 * number of components but a copy_var requires the actual types to
294 if (dst_deref
->type
!= src_deref
->type
)
297 /* The first time we see a store, we don't know which array in the
298 * deref path is the one being copied so we just record the paths
299 * as-is and continue. On the next iteration, it will try to match
300 * based on which array index changed.
302 nir_deref_path_init(&s
.first_dst_path
, dst_deref
, mem_ctx
);
303 nir_deref_path_init(&s
.first_src_path
, src_deref
, mem_ctx
);
304 s
.next_array_idx
= 1;
308 if (!try_match_deref(&s
.first_dst_path
, &s
.dst_deref_array_idx
,
309 dst_deref
, s
.next_array_idx
, mem_ctx
) ||
310 !try_match_deref(&s
.first_src_path
, &s
.src_deref_array_idx
,
311 src_deref
, s
.next_array_idx
, mem_ctx
))
314 if (s
.next_array_idx
== 1) {
315 /* This is our first non-trivial match. We now have indices into
316 * the search paths so we can do a couple more checks.
318 assert(s
.dst_deref_array_idx
> 0 && s
.src_deref_array_idx
> 0);
319 const struct glsl_type
*dst_arr_type
=
320 s
.first_dst_path
.path
[s
.dst_deref_array_idx
- 1]->type
;
321 const struct glsl_type
*src_arr_type
=
322 s
.first_src_path
.path
[s
.src_deref_array_idx
- 1]->type
;
324 assert(glsl_type_is_array(dst_arr_type
) ||
325 glsl_type_is_matrix(dst_arr_type
));
326 assert(glsl_type_is_array(src_arr_type
) ||
327 glsl_type_is_matrix(src_arr_type
));
329 /* They must be the same length */
330 s
.array_len
= glsl_get_length(dst_arr_type
);
331 if (s
.array_len
!= glsl_get_length(src_arr_type
))
337 if (s
.next_array_idx
== s
.array_len
) {
338 /* Hooray, We found a copy! */
339 b
->cursor
= nir_after_instr(instr
);
340 nir_copy_deref(b
, build_wildcard_deref(b
, &s
.first_dst_path
,
341 s
.dst_deref_array_idx
),
342 build_wildcard_deref(b
, &s
.first_src_path
,
343 s
.src_deref_array_idx
));
344 match_state_reset(&s
);
351 match_state_reset(&s
);
358 opt_find_array_copies_impl(nir_function_impl
*impl
)
361 nir_builder_init(&b
, impl
);
363 bool progress
= false;
365 void *mem_ctx
= ralloc_context(NULL
);
367 /* We re-index the SSA defs as we go; it makes it easier to handle
368 * resetting the state machine.
370 unsigned num_ssa_defs
= 0;
372 nir_foreach_block(block
, impl
) {
373 if (opt_find_array_copies_block(&b
, block
, &num_ssa_defs
, mem_ctx
))
377 impl
->ssa_alloc
= num_ssa_defs
;
379 ralloc_free(mem_ctx
);
382 nir_metadata_preserve(impl
, nir_metadata_block_index
|
383 nir_metadata_dominance
);
390 * This peephole optimization looks for a series of load/store_deref or
391 * copy_deref instructions that copy an array from one variable to another and
392 * turns it into a copy_deref that copies the entire array. The pattern it
393 * looks for is extremely specific but it's good enough to pick up on the
394 * input array copies in DXVK and should also be able to pick up the sequence
395 * generated by spirv_to_nir for a OpLoad of a large composite followed by
398 * TODO: Use a hash table approach to support out-of-order and interleaved
402 nir_opt_find_array_copies(nir_shader
*shader
)
404 bool progress
= false;
406 nir_foreach_function(function
, shader
) {
407 if (function
->impl
&& opt_find_array_copies_impl(function
->impl
))