2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
30 anv_nir_compute_push_layout(const struct anv_physical_device
*pdevice
,
31 bool robust_buffer_access
,
33 struct brw_stage_prog_data
*prog_data
,
34 struct anv_pipeline_bind_map
*map
,
37 const struct brw_compiler
*compiler
= pdevice
->compiler
;
38 memset(map
->push_ranges
, 0, sizeof(map
->push_ranges
));
40 bool has_const_ubo
= false;
41 unsigned push_start
= UINT_MAX
, push_end
= 0;
42 nir_foreach_function(function
, nir
) {
46 nir_foreach_block(block
, function
->impl
) {
47 nir_foreach_instr(instr
, block
) {
48 if (instr
->type
!= nir_instr_type_intrinsic
)
51 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
52 switch (intrin
->intrinsic
) {
53 case nir_intrinsic_load_ubo
:
54 if (nir_src_is_const(intrin
->src
[0]) &&
55 nir_src_is_const(intrin
->src
[1]))
59 case nir_intrinsic_load_push_constant
: {
60 unsigned base
= nir_intrinsic_base(intrin
);
61 unsigned range
= nir_intrinsic_range(intrin
);
62 push_start
= MIN2(push_start
, base
);
63 push_end
= MAX2(push_end
, base
+ range
);
74 const bool has_push_intrinsic
= push_start
<= push_end
;
76 const bool push_ubo_ranges
=
77 (pdevice
->info
.gen
>= 8 || pdevice
->info
.is_haswell
) &&
78 has_const_ubo
&& nir
->info
.stage
!= MESA_SHADER_COMPUTE
;
80 if (push_ubo_ranges
&& robust_buffer_access
) {
81 /* We can't on-the-fly adjust our push ranges because doing so would
82 * mess up the layout in the shader. When robustBufferAccess is
83 * enabled, we have to manually bounds check our pushed UBO accesses.
85 const uint32_t ubo_size_start
=
86 offsetof(struct anv_push_constants
, push_ubo_sizes
);
87 const uint32_t ubo_size_end
= ubo_size_start
+ (4 * sizeof(uint32_t));
88 push_start
= MIN2(push_start
, ubo_size_start
);
89 push_end
= MAX2(push_end
, ubo_size_end
);
92 if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
93 /* For compute shaders, we always have to have the subgroup ID. The
94 * back-end compiler will "helpfully" add it for us in the last push
95 * constant slot. Yes, there is an off-by-one error here but that's
96 * because the back-end will add it so we want to claim the number of
97 * push constants one dword less than the full amount including
100 assert(push_end
<= offsetof(struct anv_push_constants
, cs
.subgroup_id
));
101 push_end
= offsetof(struct anv_push_constants
, cs
.subgroup_id
);
104 /* Align push_start down to a 32B boundary and make it no larger than
105 * push_end (no push constants is indicated by push_start = UINT_MAX).
107 push_start
= MIN2(push_start
, push_end
);
108 push_start
= align_down_u32(push_start
, 32);
110 /* For vec4 our push data size needs to be aligned to a vec4 and for
111 * scalar, it needs to be aligned to a DWORD.
113 const unsigned align
= compiler
->scalar_stage
[nir
->info
.stage
] ? 4 : 16;
114 nir
->num_uniforms
= ALIGN(push_end
- push_start
, align
);
115 prog_data
->nr_params
= nir
->num_uniforms
/ 4;
116 prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, prog_data
->nr_params
);
118 struct anv_push_range push_constant_range
= {
119 .set
= ANV_DESCRIPTOR_SET_PUSH_CONSTANTS
,
120 .start
= push_start
/ 32,
121 .length
= DIV_ROUND_UP(push_end
- push_start
, 32),
124 /* Mapping from brw_ubo_range to anv_push_range */
125 int push_range_idx_map
[4] = { -1, -1, -1, -1 };
127 if (push_ubo_ranges
) {
128 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
130 /* We can push at most 64 registers worth of data. The back-end
131 * compiler would do this fixup for us but we'd like to calculate
132 * the push constant layout ourselves.
134 unsigned total_push_regs
= push_constant_range
.length
;
135 for (unsigned i
= 0; i
< 4; i
++) {
136 if (total_push_regs
+ prog_data
->ubo_ranges
[i
].length
> 64)
137 prog_data
->ubo_ranges
[i
].length
= 64 - total_push_regs
;
138 total_push_regs
+= prog_data
->ubo_ranges
[i
].length
;
140 assert(total_push_regs
<= 64);
144 if (push_constant_range
.length
> 0)
145 map
->push_ranges
[n
++] = push_constant_range
;
147 for (int i
= 0; i
< 4; i
++) {
148 struct brw_ubo_range
*ubo_range
= &prog_data
->ubo_ranges
[i
];
149 if (ubo_range
->length
== 0)
152 if (n
>= 4 || (n
== 3 && compiler
->constant_buffer_0_is_relative
)) {
153 memset(ubo_range
, 0, sizeof(*ubo_range
));
157 const struct anv_pipeline_binding
*binding
=
158 &map
->surface_to_descriptor
[ubo_range
->block
];
160 push_range_idx_map
[i
] = n
;
161 map
->push_ranges
[n
++] = (struct anv_push_range
) {
163 .index
= binding
->index
,
164 .dynamic_offset_index
= binding
->dynamic_offset_index
,
165 .start
= ubo_range
->start
,
166 .length
= ubo_range
->length
,
170 /* For Ivy Bridge, the push constants packets have a different
171 * rule that would require us to iterate in the other direction
172 * and possibly mess around with dynamic state base address.
173 * Don't bother; just emit regular push constants at n = 0.
175 * In the compute case, we don't have multiple push ranges so it's
176 * better to just provide one in push_ranges[0].
178 map
->push_ranges
[0] = push_constant_range
;
181 if (has_push_intrinsic
|| (push_ubo_ranges
&& robust_buffer_access
)) {
182 nir_foreach_function(function
, nir
) {
187 nir_builder_init(&b
, function
->impl
);
189 nir_foreach_block(block
, function
->impl
) {
190 nir_foreach_instr_safe(instr
, block
) {
191 if (instr
->type
!= nir_instr_type_intrinsic
)
194 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
195 switch (intrin
->intrinsic
) {
196 case nir_intrinsic_load_ubo
: {
197 if (!robust_buffer_access
)
200 if (!nir_src_is_const(intrin
->src
[0]) ||
201 !nir_src_is_const(intrin
->src
[1]))
204 uint32_t index
= nir_src_as_uint(intrin
->src
[0]);
205 uint64_t offset
= nir_src_as_uint(intrin
->src
[1]);
206 uint32_t size
= intrin
->num_components
*
207 (intrin
->dest
.ssa
.bit_size
/ 8);
209 int ubo_range_idx
= -1;
210 for (unsigned i
= 0; i
< 4; i
++) {
211 if (prog_data
->ubo_ranges
[i
].length
> 0 &&
212 prog_data
->ubo_ranges
[i
].block
== index
) {
218 if (ubo_range_idx
< 0)
221 const struct brw_ubo_range
*range
=
222 &prog_data
->ubo_ranges
[ubo_range_idx
];
223 const uint32_t range_end
=
224 (range
->start
+ range
->length
) * 32;
226 if (range_end
< offset
|| offset
+ size
<= range
->start
)
229 b
.cursor
= nir_after_instr(&intrin
->instr
);
231 assert(push_range_idx_map
[ubo_range_idx
] >= 0);
232 const uint32_t ubo_size_offset
=
233 offsetof(struct anv_push_constants
, push_ubo_sizes
) +
234 push_range_idx_map
[ubo_range_idx
] * sizeof(uint32_t);
236 nir_intrinsic_instr
*load_size
=
237 nir_intrinsic_instr_create(b
.shader
,
238 nir_intrinsic_load_uniform
);
239 load_size
->src
[0] = nir_src_for_ssa(nir_imm_int(&b
, 0));
240 nir_intrinsic_set_base(load_size
,
241 ubo_size_offset
- push_start
);
242 nir_intrinsic_set_range(load_size
, 4);
243 nir_intrinsic_set_type(load_size
, nir_type_uint32
);
244 load_size
->num_components
= 1;
245 nir_ssa_dest_init(&load_size
->instr
, &load_size
->dest
,
247 nir_builder_instr_insert(&b
, &load_size
->instr
);
249 /* Do the size checks per-component. Thanks to scalar block
250 * layout, we could end up with a single vector straddling a
253 * We align up to 32B so that we can get better CSE.
257 * offset + size - 1 < push_ubo_sizes[i]
261 * offset + size <= push_ubo_sizes[i]
263 * because it properly returns OOB for the case where
264 * offset + size == 0.
266 nir_const_value last_byte_const
[NIR_MAX_VEC_COMPONENTS
];
267 for (unsigned c
= 0; c
< intrin
->dest
.ssa
.num_components
; c
++) {
268 assert(intrin
->dest
.ssa
.bit_size
% 8 == 0);
269 const unsigned comp_size_B
= intrin
->dest
.ssa
.bit_size
/ 8;
270 const uint32_t comp_last_byte
=
271 align_u32(offset
+ (c
+ 1) * comp_size_B
,
272 ANV_UBO_BOUNDS_CHECK_ALIGNMENT
) - 1;
274 nir_const_value_for_uint(comp_last_byte
, 32);
276 nir_ssa_def
*last_byte
=
277 nir_build_imm(&b
, intrin
->dest
.ssa
.num_components
, 32,
279 nir_ssa_def
*in_bounds
=
280 nir_ult(&b
, last_byte
, &load_size
->dest
.ssa
);
283 nir_imm_zero(&b
, intrin
->dest
.ssa
.num_components
,
284 intrin
->dest
.ssa
.bit_size
);
286 nir_bcsel(&b
, in_bounds
, &intrin
->dest
.ssa
, zero
);
287 nir_ssa_def_rewrite_uses_after(&intrin
->dest
.ssa
,
288 nir_src_for_ssa(value
),
289 value
->parent_instr
);
293 case nir_intrinsic_load_push_constant
:
294 intrin
->intrinsic
= nir_intrinsic_load_uniform
;
295 nir_intrinsic_set_base(intrin
,
296 nir_intrinsic_base(intrin
) -
308 /* Now that we're done computing the push constant portion of the
309 * bind map, hash it. This lets us quickly determine if the actual
310 * mapping has changed and not just a no-op pipeline change.
312 _mesa_sha1_compute(map
->push_ranges
,
313 sizeof(map
->push_ranges
),
318 anv_nir_validate_push_layout(struct brw_stage_prog_data
*prog_data
,
319 struct anv_pipeline_bind_map
*map
)
322 unsigned prog_data_push_size
= DIV_ROUND_UP(prog_data
->nr_params
, 8);
323 for (unsigned i
= 0; i
< 4; i
++)
324 prog_data_push_size
+= prog_data
->ubo_ranges
[i
].length
;
326 unsigned bind_map_push_size
= 0;
327 for (unsigned i
= 0; i
< 4; i
++)
328 bind_map_push_size
+= map
->push_ranges
[i
].length
;
330 /* We could go through everything again but it should be enough to assert
331 * that they push the same number of registers. This should alert us if
332 * the back-end compiler decides to re-arrange stuff or shrink a range.
334 assert(prog_data_push_size
== bind_map_push_size
);