2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_builder.h"
26 #include "compiler/brw_nir.h"
27 #include "util/mesa-sha1.h"
30 anv_nir_compute_push_layout(const struct anv_physical_device
*pdevice
,
31 bool robust_buffer_access
,
33 struct brw_stage_prog_data
*prog_data
,
34 struct anv_pipeline_bind_map
*map
,
37 const struct brw_compiler
*compiler
= pdevice
->compiler
;
38 memset(map
->push_ranges
, 0, sizeof(map
->push_ranges
));
40 bool has_const_ubo
= false;
41 unsigned push_start
= UINT_MAX
, push_end
= 0;
42 nir_foreach_function(function
, nir
) {
46 nir_foreach_block(block
, function
->impl
) {
47 nir_foreach_instr(instr
, block
) {
48 if (instr
->type
!= nir_instr_type_intrinsic
)
51 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
52 switch (intrin
->intrinsic
) {
53 case nir_intrinsic_load_ubo
:
54 if (nir_src_is_const(intrin
->src
[0]) &&
55 nir_src_is_const(intrin
->src
[1]))
59 case nir_intrinsic_load_push_constant
: {
60 unsigned base
= nir_intrinsic_base(intrin
);
61 unsigned range
= nir_intrinsic_range(intrin
);
62 push_start
= MIN2(push_start
, base
);
63 push_end
= MAX2(push_end
, base
+ range
);
74 const bool has_push_intrinsic
= push_start
<= push_end
;
76 const bool push_ubo_ranges
=
77 (pdevice
->info
.gen
>= 8 || pdevice
->info
.is_haswell
) &&
78 has_const_ubo
&& nir
->info
.stage
!= MESA_SHADER_COMPUTE
;
80 if (push_ubo_ranges
&& robust_buffer_access
) {
81 /* We can't on-the-fly adjust our push ranges because doing so would
82 * mess up the layout in the shader. When robustBufferAccess is
83 * enabled, we push a mask into the shader indicating which pushed
84 * registers are valid and we zero out the invalid ones at the top of
87 const uint32_t push_reg_mask_start
=
88 offsetof(struct anv_push_constants
, push_reg_mask
);
89 const uint32_t push_reg_mask_end
= push_reg_mask_start
+ sizeof(uint64_t);
90 push_start
= MIN2(push_start
, push_reg_mask_start
);
91 push_end
= MAX2(push_end
, push_reg_mask_end
);
94 if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
95 /* For compute shaders, we always have to have the subgroup ID. The
96 * back-end compiler will "helpfully" add it for us in the last push
97 * constant slot. Yes, there is an off-by-one error here but that's
98 * because the back-end will add it so we want to claim the number of
99 * push constants one dword less than the full amount including
102 assert(push_end
<= offsetof(struct anv_push_constants
, cs
.subgroup_id
));
103 push_end
= offsetof(struct anv_push_constants
, cs
.subgroup_id
);
106 /* Align push_start down to a 32B boundary and make it no larger than
107 * push_end (no push constants is indicated by push_start = UINT_MAX).
109 push_start
= MIN2(push_start
, push_end
);
110 push_start
= align_down_u32(push_start
, 32);
112 /* For vec4 our push data size needs to be aligned to a vec4 and for
113 * scalar, it needs to be aligned to a DWORD.
115 const unsigned align
= compiler
->scalar_stage
[nir
->info
.stage
] ? 4 : 16;
116 nir
->num_uniforms
= ALIGN(push_end
- push_start
, align
);
117 prog_data
->nr_params
= nir
->num_uniforms
/ 4;
118 prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, prog_data
->nr_params
);
120 struct anv_push_range push_constant_range
= {
121 .set
= ANV_DESCRIPTOR_SET_PUSH_CONSTANTS
,
122 .start
= push_start
/ 32,
123 .length
= DIV_ROUND_UP(push_end
- push_start
, 32),
126 if (has_push_intrinsic
) {
127 nir_foreach_function(function
, nir
) {
131 nir_foreach_block(block
, function
->impl
) {
132 nir_foreach_instr_safe(instr
, block
) {
133 if (instr
->type
!= nir_instr_type_intrinsic
)
136 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
137 switch (intrin
->intrinsic
) {
138 case nir_intrinsic_load_push_constant
:
139 intrin
->intrinsic
= nir_intrinsic_load_uniform
;
140 nir_intrinsic_set_base(intrin
,
141 nir_intrinsic_base(intrin
) -
153 if (push_ubo_ranges
) {
154 brw_nir_analyze_ubo_ranges(compiler
, nir
, NULL
, prog_data
->ubo_ranges
);
156 /* We can push at most 64 registers worth of data. The back-end
157 * compiler would do this fixup for us but we'd like to calculate
158 * the push constant layout ourselves.
160 unsigned total_push_regs
= push_constant_range
.length
;
161 for (unsigned i
= 0; i
< 4; i
++) {
162 if (total_push_regs
+ prog_data
->ubo_ranges
[i
].length
> 64)
163 prog_data
->ubo_ranges
[i
].length
= 64 - total_push_regs
;
164 total_push_regs
+= prog_data
->ubo_ranges
[i
].length
;
166 assert(total_push_regs
<= 64);
170 if (push_constant_range
.length
> 0)
171 map
->push_ranges
[n
++] = push_constant_range
;
173 if (robust_buffer_access
) {
174 const uint32_t push_reg_mask_offset
=
175 offsetof(struct anv_push_constants
, push_reg_mask
);
176 assert(push_reg_mask_offset
>= push_start
);
177 prog_data
->push_reg_mask_param
=
178 (push_reg_mask_offset
- push_start
) / 4;
181 unsigned range_start_reg
= push_constant_range
.length
;
183 for (int i
= 0; i
< 4; i
++) {
184 struct brw_ubo_range
*ubo_range
= &prog_data
->ubo_ranges
[i
];
185 if (ubo_range
->length
== 0)
188 if (n
>= 4 || (n
== 3 && compiler
->constant_buffer_0_is_relative
)) {
189 memset(ubo_range
, 0, sizeof(*ubo_range
));
193 const struct anv_pipeline_binding
*binding
=
194 &map
->surface_to_descriptor
[ubo_range
->block
];
196 map
->push_ranges
[n
++] = (struct anv_push_range
) {
198 .index
= binding
->index
,
199 .dynamic_offset_index
= binding
->dynamic_offset_index
,
200 .start
= ubo_range
->start
,
201 .length
= ubo_range
->length
,
204 /* We only bother to shader-zero pushed client UBOs */
205 if (binding
->set
< MAX_SETS
&& robust_buffer_access
) {
206 prog_data
->zero_push_reg
|= BITFIELD64_RANGE(range_start_reg
,
210 range_start_reg
+= ubo_range
->length
;
213 /* For Ivy Bridge, the push constants packets have a different
214 * rule that would require us to iterate in the other direction
215 * and possibly mess around with dynamic state base address.
216 * Don't bother; just emit regular push constants at n = 0.
218 * In the compute case, we don't have multiple push ranges so it's
219 * better to just provide one in push_ranges[0].
221 map
->push_ranges
[0] = push_constant_range
;
224 /* Now that we're done computing the push constant portion of the
225 * bind map, hash it. This lets us quickly determine if the actual
226 * mapping has changed and not just a no-op pipeline change.
228 _mesa_sha1_compute(map
->push_ranges
,
229 sizeof(map
->push_ranges
),
234 anv_nir_validate_push_layout(struct brw_stage_prog_data
*prog_data
,
235 struct anv_pipeline_bind_map
*map
)
238 unsigned prog_data_push_size
= DIV_ROUND_UP(prog_data
->nr_params
, 8);
239 for (unsigned i
= 0; i
< 4; i
++)
240 prog_data_push_size
+= prog_data
->ubo_ranges
[i
].length
;
242 unsigned bind_map_push_size
= 0;
243 for (unsigned i
= 0; i
< 4; i
++)
244 bind_map_push_size
+= map
->push_ranges
[i
].length
;
246 /* We could go through everything again but it should be enough to assert
247 * that they push the same number of registers. This should alert us if
248 * the back-end compiler decides to re-arrange stuff or shrink a range.
250 assert(prog_data_push_size
== bind_map_push_size
);