anv: Rework push constant handling
[mesa.git] / src / intel / vulkan / anv_nir_compute_push_layout.c
1 /*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_nir.h"
25 #include "compiler/brw_nir.h"
26
27 void
28 anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,
29 nir_shader *nir,
30 struct brw_stage_prog_data *prog_data,
31 struct anv_pipeline_bind_map *map,
32 void *mem_ctx)
33 {
34 memset(map->push_ranges, 0, sizeof(map->push_ranges));
35
36 unsigned push_start = UINT_MAX, push_end = 0;
37 nir_foreach_function(function, nir) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_intrinsic)
44 continue;
45
46 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
47 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
48 continue;
49
50 unsigned base = nir_intrinsic_base(intrin);
51 unsigned range = nir_intrinsic_range(intrin);
52 push_start = MIN2(push_start, base);
53 push_end = MAX2(push_end, base + range);
54 }
55 }
56 }
57
58 const bool has_push_intrinsic = push_start <= push_end;
59
60 if (nir->info.stage == MESA_SHADER_COMPUTE) {
61 /* For compute shaders, we always have to have the subgroup ID. The
62 * back-end compiler will "helpfully" add it for us in the last push
63 * constant slot. Yes, there is an off-by-one error here but that's
64 * because the back-end will add it so we want to claim the number of
65 * push constants one dword less than the full amount including
66 * gl_SubgroupId.
67 */
68 assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));
69 push_end = offsetof(struct anv_push_constants, cs.subgroup_id);
70 }
71
72 /* Align push_start down to a 32B boundary and make it no larger than
73 * push_end (no push constants is indicated by push_start = UINT_MAX).
74 */
75 push_start = MIN2(push_start, push_end);
76 push_start &= ~31u;
77
78 if (has_push_intrinsic) {
79 nir_foreach_function(function, nir) {
80 if (!function->impl)
81 continue;
82
83 nir_foreach_block(block, function->impl) {
84 nir_foreach_instr(instr, block) {
85 if (instr->type != nir_instr_type_intrinsic)
86 continue;
87
88 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
89 if (intrin->intrinsic != nir_intrinsic_load_push_constant)
90 continue;
91
92 intrin->intrinsic = nir_intrinsic_load_uniform;
93 nir_intrinsic_set_base(intrin,
94 nir_intrinsic_base(intrin) -
95 push_start);
96 }
97 }
98 }
99 }
100
101 /* For vec4 our push data size needs to be aligned to a vec4 and for
102 * scalar, it needs to be aligned to a DWORD.
103 */
104 const unsigned align =
105 pdevice->compiler->scalar_stage[nir->info.stage] ? 4 : 16;
106 nir->num_uniforms = ALIGN(push_end - push_start, align);
107 prog_data->nr_params = nir->num_uniforms / 4;
108 prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params);
109
110 struct anv_push_range push_constant_range = {
111 .set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,
112 .start = push_start / 32,
113 .length = DIV_ROUND_UP(push_end - push_start, 32),
114 };
115
116 if ((pdevice->info.gen >= 8 || pdevice->info.is_haswell) &&
117 nir->info.stage != MESA_SHADER_COMPUTE) {
118 brw_nir_analyze_ubo_ranges(pdevice->compiler, nir, NULL,
119 prog_data->ubo_ranges);
120
121 /* We can push at most 64 registers worth of data. The back-end
122 * compiler would do this fixup for us but we'd like to calculate
123 * the push constant layout ourselves.
124 */
125 unsigned total_push_regs = push_constant_range.length;
126 for (unsigned i = 0; i < 4; i++) {
127 if (total_push_regs + prog_data->ubo_ranges[i].length > 64)
128 prog_data->ubo_ranges[i].length = 64 - total_push_regs;
129 total_push_regs += prog_data->ubo_ranges[i].length;
130 }
131 assert(total_push_regs <= 64);
132
133 /* The Skylake PRM contains the following restriction:
134 *
135 * "The driver must ensure The following case does not occur
136 * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
137 * buffer 3 read length equal to zero committed followed by a
138 * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
139 * zero committed."
140 *
141 * To avoid this, we program the buffers in the highest slots.
142 * This way, slot 0 is only used if slot 3 is also used.
143 */
144 int n = 3;
145
146 for (int i = 3; i >= 0; i--) {
147 const struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];
148 if (ubo_range->length == 0)
149 continue;
150
151 const struct anv_pipeline_binding *binding =
152 &map->surface_to_descriptor[ubo_range->block];
153
154 map->push_ranges[n--] = (struct anv_push_range) {
155 .set = binding->set,
156 .index = binding->index,
157 .dynamic_offset_index = binding->dynamic_offset_index,
158 .start = ubo_range->start,
159 .length = ubo_range->length,
160 };
161 }
162
163 if (push_constant_range.length > 0)
164 map->push_ranges[n--] = push_constant_range;
165 } else {
166 /* For Ivy Bridge, the push constants packets have a different
167 * rule that would require us to iterate in the other direction
168 * and possibly mess around with dynamic state base address.
169 * Don't bother; just emit regular push constants at n = 0.
170 *
171 * In the compute case, we don't have multiple push ranges so it's
172 * better to just provide one in push_ranges[0].
173 */
174 map->push_ranges[0] = push_constant_range;
175 }
176 }
177
178 void
179 anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,
180 struct anv_pipeline_bind_map *map)
181 {
182 #ifndef NDEBUG
183 unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);
184 for (unsigned i = 0; i < 4; i++)
185 prog_data_push_size += prog_data->ubo_ranges[i].length;
186
187 unsigned bind_map_push_size = 0;
188 for (unsigned i = 0; i < 4; i++)
189 bind_map_push_size += map->push_ranges[i].length;
190
191 /* We could go through everything again but it should be enough to assert
192 * that they push the same number of registers. This should alert us if
193 * the back-end compiler decides to re-arrange stuff or shrink a range.
194 */
195 assert(prog_data_push_size == bind_map_push_size);
196 #endif
197 }