2 * Copyright © 2019 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "compiler/brw_nir.h"
28 anv_nir_compute_push_layout(const struct anv_physical_device
*pdevice
,
30 struct brw_stage_prog_data
*prog_data
,
31 struct anv_pipeline_bind_map
*map
,
34 memset(map
->push_ranges
, 0, sizeof(map
->push_ranges
));
36 unsigned push_start
= UINT_MAX
, push_end
= 0;
37 nir_foreach_function(function
, nir
) {
41 nir_foreach_block(block
, function
->impl
) {
42 nir_foreach_instr(instr
, block
) {
43 if (instr
->type
!= nir_instr_type_intrinsic
)
46 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
47 if (intrin
->intrinsic
!= nir_intrinsic_load_push_constant
)
50 unsigned base
= nir_intrinsic_base(intrin
);
51 unsigned range
= nir_intrinsic_range(intrin
);
52 push_start
= MIN2(push_start
, base
);
53 push_end
= MAX2(push_end
, base
+ range
);
58 const bool has_push_intrinsic
= push_start
<= push_end
;
60 if (nir
->info
.stage
== MESA_SHADER_COMPUTE
) {
61 /* For compute shaders, we always have to have the subgroup ID. The
62 * back-end compiler will "helpfully" add it for us in the last push
63 * constant slot. Yes, there is an off-by-one error here but that's
64 * because the back-end will add it so we want to claim the number of
65 * push constants one dword less than the full amount including
68 assert(push_end
<= offsetof(struct anv_push_constants
, cs
.subgroup_id
));
69 push_end
= offsetof(struct anv_push_constants
, cs
.subgroup_id
);
72 /* Align push_start down to a 32B boundary and make it no larger than
73 * push_end (no push constants is indicated by push_start = UINT_MAX).
75 push_start
= MIN2(push_start
, push_end
);
78 if (has_push_intrinsic
) {
79 nir_foreach_function(function
, nir
) {
83 nir_foreach_block(block
, function
->impl
) {
84 nir_foreach_instr(instr
, block
) {
85 if (instr
->type
!= nir_instr_type_intrinsic
)
88 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
89 if (intrin
->intrinsic
!= nir_intrinsic_load_push_constant
)
92 intrin
->intrinsic
= nir_intrinsic_load_uniform
;
93 nir_intrinsic_set_base(intrin
,
94 nir_intrinsic_base(intrin
) -
101 /* For vec4 our push data size needs to be aligned to a vec4 and for
102 * scalar, it needs to be aligned to a DWORD.
104 const unsigned align
=
105 pdevice
->compiler
->scalar_stage
[nir
->info
.stage
] ? 4 : 16;
106 nir
->num_uniforms
= ALIGN(push_end
- push_start
, align
);
107 prog_data
->nr_params
= nir
->num_uniforms
/ 4;
108 prog_data
->param
= ralloc_array(mem_ctx
, uint32_t, prog_data
->nr_params
);
110 struct anv_push_range push_constant_range
= {
111 .set
= ANV_DESCRIPTOR_SET_PUSH_CONSTANTS
,
112 .start
= push_start
/ 32,
113 .length
= DIV_ROUND_UP(push_end
- push_start
, 32),
116 if ((pdevice
->info
.gen
>= 8 || pdevice
->info
.is_haswell
) &&
117 nir
->info
.stage
!= MESA_SHADER_COMPUTE
) {
118 brw_nir_analyze_ubo_ranges(pdevice
->compiler
, nir
, NULL
,
119 prog_data
->ubo_ranges
);
121 /* We can push at most 64 registers worth of data. The back-end
122 * compiler would do this fixup for us but we'd like to calculate
123 * the push constant layout ourselves.
125 unsigned total_push_regs
= push_constant_range
.length
;
126 for (unsigned i
= 0; i
< 4; i
++) {
127 if (total_push_regs
+ prog_data
->ubo_ranges
[i
].length
> 64)
128 prog_data
->ubo_ranges
[i
].length
= 64 - total_push_regs
;
129 total_push_regs
+= prog_data
->ubo_ranges
[i
].length
;
131 assert(total_push_regs
<= 64);
133 /* The Skylake PRM contains the following restriction:
135 * "The driver must ensure The following case does not occur
136 * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
137 * buffer 3 read length equal to zero committed followed by a
138 * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
141 * To avoid this, we program the buffers in the highest slots.
142 * This way, slot 0 is only used if slot 3 is also used.
146 for (int i
= 3; i
>= 0; i
--) {
147 const struct brw_ubo_range
*ubo_range
= &prog_data
->ubo_ranges
[i
];
148 if (ubo_range
->length
== 0)
151 const struct anv_pipeline_binding
*binding
=
152 &map
->surface_to_descriptor
[ubo_range
->block
];
154 map
->push_ranges
[n
--] = (struct anv_push_range
) {
156 .index
= binding
->index
,
157 .dynamic_offset_index
= binding
->dynamic_offset_index
,
158 .start
= ubo_range
->start
,
159 .length
= ubo_range
->length
,
163 if (push_constant_range
.length
> 0)
164 map
->push_ranges
[n
--] = push_constant_range
;
166 /* For Ivy Bridge, the push constants packets have a different
167 * rule that would require us to iterate in the other direction
168 * and possibly mess around with dynamic state base address.
169 * Don't bother; just emit regular push constants at n = 0.
171 * In the compute case, we don't have multiple push ranges so it's
172 * better to just provide one in push_ranges[0].
174 map
->push_ranges
[0] = push_constant_range
;
179 anv_nir_validate_push_layout(struct brw_stage_prog_data
*prog_data
,
180 struct anv_pipeline_bind_map
*map
)
183 unsigned prog_data_push_size
= DIV_ROUND_UP(prog_data
->nr_params
, 8);
184 for (unsigned i
= 0; i
< 4; i
++)
185 prog_data_push_size
+= prog_data
->ubo_ranges
[i
].length
;
187 unsigned bind_map_push_size
= 0;
188 for (unsigned i
= 0; i
< 4; i
++)
189 bind_map_push_size
+= map
->push_ranges
[i
].length
;
191 /* We could go through everything again but it should be enough to assert
192 * that they push the same number of registers. This should alert us if
193 * the back-end compiler decides to re-arrange stuff or shrink a range.
195 assert(prog_data_push_size
== bind_map_push_size
);