2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/brw_nir.h"
25 #include "compiler/glsl/ir_uniform.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "brw_program.h"
30 brw_nir_setup_glsl_builtin_uniform(nir_variable
*var
,
31 const struct gl_program
*prog
,
32 struct brw_stage_prog_data
*stage_prog_data
,
35 const nir_state_slot
*const slots
= var
->state_slots
;
36 assert(var
->state_slots
!= NULL
);
38 unsigned uniform_index
= var
->data
.driver_location
/ 4;
39 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
40 /* This state reference has already been setup by ir_to_mesa, but we'll
41 * get the same index back here.
43 int index
= _mesa_add_state_reference(prog
->Parameters
,
46 /* Add each of the unique swizzles of the element as a parameter.
47 * This'll end up matching the expected layout of the
48 * array/matrix/structure we're trying to fill in.
51 for (unsigned j
= 0; j
< 4; j
++) {
52 int swiz
= GET_SWZ(slots
[i
].swizzle
, j
);
54 /* If we hit a pair of identical swizzles, this means we've hit the
55 * end of the builtin variable. In scalar mode, we should just quit
56 * and move on to the next one. In vec4, we need to continue and pad
57 * it out to 4 components.
59 if (swiz
== last_swiz
&& is_scalar
)
64 stage_prog_data
->param
[uniform_index
++] =
65 BRW_PARAM_PARAMETER(index
, swiz
);
71 setup_vec4_image_param(uint32_t *params
, uint32_t idx
,
72 unsigned offset
, unsigned n
)
74 assert(offset
% sizeof(uint32_t) == 0);
75 for (unsigned i
= 0; i
< n
; ++i
)
76 params
[i
] = BRW_PARAM_IMAGE(idx
, offset
/ sizeof(uint32_t) + i
);
78 for (unsigned i
= n
; i
< 4; ++i
)
79 params
[i
] = BRW_PARAM_BUILTIN_ZERO
;
83 brw_setup_image_uniform_values(nir_variable
*var
,
84 struct brw_stage_prog_data
*prog_data
)
86 unsigned param_start_index
= var
->data
.driver_location
/ 4;
87 uint32_t *param
= &prog_data
->param
[param_start_index
];
88 unsigned num_images
= MAX2(1, var
->type
->arrays_of_arrays_size());
90 for (unsigned i
= 0; i
< num_images
; i
++) {
91 const unsigned image_idx
= var
->data
.binding
+ i
;
93 /* Upload the brw_image_param structure. The order is expected to match
94 * the BRW_IMAGE_PARAM_*_OFFSET defines.
96 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_OFFSET_OFFSET
,
98 offsetof(brw_image_param
, offset
), 2);
99 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SIZE_OFFSET
,
101 offsetof(brw_image_param
, size
), 3);
102 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_STRIDE_OFFSET
,
104 offsetof(brw_image_param
, stride
), 4);
105 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_TILING_OFFSET
,
107 offsetof(brw_image_param
, tiling
), 3);
108 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SWIZZLING_OFFSET
,
110 offsetof(brw_image_param
, swizzling
), 2);
111 param
+= BRW_IMAGE_PARAM_SIZE
;
116 count_uniform_storage_slots(const struct glsl_type
*type
)
118 /* gl_uniform_storage can cope with one level of array, so if the
119 * type is a composite type or an array where each element occupies
120 * more than one slot than we need to recursively process it.
122 if (glsl_type_is_struct_or_ifc(type
)) {
123 unsigned location_count
= 0;
125 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
126 const struct glsl_type
*field_type
= glsl_get_struct_field(type
, i
);
128 location_count
+= count_uniform_storage_slots(field_type
);
131 return location_count
;
134 if (glsl_type_is_array(type
)) {
135 const struct glsl_type
*element_type
= glsl_get_array_element(type
);
137 if (glsl_type_is_array(element_type
) ||
138 glsl_type_is_struct_or_ifc(element_type
)) {
139 unsigned element_count
= count_uniform_storage_slots(element_type
);
140 return element_count
* glsl_get_length(type
);
148 brw_nir_setup_glsl_uniform(gl_shader_stage stage
, nir_variable
*var
,
149 const struct gl_program
*prog
,
150 struct brw_stage_prog_data
*stage_prog_data
,
153 if (var
->type
->without_array()->is_sampler())
156 if (var
->type
->without_array()->is_image()) {
157 brw_setup_image_uniform_values(var
, stage_prog_data
);
161 /* The data for our (non-builtin) uniforms is stored in a series of
162 * gl_uniform_storage structs for each subcomponent that
163 * glGetUniformLocation() could name. We know it's been set up in the same
164 * order we'd walk the type, so walk the list of storage that matches the
165 * range of slots covered by this variable.
167 unsigned uniform_index
= var
->data
.driver_location
/ 4;
168 unsigned num_slots
= count_uniform_storage_slots(var
->type
);
169 for (unsigned u
= 0; u
< num_slots
; u
++) {
170 struct gl_uniform_storage
*storage
=
171 &prog
->sh
.data
->UniformStorage
[var
->data
.location
+ u
];
173 /* We already handled samplers and images via the separate top-level
174 * variables created by gl_nir_lower_samplers_as_deref(), but they're
175 * still part of the structure's storage, and so we'll see them while
176 * walking it to set up the other regular fields. Just skip over them.
178 if (storage
->builtin
||
179 storage
->type
->is_sampler() ||
180 storage
->type
->is_image())
183 gl_constant_value
*components
= storage
->storage
;
184 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
185 storage
->type
->matrix_columns
);
186 unsigned vector_size
= storage
->type
->vector_elements
;
187 unsigned max_vector_size
= 4;
188 if (storage
->type
->base_type
== GLSL_TYPE_DOUBLE
||
189 storage
->type
->base_type
== GLSL_TYPE_UINT64
||
190 storage
->type
->base_type
== GLSL_TYPE_INT64
) {
196 for (unsigned s
= 0; s
< vector_count
; s
++) {
198 for (i
= 0; i
< vector_size
; i
++) {
199 uint32_t idx
= components
- prog
->sh
.data
->UniformDataSlots
;
200 stage_prog_data
->param
[uniform_index
++] = BRW_PARAM_UNIFORM(idx
);
205 /* Pad out with zeros if needed (only needed for vec4) */
206 for (; i
< max_vector_size
; i
++) {
207 stage_prog_data
->param
[uniform_index
++] =
208 BRW_PARAM_BUILTIN_ZERO
;
216 brw_nir_setup_glsl_uniforms(void *mem_ctx
, nir_shader
*shader
,
217 const struct gl_program
*prog
,
218 struct brw_stage_prog_data
*stage_prog_data
,
221 unsigned nr_params
= shader
->num_uniforms
/ 4;
222 stage_prog_data
->nr_params
= nr_params
;
223 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
225 nir_foreach_variable(var
, &shader
->uniforms
) {
226 /* UBO's, atomics and samplers don't take up space in the
228 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic())
231 if (var
->num_state_slots
> 0) {
232 brw_nir_setup_glsl_builtin_uniform(var
, prog
, stage_prog_data
,
235 brw_nir_setup_glsl_uniform(shader
->info
.stage
, var
, prog
,
236 stage_prog_data
, is_scalar
);
242 brw_nir_setup_arb_uniforms(void *mem_ctx
, nir_shader
*shader
,
243 struct gl_program
*prog
,
244 struct brw_stage_prog_data
*stage_prog_data
)
246 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
248 unsigned nr_params
= plist
->NumParameters
* 4;
249 stage_prog_data
->nr_params
= nr_params
;
250 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
252 /* For ARB programs, prog_to_nir generates a single "parameters" variable
253 * for all uniform data. There may be additional sampler variables, and
254 * an extra uniform from nir_lower_wpos_ytransform.
257 for (unsigned p
= 0; p
< plist
->NumParameters
; p
++) {
258 /* Parameters should be either vec4 uniforms or single component
259 * constants; matrices and other larger types should have been broken
262 assert(plist
->Parameters
[p
].Size
<= 4);
265 for (i
= 0; i
< plist
->Parameters
[p
].Size
; i
++)
266 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_PARAMETER(p
, i
);
268 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_BUILTIN_ZERO
;
273 get_aoa_deref_offset(nir_builder
*b
,
274 nir_deref_instr
*deref
,
277 unsigned array_size
= elem_size
;
278 nir_ssa_def
*offset
= nir_imm_int(b
, 0);
280 while (deref
->deref_type
!= nir_deref_type_var
) {
281 assert(deref
->deref_type
== nir_deref_type_array
);
283 /* This level's element size is the previous level's array size */
284 nir_ssa_def
*index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
285 assert(deref
->arr
.index
.ssa
);
286 offset
= nir_iadd(b
, offset
,
287 nir_imul(b
, index
, nir_imm_int(b
, array_size
)));
289 deref
= nir_deref_instr_parent(deref
);
290 assert(glsl_type_is_array(deref
->type
));
291 array_size
*= glsl_get_length(deref
->type
);
294 /* Accessing an invalid surface index with the dataport can result in a
295 * hang. According to the spec "if the index used to select an individual
296 * element is negative or greater than or equal to the size of the array,
297 * the results of the operation are undefined but may not lead to
298 * termination" -- which is one of the possible outcomes of the hang.
299 * Clamp the index to prevent access outside of the array bounds.
301 return nir_umin(b
, offset
, nir_imm_int(b
, array_size
- elem_size
));
305 brw_nir_lower_gl_images(nir_shader
*shader
,
306 const struct gl_program
*prog
)
308 /* We put image uniforms at the end */
309 nir_foreach_variable(var
, &shader
->uniforms
) {
310 if (!var
->type
->contains_image())
313 /* GL Only allows arrays of arrays of images */
314 assert(var
->type
->without_array()->is_image());
315 const unsigned num_images
= MAX2(1, var
->type
->arrays_of_arrays_size());
317 var
->data
.driver_location
= shader
->num_uniforms
;
318 shader
->num_uniforms
+= num_images
* BRW_IMAGE_PARAM_SIZE
* 4;
321 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
324 nir_builder_init(&b
, impl
);
326 nir_foreach_block(block
, impl
) {
327 nir_foreach_instr_safe(instr
, block
) {
328 if (instr
->type
!= nir_instr_type_intrinsic
)
331 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
332 switch (intrin
->intrinsic
) {
333 case nir_intrinsic_image_deref_load
:
334 case nir_intrinsic_image_deref_store
:
335 case nir_intrinsic_image_deref_atomic_add
:
336 case nir_intrinsic_image_deref_atomic_min
:
337 case nir_intrinsic_image_deref_atomic_max
:
338 case nir_intrinsic_image_deref_atomic_and
:
339 case nir_intrinsic_image_deref_atomic_or
:
340 case nir_intrinsic_image_deref_atomic_xor
:
341 case nir_intrinsic_image_deref_atomic_exchange
:
342 case nir_intrinsic_image_deref_atomic_comp_swap
:
343 case nir_intrinsic_image_deref_size
:
344 case nir_intrinsic_image_deref_samples
:
345 case nir_intrinsic_image_deref_load_raw_intel
:
346 case nir_intrinsic_image_deref_store_raw_intel
: {
347 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
348 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
350 struct gl_uniform_storage
*storage
=
351 &prog
->sh
.data
->UniformStorage
[var
->data
.location
];
352 const unsigned image_var_idx
=
353 storage
->opaque
[shader
->info
.stage
].index
;
355 b
.cursor
= nir_before_instr(&intrin
->instr
);
356 nir_ssa_def
*index
= nir_iadd(&b
, nir_imm_int(&b
, image_var_idx
),
357 get_aoa_deref_offset(&b
, deref
, 1));
358 nir_rewrite_image_intrinsic(intrin
, index
, false);
362 case nir_intrinsic_image_deref_load_param_intel
: {
363 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
364 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
365 const unsigned num_images
=
366 MAX2(1, var
->type
->arrays_of_arrays_size());
368 b
.cursor
= nir_instr_remove(&intrin
->instr
);
370 const unsigned param
= nir_intrinsic_base(intrin
);
371 nir_ssa_def
*offset
=
372 get_aoa_deref_offset(&b
, deref
, BRW_IMAGE_PARAM_SIZE
* 4);
373 offset
= nir_iadd(&b
, offset
, nir_imm_int(&b
, param
* 16));
375 nir_intrinsic_instr
*load
=
376 nir_intrinsic_instr_create(b
.shader
,
377 nir_intrinsic_load_uniform
);
378 nir_intrinsic_set_base(load
, var
->data
.driver_location
);
379 nir_intrinsic_set_range(load
, num_images
* BRW_IMAGE_PARAM_SIZE
* 4);
380 load
->src
[0] = nir_src_for_ssa(offset
);
381 load
->num_components
= intrin
->dest
.ssa
.num_components
;
382 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
383 intrin
->dest
.ssa
.num_components
,
384 intrin
->dest
.ssa
.bit_size
, NULL
);
385 nir_builder_instr_insert(&b
, &load
->instr
);
387 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
388 nir_src_for_ssa(&load
->dest
.ssa
));