2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/brw_nir.h"
25 #include "compiler/glsl/ir_uniform.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "brw_program.h"
30 brw_nir_setup_glsl_builtin_uniform(nir_variable
*var
,
31 const struct gl_program
*prog
,
32 struct brw_stage_prog_data
*stage_prog_data
,
35 const nir_state_slot
*const slots
= var
->state_slots
;
36 assert(var
->state_slots
!= NULL
);
38 unsigned uniform_index
= var
->data
.driver_location
/ 4;
39 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
40 /* This state reference has already been setup by ir_to_mesa, but we'll
41 * get the same index back here.
43 int index
= _mesa_add_state_reference(prog
->Parameters
,
46 /* Add each of the unique swizzles of the element as a parameter.
47 * This'll end up matching the expected layout of the
48 * array/matrix/structure we're trying to fill in.
51 for (unsigned j
= 0; j
< 4; j
++) {
52 int swiz
= GET_SWZ(slots
[i
].swizzle
, j
);
54 /* If we hit a pair of identical swizzles, this means we've hit the
55 * end of the builtin variable. In scalar mode, we should just quit
56 * and move on to the next one. In vec4, we need to continue and pad
57 * it out to 4 components.
59 if (swiz
== last_swiz
&& is_scalar
)
64 stage_prog_data
->param
[uniform_index
++] =
65 BRW_PARAM_PARAMETER(index
, swiz
);
71 setup_vec4_image_param(uint32_t *params
, uint32_t idx
,
72 unsigned offset
, unsigned n
)
74 assert(offset
% sizeof(uint32_t) == 0);
75 for (unsigned i
= 0; i
< n
; ++i
)
76 params
[i
] = BRW_PARAM_IMAGE(idx
, offset
/ sizeof(uint32_t) + i
);
78 for (unsigned i
= n
; i
< 4; ++i
)
79 params
[i
] = BRW_PARAM_BUILTIN_ZERO
;
83 brw_setup_image_uniform_values(gl_shader_stage stage
,
84 struct brw_stage_prog_data
*stage_prog_data
,
85 unsigned param_start_index
,
86 const gl_uniform_storage
*storage
)
88 uint32_t *param
= &stage_prog_data
->param
[param_start_index
];
90 for (unsigned i
= 0; i
< MAX2(storage
->array_elements
, 1); i
++) {
91 const unsigned image_idx
= storage
->opaque
[stage
].index
+ i
;
93 /* Upload the brw_image_param structure. The order is expected to match
94 * the BRW_IMAGE_PARAM_*_OFFSET defines.
96 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_OFFSET_OFFSET
,
98 offsetof(brw_image_param
, offset
), 2);
99 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SIZE_OFFSET
,
101 offsetof(brw_image_param
, size
), 3);
102 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_STRIDE_OFFSET
,
104 offsetof(brw_image_param
, stride
), 4);
105 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_TILING_OFFSET
,
107 offsetof(brw_image_param
, tiling
), 3);
108 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SWIZZLING_OFFSET
,
110 offsetof(brw_image_param
, swizzling
), 2);
111 param
+= BRW_IMAGE_PARAM_SIZE
;
116 count_uniform_storage_slots(const struct glsl_type
*type
)
118 /* gl_uniform_storage can cope with one level of array, so if the
119 * type is a composite type or an array where each element occupies
120 * more than one slot than we need to recursively process it.
122 if (glsl_type_is_struct(type
)) {
123 unsigned location_count
= 0;
125 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
126 const struct glsl_type
*field_type
= glsl_get_struct_field(type
, i
);
128 location_count
+= count_uniform_storage_slots(field_type
);
131 return location_count
;
134 if (glsl_type_is_array(type
)) {
135 const struct glsl_type
*element_type
= glsl_get_array_element(type
);
137 if (glsl_type_is_array(element_type
) ||
138 glsl_type_is_struct(element_type
)) {
139 unsigned element_count
= count_uniform_storage_slots(element_type
);
140 return element_count
* glsl_get_length(type
);
148 brw_nir_setup_glsl_uniform(gl_shader_stage stage
, nir_variable
*var
,
149 const struct gl_program
*prog
,
150 struct brw_stage_prog_data
*stage_prog_data
,
153 /* The data for our (non-builtin) uniforms is stored in a series of
154 * gl_uniform_storage structs for each subcomponent that
155 * glGetUniformLocation() could name. We know it's been set up in the same
156 * order we'd walk the type, so walk the list of storage that matches the
157 * range of slots covered by this variable.
159 unsigned uniform_index
= var
->data
.driver_location
/ 4;
160 unsigned num_slots
= count_uniform_storage_slots(var
->type
);
161 for (unsigned u
= 0; u
< num_slots
; u
++) {
162 struct gl_uniform_storage
*storage
=
163 &prog
->sh
.data
->UniformStorage
[var
->data
.location
+ u
];
165 if (storage
->builtin
|| storage
->type
->is_sampler())
168 if (storage
->type
->is_image()) {
169 brw_setup_image_uniform_values(stage
, stage_prog_data
,
170 uniform_index
, storage
);
172 BRW_IMAGE_PARAM_SIZE
* MAX2(storage
->array_elements
, 1);
174 gl_constant_value
*components
= storage
->storage
;
175 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
176 storage
->type
->matrix_columns
);
177 unsigned vector_size
= storage
->type
->vector_elements
;
178 unsigned max_vector_size
= 4;
179 if (storage
->type
->base_type
== GLSL_TYPE_DOUBLE
||
180 storage
->type
->base_type
== GLSL_TYPE_UINT64
||
181 storage
->type
->base_type
== GLSL_TYPE_INT64
) {
187 for (unsigned s
= 0; s
< vector_count
; s
++) {
189 for (i
= 0; i
< vector_size
; i
++) {
190 uint32_t idx
= components
- prog
->sh
.data
->UniformDataSlots
;
191 stage_prog_data
->param
[uniform_index
++] = BRW_PARAM_UNIFORM(idx
);
196 /* Pad out with zeros if needed (only needed for vec4) */
197 for (; i
< max_vector_size
; i
++) {
198 stage_prog_data
->param
[uniform_index
++] =
199 BRW_PARAM_BUILTIN_ZERO
;
208 brw_nir_setup_glsl_uniforms(void *mem_ctx
, nir_shader
*shader
,
209 const struct gl_program
*prog
,
210 struct brw_stage_prog_data
*stage_prog_data
,
213 unsigned nr_params
= shader
->num_uniforms
/ 4;
214 stage_prog_data
->nr_params
= nr_params
;
215 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
217 nir_foreach_variable(var
, &shader
->uniforms
) {
218 /* UBO's, atomics and samplers don't take up space in the
220 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic())
223 if (var
->num_state_slots
> 0) {
224 brw_nir_setup_glsl_builtin_uniform(var
, prog
, stage_prog_data
,
227 brw_nir_setup_glsl_uniform(shader
->info
.stage
, var
, prog
,
228 stage_prog_data
, is_scalar
);
234 brw_nir_setup_arb_uniforms(void *mem_ctx
, nir_shader
*shader
,
235 struct gl_program
*prog
,
236 struct brw_stage_prog_data
*stage_prog_data
)
238 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
240 unsigned nr_params
= plist
->NumParameters
* 4;
241 stage_prog_data
->nr_params
= nr_params
;
242 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
244 /* For ARB programs, prog_to_nir generates a single "parameters" variable
245 * for all uniform data. There may be additional sampler variables, and
246 * an extra uniform from nir_lower_wpos_ytransform.
249 for (unsigned p
= 0; p
< plist
->NumParameters
; p
++) {
250 /* Parameters should be either vec4 uniforms or single component
251 * constants; matrices and other larger types should have been broken
254 assert(plist
->Parameters
[p
].Size
<= 4);
257 for (i
= 0; i
< plist
->Parameters
[p
].Size
; i
++)
258 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_PARAMETER(p
, i
);
260 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_BUILTIN_ZERO
;
265 get_aoa_deref_offset(nir_builder
*b
,
266 nir_deref_instr
*deref
,
269 unsigned array_size
= elem_size
;
270 nir_ssa_def
*offset
= nir_imm_int(b
, 0);
272 while (deref
->deref_type
!= nir_deref_type_var
) {
273 assert(deref
->deref_type
== nir_deref_type_array
);
275 /* This level's element size is the previous level's array size */
276 nir_ssa_def
*index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
277 assert(deref
->arr
.index
.ssa
);
278 offset
= nir_iadd(b
, offset
,
279 nir_imul(b
, index
, nir_imm_int(b
, array_size
)));
281 deref
= nir_deref_instr_parent(deref
);
282 assert(glsl_type_is_array(deref
->type
));
283 array_size
*= glsl_get_length(deref
->type
);
286 /* Accessing an invalid surface index with the dataport can result in a
287 * hang. According to the spec "if the index used to select an individual
288 * element is negative or greater than or equal to the size of the array,
289 * the results of the operation are undefined but may not lead to
290 * termination" -- which is one of the possible outcomes of the hang.
291 * Clamp the index to prevent access outside of the array bounds.
293 return nir_umin(b
, offset
, nir_imm_int(b
, array_size
- elem_size
));
297 brw_nir_lower_gl_images(nir_shader
*shader
,
298 const struct gl_program
*prog
)
300 /* We put image uniforms at the end */
301 nir_foreach_variable(var
, &shader
->uniforms
) {
302 if (!var
->type
->contains_image())
305 /* GL Only allows arrays of arrays of images */
306 assert(var
->type
->without_array()->is_image());
307 const unsigned num_images
= MAX2(1, var
->type
->arrays_of_arrays_size());
309 var
->data
.driver_location
= shader
->num_uniforms
;
310 shader
->num_uniforms
+= num_images
* BRW_IMAGE_PARAM_SIZE
* 4;
313 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
316 nir_builder_init(&b
, impl
);
318 nir_foreach_block(block
, impl
) {
319 nir_foreach_instr_safe(instr
, block
) {
320 if (instr
->type
!= nir_instr_type_intrinsic
)
323 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
324 switch (intrin
->intrinsic
) {
325 case nir_intrinsic_image_deref_load
:
326 case nir_intrinsic_image_deref_store
:
327 case nir_intrinsic_image_deref_atomic_add
:
328 case nir_intrinsic_image_deref_atomic_min
:
329 case nir_intrinsic_image_deref_atomic_max
:
330 case nir_intrinsic_image_deref_atomic_and
:
331 case nir_intrinsic_image_deref_atomic_or
:
332 case nir_intrinsic_image_deref_atomic_xor
:
333 case nir_intrinsic_image_deref_atomic_exchange
:
334 case nir_intrinsic_image_deref_atomic_comp_swap
:
335 case nir_intrinsic_image_deref_size
:
336 case nir_intrinsic_image_deref_samples
:
337 case nir_intrinsic_image_deref_load_raw_intel
:
338 case nir_intrinsic_image_deref_store_raw_intel
: {
339 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
340 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
342 struct gl_uniform_storage
*storage
=
343 &prog
->sh
.data
->UniformStorage
[var
->data
.location
];
344 const unsigned image_var_idx
=
345 storage
->opaque
[shader
->info
.stage
].index
;
347 b
.cursor
= nir_before_instr(&intrin
->instr
);
348 nir_ssa_def
*index
= nir_iadd(&b
, nir_imm_int(&b
, image_var_idx
),
349 get_aoa_deref_offset(&b
, deref
, 1));
350 brw_nir_rewrite_image_intrinsic(intrin
, index
);
354 case nir_intrinsic_image_deref_load_param_intel
: {
355 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
356 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
357 const unsigned num_images
=
358 MAX2(1, var
->type
->arrays_of_arrays_size());
360 b
.cursor
= nir_instr_remove(&intrin
->instr
);
362 const unsigned param
= nir_intrinsic_base(intrin
);
363 nir_ssa_def
*offset
=
364 get_aoa_deref_offset(&b
, deref
, BRW_IMAGE_PARAM_SIZE
* 4);
365 offset
= nir_iadd(&b
, offset
, nir_imm_int(&b
, param
* 16));
367 nir_intrinsic_instr
*load
=
368 nir_intrinsic_instr_create(b
.shader
,
369 nir_intrinsic_load_uniform
);
370 nir_intrinsic_set_base(load
, var
->data
.driver_location
);
371 nir_intrinsic_set_range(load
, num_images
* BRW_IMAGE_PARAM_SIZE
* 4);
372 load
->src
[0] = nir_src_for_ssa(offset
);
373 load
->num_components
= intrin
->dest
.ssa
.num_components
;
374 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
375 intrin
->dest
.ssa
.num_components
,
376 intrin
->dest
.ssa
.bit_size
, NULL
);
377 nir_builder_instr_insert(&b
, &load
->instr
);
379 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
380 nir_src_for_ssa(&load
->dest
.ssa
));