2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/brw_nir.h"
25 #include "compiler/glsl/ir_uniform.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "brw_program.h"
30 brw_nir_setup_glsl_builtin_uniform(nir_variable
*var
,
31 const struct gl_program
*prog
,
32 struct brw_stage_prog_data
*stage_prog_data
,
35 const nir_state_slot
*const slots
= var
->state_slots
;
36 assert(var
->state_slots
!= NULL
);
38 unsigned uniform_index
= var
->data
.driver_location
/ 4;
39 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
40 /* This state reference has already been setup by ir_to_mesa, but we'll
41 * get the same index back here.
43 int index
= _mesa_add_state_reference(prog
->Parameters
,
46 /* Add each of the unique swizzles of the element as a parameter.
47 * This'll end up matching the expected layout of the
48 * array/matrix/structure we're trying to fill in.
51 for (unsigned j
= 0; j
< 4; j
++) {
52 int swiz
= GET_SWZ(slots
[i
].swizzle
, j
);
54 /* If we hit a pair of identical swizzles, this means we've hit the
55 * end of the builtin variable. In scalar mode, we should just quit
56 * and move on to the next one. In vec4, we need to continue and pad
57 * it out to 4 components.
59 if (swiz
== last_swiz
&& is_scalar
)
64 stage_prog_data
->param
[uniform_index
++] =
65 BRW_PARAM_PARAMETER(index
, swiz
);
71 setup_vec4_image_param(uint32_t *params
, uint32_t idx
,
72 unsigned offset
, unsigned n
)
74 assert(offset
% sizeof(uint32_t) == 0);
75 for (unsigned i
= 0; i
< n
; ++i
)
76 params
[i
] = BRW_PARAM_IMAGE(idx
, offset
/ sizeof(uint32_t) + i
);
78 for (unsigned i
= n
; i
< 4; ++i
)
79 params
[i
] = BRW_PARAM_BUILTIN_ZERO
;
83 brw_setup_image_uniform_values(gl_shader_stage stage
,
84 struct brw_stage_prog_data
*stage_prog_data
,
85 unsigned param_start_index
,
86 const gl_uniform_storage
*storage
)
88 uint32_t *param
= &stage_prog_data
->param
[param_start_index
];
90 for (unsigned i
= 0; i
< MAX2(storage
->array_elements
, 1); i
++) {
91 const unsigned image_idx
= storage
->opaque
[stage
].index
+ i
;
93 /* Upload the brw_image_param structure. The order is expected to match
94 * the BRW_IMAGE_PARAM_*_OFFSET defines.
96 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_OFFSET_OFFSET
,
98 offsetof(brw_image_param
, offset
), 2);
99 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SIZE_OFFSET
,
101 offsetof(brw_image_param
, size
), 3);
102 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_STRIDE_OFFSET
,
104 offsetof(brw_image_param
, stride
), 4);
105 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_TILING_OFFSET
,
107 offsetof(brw_image_param
, tiling
), 3);
108 setup_vec4_image_param(param
+ BRW_IMAGE_PARAM_SWIZZLING_OFFSET
,
110 offsetof(brw_image_param
, swizzling
), 2);
111 param
+= BRW_IMAGE_PARAM_SIZE
;
113 brw_mark_surface_used(
115 stage_prog_data
->binding_table
.image_start
+ image_idx
);
120 count_uniform_storage_slots(const struct glsl_type
*type
)
122 /* gl_uniform_storage can cope with one level of array, so if the
123 * type is a composite type or an array where each element occupies
124 * more than one slot than we need to recursively process it.
126 if (glsl_type_is_struct(type
)) {
127 unsigned location_count
= 0;
129 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
130 const struct glsl_type
*field_type
= glsl_get_struct_field(type
, i
);
132 location_count
+= count_uniform_storage_slots(field_type
);
135 return location_count
;
138 if (glsl_type_is_array(type
)) {
139 const struct glsl_type
*element_type
= glsl_get_array_element(type
);
141 if (glsl_type_is_array(element_type
) ||
142 glsl_type_is_struct(element_type
)) {
143 unsigned element_count
= count_uniform_storage_slots(element_type
);
144 return element_count
* glsl_get_length(type
);
152 brw_nir_setup_glsl_uniform(gl_shader_stage stage
, nir_variable
*var
,
153 const struct gl_program
*prog
,
154 struct brw_stage_prog_data
*stage_prog_data
,
157 /* The data for our (non-builtin) uniforms is stored in a series of
158 * gl_uniform_storage structs for each subcomponent that
159 * glGetUniformLocation() could name. We know it's been set up in the same
160 * order we'd walk the type, so walk the list of storage that matches the
161 * range of slots covered by this variable.
163 unsigned uniform_index
= var
->data
.driver_location
/ 4;
164 unsigned num_slots
= count_uniform_storage_slots(var
->type
);
165 for (unsigned u
= 0; u
< num_slots
; u
++) {
166 struct gl_uniform_storage
*storage
=
167 &prog
->sh
.data
->UniformStorage
[var
->data
.location
+ u
];
169 if (storage
->builtin
|| storage
->type
->is_sampler())
172 if (storage
->type
->is_image()) {
173 brw_setup_image_uniform_values(stage
, stage_prog_data
,
174 uniform_index
, storage
);
176 BRW_IMAGE_PARAM_SIZE
* MAX2(storage
->array_elements
, 1);
178 gl_constant_value
*components
= storage
->storage
;
179 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
180 storage
->type
->matrix_columns
);
181 unsigned vector_size
= storage
->type
->vector_elements
;
182 unsigned max_vector_size
= 4;
183 if (storage
->type
->base_type
== GLSL_TYPE_DOUBLE
||
184 storage
->type
->base_type
== GLSL_TYPE_UINT64
||
185 storage
->type
->base_type
== GLSL_TYPE_INT64
) {
191 for (unsigned s
= 0; s
< vector_count
; s
++) {
193 for (i
= 0; i
< vector_size
; i
++) {
194 uint32_t idx
= components
- prog
->sh
.data
->UniformDataSlots
;
195 stage_prog_data
->param
[uniform_index
++] = BRW_PARAM_UNIFORM(idx
);
200 /* Pad out with zeros if needed (only needed for vec4) */
201 for (; i
< max_vector_size
; i
++) {
202 stage_prog_data
->param
[uniform_index
++] =
203 BRW_PARAM_BUILTIN_ZERO
;
212 brw_nir_setup_glsl_uniforms(void *mem_ctx
, nir_shader
*shader
,
213 const struct gl_program
*prog
,
214 struct brw_stage_prog_data
*stage_prog_data
,
217 unsigned nr_params
= shader
->num_uniforms
/ 4;
218 stage_prog_data
->nr_params
= nr_params
;
219 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
221 nir_foreach_variable(var
, &shader
->uniforms
) {
222 /* UBO's, atomics and samplers don't take up space in the
224 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic())
227 if (var
->num_state_slots
> 0) {
228 brw_nir_setup_glsl_builtin_uniform(var
, prog
, stage_prog_data
,
231 brw_nir_setup_glsl_uniform(shader
->info
.stage
, var
, prog
,
232 stage_prog_data
, is_scalar
);
238 brw_nir_setup_arb_uniforms(void *mem_ctx
, nir_shader
*shader
,
239 struct gl_program
*prog
,
240 struct brw_stage_prog_data
*stage_prog_data
)
242 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
244 unsigned nr_params
= plist
->NumParameters
* 4;
245 stage_prog_data
->nr_params
= nr_params
;
246 stage_prog_data
->param
= rzalloc_array(mem_ctx
, uint32_t, nr_params
);
248 /* For ARB programs, prog_to_nir generates a single "parameters" variable
249 * for all uniform data. nir_lower_wpos_ytransform may also create an
250 * additional variable.
252 assert(shader
->uniforms
.length() <= 2);
254 for (unsigned p
= 0; p
< plist
->NumParameters
; p
++) {
255 /* Parameters should be either vec4 uniforms or single component
256 * constants; matrices and other larger types should have been broken
259 assert(plist
->Parameters
[p
].Size
<= 4);
262 for (i
= 0; i
< plist
->Parameters
[p
].Size
; i
++)
263 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_PARAMETER(p
, i
);
265 stage_prog_data
->param
[4 * p
+ i
] = BRW_PARAM_BUILTIN_ZERO
;
270 get_aoa_deref_offset(nir_builder
*b
,
271 nir_deref_instr
*deref
,
274 unsigned array_size
= elem_size
;
275 nir_ssa_def
*offset
= nir_imm_int(b
, 0);
277 while (deref
->deref_type
!= nir_deref_type_var
) {
278 assert(deref
->deref_type
== nir_deref_type_array
);
280 /* This level's element size is the previous level's array size */
281 nir_ssa_def
*index
= nir_ssa_for_src(b
, deref
->arr
.index
, 1);
282 assert(deref
->arr
.index
.ssa
);
283 offset
= nir_iadd(b
, offset
,
284 nir_imul(b
, index
, nir_imm_int(b
, array_size
)));
286 deref
= nir_deref_instr_parent(deref
);
287 assert(glsl_type_is_array(deref
->type
));
288 array_size
*= glsl_get_length(deref
->type
);
291 /* Accessing an invalid surface index with the dataport can result in a
292 * hang. According to the spec "if the index used to select an individual
293 * element is negative or greater than or equal to the size of the array,
294 * the results of the operation are undefined but may not lead to
295 * termination" -- which is one of the possible outcomes of the hang.
296 * Clamp the index to prevent access outside of the array bounds.
298 return nir_umin(b
, offset
, nir_imm_int(b
, array_size
- elem_size
));
302 brw_nir_lower_gl_images(nir_shader
*shader
,
303 const struct gl_program
*prog
)
305 /* We put image uniforms at the end */
306 nir_foreach_variable(var
, &shader
->uniforms
) {
307 if (!var
->type
->contains_image())
310 /* GL Only allows arrays of arrays of images */
311 assert(var
->type
->without_array()->is_image());
312 const unsigned num_images
= MAX2(1, var
->type
->arrays_of_arrays_size());
314 var
->data
.driver_location
= shader
->num_uniforms
;
315 shader
->num_uniforms
+= num_images
* BRW_IMAGE_PARAM_SIZE
* 4;
318 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
321 nir_builder_init(&b
, impl
);
323 nir_foreach_block(block
, impl
) {
324 nir_foreach_instr_safe(instr
, block
) {
325 if (instr
->type
!= nir_instr_type_intrinsic
)
328 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
329 switch (intrin
->intrinsic
) {
330 case nir_intrinsic_image_deref_load
:
331 case nir_intrinsic_image_deref_store
:
332 case nir_intrinsic_image_deref_atomic_add
:
333 case nir_intrinsic_image_deref_atomic_min
:
334 case nir_intrinsic_image_deref_atomic_max
:
335 case nir_intrinsic_image_deref_atomic_and
:
336 case nir_intrinsic_image_deref_atomic_or
:
337 case nir_intrinsic_image_deref_atomic_xor
:
338 case nir_intrinsic_image_deref_atomic_exchange
:
339 case nir_intrinsic_image_deref_atomic_comp_swap
:
340 case nir_intrinsic_image_deref_size
:
341 case nir_intrinsic_image_deref_samples
:
342 case nir_intrinsic_image_deref_load_raw_intel
:
343 case nir_intrinsic_image_deref_store_raw_intel
: {
344 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
345 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
347 struct gl_uniform_storage
*storage
=
348 &prog
->sh
.data
->UniformStorage
[var
->data
.location
];
349 const unsigned image_var_idx
=
350 storage
->opaque
[shader
->info
.stage
].index
;
352 b
.cursor
= nir_before_instr(&intrin
->instr
);
353 nir_ssa_def
*index
= nir_iadd(&b
, nir_imm_int(&b
, image_var_idx
),
354 get_aoa_deref_offset(&b
, deref
, 1));
355 brw_nir_rewrite_image_intrinsic(intrin
, index
);
359 case nir_intrinsic_image_deref_load_param_intel
: {
360 nir_deref_instr
*deref
= nir_src_as_deref(intrin
->src
[0]);
361 nir_variable
*var
= nir_deref_instr_get_variable(deref
);
362 const unsigned num_images
=
363 MAX2(1, var
->type
->arrays_of_arrays_size());
365 b
.cursor
= nir_instr_remove(&intrin
->instr
);
367 const unsigned param
= nir_intrinsic_base(intrin
);
368 nir_ssa_def
*offset
=
369 get_aoa_deref_offset(&b
, deref
, BRW_IMAGE_PARAM_SIZE
* 4);
370 offset
= nir_iadd(&b
, offset
, nir_imm_int(&b
, param
* 16));
372 nir_intrinsic_instr
*load
=
373 nir_intrinsic_instr_create(b
.shader
,
374 nir_intrinsic_load_uniform
);
375 nir_intrinsic_set_base(load
, var
->data
.driver_location
);
376 nir_intrinsic_set_range(load
, num_images
* BRW_IMAGE_PARAM_SIZE
* 4);
377 load
->src
[0] = nir_src_for_ssa(offset
);
378 load
->num_components
= intrin
->dest
.ssa
.num_components
;
379 nir_ssa_dest_init(&load
->instr
, &load
->dest
,
380 intrin
->dest
.ssa
.num_components
,
381 intrin
->dest
.ssa
.bit_size
, NULL
);
382 nir_builder_instr_insert(&b
, &load
->instr
);
384 nir_ssa_def_rewrite_uses(&intrin
->dest
.ssa
,
385 nir_src_for_ssa(&load
->dest
.ssa
));