2 * Copyright (c) 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_shared_reference.cpp
27 * IR lower pass to replace dereferences of compute shader shared variables
28 * with intrinsic function calls.
30 * This relieves drivers of the responsibility of allocating space for the
31 * shared variables in the shared memory region.
34 #include "lower_buffer_access.h"
35 #include "ir_builder.h"
37 #include "main/macros.h"
38 #include "util/list.h"
39 #include "glsl_parser_extras.h"
40 #include "main/mtypes.h"
42 using namespace ir_builder
;
47 struct list_head node
;
48 const ir_variable
*var
;
52 class lower_shared_reference_visitor
:
53 public lower_buffer_access::lower_buffer_access
{
56 lower_shared_reference_visitor(struct gl_linked_shader
*shader
)
57 : list_ctx(ralloc_context(NULL
)), shader(shader
), shared_size(0u)
59 list_inithead(&var_offsets
);
62 ~lower_shared_reference_visitor()
64 ralloc_free(list_ctx
);
73 void insert_buffer_access(void *mem_ctx
, ir_dereference
*deref
,
74 const glsl_type
*type
, ir_rvalue
*offset
,
75 unsigned mask
, int channel
);
77 void handle_rvalue(ir_rvalue
**rvalue
);
78 ir_visitor_status
visit_enter(ir_assignment
*ir
);
79 void handle_assignment(ir_assignment
*ir
);
81 ir_call
*lower_shared_atomic_intrinsic(ir_call
*ir
);
82 ir_call
*check_for_shared_atomic_intrinsic(ir_call
*ir
);
83 ir_visitor_status
visit_enter(ir_call
*ir
);
85 unsigned get_shared_offset(const ir_variable
*);
87 ir_call
*shared_load(void *mem_ctx
, const struct glsl_type
*type
,
89 ir_call
*shared_store(void *mem_ctx
, ir_rvalue
*deref
, ir_rvalue
*offset
,
93 struct gl_linked_shader
*shader
;
94 struct list_head var_offsets
;
100 lower_shared_reference_visitor::get_shared_offset(const ir_variable
*var
)
102 list_for_each_entry(var_offset
, var_entry
, &var_offsets
, node
) {
103 if (var_entry
->var
== var
)
104 return var_entry
->offset
;
107 struct var_offset
*new_entry
= rzalloc(list_ctx
, struct var_offset
);
108 list_add(&new_entry
->node
, &var_offsets
);
109 new_entry
->var
= var
;
111 unsigned var_align
= var
->type
->std430_base_alignment(false);
112 new_entry
->offset
= glsl_align(shared_size
, var_align
);
114 unsigned var_size
= var
->type
->std430_size(false);
115 shared_size
= new_entry
->offset
+ var_size
;
117 return new_entry
->offset
;
121 lower_shared_reference_visitor::handle_rvalue(ir_rvalue
**rvalue
)
126 ir_dereference
*deref
= (*rvalue
)->as_dereference();
130 ir_variable
*var
= deref
->variable_referenced();
131 if (!var
|| var
->data
.mode
!= ir_var_shader_shared
)
134 buffer_access_type
= shared_load_access
;
136 void *mem_ctx
= ralloc_parent(shader
->ir
);
138 ir_rvalue
*offset
= NULL
;
139 unsigned const_offset
= get_shared_offset(var
);
141 const glsl_type
*matrix_type
;
142 assert(var
->get_interface_type() == NULL
);
143 const enum glsl_interface_packing packing
= GLSL_INTERFACE_PACKING_STD430
;
145 setup_buffer_access(mem_ctx
, deref
,
146 &offset
, &const_offset
,
147 &row_major
, &matrix_type
, NULL
, packing
);
149 /* Now that we've calculated the offset to the start of the
150 * dereference, walk over the type and emit loads into a temporary.
152 const glsl_type
*type
= (*rvalue
)->type
;
153 ir_variable
*load_var
= new(mem_ctx
) ir_variable(type
,
156 base_ir
->insert_before(load_var
);
158 ir_variable
*load_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
159 "shared_load_temp_offset",
161 base_ir
->insert_before(load_offset
);
162 base_ir
->insert_before(assign(load_offset
, offset
));
164 deref
= new(mem_ctx
) ir_dereference_variable(load_var
);
166 emit_access(mem_ctx
, false, deref
, load_offset
, const_offset
, row_major
,
167 matrix_type
, packing
, 0);
175 lower_shared_reference_visitor::handle_assignment(ir_assignment
*ir
)
180 ir_rvalue
*rvalue
= ir
->lhs
->as_rvalue();
184 ir_dereference
*deref
= ir
->lhs
->as_dereference();
188 ir_variable
*var
= ir
->lhs
->variable_referenced();
189 if (!var
|| var
->data
.mode
!= ir_var_shader_shared
)
192 buffer_access_type
= shared_store_access
;
194 /* We have a write to a shared variable, so declare a temporary and rewrite
195 * the assignment so that the temporary is the LHS.
197 void *mem_ctx
= ralloc_parent(shader
->ir
);
199 const glsl_type
*type
= rvalue
->type
;
200 ir_variable
*store_var
= new(mem_ctx
) ir_variable(type
,
203 base_ir
->insert_before(store_var
);
204 ir
->lhs
= new(mem_ctx
) ir_dereference_variable(store_var
);
206 ir_rvalue
*offset
= NULL
;
207 unsigned const_offset
= get_shared_offset(var
);
209 const glsl_type
*matrix_type
;
210 assert(var
->get_interface_type() == NULL
);
211 const enum glsl_interface_packing packing
= GLSL_INTERFACE_PACKING_STD430
;
213 setup_buffer_access(mem_ctx
, deref
,
214 &offset
, &const_offset
,
215 &row_major
, &matrix_type
, NULL
, packing
);
217 deref
= new(mem_ctx
) ir_dereference_variable(store_var
);
219 ir_variable
*store_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
220 "shared_store_temp_offset",
222 base_ir
->insert_before(store_offset
);
223 base_ir
->insert_before(assign(store_offset
, offset
));
225 /* Now we have to write the value assigned to the temporary back to memory */
226 emit_access(mem_ctx
, true, deref
, store_offset
, const_offset
, row_major
,
227 matrix_type
, packing
, ir
->write_mask
);
233 lower_shared_reference_visitor::visit_enter(ir_assignment
*ir
)
235 handle_assignment(ir
);
236 return rvalue_visit(ir
);
240 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx
,
241 ir_dereference
*deref
,
242 const glsl_type
*type
,
247 if (buffer_access_type
== shared_store_access
) {
248 ir_call
*store
= shared_store(mem_ctx
, deref
, offset
, mask
);
249 base_ir
->insert_after(store
);
251 ir_call
*load
= shared_load(mem_ctx
, type
, offset
);
252 base_ir
->insert_before(load
);
253 ir_rvalue
*value
= load
->return_deref
->as_rvalue()->clone(mem_ctx
, NULL
);
254 base_ir
->insert_before(assign(deref
->clone(mem_ctx
, NULL
),
260 compute_shader_enabled(const _mesa_glsl_parse_state
*state
)
262 return state
->stage
== MESA_SHADER_COMPUTE
;
266 lower_shared_reference_visitor::shared_store(void *mem_ctx
,
271 exec_list sig_params
;
273 ir_variable
*offset_ref
= new(mem_ctx
)
274 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
275 sig_params
.push_tail(offset_ref
);
277 ir_variable
*val_ref
= new(mem_ctx
)
278 ir_variable(deref
->type
, "value" , ir_var_function_in
);
279 sig_params
.push_tail(val_ref
);
281 ir_variable
*writemask_ref
= new(mem_ctx
)
282 ir_variable(glsl_type::uint_type
, "write_mask" , ir_var_function_in
);
283 sig_params
.push_tail(writemask_ref
);
285 ir_function_signature
*sig
= new(mem_ctx
)
286 ir_function_signature(glsl_type::void_type
, compute_shader_enabled
);
288 sig
->replace_parameters(&sig_params
);
289 sig
->intrinsic_id
= ir_intrinsic_shared_store
;
291 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_store_shared");
292 f
->add_signature(sig
);
294 exec_list call_params
;
295 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
296 call_params
.push_tail(deref
->clone(mem_ctx
, NULL
));
297 call_params
.push_tail(new(mem_ctx
) ir_constant(write_mask
));
298 return new(mem_ctx
) ir_call(sig
, NULL
, &call_params
);
302 lower_shared_reference_visitor::shared_load(void *mem_ctx
,
303 const struct glsl_type
*type
,
306 exec_list sig_params
;
308 ir_variable
*offset_ref
= new(mem_ctx
)
309 ir_variable(glsl_type::uint_type
, "offset_ref" , ir_var_function_in
);
310 sig_params
.push_tail(offset_ref
);
312 ir_function_signature
*sig
=
313 new(mem_ctx
) ir_function_signature(type
, compute_shader_enabled
);
315 sig
->replace_parameters(&sig_params
);
316 sig
->intrinsic_id
= ir_intrinsic_shared_load
;
318 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_load_shared");
319 f
->add_signature(sig
);
321 ir_variable
*result
= new(mem_ctx
)
322 ir_variable(type
, "shared_load_result", ir_var_temporary
);
323 base_ir
->insert_before(result
);
324 ir_dereference_variable
*deref_result
= new(mem_ctx
)
325 ir_dereference_variable(result
);
327 exec_list call_params
;
328 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
330 return new(mem_ctx
) ir_call(sig
, deref_result
, &call_params
);
333 /* Lowers the intrinsic call to a new internal intrinsic that swaps the access
334 * to the shared variable in the first parameter by an offset. This involves
335 * creating the new internal intrinsic (i.e. the new function signature).
338 lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call
*ir
)
340 /* Shared atomics usually have 2 parameters, the shared variable and an
341 * integer argument. The exception is CompSwap, that has an additional
344 int param_count
= ir
->actual_parameters
.length();
345 assert(param_count
== 2 || param_count
== 3);
347 /* First argument must be a scalar integer shared variable */
348 exec_node
*param
= ir
->actual_parameters
.get_head();
349 ir_instruction
*inst
= (ir_instruction
*) param
;
350 assert(inst
->ir_type
== ir_type_dereference_variable
||
351 inst
->ir_type
== ir_type_dereference_array
||
352 inst
->ir_type
== ir_type_dereference_record
||
353 inst
->ir_type
== ir_type_swizzle
);
355 ir_rvalue
*deref
= (ir_rvalue
*) inst
;
356 assert(deref
->type
->is_scalar() &&
357 (deref
->type
->is_integer_32() || deref
->type
->is_float()));
359 ir_variable
*var
= deref
->variable_referenced();
362 /* Compute the offset to the start if the dereference
364 void *mem_ctx
= ralloc_parent(shader
->ir
);
366 ir_rvalue
*offset
= NULL
;
367 unsigned const_offset
= get_shared_offset(var
);
369 const glsl_type
*matrix_type
;
370 assert(var
->get_interface_type() == NULL
);
371 const enum glsl_interface_packing packing
= GLSL_INTERFACE_PACKING_STD430
;
372 buffer_access_type
= shared_atomic_access
;
374 setup_buffer_access(mem_ctx
, deref
,
375 &offset
, &const_offset
,
376 &row_major
, &matrix_type
, NULL
, packing
);
380 assert(matrix_type
== NULL
);
382 ir_rvalue
*deref_offset
=
383 add(offset
, new(mem_ctx
) ir_constant(const_offset
));
385 /* Create the new internal function signature that will take an offset
386 * instead of a shared variable
388 exec_list sig_params
;
389 ir_variable
*sig_param
= new(mem_ctx
)
390 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
391 sig_params
.push_tail(sig_param
);
393 const glsl_type
*type
= deref
->type
->get_scalar_type();
394 sig_param
= new(mem_ctx
)
395 ir_variable(type
, "data1", ir_var_function_in
);
396 sig_params
.push_tail(sig_param
);
398 if (param_count
== 3) {
399 sig_param
= new(mem_ctx
)
400 ir_variable(type
, "data2", ir_var_function_in
);
401 sig_params
.push_tail(sig_param
);
404 ir_function_signature
*sig
=
405 new(mem_ctx
) ir_function_signature(deref
->type
,
406 compute_shader_enabled
);
408 sig
->replace_parameters(&sig_params
);
410 assert(ir
->callee
->intrinsic_id
>= ir_intrinsic_generic_load
);
411 assert(ir
->callee
->intrinsic_id
<= ir_intrinsic_generic_atomic_comp_swap
);
412 sig
->intrinsic_id
= MAP_INTRINSIC_TO_TYPE(ir
->callee
->intrinsic_id
, shared
);
415 sprintf(func_name
, "%s_shared", ir
->callee_name());
416 ir_function
*f
= new(mem_ctx
) ir_function(func_name
);
417 f
->add_signature(sig
);
419 /* Now, create the call to the internal intrinsic */
420 exec_list call_params
;
421 call_params
.push_tail(deref_offset
);
422 param
= ir
->actual_parameters
.get_head()->get_next();
423 ir_rvalue
*param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
424 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
425 if (param_count
== 3) {
426 param
= param
->get_next();
427 param_as_rvalue
= ((ir_instruction
*) param
)->as_rvalue();
428 call_params
.push_tail(param_as_rvalue
->clone(mem_ctx
, NULL
));
430 ir_dereference_variable
*return_deref
=
431 ir
->return_deref
->clone(mem_ctx
, NULL
);
432 return new(mem_ctx
) ir_call(sig
, return_deref
, &call_params
);
436 lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call
*ir
)
438 exec_list
& params
= ir
->actual_parameters
;
440 if (params
.length() < 2 || params
.length() > 3)
444 ((ir_instruction
*) params
.get_head())->as_rvalue();
448 ir_variable
*var
= rvalue
->variable_referenced();
449 if (!var
|| var
->data
.mode
!= ir_var_shader_shared
)
452 const enum ir_intrinsic_id id
= ir
->callee
->intrinsic_id
;
453 if (id
== ir_intrinsic_generic_atomic_add
||
454 id
== ir_intrinsic_generic_atomic_min
||
455 id
== ir_intrinsic_generic_atomic_max
||
456 id
== ir_intrinsic_generic_atomic_and
||
457 id
== ir_intrinsic_generic_atomic_or
||
458 id
== ir_intrinsic_generic_atomic_xor
||
459 id
== ir_intrinsic_generic_atomic_exchange
||
460 id
== ir_intrinsic_generic_atomic_comp_swap
) {
461 return lower_shared_atomic_intrinsic(ir
);
468 lower_shared_reference_visitor::visit_enter(ir_call
*ir
)
470 ir_call
*new_ir
= check_for_shared_atomic_intrinsic(ir
);
473 base_ir
->replace_with(new_ir
);
474 return visit_continue_with_parent
;
477 return rvalue_visit(ir
);
480 } /* unnamed namespace */
483 lower_shared_reference(struct gl_context
*ctx
,
484 struct gl_shader_program
*prog
,
485 struct gl_linked_shader
*shader
)
487 if (shader
->Stage
!= MESA_SHADER_COMPUTE
)
490 lower_shared_reference_visitor
v(shader
);
492 /* Loop over the instructions lowering references, because we take a deref
493 * of an shared variable array using a shared variable dereference as the
494 * index will produce a collection of instructions all of which have cloned
495 * shared variable dereferences for that array index.
499 visit_list_elements(&v
, shader
->ir
);
500 } while (v
.progress
);
502 prog
->Comp
.SharedSize
= v
.shared_size
;
504 /* Section 19.1 (Compute Shader Variables) of the OpenGL 4.5 (Core Profile)
505 * specification says:
507 * "There is a limit to the total size of all variables declared as
508 * shared in a single program object. This limit, expressed in units of
509 * basic machine units, may be queried as the value of
510 * MAX_COMPUTE_SHARED_MEMORY_SIZE."
512 if (prog
->Comp
.SharedSize
> ctx
->Const
.MaxComputeSharedMemorySize
) {
513 linker_error(prog
, "Too much shared memory used (%u/%u)\n",
514 prog
->Comp
.SharedSize
,
515 ctx
->Const
.MaxComputeSharedMemorySize
);