2 * Copyright (c) 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file lower_shared_reference.cpp
27 * IR lower pass to replace dereferences of compute shader shared variables
28 * with intrinsic function calls.
30 * This relieves drivers of the responsibility of allocating space for the
31 * shared variables in the shared memory region.
34 #include "lower_buffer_access.h"
35 #include "ir_builder.h"
36 #include "main/macros.h"
37 #include "util/list.h"
38 #include "glsl_parser_extras.h"
40 using namespace ir_builder
;
45 struct list_head node
;
46 const ir_variable
*var
;
50 class lower_shared_reference_visitor
:
51 public lower_buffer_access::lower_buffer_access
{
54 lower_shared_reference_visitor(struct gl_shader
*shader
)
55 : list_ctx(ralloc_context(NULL
)), shader(shader
), shared_size(0u)
57 list_inithead(&var_offsets
);
60 ~lower_shared_reference_visitor()
62 ralloc_free(list_ctx
);
71 void insert_buffer_access(void *mem_ctx
, ir_dereference
*deref
,
72 const glsl_type
*type
, ir_rvalue
*offset
,
73 unsigned mask
, int channel
);
75 void handle_rvalue(ir_rvalue
**rvalue
);
76 ir_visitor_status
visit_enter(ir_assignment
*ir
);
77 void handle_assignment(ir_assignment
*ir
);
79 unsigned get_shared_offset(const ir_variable
*);
81 ir_call
*shared_load(void *mem_ctx
, const struct glsl_type
*type
,
83 ir_call
*shared_store(void *mem_ctx
, ir_rvalue
*deref
, ir_rvalue
*offset
,
87 struct gl_shader
*shader
;
88 struct list_head var_offsets
;
94 lower_shared_reference_visitor::get_shared_offset(const ir_variable
*var
)
96 list_for_each_entry(var_offset
, var_entry
, &var_offsets
, node
) {
97 if (var_entry
->var
== var
)
98 return var_entry
->offset
;
101 struct var_offset
*new_entry
= rzalloc(list_ctx
, struct var_offset
);
102 list_add(&new_entry
->node
, &var_offsets
);
103 new_entry
->var
= var
;
105 unsigned var_align
= var
->type
->std430_base_alignment(false);
106 new_entry
->offset
= glsl_align(shared_size
, var_align
);
108 unsigned var_size
= var
->type
->std430_size(false);
109 shared_size
= new_entry
->offset
+ var_size
;
111 return new_entry
->offset
;
115 lower_shared_reference_visitor::handle_rvalue(ir_rvalue
**rvalue
)
120 ir_dereference
*deref
= (*rvalue
)->as_dereference();
124 ir_variable
*var
= deref
->variable_referenced();
125 if (!var
|| var
->data
.mode
!= ir_var_shader_shared
)
128 buffer_access_type
= shared_load_access
;
130 void *mem_ctx
= ralloc_parent(shader
->ir
);
132 ir_rvalue
*offset
= NULL
;
133 unsigned const_offset
= get_shared_offset(var
);
136 assert(var
->get_interface_type() == NULL
);
137 const unsigned packing
= GLSL_INTERFACE_PACKING_STD430
;
139 setup_buffer_access(mem_ctx
, var
, deref
,
140 &offset
, &const_offset
,
141 &row_major
, &matrix_columns
, packing
);
143 /* Now that we've calculated the offset to the start of the
144 * dereference, walk over the type and emit loads into a temporary.
146 const glsl_type
*type
= (*rvalue
)->type
;
147 ir_variable
*load_var
= new(mem_ctx
) ir_variable(type
,
150 base_ir
->insert_before(load_var
);
152 ir_variable
*load_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
153 "shared_load_temp_offset",
155 base_ir
->insert_before(load_offset
);
156 base_ir
->insert_before(assign(load_offset
, offset
));
158 deref
= new(mem_ctx
) ir_dereference_variable(load_var
);
160 emit_access(mem_ctx
, false, deref
, load_offset
, const_offset
, row_major
,
161 matrix_columns
, packing
, 0);
169 lower_shared_reference_visitor::handle_assignment(ir_assignment
*ir
)
174 ir_rvalue
*rvalue
= ir
->lhs
->as_rvalue();
178 ir_dereference
*deref
= ir
->lhs
->as_dereference();
182 ir_variable
*var
= ir
->lhs
->variable_referenced();
183 if (!var
|| var
->data
.mode
!= ir_var_shader_shared
)
186 buffer_access_type
= shared_store_access
;
188 /* We have a write to a shared variable, so declare a temporary and rewrite
189 * the assignment so that the temporary is the LHS.
191 void *mem_ctx
= ralloc_parent(shader
->ir
);
193 const glsl_type
*type
= rvalue
->type
;
194 ir_variable
*store_var
= new(mem_ctx
) ir_variable(type
,
197 base_ir
->insert_before(store_var
);
198 ir
->lhs
= new(mem_ctx
) ir_dereference_variable(store_var
);
200 ir_rvalue
*offset
= NULL
;
201 unsigned const_offset
= get_shared_offset(var
);
204 assert(var
->get_interface_type() == NULL
);
205 const unsigned packing
= GLSL_INTERFACE_PACKING_STD430
;
207 setup_buffer_access(mem_ctx
, var
, deref
,
208 &offset
, &const_offset
,
209 &row_major
, &matrix_columns
, packing
);
211 deref
= new(mem_ctx
) ir_dereference_variable(store_var
);
213 ir_variable
*store_offset
= new(mem_ctx
) ir_variable(glsl_type::uint_type
,
214 "shared_store_temp_offset",
216 base_ir
->insert_before(store_offset
);
217 base_ir
->insert_before(assign(store_offset
, offset
));
219 /* Now we have to write the value assigned to the temporary back to memory */
220 emit_access(mem_ctx
, true, deref
, store_offset
, const_offset
, row_major
,
221 matrix_columns
, packing
, ir
->write_mask
);
227 lower_shared_reference_visitor::visit_enter(ir_assignment
*ir
)
229 handle_assignment(ir
);
230 return rvalue_visit(ir
);
234 lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx
,
235 ir_dereference
*deref
,
236 const glsl_type
*type
,
241 if (buffer_access_type
== shared_store_access
) {
242 ir_call
*store
= shared_store(mem_ctx
, deref
, offset
, mask
);
243 base_ir
->insert_after(store
);
245 ir_call
*load
= shared_load(mem_ctx
, type
, offset
);
246 base_ir
->insert_before(load
);
247 ir_rvalue
*value
= load
->return_deref
->as_rvalue()->clone(mem_ctx
, NULL
);
248 base_ir
->insert_before(assign(deref
->clone(mem_ctx
, NULL
),
254 compute_shader_enabled(const _mesa_glsl_parse_state
*state
)
256 return state
->stage
== MESA_SHADER_COMPUTE
;
260 lower_shared_reference_visitor::shared_store(void *mem_ctx
,
265 exec_list sig_params
;
267 ir_variable
*offset_ref
= new(mem_ctx
)
268 ir_variable(glsl_type::uint_type
, "offset" , ir_var_function_in
);
269 sig_params
.push_tail(offset_ref
);
271 ir_variable
*val_ref
= new(mem_ctx
)
272 ir_variable(deref
->type
, "value" , ir_var_function_in
);
273 sig_params
.push_tail(val_ref
);
275 ir_variable
*writemask_ref
= new(mem_ctx
)
276 ir_variable(glsl_type::uint_type
, "write_mask" , ir_var_function_in
);
277 sig_params
.push_tail(writemask_ref
);
279 ir_function_signature
*sig
= new(mem_ctx
)
280 ir_function_signature(glsl_type::void_type
, compute_shader_enabled
);
282 sig
->replace_parameters(&sig_params
);
283 sig
->is_intrinsic
= true;
285 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_store_shared");
286 f
->add_signature(sig
);
288 exec_list call_params
;
289 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
290 call_params
.push_tail(deref
->clone(mem_ctx
, NULL
));
291 call_params
.push_tail(new(mem_ctx
) ir_constant(write_mask
));
292 return new(mem_ctx
) ir_call(sig
, NULL
, &call_params
);
296 lower_shared_reference_visitor::shared_load(void *mem_ctx
,
297 const struct glsl_type
*type
,
300 exec_list sig_params
;
302 ir_variable
*offset_ref
= new(mem_ctx
)
303 ir_variable(glsl_type::uint_type
, "offset_ref" , ir_var_function_in
);
304 sig_params
.push_tail(offset_ref
);
306 ir_function_signature
*sig
=
307 new(mem_ctx
) ir_function_signature(type
, compute_shader_enabled
);
309 sig
->replace_parameters(&sig_params
);
310 sig
->is_intrinsic
= true;
312 ir_function
*f
= new(mem_ctx
) ir_function("__intrinsic_load_shared");
313 f
->add_signature(sig
);
315 ir_variable
*result
= new(mem_ctx
)
316 ir_variable(type
, "shared_load_result", ir_var_temporary
);
317 base_ir
->insert_before(result
);
318 ir_dereference_variable
*deref_result
= new(mem_ctx
)
319 ir_dereference_variable(result
);
321 exec_list call_params
;
322 call_params
.push_tail(offset
->clone(mem_ctx
, NULL
));
324 return new(mem_ctx
) ir_call(sig
, deref_result
, &call_params
);
327 } /* unnamed namespace */
330 lower_shared_reference(struct gl_shader
*shader
, unsigned *shared_size
)
332 if (shader
->Stage
!= MESA_SHADER_COMPUTE
)
335 lower_shared_reference_visitor
v(shader
);
337 /* Loop over the instructions lowering references, because we take a deref
338 * of an shared variable array using a shared variable dereference as the
339 * index will produce a collection of instructions all of which have cloned
340 * shared variable dereferences for that array index.
344 visit_list_elements(&v
, shader
->ir
);
345 } while (v
.progress
);
347 *shared_size
= v
.shared_size
;