63c55c0969426cc6b5e5321df4e78d56679fcb9a
[mesa.git] / src / glsl / nir / nir_lower_variables_scalar.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 /*
29 * This lowering pass converts references to variables with loads/stores to
30 * registers or inputs/outputs. We assume that structure splitting has already
31 * been run, or else structures with indirect references can't be split. We
32 * also assume that this pass will be consumed by a scalar backend, so we pack
33 * things more tightly.
34 */
35
36 #include "nir.h"
37
38 static unsigned
39 type_size(const struct glsl_type *type)
40 {
41 unsigned int size, i;
42
43 switch (glsl_get_base_type(type)) {
44 case GLSL_TYPE_UINT:
45 case GLSL_TYPE_INT:
46 case GLSL_TYPE_FLOAT:
47 case GLSL_TYPE_BOOL:
48 return glsl_get_components(type);
49 case GLSL_TYPE_ARRAY:
50 return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
51 case GLSL_TYPE_STRUCT:
52 size = 0;
53 for (i = 0; i < glsl_get_length(type); i++) {
54 size += type_size(glsl_get_struct_elem_type(type, i));
55 }
56 return size;
57 case GLSL_TYPE_SAMPLER:
58 return 0;
59 case GLSL_TYPE_ATOMIC_UINT:
60 return 0;
61 case GLSL_TYPE_INTERFACE:
62 return 0;
63 case GLSL_TYPE_IMAGE:
64 return 0;
65 case GLSL_TYPE_VOID:
66 case GLSL_TYPE_ERROR:
67 unreachable("not reached");
68 }
69
70 return 0;
71 }
72
73 /*
74 * for inputs, outputs, and uniforms, assigns starting locations for variables
75 */
76
77 static void
78 assign_var_locations(struct hash_table *ht)
79 {
80 unsigned location = 0;
81
82 struct hash_entry *entry;
83 hash_table_foreach(ht, entry) {
84 nir_variable *var = (nir_variable *) entry->data;
85
86 /*
87 * UBO's have their own address spaces, so don't count them towards the
88 * number of global uniforms
89 */
90 if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
91 continue;
92
93 var->data.driver_location = location;
94 location += type_size(var->type);
95 }
96 }
97
98 static void
99 assign_var_locations_shader(nir_shader *shader)
100 {
101 assign_var_locations(shader->inputs);
102 assign_var_locations(shader->outputs);
103 assign_var_locations(shader->uniforms);
104 }
105
106 static void
107 init_reg(nir_variable *var, nir_register *reg, struct hash_table *ht,
108 bool add_names)
109 {
110 if (!glsl_type_is_scalar(var->type) &&
111 !glsl_type_is_vector(var->type)) {
112 reg->is_packed = true;
113 reg->num_components = 1;
114 reg->num_array_elems = type_size(var->type);
115 } else {
116 reg->num_components = glsl_get_components(var->type);
117 }
118 if (add_names)
119 reg->name = ralloc_strdup(reg, var->name);
120 _mesa_hash_table_insert(ht, var, reg);
121 }
122
123 static struct hash_table *
124 init_var_ht(nir_shader *shader, bool lower_globals, bool lower_io,
125 bool add_names)
126 {
127 struct hash_table *ht = _mesa_hash_table_create(NULL,
128 _mesa_hash_pointer,
129 _mesa_key_pointer_equal);
130
131 if (lower_globals) {
132 foreach_list_typed(nir_variable, var, node, &shader->globals) {
133 nir_register *reg = nir_global_reg_create(shader);
134 init_reg(var, reg, ht, add_names);
135 }
136 }
137
138 if (lower_io) {
139 struct hash_entry *entry;
140 hash_table_foreach(shader->outputs, entry) {
141 nir_variable *var = (nir_variable *) entry->data;
142 nir_register *reg = nir_global_reg_create(shader);
143 init_reg(var, reg, ht, add_names);
144 }
145 }
146
147 nir_foreach_overload(shader, overload) {
148 if (overload->impl) {
149 nir_function_impl *impl = overload->impl;
150
151 foreach_list_typed(nir_variable, var, node, &impl->locals) {
152 nir_register *reg = nir_local_reg_create(impl);
153 init_reg(var, reg, ht, add_names);
154 }
155 }
156 }
157
158 return ht;
159 }
160
161 static bool
162 deref_has_indirect(nir_deref_var *deref_var)
163 {
164 nir_deref *deref = &deref_var->deref;
165
166 while (deref->child != NULL) {
167 deref = deref->child;
168 if (deref->deref_type == nir_deref_type_array) {
169 nir_deref_array *deref_array = nir_deref_as_array(deref);
170 if (deref_array->has_indirect)
171 return true;
172 }
173 }
174
175 return false;
176 }
177
178 static unsigned
179 get_deref_offset(nir_deref_var *deref_var, nir_instr *instr,
180 nir_function_impl *impl, bool native_integers,
181 nir_src *indirect)
182 {
183 void *mem_ctx = ralloc_parent(instr);
184
185 bool first_indirect = true;
186
187 unsigned base_offset = 0;
188 nir_deref *deref = &deref_var->deref;
189 while (deref->child != NULL) {
190 const struct glsl_type *parent_type = deref->type;
191 deref = deref->child;
192
193 if (deref->deref_type == nir_deref_type_array) {
194 nir_deref_array *deref_array = nir_deref_as_array(deref);
195 unsigned size = type_size(deref->type);
196
197 base_offset += size * deref_array->base_offset;
198
199 if (deref_array->has_indirect) {
200 nir_src src;
201 if (size == 1) {
202 src = deref_array->indirect;
203 } else {
204 /* temp1 = size * deref_array->indirect */
205
206 nir_register *const_reg = nir_local_reg_create(impl);
207 const_reg->num_components = 1;
208
209 nir_load_const_instr *load_const =
210 nir_load_const_instr_create(mem_ctx);
211 load_const->dest.reg.reg = const_reg;
212 load_const->num_components = 1;
213 load_const->value.u[0] = size;
214 nir_instr_insert_before(instr, &load_const->instr);
215
216 nir_register *reg = nir_local_reg_create(impl);
217 reg->num_components = 1;
218
219 nir_op op;
220 if (native_integers)
221 op = nir_op_imul;
222 else
223 op = nir_op_fmul;
224 nir_alu_instr *mul_instr = nir_alu_instr_create(mem_ctx, op);
225 mul_instr->dest.write_mask = 1;
226 mul_instr->dest.dest.reg.reg = reg;
227 mul_instr->src[0].src = deref_array->indirect;
228 mul_instr->src[1].src.reg.reg = const_reg;
229 nir_instr_insert_before(instr, &mul_instr->instr);
230
231 src.is_ssa = false;
232 src.reg.reg = reg;
233 src.reg.base_offset = 0;
234 src.reg.indirect = NULL;
235 }
236
237 if (!first_indirect) {
238 /* temp2 = indirect + temp1 */
239
240 nir_register *reg = nir_local_reg_create(impl);
241 reg->num_components = 1;
242
243 nir_op op;
244 if (native_integers)
245 op = nir_op_iadd;
246 else
247 op = nir_op_fadd;
248 nir_alu_instr *add_instr = nir_alu_instr_create(mem_ctx, op);
249 add_instr->dest.write_mask = 1;
250 add_instr->dest.dest.reg.reg = reg;
251 add_instr->src[0].src = *indirect;
252 add_instr->src[1].src = src;
253 nir_instr_insert_before(instr, &add_instr->instr);
254
255 src.is_ssa = false;
256 src.reg.reg = reg;
257 src.reg.base_offset = 0;
258 src.reg.indirect = NULL;
259 }
260
261 /* indirect = tempX */
262 *indirect = src;
263 first_indirect = false;
264 }
265 } else {
266 nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
267
268 unsigned i = 0;
269 while(strcmp(glsl_get_struct_elem_name(parent_type, i),
270 deref_struct->elem) != 0) {
271 base_offset += type_size(glsl_get_struct_elem_type(parent_type, i));
272 i++;
273 }
274 }
275 }
276
277 return base_offset;
278 }
279
280 /*
281 * We cannot convert variables used in calls, so remove them from the hash
282 * table.
283 */
284
285 static bool
286 remove_call_vars_cb(nir_block *block, void *state)
287 {
288 struct hash_table *ht = (struct hash_table *) state;
289
290 nir_foreach_instr(block, instr) {
291 if (instr->type == nir_instr_type_call) {
292 nir_call_instr *call = nir_instr_as_call(instr);
293 if (call->return_deref) {
294 struct hash_entry *entry =
295 _mesa_hash_table_search(ht, call->return_deref->var);
296 if (entry)
297 _mesa_hash_table_remove(ht, entry);
298 }
299
300 for (unsigned i = 0; i < call->num_params; i++) {
301 struct hash_entry *entry =
302 _mesa_hash_table_search(ht, call->params[i]->var);
303 if (entry)
304 _mesa_hash_table_remove(ht, entry);
305 }
306 }
307 }
308
309 return true;
310 }
311
312 static void
313 remove_local_vars(nir_function_impl *impl, struct hash_table *ht)
314 {
315 if (impl->return_var) {
316 struct hash_entry *entry =
317 _mesa_hash_table_search(ht, impl->return_var);
318
319 if (entry)
320 _mesa_hash_table_remove(ht, entry);
321 }
322
323 for (unsigned i = 0; i < impl->num_params; i++) {
324 struct hash_entry *entry =
325 _mesa_hash_table_search(ht, impl->params[i]);
326 if (entry)
327 _mesa_hash_table_remove(ht, entry);
328 }
329
330 nir_foreach_block(impl, remove_call_vars_cb, ht);
331 }
332
333 static void
334 remove_local_vars_shader(nir_shader *shader, struct hash_table *ht)
335 {
336 nir_foreach_overload(shader, overload) {
337 if (overload->impl)
338 remove_local_vars(overload->impl, ht);
339 }
340 }
341
342 static nir_deref *
343 get_deref_tail(nir_deref *deref)
344 {
345 while (deref->child != NULL)
346 deref = deref->child;
347 return deref;
348 }
349
350 /* helper for reg_const_load which emits a single instruction */
351 static void
352 reg_const_load_single_instr(nir_reg_dest reg, nir_constant *constant,
353 unsigned num_components, unsigned offset,
354 nir_function_impl *impl, void *mem_ctx)
355 {
356 nir_load_const_instr *instr = nir_load_const_instr_create(mem_ctx);
357 instr->num_components = num_components;
358 for (unsigned i = 0; i < num_components; i++) {
359 instr->value.u[i] = constant->value.u[i + offset];
360 }
361 instr->dest.reg = reg;
362 instr->dest.reg.base_offset += offset;
363
364 nir_instr_insert_before_cf_list(&impl->body, &instr->instr);
365 }
366
367 /* loads a constant value into a register */
368 static void
369 reg_const_load(nir_reg_dest reg, nir_constant *constant,
370 const struct glsl_type *type, nir_function_impl *impl,
371 void *mem_ctx)
372 {
373 unsigned offset = 0;
374 const struct glsl_type *subtype;
375 unsigned subtype_size;
376
377 switch (glsl_get_base_type(type)) {
378 case GLSL_TYPE_FLOAT:
379 case GLSL_TYPE_INT:
380 case GLSL_TYPE_UINT:
381 case GLSL_TYPE_BOOL:
382 if (glsl_type_is_matrix(type)) {
383 for (unsigned i = 0; i < glsl_get_matrix_columns(type); i++) {
384 reg_const_load_single_instr(reg, constant,
385 glsl_get_vector_elements(type),
386 i * glsl_get_vector_elements(type),
387 impl, mem_ctx);
388 }
389 } else {
390 reg_const_load_single_instr(reg, constant,
391 glsl_get_vector_elements(type), 0,
392 impl, mem_ctx);
393 }
394 break;
395
396 case GLSL_TYPE_STRUCT:
397 for (unsigned i = 0; i < glsl_get_length(type); i++) {
398 const struct glsl_type *field = glsl_get_struct_elem_type(type, i);
399 nir_reg_dest new_reg = reg;
400 new_reg.base_offset += offset;
401 reg_const_load(new_reg, constant->elements[i], field, impl,
402 mem_ctx);
403 offset += type_size(field);
404 }
405 break;
406
407 case GLSL_TYPE_ARRAY:
408 subtype = glsl_get_array_element(type);
409 subtype_size = type_size(subtype);
410 for (unsigned i = 0; i < glsl_get_length(type); i++) {
411 nir_reg_dest new_reg = reg;
412 new_reg.base_offset += subtype_size * i;
413 reg_const_load(new_reg, constant->elements[i], subtype, impl,
414 mem_ctx);
415 }
416 break;
417
418 default:
419 assert(0);
420 break;
421 }
422 }
423
424 /* recursively emits a register <-> dereference block copy */
425 static void
426 var_reg_block_copy_impl(nir_reg_src reg, nir_deref_var *deref_head,
427 nir_src *predicate, const struct glsl_type *type,
428 nir_instr *after, bool var_dest, void *mem_ctx)
429 {
430 unsigned offset;
431
432 switch (glsl_get_base_type(type)) {
433 case GLSL_TYPE_FLOAT:
434 case GLSL_TYPE_INT:
435 case GLSL_TYPE_UINT:
436 case GLSL_TYPE_BOOL:
437 if (glsl_type_is_matrix(type)) {
438 for (unsigned i = 0; i < glsl_get_matrix_columns(type); i++) {
439 nir_deref_array *deref_array = nir_deref_array_create(mem_ctx);
440 deref_array->base_offset = i;
441 deref_array->deref.type = glsl_get_column_type(type);
442
443 nir_deref_var *new_deref_head =
444 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
445 get_deref_tail(&new_deref_head->deref)->child =
446 &deref_array->deref;
447
448 nir_reg_src new_reg = reg;
449 new_reg.base_offset += i * glsl_get_vector_elements(type);
450
451 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
452 glsl_get_column_type(type), after,
453 var_dest, mem_ctx);
454 }
455 } else {
456 if (var_dest) {
457 nir_intrinsic_op op;
458 switch (glsl_get_vector_elements(type)) {
459 case 1: op = nir_intrinsic_store_var_vec1; break;
460 case 2: op = nir_intrinsic_store_var_vec2; break;
461 case 3: op = nir_intrinsic_store_var_vec3; break;
462 case 4: op = nir_intrinsic_store_var_vec4; break;
463 default: assert(0); break;
464 }
465
466 nir_intrinsic_instr *store =
467 nir_intrinsic_instr_create(mem_ctx, op);
468 store->variables[0] = deref_head;
469 store->src[0].reg.reg = reg.reg;
470 store->src[0].reg.base_offset = reg.base_offset;
471 if (reg.indirect) {
472 store->src[0].reg.indirect = ralloc(mem_ctx, nir_src);
473 *store->src[0].reg.indirect = *reg.indirect;
474 }
475
476 if (predicate) {
477 store->has_predicate = true;
478 store->predicate = nir_src_copy(*predicate, mem_ctx);
479 }
480
481 nir_instr_insert_before(after, &store->instr);
482 } else {
483 nir_intrinsic_op op;
484 switch (glsl_get_vector_elements(type)) {
485 case 1: op = nir_intrinsic_load_var_vec1; break;
486 case 2: op = nir_intrinsic_load_var_vec2; break;
487 case 3: op = nir_intrinsic_load_var_vec3; break;
488 case 4: op = nir_intrinsic_load_var_vec4; break;
489 default: assert(0); break;
490 }
491
492 nir_intrinsic_instr *load =
493 nir_intrinsic_instr_create(mem_ctx, op);
494 load->variables[0] = deref_head;
495 load->dest.reg.reg = reg.reg;
496 load->dest.reg.base_offset = reg.base_offset;
497 if (reg.indirect) {
498 load->dest.reg.indirect = ralloc(mem_ctx, nir_src);
499 *load->dest.reg.indirect = *reg.indirect;
500 }
501
502 if (predicate) {
503 load->has_predicate = true;
504 load->predicate = nir_src_copy(*predicate, mem_ctx);
505 }
506
507 nir_instr_insert_before(after, &load->instr);
508 }
509 }
510 break;
511
512 case GLSL_TYPE_STRUCT:
513 offset = 0;
514 for (unsigned i = 0; i < glsl_get_length(type); i++) {
515 const struct glsl_type *field_type =
516 glsl_get_struct_elem_type(type, i);
517 const char *field_name = glsl_get_struct_elem_name(type, i);
518
519 nir_deref_struct *deref_struct =
520 nir_deref_struct_create(mem_ctx, field_name);
521 deref_struct->deref.type = field_type;
522 deref_struct->elem = field_name;
523
524 nir_deref_var *new_deref_head =
525 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
526 get_deref_tail(&new_deref_head->deref)->child =
527 &deref_struct->deref;
528
529 nir_reg_src new_reg = reg;
530 new_reg.base_offset += offset;
531
532 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
533 field_type, after, var_dest, mem_ctx);
534
535 offset += type_size(field_type);
536 }
537 break;
538
539 case GLSL_TYPE_ARRAY:
540 for (unsigned i = 0; i < glsl_get_length(type);
541 i++) {
542 const struct glsl_type *elem_type = glsl_get_array_element(type);
543
544 nir_deref_array *deref_array = nir_deref_array_create(mem_ctx);
545 deref_array->base_offset = i;
546 deref_array->deref.type = elem_type;
547
548 nir_deref_var *new_deref_head =
549 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
550 get_deref_tail(&new_deref_head->deref)->child =
551 &deref_array->deref;
552
553 nir_reg_src new_reg = reg;
554 new_reg.base_offset += i * type_size(elem_type);
555
556 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
557 elem_type, after, var_dest, mem_ctx);
558 }
559 break;
560
561 default:
562 break;
563 }
564 }
565
566 static nir_intrinsic_op
567 get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)
568 {
569 if (indirect) {
570 switch (mode) {
571 case nir_var_shader_in:
572 switch (num_components) {
573 case 1: return nir_intrinsic_load_input_vec1_indirect;
574 case 2: return nir_intrinsic_load_input_vec2_indirect;
575 case 3: return nir_intrinsic_load_input_vec3_indirect;
576 case 4: return nir_intrinsic_load_input_vec4_indirect;
577 default: assert(0); break;
578 }
579 break;
580
581 case nir_var_uniform:
582 switch (num_components) {
583 case 1: return nir_intrinsic_load_uniform_vec1_indirect;
584 case 2: return nir_intrinsic_load_uniform_vec2_indirect;
585 case 3: return nir_intrinsic_load_uniform_vec3_indirect;
586 case 4: return nir_intrinsic_load_uniform_vec4_indirect;
587 default: assert(0); break;
588 }
589 break;
590
591 default:
592 assert(0);
593 break;
594 }
595 } else {
596 switch (mode) {
597 case nir_var_shader_in:
598 switch (num_components) {
599 case 1: return nir_intrinsic_load_input_vec1;
600 case 2: return nir_intrinsic_load_input_vec2;
601 case 3: return nir_intrinsic_load_input_vec3;
602 case 4: return nir_intrinsic_load_input_vec4;
603 default: assert(0); break;
604 }
605 break;
606
607 case nir_var_uniform:
608 switch (num_components) {
609 case 1: return nir_intrinsic_load_uniform_vec1;
610 case 2: return nir_intrinsic_load_uniform_vec2;
611 case 3: return nir_intrinsic_load_uniform_vec3;
612 case 4: return nir_intrinsic_load_uniform_vec4;
613 default: assert(0); break;
614 }
615 break;
616
617 default:
618 assert(0);
619 break;
620 }
621 }
622
623 return nir_intrinsic_load_input_vec1;
624 }
625
626 /* emits an input -> reg block copy */
627
628 static void
629 reg_input_block_copy(nir_reg_dest dest, unsigned src_index, nir_src *indirect,
630 nir_src *predicate, unsigned size,
631 unsigned num_components, nir_variable_mode mode,
632 nir_instr *after, void *mem_ctx)
633 {
634 nir_intrinsic_op op = get_load_op(mode, indirect != NULL, num_components);
635
636 nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, op);
637 load->const_index[0] = src_index;
638 load->const_index[1] = size;
639 if (indirect)
640 load->src[0] = *indirect;
641 if (predicate) {
642 load->has_predicate = true;
643 load->predicate = nir_src_copy(*predicate, mem_ctx);
644 }
645 load->dest.reg = dest;
646 nir_instr_insert_before(after, &load->instr);
647 }
648
649 /* emits a variable/input -> register block copy */
650
651 static void
652 var_reg_block_copy(nir_deref_var *src, nir_reg_dest dest, nir_src *predicate,
653 bool lower_io, nir_instr *after, nir_function_impl *impl,
654 bool native_integers, void *mem_ctx)
655 {
656 const struct glsl_type *src_type = get_deref_tail(&src->deref)->type;
657
658 if (lower_io && (src->var->data.mode == nir_var_shader_in ||
659 src->var->data.mode == nir_var_uniform)) {
660 unsigned size, num_components;
661 if (glsl_type_is_scalar(src_type) || glsl_type_is_vector(src_type)) {
662 num_components = glsl_get_vector_elements(src_type);
663 size = 1;
664 } else {
665 num_components = 1;
666 size = type_size(src_type);
667 }
668 bool has_indirect = deref_has_indirect(src);
669 nir_src indirect;
670 nir_src *indirect_ptr = has_indirect ? &indirect : NULL;
671 unsigned offset = get_deref_offset(src, after, impl, native_integers,
672 indirect_ptr);
673 offset += src->var->data.driver_location;
674
675 reg_input_block_copy(dest, offset, indirect_ptr, predicate, size,
676 num_components, src->var->data.mode, after,
677 mem_ctx);
678 } else {
679 nir_reg_src reg;
680 reg.reg = dest.reg;
681 reg.base_offset = dest.base_offset;
682 reg.indirect = dest.indirect;
683
684 var_reg_block_copy_impl(reg, src, predicate, src_type, after, false,
685 mem_ctx);
686 }
687 }
688
689 /* emits a register -> variable copy */
690 static void
691 reg_var_block_copy(nir_reg_src src, nir_deref_var *dest, nir_src *predicate,
692 nir_instr *after, void *mem_ctx)
693 {
694 const struct glsl_type *dest_type = get_deref_tail(&dest->deref)->type;
695
696 var_reg_block_copy_impl(src, dest, predicate, dest_type, after, true,
697 mem_ctx);
698 }
699
700 /*
701 * emits an input -> variable block copy using an intermediate register
702 */
703 static void
704 var_var_block_copy(nir_deref_var *src, nir_deref_var *dest, nir_src *predicate,
705 nir_instr *after, nir_function_impl *impl,
706 bool native_integers, void *mem_ctx)
707 {
708 const struct glsl_type *type = get_deref_tail(&dest->deref)->type;
709 nir_register *reg = nir_local_reg_create(impl);
710 if (glsl_type_is_scalar(type) || glsl_type_is_vector(type)) {
711 reg->num_components = glsl_get_vector_elements(type);
712 } else {
713 reg->is_packed = true;
714 reg->num_components = 1;
715 reg->num_array_elems = type_size(type);
716 }
717
718 nir_reg_src reg_src;
719 reg_src.base_offset = 0;
720 reg_src.indirect = NULL;
721 reg_src.reg = reg;
722
723 nir_reg_dest reg_dest;
724 reg_dest.base_offset = 0;
725 reg_dest.indirect = NULL;
726 reg_dest.reg = reg;
727
728 var_reg_block_copy(src, reg_dest, predicate, true, after, impl,
729 native_integers, mem_ctx);
730 reg_var_block_copy(reg_src, dest, predicate, after, mem_ctx);
731 }
732
733 /* emits a register -> register block copy */
734 static void
735 reg_reg_block_copy(nir_reg_dest dest, nir_reg_src src, nir_src *predicate,
736 const struct glsl_type *type, nir_instr *after,
737 void *mem_ctx)
738 {
739 if (!dest.reg->is_packed && !src.reg->is_packed)
740 assert(dest.reg->num_components == src.reg->num_components);
741
742 unsigned size, num_components;
743 if (dest.reg->is_packed && src.reg->is_packed) {
744 size = type_size(type);
745 num_components = 1;
746 } else {
747 size = 1;
748 if (dest.reg->is_packed)
749 num_components = src.reg->num_components;
750 else
751 num_components = dest.reg->num_components;
752 }
753
754 for (unsigned i = 0; i < size; i++) {
755 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
756 move->dest.write_mask = (1 << num_components) - 1;
757
758 move->dest.dest.reg.reg = dest.reg;
759 move->dest.dest.reg.base_offset = dest.base_offset + i;
760 if (dest.indirect != NULL) {
761 move->dest.dest.reg.indirect = ralloc(mem_ctx, nir_src);
762 *move->dest.dest.reg.indirect = *dest.indirect;
763 }
764
765 if (predicate) {
766 move->has_predicate = true;
767 move->predicate = nir_src_copy(*predicate, mem_ctx);
768 }
769
770 move->src[0].src.reg = src;
771 move->src[0].src.reg.base_offset += i;
772
773 nir_instr_insert_before(after, &move->instr);
774 }
775 }
776
777 static nir_reg_dest
778 create_dest(nir_deref_var *deref, nir_instr *instr, nir_register *reg,
779 nir_function_impl *impl, bool native_integers, void *mem_ctx)
780 {
781 nir_reg_dest dest;
782 if (deref_has_indirect(deref)) {
783 dest.indirect = ralloc(mem_ctx, nir_src);
784 dest.indirect->is_ssa = false;
785 dest.base_offset = get_deref_offset(deref, instr,
786 impl, native_integers,
787 dest.indirect);
788 } else {
789 dest.base_offset = get_deref_offset(deref, instr,
790 impl, native_integers, NULL);
791 dest.indirect = NULL;
792 }
793 dest.reg = reg;
794
795 return dest;
796 }
797
798 static nir_reg_src
799 create_src(nir_deref_var *deref, nir_instr *instr, nir_register *reg,
800 nir_function_impl *impl, bool native_integers, void *mem_ctx)
801 {
802 nir_reg_src src;
803 if (deref_has_indirect(deref)) {
804 src.indirect = ralloc(mem_ctx, nir_src);
805 src.indirect->is_ssa = false;
806 src.base_offset = get_deref_offset(deref, instr,
807 impl, native_integers,
808 src.indirect);
809 } else {
810 src.base_offset = get_deref_offset(deref, instr,
811 impl, native_integers, NULL);
812 src.indirect = NULL;
813 }
814 src.reg = reg;
815
816 return src;
817 }
818
819 static void
820 handle_var_copy(nir_intrinsic_instr *instr, nir_function_impl *impl,
821 bool native_integers, bool lower_io, struct hash_table *ht)
822 {
823 void *mem_ctx = ralloc_parent(instr);
824
825 struct hash_entry *entry;
826
827 nir_variable *dest_var = instr->variables[0]->var;
828 nir_variable *src_var = instr->variables[1]->var;
829
830 const struct glsl_type *type =
831 get_deref_tail(&instr->variables[0]->deref)->type;
832
833 nir_src *predicate = instr->has_predicate ? &instr->predicate : NULL;
834
835 /*
836 * The source can be either:
837 * 1. a variable we're lowering to a register
838 * 2. an input or uniform we're lowering to loads from an index
839 * 3. a variable we can't lower yet
840 *
841 * and similarly, the destination can be either:
842 * 1. a variable we're lowering to a register
843 * 2. a variable we can't lower yet
844 *
845 * meaning that there are six cases, including the trivial one (where
846 * source and destination are #3 and #2 respectively) where we can't do
847 * anything.
848 */
849
850 entry = _mesa_hash_table_search(ht, dest_var);
851 if (entry) {
852 nir_reg_dest dest = create_dest(instr->variables[0], &instr->instr,
853 (nir_register *) entry->data, impl,
854 native_integers, mem_ctx);
855
856 entry = _mesa_hash_table_search(ht, src_var);
857 if (entry) {
858 nir_reg_src src = create_src(instr->variables[1], &instr->instr,
859 (nir_register *) entry->data, impl,
860 native_integers, mem_ctx);
861
862 reg_reg_block_copy(dest, src, predicate, type, &instr->instr, mem_ctx);
863 } else {
864 var_reg_block_copy(instr->variables[1], dest, predicate, lower_io,
865 &instr->instr, impl, native_integers, mem_ctx);
866 }
867 } else {
868 entry = _mesa_hash_table_search(ht, src_var);
869 if (entry) {
870 nir_reg_src src = create_src(instr->variables[1], &instr->instr,
871 (nir_register *) entry->data, impl,
872 native_integers, mem_ctx);
873
874 reg_var_block_copy(src, instr->variables[0], predicate, &instr->instr,
875 mem_ctx);
876 } else {
877 if (!lower_io || (src_var->data.mode != nir_var_shader_in &&
878 src_var->data.mode != nir_var_uniform)) {
879 /* nothing to do here */
880 return;
881 }
882
883 var_var_block_copy(instr->variables[1], instr->variables[0], predicate,
884 &instr->instr, impl, native_integers, mem_ctx);
885 }
886 }
887
888 nir_instr_remove(&instr->instr);
889 }
890
891 static void
892 handle_var_load(nir_intrinsic_instr *instr, nir_function_impl *impl,
893 bool native_integers, bool lower_io, struct hash_table *ht)
894 {
895 void *mem_ctx = ralloc_parent(instr);
896
897 struct hash_entry *entry =
898 _mesa_hash_table_search(ht, instr->variables[0]->var);
899
900 if (entry == NULL) {
901 nir_variable *src_var = instr->variables[0]->var;
902
903 if (lower_io && (src_var->data.mode == nir_var_shader_in ||
904 src_var->data.mode == nir_var_uniform)) {
905 bool has_indirect = deref_has_indirect(instr->variables[0]);
906 unsigned num_components =
907 nir_intrinsic_infos[instr->intrinsic].dest_components;
908 nir_src indirect;
909 unsigned offset = get_deref_offset(instr->variables[0], &instr->instr,
910 impl, native_integers, &indirect);
911 offset += src_var->data.driver_location;
912
913 nir_intrinsic_op op = get_load_op(src_var->data.mode, has_indirect,
914 num_components);
915 nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, op);
916 load->dest = instr->dest;
917 load->const_index[0] = (int) offset;
918 load->const_index[1] = 1;
919 if (has_indirect)
920 load->src[0] = indirect;
921
922 if (instr->has_predicate) {
923 load->has_predicate = true;
924 load->predicate = nir_src_copy(instr->predicate, mem_ctx);
925 }
926
927 nir_instr_insert_before(&instr->instr, &load->instr);
928 } else {
929 return;
930 }
931 } else {
932 nir_register *reg = (nir_register *) entry->data;
933
934 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
935 unsigned dest_components =
936 nir_intrinsic_infos[instr->intrinsic].dest_components;
937 move->dest.dest = instr->dest;
938 move->dest.write_mask = (1 << dest_components) - 1;
939 move->src[0].src.reg = create_src(instr->variables[0], &instr->instr,
940 reg, impl, native_integers, mem_ctx);
941 if (instr->has_predicate) {
942 move->has_predicate = true;
943 move->predicate = nir_src_copy(instr->predicate, mem_ctx);
944 }
945 nir_instr_insert_before(&instr->instr, &move->instr);
946 }
947
948 nir_instr_remove(&instr->instr);
949 }
950
951 static void
952 handle_var_store(nir_intrinsic_instr *instr, nir_function_impl *impl,
953 bool native_integers, bool lower_io, struct hash_table *ht)
954 {
955 void *mem_ctx = ralloc_parent(instr);
956
957 struct hash_entry *entry =
958 _mesa_hash_table_search(ht, instr->variables[0]->var);
959 if (entry == NULL)
960 return;
961
962 nir_register *reg = (nir_register *) entry->data;
963
964 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
965 unsigned src_components =
966 nir_intrinsic_infos[instr->intrinsic].src_components[0];
967 move->dest.dest.reg = create_dest(instr->variables[0], &instr->instr,
968 reg, impl, native_integers, mem_ctx);
969 move->dest.write_mask = (1 << src_components) - 1;
970 move->src[0].src = instr->src[0];
971 if (instr->has_predicate) {
972 move->has_predicate = true;
973 move->predicate = nir_src_copy(instr->predicate, mem_ctx);
974 }
975 nir_instr_insert_before(&instr->instr, &move->instr);
976 nir_instr_remove(&instr->instr);
977 }
978
979 typedef struct {
980 struct hash_table *ht;
981 bool native_integers, lower_io;
982 nir_function_impl *impl;
983 } rewrite_state;
984
985 static bool
986 rewrite_block_cb(nir_block *block, void *_state)
987 {
988 rewrite_state *state = (rewrite_state *) _state;
989
990 nir_foreach_instr_safe(block, instr) {
991 if (instr->type == nir_instr_type_intrinsic) {
992 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
993 switch (intrin->intrinsic) {
994 case nir_intrinsic_load_var_vec1:
995 case nir_intrinsic_load_var_vec2:
996 case nir_intrinsic_load_var_vec3:
997 case nir_intrinsic_load_var_vec4:
998 handle_var_load(intrin, state->impl, state->native_integers,
999 state->lower_io, state->ht);
1000 break;
1001
1002 case nir_intrinsic_store_var_vec1:
1003 case nir_intrinsic_store_var_vec2:
1004 case nir_intrinsic_store_var_vec3:
1005 case nir_intrinsic_store_var_vec4:
1006 handle_var_store(intrin, state->impl, state->native_integers,
1007 state->lower_io, state->ht);
1008 break;
1009
1010 case nir_intrinsic_copy_var:
1011 handle_var_copy(intrin, state->impl, state->native_integers,
1012 state->lower_io, state->ht);
1013 break;
1014
1015 default:
1016 break;
1017 }
1018 }
1019 }
1020
1021 return true;
1022 }
1023
1024 static void
1025 rewrite_impl(nir_function_impl *impl, struct hash_table *ht,
1026 bool native_integers, bool lower_io)
1027 {
1028 rewrite_state state;
1029 state.ht = ht;
1030 state.native_integers = native_integers;
1031 state.lower_io = lower_io;
1032 state.impl = impl;
1033
1034 nir_foreach_block(impl, rewrite_block_cb, &state);
1035 }
1036
1037 static void
1038 insert_load_const_impl(nir_function_impl *impl, struct exec_list *vars,
1039 struct hash_table *ht)
1040 {
1041 void *mem_ctx = ralloc_parent(impl);
1042
1043 foreach_list_typed(nir_variable, var, node, vars) {
1044 if (var->constant_initializer == NULL)
1045 continue;
1046
1047 struct hash_entry *entry = _mesa_hash_table_search(ht, var);
1048 if (entry) {
1049 nir_register *reg = (nir_register *) entry->data;
1050 nir_reg_dest dest;
1051 dest.reg = reg;
1052 dest.base_offset = 0;
1053 dest.indirect = NULL;
1054 reg_const_load(dest, var->constant_initializer, var->type, impl,
1055 mem_ctx);
1056 }
1057 }
1058 }
1059
1060 static nir_intrinsic_op
1061 get_store_op(bool indirect, unsigned num_components)
1062 {
1063 if (indirect) {
1064 switch (num_components) {
1065 case 1: return nir_intrinsic_store_output_vec1_indirect;
1066 case 2: return nir_intrinsic_store_output_vec2_indirect;
1067 case 3: return nir_intrinsic_store_output_vec3_indirect;
1068 case 4: return nir_intrinsic_store_output_vec4_indirect;
1069 default: assert(0); break;
1070 }
1071 } else {
1072 switch (num_components) {
1073 case 1: return nir_intrinsic_store_output_vec1;
1074 case 2: return nir_intrinsic_store_output_vec2;
1075 case 3: return nir_intrinsic_store_output_vec3;
1076 case 4: return nir_intrinsic_store_output_vec4;
1077 default: assert(0); break;
1078 }
1079 }
1080
1081 return nir_intrinsic_store_output_vec1;
1082 }
1083
1084 /* emits a reg -> output block copy after a block */
1085 static void
1086 reg_output_block_copy_block(nir_reg_src src, unsigned dest_index,
1087 unsigned num_components, unsigned size,
1088 nir_block *block, void *mem_ctx)
1089 {
1090 nir_intrinsic_op op = get_store_op(false, num_components);
1091
1092 nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, op);
1093 store->const_index[0] = dest_index;
1094 store->const_index[1] = (size == 0) ? 1 : size;
1095 store->src[0].reg = src;
1096 nir_instr_insert_after_block(block, &store->instr);
1097 }
1098
1099 /* emits a reg -> output copy after an instruction */
1100 static void
1101 reg_output_block_copy_instr(nir_reg_src src, unsigned dest_index,
1102 unsigned num_components, unsigned size,
1103 nir_instr *after, void *mem_ctx)
1104 {
1105 nir_intrinsic_op op = get_store_op(false, num_components);
1106
1107 nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, op);
1108 store->const_index[0] = dest_index;
1109 store->const_index[1] = (size == 0) ? 1 : size;
1110 store->src[0].reg = src;
1111 nir_instr_insert_before(after, &store->instr);
1112 }
1113
1114 static nir_function_impl *
1115 find_main(nir_shader *shader)
1116 {
1117 foreach_list_typed(nir_function, func, node, &shader->functions) {
1118 if (strcmp(func->name, "main") == 0) {
1119 assert(exec_list_length(&func->overload_list) == 1);
1120 nir_function_overload *overload = nir_function_first_overload(func);
1121 return overload->impl;
1122 }
1123 }
1124
1125 assert(0);
1126 return NULL;
1127 }
1128
1129 static void
1130 insert_output_reg_copies(nir_shader *shader, nir_block *block,
1131 nir_instr *after, struct hash_table *ht)
1132 {
1133 struct hash_entry *entry;
1134 hash_table_foreach(shader->outputs, entry) {
1135 nir_variable *var = (nir_variable *) entry->data;
1136
1137 struct hash_entry *entry2;
1138 entry2 = _mesa_hash_table_search(ht, var);
1139 if (entry2) {
1140 nir_register *reg = (nir_register *) entry2->data;
1141 nir_reg_src src;
1142 src.reg = reg;
1143 src.base_offset = 0;
1144 src.indirect = NULL;
1145
1146 if (after) {
1147 reg_output_block_copy_instr(src, var->data.driver_location,
1148 reg->num_components,
1149 reg->num_array_elems,
1150 after, shader);
1151 } else {
1152 reg_output_block_copy_block(src, var->data.driver_location,
1153 reg->num_components,
1154 reg->num_array_elems,
1155 block, shader);
1156 }
1157 }
1158 }
1159 }
1160
1161 typedef struct {
1162 struct hash_table *ht;
1163 nir_shader *shader;
1164 bool found_emit_vertex;
1165 } reg_output_state;
1166
1167 static bool
1168 insert_output_reg_copies_emit_vertex(nir_block *block, void *_state)
1169 {
1170 reg_output_state *state = (reg_output_state *) _state;
1171
1172 nir_foreach_instr(block, instr) {
1173 if (instr->type == nir_instr_type_intrinsic) {
1174 nir_intrinsic_instr *intrin_instr = nir_instr_as_intrinsic(instr);
1175 if (intrin_instr->intrinsic == nir_intrinsic_emit_vertex) {
1176 insert_output_reg_copies(state->shader, NULL, instr, state->ht);
1177 state->found_emit_vertex = true;
1178 }
1179 }
1180 }
1181
1182 return true;
1183 }
1184
1185 static void
1186 insert_output_reg_copies_shader(nir_shader *shader, struct hash_table *ht)
1187 {
1188 nir_function_impl *main_impl = find_main(shader);
1189
1190 reg_output_state state;
1191 state.shader = shader;
1192 state.ht = ht;
1193 state.found_emit_vertex = false;
1194 nir_foreach_block(main_impl, insert_output_reg_copies_emit_vertex, &state);
1195
1196 if (!state.found_emit_vertex) {
1197 struct set_entry *entry;
1198 set_foreach(main_impl->end_block->predecessors, entry) {
1199 nir_block *block = (nir_block *) entry->key;
1200 insert_output_reg_copies(shader, block, NULL, ht);
1201 }
1202 }
1203 }
1204
1205 static void
1206 rewrite_shader(nir_shader *shader, struct hash_table *ht, bool native_integers,
1207 bool lower_globals, bool lower_io)
1208 {
1209 nir_foreach_overload(shader, overload) {
1210 if (overload->impl) {
1211 insert_load_const_impl(overload->impl, &overload->impl->locals, ht);
1212 if (lower_globals && strcmp(overload->function->name, "main") == 0)
1213 insert_load_const_impl(overload->impl, &shader->globals, ht);
1214 rewrite_impl(overload->impl, ht, native_integers, lower_io);
1215 }
1216 }
1217 }
1218
1219 void
1220 nir_lower_variables_scalar(nir_shader *shader, bool lower_globals,
1221 bool lower_io, bool add_names, bool native_integers)
1222 {
1223 if (lower_io)
1224 assign_var_locations_shader(shader);
1225 struct hash_table *ht = init_var_ht(shader, lower_globals, lower_io,
1226 add_names);
1227 remove_local_vars_shader(shader, ht);
1228 rewrite_shader(shader, ht, native_integers, lower_globals, lower_io);
1229 if (lower_io)
1230 insert_output_reg_copies_shader(shader, ht);
1231 _mesa_hash_table_destroy(ht, NULL);
1232 }