nir: Clean up nir_deref helper functions
[mesa.git] / src / glsl / nir / nir_lower_variables_scalar.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 /*
29 * This lowering pass converts references to variables with loads/stores to
30 * registers or inputs/outputs. We assume that structure splitting has already
31 * been run, or else structures with indirect references can't be split. We
32 * also assume that this pass will be consumed by a scalar backend, so we pack
33 * things more tightly.
34 */
35
36 #include "nir.h"
37
38 static unsigned
39 type_size(const struct glsl_type *type)
40 {
41 unsigned int size, i;
42
43 switch (glsl_get_base_type(type)) {
44 case GLSL_TYPE_UINT:
45 case GLSL_TYPE_INT:
46 case GLSL_TYPE_FLOAT:
47 case GLSL_TYPE_BOOL:
48 return glsl_get_components(type);
49 case GLSL_TYPE_ARRAY:
50 return type_size(glsl_get_array_element(type)) * glsl_get_length(type);
51 case GLSL_TYPE_STRUCT:
52 size = 0;
53 for (i = 0; i < glsl_get_length(type); i++) {
54 size += type_size(glsl_get_struct_elem_type(type, i));
55 }
56 return size;
57 case GLSL_TYPE_SAMPLER:
58 return 0;
59 case GLSL_TYPE_ATOMIC_UINT:
60 return 0;
61 case GLSL_TYPE_INTERFACE:
62 return 0;
63 case GLSL_TYPE_IMAGE:
64 return 0;
65 case GLSL_TYPE_VOID:
66 case GLSL_TYPE_ERROR:
67 unreachable("not reached");
68 }
69
70 return 0;
71 }
72
73 /*
74 * for inputs, outputs, and uniforms, assigns starting locations for variables
75 */
76
77 static void
78 assign_var_locations(struct hash_table *ht, unsigned *size)
79 {
80 unsigned location = 0;
81
82 struct hash_entry *entry;
83 hash_table_foreach(ht, entry) {
84 nir_variable *var = (nir_variable *) entry->data;
85
86 /*
87 * UBO's have their own address spaces, so don't count them towards the
88 * number of global uniforms
89 */
90 if (var->data.mode == nir_var_uniform && var->interface_type != NULL)
91 continue;
92
93 var->data.driver_location = location;
94 location += type_size(var->type);
95 }
96
97 *size = location;
98 }
99
100 static void
101 assign_var_locations_shader(nir_shader *shader)
102 {
103 assign_var_locations(shader->inputs, &shader->num_inputs);
104 assign_var_locations(shader->outputs, &shader->num_outputs);
105 assign_var_locations(shader->uniforms, &shader->num_uniforms);
106 }
107
108 static void
109 init_reg(nir_variable *var, nir_register *reg, struct hash_table *ht,
110 bool add_names)
111 {
112 if (!glsl_type_is_scalar(var->type) &&
113 !glsl_type_is_vector(var->type)) {
114 reg->is_packed = true;
115 reg->num_components = 1;
116 reg->num_array_elems = type_size(var->type);
117 } else {
118 reg->num_components = glsl_get_components(var->type);
119 }
120 if (add_names)
121 reg->name = ralloc_strdup(reg, var->name);
122 _mesa_hash_table_insert(ht, var, reg);
123 }
124
125 static struct hash_table *
126 init_var_ht(nir_shader *shader, bool lower_globals, bool lower_io,
127 bool add_names)
128 {
129 struct hash_table *ht = _mesa_hash_table_create(NULL,
130 _mesa_hash_pointer,
131 _mesa_key_pointer_equal);
132
133 if (lower_globals) {
134 foreach_list_typed(nir_variable, var, node, &shader->globals) {
135 nir_register *reg = nir_global_reg_create(shader);
136 init_reg(var, reg, ht, add_names);
137 }
138 }
139
140 if (lower_io) {
141 struct hash_entry *entry;
142 hash_table_foreach(shader->outputs, entry) {
143 nir_variable *var = (nir_variable *) entry->data;
144 nir_register *reg = nir_global_reg_create(shader);
145 init_reg(var, reg, ht, add_names);
146 }
147 }
148
149 nir_foreach_overload(shader, overload) {
150 if (overload->impl) {
151 nir_function_impl *impl = overload->impl;
152
153 foreach_list_typed(nir_variable, var, node, &impl->locals) {
154 nir_register *reg = nir_local_reg_create(impl);
155 init_reg(var, reg, ht, add_names);
156 }
157 }
158 }
159
160 return ht;
161 }
162
163 static bool
164 deref_has_indirect(nir_deref_var *deref_var)
165 {
166 nir_deref *deref = &deref_var->deref;
167
168 while (deref->child != NULL) {
169 deref = deref->child;
170 if (deref->deref_type == nir_deref_type_array) {
171 nir_deref_array *deref_array = nir_deref_as_array(deref);
172 if (deref_array->has_indirect)
173 return true;
174 }
175 }
176
177 return false;
178 }
179
180 static unsigned
181 get_deref_offset(nir_deref_var *deref_var, nir_instr *instr,
182 nir_function_impl *impl, bool native_integers,
183 nir_src *indirect)
184 {
185 void *mem_ctx = ralloc_parent(instr);
186
187 bool first_indirect = true;
188
189 unsigned base_offset = 0;
190 nir_deref *deref = &deref_var->deref;
191 while (deref->child != NULL) {
192 const struct glsl_type *parent_type = deref->type;
193 deref = deref->child;
194
195 if (deref->deref_type == nir_deref_type_array) {
196 nir_deref_array *deref_array = nir_deref_as_array(deref);
197 unsigned size = type_size(deref->type);
198
199 base_offset += size * deref_array->base_offset;
200
201 if (deref_array->has_indirect) {
202 nir_src src;
203 if (size == 1) {
204 src = deref_array->indirect;
205 } else {
206 /* temp1 = size * deref_array->indirect */
207
208 nir_register *const_reg = nir_local_reg_create(impl);
209 const_reg->num_components = 1;
210
211 nir_load_const_instr *load_const =
212 nir_load_const_instr_create(mem_ctx);
213 load_const->dest.reg.reg = const_reg;
214 load_const->num_components = 1;
215 load_const->value.u[0] = size;
216 nir_instr_insert_before(instr, &load_const->instr);
217
218 nir_register *reg = nir_local_reg_create(impl);
219 reg->num_components = 1;
220
221 nir_op op;
222 if (native_integers)
223 op = nir_op_imul;
224 else
225 op = nir_op_fmul;
226 nir_alu_instr *mul_instr = nir_alu_instr_create(mem_ctx, op);
227 mul_instr->dest.write_mask = 1;
228 mul_instr->dest.dest.reg.reg = reg;
229 mul_instr->src[0].src = deref_array->indirect;
230 mul_instr->src[1].src.reg.reg = const_reg;
231 nir_instr_insert_before(instr, &mul_instr->instr);
232
233 src.is_ssa = false;
234 src.reg.reg = reg;
235 src.reg.base_offset = 0;
236 src.reg.indirect = NULL;
237 }
238
239 if (!first_indirect) {
240 /* temp2 = indirect + temp1 */
241
242 nir_register *reg = nir_local_reg_create(impl);
243 reg->num_components = 1;
244
245 nir_op op;
246 if (native_integers)
247 op = nir_op_iadd;
248 else
249 op = nir_op_fadd;
250 nir_alu_instr *add_instr = nir_alu_instr_create(mem_ctx, op);
251 add_instr->dest.write_mask = 1;
252 add_instr->dest.dest.reg.reg = reg;
253 add_instr->src[0].src = *indirect;
254 add_instr->src[1].src = src;
255 nir_instr_insert_before(instr, &add_instr->instr);
256
257 src.is_ssa = false;
258 src.reg.reg = reg;
259 src.reg.base_offset = 0;
260 src.reg.indirect = NULL;
261 }
262
263 /* indirect = tempX */
264 *indirect = src;
265 first_indirect = false;
266 }
267 } else {
268 nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
269
270 unsigned i = 0;
271 while(strcmp(glsl_get_struct_elem_name(parent_type, i),
272 deref_struct->elem) != 0) {
273 base_offset += type_size(glsl_get_struct_elem_type(parent_type, i));
274 i++;
275 }
276 }
277 }
278
279 return base_offset;
280 }
281
282 /*
283 * We cannot convert variables used in calls, so remove them from the hash
284 * table.
285 */
286
287 static bool
288 remove_call_vars_cb(nir_block *block, void *state)
289 {
290 struct hash_table *ht = (struct hash_table *) state;
291
292 nir_foreach_instr(block, instr) {
293 if (instr->type == nir_instr_type_call) {
294 nir_call_instr *call = nir_instr_as_call(instr);
295 if (call->return_deref) {
296 struct hash_entry *entry =
297 _mesa_hash_table_search(ht, call->return_deref->var);
298 if (entry)
299 _mesa_hash_table_remove(ht, entry);
300 }
301
302 for (unsigned i = 0; i < call->num_params; i++) {
303 struct hash_entry *entry =
304 _mesa_hash_table_search(ht, call->params[i]->var);
305 if (entry)
306 _mesa_hash_table_remove(ht, entry);
307 }
308 }
309 }
310
311 return true;
312 }
313
314 static void
315 remove_local_vars(nir_function_impl *impl, struct hash_table *ht)
316 {
317 if (impl->return_var) {
318 struct hash_entry *entry =
319 _mesa_hash_table_search(ht, impl->return_var);
320
321 if (entry)
322 _mesa_hash_table_remove(ht, entry);
323 }
324
325 for (unsigned i = 0; i < impl->num_params; i++) {
326 struct hash_entry *entry =
327 _mesa_hash_table_search(ht, impl->params[i]);
328 if (entry)
329 _mesa_hash_table_remove(ht, entry);
330 }
331
332 nir_foreach_block(impl, remove_call_vars_cb, ht);
333 }
334
335 static void
336 remove_local_vars_shader(nir_shader *shader, struct hash_table *ht)
337 {
338 nir_foreach_overload(shader, overload) {
339 if (overload->impl)
340 remove_local_vars(overload->impl, ht);
341 }
342 }
343
344 static nir_deref *
345 get_deref_tail(nir_deref *deref)
346 {
347 while (deref->child != NULL)
348 deref = deref->child;
349 return deref;
350 }
351
352 /* helper for reg_const_load which emits a single instruction */
353 static void
354 reg_const_load_single_instr(nir_reg_dest reg, nir_constant *constant,
355 enum glsl_base_type base_type,
356 unsigned num_components, unsigned offset,
357 nir_function_impl *impl, void *mem_ctx)
358 {
359 nir_load_const_instr *instr = nir_load_const_instr_create(mem_ctx);
360 instr->num_components = num_components;
361 for (unsigned i = 0; i < num_components; i++) {
362 switch (base_type) {
363 case GLSL_TYPE_FLOAT:
364 case GLSL_TYPE_INT:
365 case GLSL_TYPE_UINT:
366 instr->value.u[i] = constant->value.u[i + offset];
367 break;
368 case GLSL_TYPE_BOOL:
369 instr->value.u[i] = constant->value.u[i + offset] ?
370 NIR_TRUE : NIR_FALSE;
371 break;
372 default:
373 unreachable("Invalid immediate type");
374 }
375 }
376 instr->dest.reg = reg;
377 instr->dest.reg.base_offset += offset;
378
379 nir_instr_insert_before_cf_list(&impl->body, &instr->instr);
380 }
381
382 /* loads a constant value into a register */
383 static void
384 reg_const_load(nir_reg_dest reg, nir_constant *constant,
385 const struct glsl_type *type, nir_function_impl *impl,
386 void *mem_ctx)
387 {
388 unsigned offset = 0;
389 const struct glsl_type *subtype;
390 unsigned subtype_size;
391
392 enum glsl_base_type base_type = glsl_get_base_type(type);
393 switch (base_type) {
394 case GLSL_TYPE_FLOAT:
395 case GLSL_TYPE_INT:
396 case GLSL_TYPE_UINT:
397 case GLSL_TYPE_BOOL:
398 if (glsl_type_is_matrix(type)) {
399 for (unsigned i = 0; i < glsl_get_matrix_columns(type); i++) {
400 reg_const_load_single_instr(reg, constant, base_type,
401 glsl_get_vector_elements(type),
402 i * glsl_get_vector_elements(type),
403 impl, mem_ctx);
404 }
405 } else {
406 reg_const_load_single_instr(reg, constant, base_type,
407 glsl_get_vector_elements(type), 0,
408 impl, mem_ctx);
409 }
410 break;
411
412 case GLSL_TYPE_STRUCT:
413 for (unsigned i = 0; i < glsl_get_length(type); i++) {
414 const struct glsl_type *field = glsl_get_struct_elem_type(type, i);
415 nir_reg_dest new_reg = reg;
416 new_reg.base_offset += offset;
417 reg_const_load(new_reg, constant->elements[i], field, impl,
418 mem_ctx);
419 offset += type_size(field);
420 }
421 break;
422
423 case GLSL_TYPE_ARRAY:
424 subtype = glsl_get_array_element(type);
425 subtype_size = type_size(subtype);
426 for (unsigned i = 0; i < glsl_get_length(type); i++) {
427 nir_reg_dest new_reg = reg;
428 new_reg.base_offset += subtype_size * i;
429 reg_const_load(new_reg, constant->elements[i], subtype, impl,
430 mem_ctx);
431 }
432 break;
433
434 default:
435 assert(0);
436 break;
437 }
438 }
439
440 /* recursively emits a register <-> dereference block copy */
441 static void
442 var_reg_block_copy_impl(nir_reg_src reg, nir_deref_var *deref_head,
443 nir_src *predicate, const struct glsl_type *type,
444 nir_instr *after, bool var_dest, void *mem_ctx)
445 {
446 unsigned offset;
447
448 switch (glsl_get_base_type(type)) {
449 case GLSL_TYPE_FLOAT:
450 case GLSL_TYPE_INT:
451 case GLSL_TYPE_UINT:
452 case GLSL_TYPE_BOOL:
453 if (glsl_type_is_matrix(type)) {
454 for (unsigned i = 0; i < glsl_get_matrix_columns(type); i++) {
455 nir_deref_array *deref_array = nir_deref_array_create(mem_ctx);
456 deref_array->base_offset = i;
457 deref_array->deref.type = glsl_get_column_type(type);
458
459 nir_deref_var *new_deref_head =
460 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
461 get_deref_tail(&new_deref_head->deref)->child =
462 &deref_array->deref;
463
464 nir_reg_src new_reg = reg;
465 new_reg.base_offset += i * glsl_get_vector_elements(type);
466
467 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
468 glsl_get_column_type(type), after,
469 var_dest, mem_ctx);
470 }
471 } else {
472 if (var_dest) {
473 nir_intrinsic_op op;
474 switch (glsl_get_vector_elements(type)) {
475 case 1: op = nir_intrinsic_store_var_vec1; break;
476 case 2: op = nir_intrinsic_store_var_vec2; break;
477 case 3: op = nir_intrinsic_store_var_vec3; break;
478 case 4: op = nir_intrinsic_store_var_vec4; break;
479 default: assert(0); break;
480 }
481
482 nir_intrinsic_instr *store =
483 nir_intrinsic_instr_create(mem_ctx, op);
484 store->variables[0] = deref_head;
485 store->src[0].reg.reg = reg.reg;
486 store->src[0].reg.base_offset = reg.base_offset;
487 if (reg.indirect) {
488 store->src[0].reg.indirect = ralloc(mem_ctx, nir_src);
489 *store->src[0].reg.indirect = *reg.indirect;
490 }
491
492 if (predicate) {
493 store->has_predicate = true;
494 store->predicate = nir_src_copy(*predicate, mem_ctx);
495 }
496
497 nir_instr_insert_before(after, &store->instr);
498 } else {
499 nir_intrinsic_op op;
500 switch (glsl_get_vector_elements(type)) {
501 case 1: op = nir_intrinsic_load_var_vec1; break;
502 case 2: op = nir_intrinsic_load_var_vec2; break;
503 case 3: op = nir_intrinsic_load_var_vec3; break;
504 case 4: op = nir_intrinsic_load_var_vec4; break;
505 default: assert(0); break;
506 }
507
508 nir_intrinsic_instr *load =
509 nir_intrinsic_instr_create(mem_ctx, op);
510 load->variables[0] = deref_head;
511 load->dest.reg.reg = reg.reg;
512 load->dest.reg.base_offset = reg.base_offset;
513 if (reg.indirect) {
514 load->dest.reg.indirect = ralloc(mem_ctx, nir_src);
515 *load->dest.reg.indirect = *reg.indirect;
516 }
517
518 if (predicate) {
519 load->has_predicate = true;
520 load->predicate = nir_src_copy(*predicate, mem_ctx);
521 }
522
523 nir_instr_insert_before(after, &load->instr);
524 }
525 }
526 break;
527
528 case GLSL_TYPE_STRUCT:
529 offset = 0;
530 for (unsigned i = 0; i < glsl_get_length(type); i++) {
531 const struct glsl_type *field_type =
532 glsl_get_struct_elem_type(type, i);
533 const char *field_name = glsl_get_struct_elem_name(type, i);
534
535 nir_deref_struct *deref_struct =
536 nir_deref_struct_create(mem_ctx, field_name);
537 deref_struct->deref.type = field_type;
538 deref_struct->elem = field_name;
539
540 nir_deref_var *new_deref_head =
541 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
542 get_deref_tail(&new_deref_head->deref)->child =
543 &deref_struct->deref;
544
545 nir_reg_src new_reg = reg;
546 new_reg.base_offset += offset;
547
548 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
549 field_type, after, var_dest, mem_ctx);
550
551 offset += type_size(field_type);
552 }
553 break;
554
555 case GLSL_TYPE_ARRAY:
556 for (unsigned i = 0; i < glsl_get_length(type);
557 i++) {
558 const struct glsl_type *elem_type = glsl_get_array_element(type);
559
560 nir_deref_array *deref_array = nir_deref_array_create(mem_ctx);
561 deref_array->base_offset = i;
562 deref_array->deref.type = elem_type;
563
564 nir_deref_var *new_deref_head =
565 nir_deref_as_var(nir_copy_deref(mem_ctx, &deref_head->deref));
566 get_deref_tail(&new_deref_head->deref)->child =
567 &deref_array->deref;
568
569 nir_reg_src new_reg = reg;
570 new_reg.base_offset += i * type_size(elem_type);
571
572 var_reg_block_copy_impl(new_reg, new_deref_head, predicate,
573 elem_type, after, var_dest, mem_ctx);
574 }
575 break;
576
577 default:
578 break;
579 }
580 }
581
582 static nir_intrinsic_op
583 get_load_op(nir_variable_mode mode, bool indirect, unsigned num_components)
584 {
585 if (indirect) {
586 switch (mode) {
587 case nir_var_shader_in:
588 switch (num_components) {
589 case 1: return nir_intrinsic_load_input_vec1_indirect;
590 case 2: return nir_intrinsic_load_input_vec2_indirect;
591 case 3: return nir_intrinsic_load_input_vec3_indirect;
592 case 4: return nir_intrinsic_load_input_vec4_indirect;
593 default: assert(0); break;
594 }
595 break;
596
597 case nir_var_uniform:
598 switch (num_components) {
599 case 1: return nir_intrinsic_load_uniform_vec1_indirect;
600 case 2: return nir_intrinsic_load_uniform_vec2_indirect;
601 case 3: return nir_intrinsic_load_uniform_vec3_indirect;
602 case 4: return nir_intrinsic_load_uniform_vec4_indirect;
603 default: assert(0); break;
604 }
605 break;
606
607 default:
608 assert(0);
609 break;
610 }
611 } else {
612 switch (mode) {
613 case nir_var_shader_in:
614 switch (num_components) {
615 case 1: return nir_intrinsic_load_input_vec1;
616 case 2: return nir_intrinsic_load_input_vec2;
617 case 3: return nir_intrinsic_load_input_vec3;
618 case 4: return nir_intrinsic_load_input_vec4;
619 default: assert(0); break;
620 }
621 break;
622
623 case nir_var_uniform:
624 switch (num_components) {
625 case 1: return nir_intrinsic_load_uniform_vec1;
626 case 2: return nir_intrinsic_load_uniform_vec2;
627 case 3: return nir_intrinsic_load_uniform_vec3;
628 case 4: return nir_intrinsic_load_uniform_vec4;
629 default: assert(0); break;
630 }
631 break;
632
633 default:
634 assert(0);
635 break;
636 }
637 }
638
639 return nir_intrinsic_load_input_vec1;
640 }
641
642 /* emits an input -> reg block copy */
643
644 static void
645 reg_input_block_copy(nir_reg_dest dest, unsigned src_index, nir_src *indirect,
646 nir_src *predicate, unsigned size,
647 unsigned num_components, nir_variable_mode mode,
648 nir_instr *after, void *mem_ctx)
649 {
650 nir_intrinsic_op op = get_load_op(mode, indirect != NULL, num_components);
651
652 nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, op);
653 load->const_index[0] = src_index;
654 load->const_index[1] = size;
655 if (indirect)
656 load->src[0] = *indirect;
657 if (predicate) {
658 load->has_predicate = true;
659 load->predicate = nir_src_copy(*predicate, mem_ctx);
660 }
661 load->dest.reg = dest;
662 nir_instr_insert_before(after, &load->instr);
663 }
664
665 /* emits a variable/input -> register block copy */
666
667 static void
668 var_reg_block_copy(nir_deref_var *src, nir_reg_dest dest, nir_src *predicate,
669 bool lower_io, nir_instr *after, nir_function_impl *impl,
670 bool native_integers, void *mem_ctx)
671 {
672 const struct glsl_type *src_type = get_deref_tail(&src->deref)->type;
673
674 if (lower_io && (src->var->data.mode == nir_var_shader_in ||
675 src->var->data.mode == nir_var_uniform)) {
676 unsigned size, num_components;
677 if (glsl_type_is_scalar(src_type) || glsl_type_is_vector(src_type)) {
678 num_components = glsl_get_vector_elements(src_type);
679 size = 1;
680 } else {
681 num_components = 1;
682 size = type_size(src_type);
683 }
684 bool has_indirect = deref_has_indirect(src);
685 nir_src indirect;
686 nir_src *indirect_ptr = has_indirect ? &indirect : NULL;
687 unsigned offset = get_deref_offset(src, after, impl, native_integers,
688 indirect_ptr);
689 offset += src->var->data.driver_location;
690
691 reg_input_block_copy(dest, offset, indirect_ptr, predicate, size,
692 num_components, src->var->data.mode, after,
693 mem_ctx);
694 } else {
695 nir_reg_src reg;
696 reg.reg = dest.reg;
697 reg.base_offset = dest.base_offset;
698 reg.indirect = dest.indirect;
699
700 var_reg_block_copy_impl(reg, src, predicate, src_type, after, false,
701 mem_ctx);
702 }
703 }
704
705 /* emits a register -> variable copy */
706 static void
707 reg_var_block_copy(nir_reg_src src, nir_deref_var *dest, nir_src *predicate,
708 nir_instr *after, void *mem_ctx)
709 {
710 const struct glsl_type *dest_type = get_deref_tail(&dest->deref)->type;
711
712 var_reg_block_copy_impl(src, dest, predicate, dest_type, after, true,
713 mem_ctx);
714 }
715
716 /*
717 * emits an input -> variable block copy using an intermediate register
718 */
719 static void
720 var_var_block_copy(nir_deref_var *src, nir_deref_var *dest, nir_src *predicate,
721 nir_instr *after, nir_function_impl *impl,
722 bool native_integers, void *mem_ctx)
723 {
724 const struct glsl_type *type = get_deref_tail(&dest->deref)->type;
725 nir_register *reg = nir_local_reg_create(impl);
726 if (glsl_type_is_scalar(type) || glsl_type_is_vector(type)) {
727 reg->num_components = glsl_get_vector_elements(type);
728 } else {
729 reg->is_packed = true;
730 reg->num_components = 1;
731 reg->num_array_elems = type_size(type);
732 }
733
734 nir_reg_src reg_src;
735 reg_src.base_offset = 0;
736 reg_src.indirect = NULL;
737 reg_src.reg = reg;
738
739 nir_reg_dest reg_dest;
740 reg_dest.base_offset = 0;
741 reg_dest.indirect = NULL;
742 reg_dest.reg = reg;
743
744 var_reg_block_copy(src, reg_dest, predicate, true, after, impl,
745 native_integers, mem_ctx);
746 reg_var_block_copy(reg_src, dest, predicate, after, mem_ctx);
747 }
748
749 /* emits a register -> register block copy */
750 static void
751 reg_reg_block_copy(nir_reg_dest dest, nir_reg_src src, nir_src *predicate,
752 const struct glsl_type *type, nir_instr *after,
753 void *mem_ctx)
754 {
755 if (!dest.reg->is_packed && !src.reg->is_packed)
756 assert(dest.reg->num_components == src.reg->num_components);
757
758 unsigned size, num_components;
759 if (dest.reg->is_packed && src.reg->is_packed) {
760 size = type_size(type);
761 num_components = 1;
762 } else {
763 size = 1;
764 if (dest.reg->is_packed)
765 num_components = src.reg->num_components;
766 else
767 num_components = dest.reg->num_components;
768 }
769
770 for (unsigned i = 0; i < size; i++) {
771 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
772 move->dest.write_mask = (1 << num_components) - 1;
773
774 move->dest.dest.reg.reg = dest.reg;
775 move->dest.dest.reg.base_offset = dest.base_offset + i;
776 if (dest.indirect != NULL) {
777 move->dest.dest.reg.indirect = ralloc(mem_ctx, nir_src);
778 *move->dest.dest.reg.indirect = *dest.indirect;
779 }
780
781 if (predicate) {
782 move->has_predicate = true;
783 move->predicate = nir_src_copy(*predicate, mem_ctx);
784 }
785
786 move->src[0].src.reg = src;
787 move->src[0].src.reg.base_offset += i;
788
789 nir_instr_insert_before(after, &move->instr);
790 }
791 }
792
793 static nir_reg_dest
794 create_dest(nir_deref_var *deref, nir_instr *instr, nir_register *reg,
795 nir_function_impl *impl, bool native_integers, void *mem_ctx)
796 {
797 nir_reg_dest dest;
798 if (deref_has_indirect(deref)) {
799 dest.indirect = ralloc(mem_ctx, nir_src);
800 dest.indirect->is_ssa = false;
801 dest.base_offset = get_deref_offset(deref, instr,
802 impl, native_integers,
803 dest.indirect);
804 } else {
805 dest.base_offset = get_deref_offset(deref, instr,
806 impl, native_integers, NULL);
807 dest.indirect = NULL;
808 }
809 dest.reg = reg;
810
811 return dest;
812 }
813
814 static nir_reg_src
815 create_src(nir_deref_var *deref, nir_instr *instr, nir_register *reg,
816 nir_function_impl *impl, bool native_integers, void *mem_ctx)
817 {
818 nir_reg_src src;
819 if (deref_has_indirect(deref)) {
820 src.indirect = ralloc(mem_ctx, nir_src);
821 src.indirect->is_ssa = false;
822 src.base_offset = get_deref_offset(deref, instr,
823 impl, native_integers,
824 src.indirect);
825 } else {
826 src.base_offset = get_deref_offset(deref, instr,
827 impl, native_integers, NULL);
828 src.indirect = NULL;
829 }
830 src.reg = reg;
831
832 return src;
833 }
834
835 static void
836 handle_var_copy(nir_intrinsic_instr *instr, nir_function_impl *impl,
837 bool native_integers, bool lower_io, struct hash_table *ht)
838 {
839 void *mem_ctx = ralloc_parent(instr);
840
841 struct hash_entry *entry;
842
843 nir_variable *dest_var = instr->variables[0]->var;
844 nir_variable *src_var = instr->variables[1]->var;
845
846 const struct glsl_type *type =
847 get_deref_tail(&instr->variables[0]->deref)->type;
848
849 nir_src *predicate = instr->has_predicate ? &instr->predicate : NULL;
850
851 /*
852 * The source can be either:
853 * 1. a variable we're lowering to a register
854 * 2. an input or uniform we're lowering to loads from an index
855 * 3. a variable we can't lower yet
856 *
857 * and similarly, the destination can be either:
858 * 1. a variable we're lowering to a register
859 * 2. a variable we can't lower yet
860 *
861 * meaning that there are six cases, including the trivial one (where
862 * source and destination are #3 and #2 respectively) where we can't do
863 * anything.
864 */
865
866 entry = _mesa_hash_table_search(ht, dest_var);
867 if (entry) {
868 nir_reg_dest dest = create_dest(instr->variables[0], &instr->instr,
869 (nir_register *) entry->data, impl,
870 native_integers, mem_ctx);
871
872 entry = _mesa_hash_table_search(ht, src_var);
873 if (entry) {
874 nir_reg_src src = create_src(instr->variables[1], &instr->instr,
875 (nir_register *) entry->data, impl,
876 native_integers, mem_ctx);
877
878 reg_reg_block_copy(dest, src, predicate, type, &instr->instr, mem_ctx);
879 } else {
880 var_reg_block_copy(instr->variables[1], dest, predicate, lower_io,
881 &instr->instr, impl, native_integers, mem_ctx);
882 }
883 } else {
884 entry = _mesa_hash_table_search(ht, src_var);
885 if (entry) {
886 nir_reg_src src = create_src(instr->variables[1], &instr->instr,
887 (nir_register *) entry->data, impl,
888 native_integers, mem_ctx);
889
890 reg_var_block_copy(src, instr->variables[0], predicate, &instr->instr,
891 mem_ctx);
892 } else {
893 if (!lower_io || (src_var->data.mode != nir_var_shader_in &&
894 src_var->data.mode != nir_var_uniform)) {
895 /* nothing to do here */
896 return;
897 }
898
899 var_var_block_copy(instr->variables[1], instr->variables[0], predicate,
900 &instr->instr, impl, native_integers, mem_ctx);
901 }
902 }
903
904 nir_instr_remove(&instr->instr);
905 }
906
907 static void
908 handle_var_load(nir_intrinsic_instr *instr, nir_function_impl *impl,
909 bool native_integers, bool lower_io, struct hash_table *ht)
910 {
911 void *mem_ctx = ralloc_parent(instr);
912
913 struct hash_entry *entry =
914 _mesa_hash_table_search(ht, instr->variables[0]->var);
915
916 if (entry == NULL) {
917 nir_variable *src_var = instr->variables[0]->var;
918
919 if (lower_io && (src_var->data.mode == nir_var_shader_in ||
920 src_var->data.mode == nir_var_uniform)) {
921 bool has_indirect = deref_has_indirect(instr->variables[0]);
922 unsigned num_components =
923 nir_intrinsic_infos[instr->intrinsic].dest_components;
924 nir_src indirect;
925 unsigned offset = get_deref_offset(instr->variables[0], &instr->instr,
926 impl, native_integers, &indirect);
927 offset += src_var->data.driver_location;
928
929 nir_intrinsic_op op = get_load_op(src_var->data.mode, has_indirect,
930 num_components);
931 nir_intrinsic_instr *load = nir_intrinsic_instr_create(mem_ctx, op);
932 load->dest = instr->dest;
933 load->const_index[0] = (int) offset;
934 load->const_index[1] = 1;
935 if (has_indirect)
936 load->src[0] = indirect;
937
938 if (instr->has_predicate) {
939 load->has_predicate = true;
940 load->predicate = nir_src_copy(instr->predicate, mem_ctx);
941 }
942
943 nir_instr_insert_before(&instr->instr, &load->instr);
944 } else {
945 return;
946 }
947 } else {
948 nir_register *reg = (nir_register *) entry->data;
949
950 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
951 unsigned dest_components =
952 nir_intrinsic_infos[instr->intrinsic].dest_components;
953 move->dest.dest = instr->dest;
954 move->dest.write_mask = (1 << dest_components) - 1;
955 move->src[0].src.reg = create_src(instr->variables[0], &instr->instr,
956 reg, impl, native_integers, mem_ctx);
957 if (instr->has_predicate) {
958 move->has_predicate = true;
959 move->predicate = nir_src_copy(instr->predicate, mem_ctx);
960 }
961 nir_instr_insert_before(&instr->instr, &move->instr);
962 }
963
964 nir_instr_remove(&instr->instr);
965 }
966
967 static void
968 handle_var_store(nir_intrinsic_instr *instr, nir_function_impl *impl,
969 bool native_integers, bool lower_io, struct hash_table *ht)
970 {
971 void *mem_ctx = ralloc_parent(instr);
972
973 struct hash_entry *entry =
974 _mesa_hash_table_search(ht, instr->variables[0]->var);
975 if (entry == NULL)
976 return;
977
978 nir_register *reg = (nir_register *) entry->data;
979
980 nir_alu_instr *move = nir_alu_instr_create(mem_ctx, nir_op_imov);
981 unsigned src_components =
982 nir_intrinsic_infos[instr->intrinsic].src_components[0];
983 move->dest.dest.reg = create_dest(instr->variables[0], &instr->instr,
984 reg, impl, native_integers, mem_ctx);
985 move->dest.write_mask = (1 << src_components) - 1;
986 move->src[0].src = instr->src[0];
987 if (instr->has_predicate) {
988 move->has_predicate = true;
989 move->predicate = nir_src_copy(instr->predicate, mem_ctx);
990 }
991 nir_instr_insert_before(&instr->instr, &move->instr);
992 nir_instr_remove(&instr->instr);
993 }
994
995 typedef struct {
996 struct hash_table *ht;
997 bool native_integers, lower_io;
998 nir_function_impl *impl;
999 } rewrite_state;
1000
1001 static bool
1002 rewrite_block_cb(nir_block *block, void *_state)
1003 {
1004 rewrite_state *state = (rewrite_state *) _state;
1005
1006 nir_foreach_instr_safe(block, instr) {
1007 if (instr->type == nir_instr_type_intrinsic) {
1008 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1009 switch (intrin->intrinsic) {
1010 case nir_intrinsic_load_var_vec1:
1011 case nir_intrinsic_load_var_vec2:
1012 case nir_intrinsic_load_var_vec3:
1013 case nir_intrinsic_load_var_vec4:
1014 handle_var_load(intrin, state->impl, state->native_integers,
1015 state->lower_io, state->ht);
1016 break;
1017
1018 case nir_intrinsic_store_var_vec1:
1019 case nir_intrinsic_store_var_vec2:
1020 case nir_intrinsic_store_var_vec3:
1021 case nir_intrinsic_store_var_vec4:
1022 handle_var_store(intrin, state->impl, state->native_integers,
1023 state->lower_io, state->ht);
1024 break;
1025
1026 case nir_intrinsic_copy_var:
1027 handle_var_copy(intrin, state->impl, state->native_integers,
1028 state->lower_io, state->ht);
1029 break;
1030
1031 default:
1032 break;
1033 }
1034 }
1035 }
1036
1037 return true;
1038 }
1039
1040 static void
1041 rewrite_impl(nir_function_impl *impl, struct hash_table *ht,
1042 bool native_integers, bool lower_io)
1043 {
1044 rewrite_state state;
1045 state.ht = ht;
1046 state.native_integers = native_integers;
1047 state.lower_io = lower_io;
1048 state.impl = impl;
1049
1050 nir_foreach_block(impl, rewrite_block_cb, &state);
1051 }
1052
1053 static void
1054 insert_load_const_impl(nir_function_impl *impl, struct exec_list *vars,
1055 struct hash_table *ht)
1056 {
1057 void *mem_ctx = ralloc_parent(impl);
1058
1059 foreach_list_typed(nir_variable, var, node, vars) {
1060 if (var->constant_initializer == NULL)
1061 continue;
1062
1063 struct hash_entry *entry = _mesa_hash_table_search(ht, var);
1064 if (entry) {
1065 nir_register *reg = (nir_register *) entry->data;
1066 nir_reg_dest dest;
1067 dest.reg = reg;
1068 dest.base_offset = 0;
1069 dest.indirect = NULL;
1070 reg_const_load(dest, var->constant_initializer, var->type, impl,
1071 mem_ctx);
1072 }
1073 }
1074 }
1075
1076 static nir_intrinsic_op
1077 get_store_op(bool indirect, unsigned num_components)
1078 {
1079 if (indirect) {
1080 switch (num_components) {
1081 case 1: return nir_intrinsic_store_output_vec1_indirect;
1082 case 2: return nir_intrinsic_store_output_vec2_indirect;
1083 case 3: return nir_intrinsic_store_output_vec3_indirect;
1084 case 4: return nir_intrinsic_store_output_vec4_indirect;
1085 default: assert(0); break;
1086 }
1087 } else {
1088 switch (num_components) {
1089 case 1: return nir_intrinsic_store_output_vec1;
1090 case 2: return nir_intrinsic_store_output_vec2;
1091 case 3: return nir_intrinsic_store_output_vec3;
1092 case 4: return nir_intrinsic_store_output_vec4;
1093 default: assert(0); break;
1094 }
1095 }
1096
1097 return nir_intrinsic_store_output_vec1;
1098 }
1099
1100 /* emits a reg -> output block copy after a block */
1101 static void
1102 reg_output_block_copy_block(nir_reg_src src, unsigned dest_index,
1103 unsigned num_components, unsigned size,
1104 nir_block *block, void *mem_ctx)
1105 {
1106 nir_intrinsic_op op = get_store_op(false, num_components);
1107
1108 nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, op);
1109 store->const_index[0] = dest_index;
1110 store->const_index[1] = (size == 0) ? 1 : size;
1111 store->src[0].reg = src;
1112 nir_instr_insert_after_block(block, &store->instr);
1113 }
1114
1115 /* emits a reg -> output copy after an instruction */
1116 static void
1117 reg_output_block_copy_instr(nir_reg_src src, unsigned dest_index,
1118 unsigned num_components, unsigned size,
1119 nir_instr *after, void *mem_ctx)
1120 {
1121 nir_intrinsic_op op = get_store_op(false, num_components);
1122
1123 nir_intrinsic_instr *store = nir_intrinsic_instr_create(mem_ctx, op);
1124 store->const_index[0] = dest_index;
1125 store->const_index[1] = (size == 0) ? 1 : size;
1126 store->src[0].reg = src;
1127 nir_instr_insert_before(after, &store->instr);
1128 }
1129
1130 static nir_function_impl *
1131 find_main(nir_shader *shader)
1132 {
1133 foreach_list_typed(nir_function, func, node, &shader->functions) {
1134 if (strcmp(func->name, "main") == 0) {
1135 assert(exec_list_length(&func->overload_list) == 1);
1136 nir_function_overload *overload = nir_function_first_overload(func);
1137 return overload->impl;
1138 }
1139 }
1140
1141 assert(0);
1142 return NULL;
1143 }
1144
1145 static void
1146 insert_output_reg_copies(nir_shader *shader, nir_block *block,
1147 nir_instr *after, struct hash_table *ht)
1148 {
1149 struct hash_entry *entry;
1150 hash_table_foreach(shader->outputs, entry) {
1151 nir_variable *var = (nir_variable *) entry->data;
1152
1153 struct hash_entry *entry2;
1154 entry2 = _mesa_hash_table_search(ht, var);
1155 if (entry2) {
1156 nir_register *reg = (nir_register *) entry2->data;
1157 nir_reg_src src;
1158 src.reg = reg;
1159 src.base_offset = 0;
1160 src.indirect = NULL;
1161
1162 if (after) {
1163 reg_output_block_copy_instr(src, var->data.driver_location,
1164 reg->num_components,
1165 reg->num_array_elems,
1166 after, shader);
1167 } else {
1168 reg_output_block_copy_block(src, var->data.driver_location,
1169 reg->num_components,
1170 reg->num_array_elems,
1171 block, shader);
1172 }
1173 }
1174 }
1175 }
1176
1177 typedef struct {
1178 struct hash_table *ht;
1179 nir_shader *shader;
1180 bool found_emit_vertex;
1181 } reg_output_state;
1182
1183 static bool
1184 insert_output_reg_copies_emit_vertex(nir_block *block, void *_state)
1185 {
1186 reg_output_state *state = (reg_output_state *) _state;
1187
1188 nir_foreach_instr(block, instr) {
1189 if (instr->type == nir_instr_type_intrinsic) {
1190 nir_intrinsic_instr *intrin_instr = nir_instr_as_intrinsic(instr);
1191 if (intrin_instr->intrinsic == nir_intrinsic_emit_vertex) {
1192 insert_output_reg_copies(state->shader, NULL, instr, state->ht);
1193 state->found_emit_vertex = true;
1194 }
1195 }
1196 }
1197
1198 return true;
1199 }
1200
1201 static void
1202 insert_output_reg_copies_shader(nir_shader *shader, struct hash_table *ht)
1203 {
1204 nir_function_impl *main_impl = find_main(shader);
1205
1206 reg_output_state state;
1207 state.shader = shader;
1208 state.ht = ht;
1209 state.found_emit_vertex = false;
1210 nir_foreach_block(main_impl, insert_output_reg_copies_emit_vertex, &state);
1211
1212 if (!state.found_emit_vertex) {
1213 struct set_entry *entry;
1214 set_foreach(main_impl->end_block->predecessors, entry) {
1215 nir_block *block = (nir_block *) entry->key;
1216 insert_output_reg_copies(shader, block, NULL, ht);
1217 }
1218 }
1219 }
1220
1221 static void
1222 rewrite_shader(nir_shader *shader, struct hash_table *ht, bool native_integers,
1223 bool lower_globals, bool lower_io)
1224 {
1225 nir_foreach_overload(shader, overload) {
1226 if (overload->impl) {
1227 insert_load_const_impl(overload->impl, &overload->impl->locals, ht);
1228 if (lower_globals && strcmp(overload->function->name, "main") == 0)
1229 insert_load_const_impl(overload->impl, &shader->globals, ht);
1230 rewrite_impl(overload->impl, ht, native_integers, lower_io);
1231 }
1232 }
1233 }
1234
1235 void
1236 nir_lower_variables_scalar(nir_shader *shader, bool lower_globals,
1237 bool lower_io, bool add_names, bool native_integers)
1238 {
1239 if (lower_io)
1240 assign_var_locations_shader(shader);
1241 struct hash_table *ht = init_var_ht(shader, lower_globals, lower_io,
1242 add_names);
1243 remove_local_vars_shader(shader, ht);
1244 rewrite_shader(shader, ht, native_integers, lower_globals, lower_io);
1245 if (lower_io)
1246 insert_output_reg_copies_shader(shader, ht);
1247 _mesa_hash_table_destroy(ht, NULL);
1248 }