glsl: lower mediump temporaries to 16 bits except structures (v2)
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi_array_merge.cpp
1 /*
2 * Copyright © 2017 Gert Wollny
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /* A short overview on how the array merging works:
25 *
26 * Inputs:
27 * - per array information: live range, access mask, size
28 * - the program
29 *
30 * Output:
31 * - the program with updated array addressing
32 *
33 * Pseudo algorithm:
34 *
35 * repeat
36 * for all pairs of arrays:
37 * if they have non-overlapping live ranges and equal access masks:
38 * - pick shorter array
39 * - merge its live range into the longer array
40 * - set its merge target array to the longer array
41 * - mark the shorter array as processed
42 *
43 * for all pairs of arrays:
44 * if they have overlapping live ranges use in sum at most four components:
45 * - pick shorter array
46 * - evaluate reswizzle map to move its components into the components
47 * that are not used by the longer array
48 * - set its merge target array to the longer array
49 * - mark the shorter array as processed
50 * - bail out loop
51 * until no more successfull merges were found
52 *
53 * for all pairs of arrays:
54 * if they have non-overlapping live ranges:
55 * - pick shorter array
56 * - merge its live range into the longer array
57 * - set its merge target array to the longer array
58 * - mark the shorter array as processed
59 *
60 * Finalize remapping map so that target arrays are always final, i.e. have
61 * themselfes no merge target set.
62 *
63 * Example:
64 * ID | Length | Live range | access mask | target id | reswizzle
65 * ================================================================
66 * 1 3 3-10 x___ 0 ____
67 * 2 4 13-20 x___ 0 ____
68 * 3 8 3-20 x___ 0 ____
69 * 4 6 21-40 xy__ 0 ____
70 * 5 7 12-30 xy__ 0 ____
71 *
72 * 1. merge live ranges 1 and 2
73 *
74 * ID | Length | Live range | access mask | target id | reswizzle
75 * ================================================================
76 * 1 - - x___ 2 ____
77 * 2 4 3-20 x___ 0 ____
78 * 3 8 3-20 x___ 0 ____
79 * 4 6 21-40 xy__ 0 ____
80 * 5 7 12-30 xy__ 0 ____
81 *
82 *
83 * 3. interleave 2 and 3
84 *
85 * ID | Length | Live range | access mask | target id | reswizzle
86 * ================================================================
87 * 1 - - x___ 2 ____
88 * 2 - - x___ 3 _x__
89 * 3 8 3-20 xy__ 0 ____
90 * 4 6 21-40 xy__ 0 ____
91 * 5 7 12-30 xy__ 0 ____
92 *
93 * 3. merge live ranges 3 and 4
94 *
95 * ID | Length | Live range | access mask | target id | reswizzle
96 * ================================================================
97 * 1 - - x___ 2 ____
98 * 2 - - x___ 3 _x__
99 * 3 8 3-40 xy__ 0 ____
100 * 4 - - xy__ 3 ____
101 * 5 7 3-21 xy__ 0 ____
102 *
103 * 4. interleave 3 and 5
104 *
105 * ID | Length | Live range | access mask | target id | reswizzle
106 * ================================================================
107 * 1 - - x___ 2 ____
108 * 2 - - x___ 3 _x__
109 * 3 8 3-40 xy__ 0 ____
110 * 4 - - xy__ 3 ____
111 * 5 - - xy__ 3 __xy
112 *
113 * 5. finalize remapping
114 * (Array 1 has been merged with 2 that was later interleaved, so
115 * the reswizzeling must be propagated.
116 *
117 * ID | Length | Live range | new access mask | target id | reswizzle
118 * ================================================================
119 * 1 - - _y__ 3 _x__
120 * 2 - - _y__ 3 _x__
121 * 3 8 3-40 xy__ 0 ____
122 * 4 - - xy__ 3 ____
123 * 5 - - __zw 3 __xy
124 *
125 */
126
127 #include "program/prog_instruction.h"
128 #include "util/u_math.h"
129 #include <ostream>
130 #include <cassert>
131 #include <algorithm>
132
133 #include <iostream>
134
135 #include "st_glsl_to_tgsi_array_merge.h"
136
137 #if __cplusplus >= 201402L
138 #include <memory>
139 using std::unique_ptr;
140 using std::make_unique;
141 #endif
142
143 #define ARRAY_MERGE_DEBUG 0
144
145 #if ARRAY_MERGE_DEBUG > 0
146 #define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0)
147 #define ARRAY_MERGE_DUMP_BLOCK(x) do { x } while (0)
148 #else
149 #define ARRAY_MERGE_DUMP(x)
150 #define ARRAY_MERGE_DUMP_BLOCK(x)
151 #endif
152
153 static const char xyzw[] = "xyzw";
154
155 array_live_range::array_live_range():
156 id(0),
157 length(0),
158 first_access(0),
159 last_access(0),
160 component_access_mask(0),
161 used_component_count(0),
162 target_array(nullptr)
163 {
164 init_swizzles();
165 }
166
167 array_live_range::array_live_range(unsigned aid, unsigned alength):
168 id(aid),
169 length(alength),
170 first_access(0),
171 last_access(0),
172 component_access_mask(0),
173 used_component_count(0),
174 target_array(nullptr)
175 {
176 init_swizzles();
177 }
178
179 array_live_range::array_live_range(unsigned aid, unsigned alength, int begin,
180 int end, int sw):
181 id(aid),
182 length(alength),
183 first_access(begin),
184 last_access(end),
185 component_access_mask(sw),
186 used_component_count(util_bitcount(sw)),
187 target_array(nullptr)
188 {
189 init_swizzles();
190 }
191
192 void array_live_range::init_swizzles()
193 {
194 for (int i = 0; i < 4; ++i)
195 swizzle_map[i] = i;
196 }
197
198 void array_live_range::set_live_range(int _begin, int _end)
199 {
200 set_begin(_begin);
201 set_end(_end);
202 }
203
204 void array_live_range::set_access_mask(int mask)
205 {
206 component_access_mask = mask;
207 used_component_count = util_bitcount(mask);
208 }
209
210 void array_live_range::merge(array_live_range *a, array_live_range *b)
211 {
212 if (a->array_length() < b->array_length())
213 b->merge_live_range_from(a);
214 else
215 a->merge_live_range_from(b);
216 }
217
218 void array_live_range::interleave(array_live_range *a, array_live_range *b)
219 {
220 if (a->array_length() < b->array_length())
221 a->interleave_into(b);
222 else
223 b->interleave_into(a);
224 }
225
226 void array_live_range::interleave_into(array_live_range *other)
227 {
228 for (int i = 0; i < 4; ++i) {
229 swizzle_map[i] = -1;
230 }
231
232 int trgt_access_mask = other->access_mask();
233 int summary_access_mask = trgt_access_mask;
234 int src_swizzle_bit = 1;
235 int next_free_swizzle_bit = 1;
236 int k = 0;
237 unsigned i;
238 unsigned last_src_bit = util_last_bit(component_access_mask);
239
240 for (i = 0; i <= last_src_bit ; ++i, src_swizzle_bit <<= 1) {
241
242 /* Jump over empty src component slots (e.g. x__w). This is just a
243 * safety measure and it is tested for, but it is very likely that the
244 * emitted code always uses slots staring from x without leaving holes
245 * (i.e. always xy__ not x_z_ or _yz_ etc).
246 */
247 if (!(src_swizzle_bit & component_access_mask))
248 continue;
249
250 /* Find the next free access slot in the target. */
251 while ((trgt_access_mask & next_free_swizzle_bit) &&
252 k < 4) {
253 next_free_swizzle_bit <<= 1;
254 ++k;
255 }
256 assert(k < 4 &&
257 "Interleaved array would have more then four components");
258
259 /* Set the mapping for this component. */
260 swizzle_map[i] = k;
261 trgt_access_mask |= next_free_swizzle_bit;
262
263 /* Update the joined access mask if we didn't just fill the mapping.*/
264 if (src_swizzle_bit & component_access_mask)
265 summary_access_mask |= next_free_swizzle_bit;
266 }
267
268 other->set_access_mask(summary_access_mask);
269 other->merge_live_range_from(this);
270
271 ARRAY_MERGE_DUMP_BLOCK(
272 std::cerr << "Interleave " << id << " into " << other->id << ", swz:";
273 for (unsigned i = 0; i < 4; ++i) {
274 std::cerr << ((swizzle_map[i] >= 0) ? xyzw[swizzle_map[i]] : '_');
275 }
276 std::cerr << '\n';
277 );
278 }
279
280 void array_live_range::merge_live_range_from(array_live_range *other)
281 {
282 other->set_target(this);
283 if (other->begin() < first_access)
284 first_access = other->begin();
285 if (other->end() > last_access)
286 last_access = other->end();
287 }
288
289 int8_t array_live_range::remap_one_swizzle(int8_t idx) const
290 {
291 // needs testing
292 if (target_array) {
293 idx = swizzle_map[idx];
294 if (idx >= 0)
295 idx = target_array->remap_one_swizzle(idx);
296 }
297 return idx;
298 }
299
300 void array_live_range::set_target(array_live_range *target)
301 {
302 target_array = target;
303 }
304
305 void array_live_range::print(std::ostream& os) const
306 {
307 os << "[id:" << id
308 << ", length:" << length
309 << ", (b:" << first_access
310 << ", e:" << last_access
311 << "), sw:" << (int)component_access_mask
312 << ", nc:" << (int)used_component_count
313 << "]";
314 }
315
316 bool array_live_range::time_doesnt_overlap(const array_live_range& other) const
317 {
318 return (other.last_access < first_access ||
319 last_access < other.first_access);
320 }
321
322 namespace tgsi_array_merge {
323
324 array_remapping::array_remapping():
325 target_id(0)
326 {
327 for (int i = 0; i < 4; ++i) {
328 read_swizzle_map[i] = i;
329 }
330 }
331
332 array_remapping::array_remapping(int trgt_array_id, const int8_t swizzle[]):
333 target_id(trgt_array_id)
334 {
335 for (int i = 0; i < 4; ++i) {
336 read_swizzle_map[i] = swizzle[i];
337 }
338 }
339
340 void array_remapping::init_from(const array_live_range& range)
341 {
342 target_id = range.is_mapped() ? range.final_target()->array_id(): 0;
343 for (int i = 0; i < 4; ++i)
344 read_swizzle_map[i] = range.remap_one_swizzle(i);
345 }
346
347
348 int array_remapping::map_writemask(int write_mask) const
349 {
350 assert(is_valid());
351 int result_write_mask = 0;
352 for (int i = 0; i < 4; ++i) {
353 if (1 << i & write_mask) {
354 assert(read_swizzle_map[i] >= 0);
355 result_write_mask |= 1 << read_swizzle_map[i];
356 }
357 }
358 return result_write_mask;
359 }
360
361 uint16_t array_remapping::move_read_swizzles(uint16_t original_swizzle) const
362 {
363 assert(is_valid());
364 /* Since
365 *
366 * dst.zw = src.xy in glsl actually is MOV dst.__zw src.__xy
367 *
368 * when interleaving the arrays the source swizzles must be moved
369 * according to the changed dst write mask.
370 */
371 uint16_t out_swizzle = 0;
372 for (int idx = 0; idx < 4; ++idx) {
373 uint16_t orig_swz = GET_SWZ(original_swizzle, idx);
374 int new_idx = read_swizzle_map[idx];
375 if (new_idx >= 0)
376 out_swizzle |= orig_swz << 3 * new_idx;
377 }
378 return out_swizzle;
379 }
380
381 uint16_t array_remapping::map_swizzles(uint16_t old_swizzle) const
382 {
383 uint16_t out_swizzle = 0;
384 for (int idx = 0; idx < 4; ++idx) {
385 uint16_t swz = read_swizzle_map[GET_SWZ(old_swizzle, idx)];
386 out_swizzle |= swz << 3 * idx;
387 }
388 return out_swizzle;
389 }
390
391 void array_remapping::print(std::ostream& os) const
392 {
393 if (is_valid()) {
394 os << "[aid: " << target_id << " swz: ";
395 for (int i = 0; i < 4; ++i)
396 os << (read_swizzle_map[i] >= 0 ? xyzw[read_swizzle_map[i]] : '_');
397 os << "]";
398 } else {
399 os << "[unused]";
400 }
401 }
402
403 /* Required by the unit tests */
404 bool operator == (const array_remapping& lhs, const array_remapping& rhs)
405 {
406 if (lhs.target_id != rhs.target_id)
407 return false;
408
409 if (lhs.target_id == 0)
410 return true;
411
412 for (int i = 0; i < 4; ++i) {
413 if (lhs.read_swizzle_map[i] != rhs.read_swizzle_map[i])
414 return false;
415 }
416 return true;
417 }
418
419 static
420 bool sort_by_begin(const array_live_range& lhs, const array_live_range& rhs) {
421 return lhs.begin() < rhs.begin();
422 }
423
424 /* Helper class to evaluate merging and interleaving of arrays */
425 class array_merge_evaluator {
426 public:
427 typedef int (*array_merger)(array_live_range& range_1,
428 array_live_range& range_2);
429
430 array_merge_evaluator(int _narrays, array_live_range *_ranges,
431 bool _restart);
432
433 /** Run the merge strategy on all arrays
434 * @returns number of successfull merges
435 */
436 int run();
437
438 private:
439 virtual int do_run(array_live_range& range_1, array_live_range& range_2) = 0;
440
441 int narrays;
442 array_live_range *ranges;
443 bool restart;
444 };
445
446 array_merge_evaluator::array_merge_evaluator(int _narrays,
447 array_live_range *_ranges,
448 bool _restart):
449 narrays(_narrays),
450 ranges(_ranges),
451 restart(_restart)
452 {
453 }
454
455 int array_merge_evaluator::run()
456 {
457 int remaps = 0;
458
459 for (int i = 0; i < narrays; ++i) {
460 if (ranges[i].is_mapped())
461 continue;
462
463 for (int j = i + 1; j < narrays; ++j) {
464 if (!ranges[j].is_mapped()) {
465 ARRAY_MERGE_DUMP("try merge " << i << " id:" << ranges[i].array_id()
466 << " and " << j << " id: "<< ranges[j].array_id()
467 << "\n");
468 int n = do_run(ranges[i], ranges[j]);
469 if (restart && n)
470 return n;
471 remaps += n;
472 }
473 }
474 }
475 return remaps;
476 }
477
478 /* Merge live ranges if possible at all */
479 class merge_live_range_always: public array_merge_evaluator {
480 public:
481 merge_live_range_always(int _narrays, array_live_range *_ranges):
482 array_merge_evaluator(_narrays, _ranges, false) {
483 }
484 protected:
485 int do_run(array_live_range& range_1, array_live_range& range_2){
486 if (range_2.time_doesnt_overlap(range_1)) {
487 ARRAY_MERGE_DUMP("merge " << range_2 << " into " << range_1 << "\n");
488 array_live_range::merge(&range_1,&range_2);
489 return 1;
490 }
491 return 0;
492 }
493 };
494
495 /* Merge live ranges only if they use the same swizzle */
496 class merge_live_range_equal_swizzle: public merge_live_range_always {
497 public:
498 merge_live_range_equal_swizzle(int _narrays, array_live_range *_ranges):
499 merge_live_range_always(_narrays, _ranges) {
500 }
501 private:
502 int do_run(array_live_range& range_1, array_live_range& range_2){
503 if (range_1.access_mask() == range_2.access_mask()) {
504 return merge_live_range_always::do_run(range_1, range_2);
505 }
506 return 0;
507 }
508 };
509
510 /* Interleave arrays if possible */
511 class interleave_live_range: public array_merge_evaluator {
512 public:
513 interleave_live_range(int _narrays, array_live_range *_ranges):
514 array_merge_evaluator(_narrays, _ranges, true) {
515 }
516 private:
517 int do_run(array_live_range& range_1, array_live_range& range_2){
518 if ((range_2.used_components() + range_1.used_components() <= 4) &&
519 !range_1.time_doesnt_overlap(range_2)) {
520 ARRAY_MERGE_DUMP("Interleave " << range_2 << " into " << range_1 << "\n");
521 array_live_range::interleave(&range_1, &range_2);
522 return 1;
523 }
524 return 0;
525 }
526 };
527
528 /* Estimate the array merging: First in a loop, arrays with equal access mask
529 * are merged, then interleave arrays that together use at most four components,
530 * and have overlapping live ranges. Finally arrays are merged regardless of
531 * access mask.
532 * @param[in] narrays number of arrays
533 * @param[in,out] alt array life times, the merge target life time will be
534 * updated with the new life time.
535 * @param[in,out] remapping track the arraay index remapping and reswizzeling.
536 * @returns number of merged arrays
537 */
538 bool get_array_remapping(int narrays, array_live_range *ranges,
539 array_remapping *remapping)
540 {
541 int total_remapped = 0;
542 int n_remapped;
543
544 /* Sort by "begin of live range" so that we don't have to restart searching
545 * after every merge.
546 */
547 std::sort(ranges, ranges + narrays, sort_by_begin);
548 merge_live_range_equal_swizzle merge_evaluator_es(narrays, ranges);
549 interleave_live_range interleave_lr(narrays, ranges);
550 do {
551
552 n_remapped = merge_evaluator_es.run();
553
554 /* try only one array interleave, if successfull, another
555 * live_range merge is tried. The test MergeAndInterleave5
556 * (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp)
557 * shows that this can result in more arrays being merged/interleaved.
558 */
559 n_remapped += interleave_lr.run();
560 total_remapped += n_remapped;
561
562 ARRAY_MERGE_DUMP("Remapped " << n_remapped << " arrays\n");
563 } while (n_remapped > 0);
564
565 total_remapped += merge_live_range_always(narrays, ranges).run();
566 ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped << " arrays\n");
567
568 /* Resolve the remapping chain */
569 for (int i = 1; i <= narrays; ++i) {
570 ARRAY_MERGE_DUMP("Map " << i << ":");
571 remapping[ranges[i-1].array_id()].init_from(ranges[i-1]);
572 }
573 return total_remapped > 0;
574 }
575
576 /* Remap the arrays in a TGSI program according to the given mapping.
577 * @param narrays number of arrays
578 * @param array_sizes array of arrays sizes
579 * @param map the array remapping information
580 * @param instructions TGSI program
581 * @returns number of arrays after remapping
582 */
583 int remap_arrays(int narrays, unsigned *array_sizes,
584 exec_list *instructions,
585 array_remapping *map)
586 {
587 /* re-calculate arrays */
588 #if __cplusplus < 201402L
589 int *idx_map = new int[narrays + 1];
590 unsigned *old_sizes = new unsigned[narrays];
591 #else
592 unique_ptr<int[]> idx_map = make_unique<int[]>(narrays + 1);
593 unique_ptr<unsigned[]> old_sizes = make_unique<unsigned[]>(narrays);
594 #endif
595
596 memcpy(&old_sizes[0], &array_sizes[0], sizeof(unsigned) * narrays);
597
598 /* Evaluate mapping for the array indices and update array sizes */
599 int new_narrays = 0;
600 for (int i = 1; i <= narrays; ++i) {
601 if (!map[i].is_valid()) {
602 ++new_narrays;
603 array_sizes[new_narrays-1] = old_sizes[i-1];
604 idx_map[i] = new_narrays;
605 }
606 }
607
608 /* Map the array ids of merged arrays. */
609 for (int i = 1; i <= narrays; ++i) {
610 if (map[i].is_valid()) {
611 map[i].set_target_id(idx_map[map[i].target_array_id()]);
612 }
613 }
614
615 /* Map the array ids of merge targets that got only renumbered. */
616 for (int i = 1; i <= narrays; ++i) {
617 if (!map[i].is_valid()) {
618 map[i].set_target_id(idx_map[i]);
619 }
620 }
621
622 /* Update the array ids and swizzles in the registers */
623 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) {
624 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
625 st_src_reg& src = inst->src[j];
626 if (src.file == PROGRAM_ARRAY && src.array_id > 0) {
627 array_remapping& m = map[src.array_id];
628 if (m.is_valid()) {
629 src.array_id = m.target_array_id();
630 src.swizzle = m.map_swizzles(src.swizzle);
631 }
632 }
633 }
634 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) {
635 st_src_reg& src = inst->tex_offsets[j];
636 if (src.file == PROGRAM_ARRAY && src.array_id > 0) {
637 array_remapping& m = map[src.array_id];
638 if (m.is_valid()) {
639 src.array_id = m.target_array_id();
640 src.swizzle = m.map_swizzles(src.swizzle);
641 }
642 }
643 }
644 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) {
645 st_dst_reg& dst = inst->dst[j];
646 if (dst.file == PROGRAM_ARRAY && dst.array_id > 0) {
647 array_remapping& m = map[dst.array_id];
648 if (m.is_valid()) {
649 assert(j == 0 &&
650 "remapping can only be done for single dest ops");
651 dst.array_id = m.target_array_id();
652 dst.writemask = m.map_writemask(dst.writemask);
653
654 /* If the target component is moved, then the source swizzles
655 * must be moved accordingly.
656 */
657 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) {
658 st_src_reg& src = inst->src[j];
659 src.swizzle = m.move_read_swizzles(src.swizzle);
660 }
661 }
662 }
663 }
664 st_src_reg& res = inst->resource;
665 if (res.file == PROGRAM_ARRAY && res.array_id > 0) {
666 array_remapping& m = map[res.array_id];
667 if (m.is_valid()) {
668 res.array_id = m.target_array_id();
669 res.swizzle = m.map_swizzles(res.swizzle);
670 }
671 }
672 }
673
674 #if __cplusplus < 201402L
675 delete[] old_sizes;
676 delete[] idx_map;
677 #endif
678
679 return new_narrays;
680 }
681
682 }
683
684 using namespace tgsi_array_merge;
685
686 int merge_arrays(int narrays,
687 unsigned *array_sizes,
688 exec_list *instructions,
689 class array_live_range *arr_live_ranges)
690 {
691 array_remapping *map= new array_remapping[narrays + 1];
692
693 if (get_array_remapping(narrays, arr_live_ranges, map))
694 narrays = remap_arrays(narrays, array_sizes, instructions, map);
695
696 delete[] map;
697 return narrays;
698 }