2 * Copyright © 2017 Gert Wollny
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 /* A short overview on how the array merging works:
27 * - per array information: live range, access mask, size
31 * - the program with updated array addressing
36 * for all pairs of arrays:
37 * if they have non-overlapping live ranges and equal access masks:
38 * - pick shorter array
39 * - merge its live range into the longer array
40 * - set its merge target array to the longer array
41 * - mark the shorter array as processed
43 * for all pairs of arrays:
44 * if they have overlapping live ranges use in sum at most four components:
45 * - pick shorter array
46 * - evaluate reswizzle map to move its components into the components
47 * that are not used by the longer array
48 * - set its merge target array to the longer array
49 * - mark the shorter array as processed
51 * until no more successfull merges were found
53 * for all pairs of arrays:
54 * if they have non-overlapping live ranges:
55 * - pick shorter array
56 * - merge its live range into the longer array
57 * - set its merge target array to the longer array
58 * - mark the shorter array as processed
60 * Finalize remapping map so that target arrays are always final, i.e. have
61 * themselfes no merge target set.
64 * ID | Length | Live range | access mask | target id | reswizzle
65 * ================================================================
66 * 1 3 3-10 x___ 0 ____
67 * 2 4 13-20 x___ 0 ____
68 * 3 8 3-20 x___ 0 ____
69 * 4 6 21-40 xy__ 0 ____
70 * 5 7 12-30 xy__ 0 ____
72 * 1. merge live ranges 1 and 2
74 * ID | Length | Live range | access mask | target id | reswizzle
75 * ================================================================
77 * 2 4 3-20 x___ 0 ____
78 * 3 8 3-20 x___ 0 ____
79 * 4 6 21-40 xy__ 0 ____
80 * 5 7 12-30 xy__ 0 ____
83 * 3. interleave 2 and 3
85 * ID | Length | Live range | access mask | target id | reswizzle
86 * ================================================================
89 * 3 8 3-20 xy__ 0 ____
90 * 4 6 21-40 xy__ 0 ____
91 * 5 7 12-30 xy__ 0 ____
93 * 3. merge live ranges 3 and 4
95 * ID | Length | Live range | access mask | target id | reswizzle
96 * ================================================================
99 * 3 8 3-40 xy__ 0 ____
101 * 5 7 3-21 xy__ 0 ____
103 * 4. interleave 3 and 5
105 * ID | Length | Live range | access mask | target id | reswizzle
106 * ================================================================
109 * 3 8 3-40 xy__ 0 ____
113 * 5. finalize remapping
114 * (Array 1 has been merged with 2 that was later interleaved, so
115 * the reswizzeling must be propagated.
117 * ID | Length | Live range | new access mask | target id | reswizzle
118 * ================================================================
121 * 3 8 3-40 xy__ 0 ____
127 #include "program/prog_instruction.h"
128 #include "util/u_math.h"
135 #include "st_glsl_to_tgsi_array_merge.h"
137 #if __cplusplus >= 201402L
139 using std::unique_ptr
;
140 using std::make_unique
;
143 #define ARRAY_MERGE_DEBUG 0
145 #if ARRAY_MERGE_DEBUG > 0
146 #define ARRAY_MERGE_DUMP(x) do std::cerr << x; while (0)
147 #define ARRAY_MERGE_DUMP_BLOCK(x) do { x } while (0)
149 #define ARRAY_MERGE_DUMP(x)
150 #define ARRAY_MERGE_DUMP_BLOCK(x)
153 static const char xyzw
[] = "xyzw";
155 array_live_range::array_live_range():
160 component_access_mask(0),
161 used_component_count(0),
162 target_array(nullptr)
167 array_live_range::array_live_range(unsigned aid
, unsigned alength
):
172 component_access_mask(0),
173 used_component_count(0),
174 target_array(nullptr)
179 array_live_range::array_live_range(unsigned aid
, unsigned alength
, int begin
,
185 component_access_mask(sw
),
186 used_component_count(util_bitcount(sw
)),
187 target_array(nullptr)
192 void array_live_range::init_swizzles()
194 for (int i
= 0; i
< 4; ++i
)
198 void array_live_range::set_live_range(int _begin
, int _end
)
204 void array_live_range::set_access_mask(int mask
)
206 component_access_mask
= mask
;
207 used_component_count
= util_bitcount(mask
);
210 void array_live_range::merge(array_live_range
*a
, array_live_range
*b
)
212 if (a
->array_length() < b
->array_length())
213 b
->merge_live_range_from(a
);
215 a
->merge_live_range_from(b
);
218 void array_live_range::interleave(array_live_range
*a
, array_live_range
*b
)
220 if (a
->array_length() < b
->array_length())
221 a
->interleave_into(b
);
223 b
->interleave_into(a
);
226 void array_live_range::interleave_into(array_live_range
*other
)
228 for (int i
= 0; i
< 4; ++i
) {
232 int trgt_access_mask
= other
->access_mask();
233 int summary_access_mask
= trgt_access_mask
;
234 int src_swizzle_bit
= 1;
235 int next_free_swizzle_bit
= 1;
238 unsigned last_src_bit
= util_last_bit(component_access_mask
);
240 for (i
= 0; i
<= last_src_bit
; ++i
, src_swizzle_bit
<<= 1) {
242 /* Jump over empty src component slots (e.g. x__w). This is just a
243 * safety measure and it is tested for, but it is very likely that the
244 * emitted code always uses slots staring from x without leaving holes
245 * (i.e. always xy__ not x_z_ or _yz_ etc).
247 if (!(src_swizzle_bit
& component_access_mask
))
250 /* Find the next free access slot in the target. */
251 while ((trgt_access_mask
& next_free_swizzle_bit
) &&
253 next_free_swizzle_bit
<<= 1;
257 "Interleaved array would have more then four components");
259 /* Set the mapping for this component. */
261 trgt_access_mask
|= next_free_swizzle_bit
;
263 /* Update the joined access mask if we didn't just fill the mapping.*/
264 if (src_swizzle_bit
& component_access_mask
)
265 summary_access_mask
|= next_free_swizzle_bit
;
268 other
->set_access_mask(summary_access_mask
);
269 other
->merge_live_range_from(this);
271 ARRAY_MERGE_DUMP_BLOCK(
272 std::cerr
<< "Interleave " << id
<< " into " << other
->id
<< ", swz:";
273 for (unsigned i
= 0; i
< 4; ++i
) {
274 std::cerr
<< ((swizzle_map
[i
] >= 0) ? xyzw
[swizzle_map
[i
]] : '_');
280 void array_live_range::merge_live_range_from(array_live_range
*other
)
282 other
->set_target(this);
283 if (other
->begin() < first_access
)
284 first_access
= other
->begin();
285 if (other
->end() > last_access
)
286 last_access
= other
->end();
289 int8_t array_live_range::remap_one_swizzle(int8_t idx
) const
293 idx
= swizzle_map
[idx
];
295 idx
= target_array
->remap_one_swizzle(idx
);
300 void array_live_range::set_target(array_live_range
*target
)
302 target_array
= target
;
305 void array_live_range::print(std::ostream
& os
) const
308 << ", length:" << length
309 << ", (b:" << first_access
310 << ", e:" << last_access
311 << "), sw:" << (int)component_access_mask
312 << ", nc:" << (int)used_component_count
316 bool array_live_range::time_doesnt_overlap(const array_live_range
& other
) const
318 return (other
.last_access
< first_access
||
319 last_access
< other
.first_access
);
322 namespace tgsi_array_merge
{
324 array_remapping::array_remapping():
327 for (int i
= 0; i
< 4; ++i
) {
328 read_swizzle_map
[i
] = i
;
332 array_remapping::array_remapping(int trgt_array_id
, const int8_t swizzle
[]):
333 target_id(trgt_array_id
)
335 for (int i
= 0; i
< 4; ++i
) {
336 read_swizzle_map
[i
] = swizzle
[i
];
340 void array_remapping::init_from(const array_live_range
& range
)
342 target_id
= range
.is_mapped() ? range
.final_target()->array_id(): 0;
343 for (int i
= 0; i
< 4; ++i
)
344 read_swizzle_map
[i
] = range
.remap_one_swizzle(i
);
348 int array_remapping::map_writemask(int write_mask
) const
351 int result_write_mask
= 0;
352 for (int i
= 0; i
< 4; ++i
) {
353 if (1 << i
& write_mask
) {
354 assert(read_swizzle_map
[i
] >= 0);
355 result_write_mask
|= 1 << read_swizzle_map
[i
];
358 return result_write_mask
;
361 uint16_t array_remapping::move_read_swizzles(uint16_t original_swizzle
) const
366 * dst.zw = src.xy in glsl actually is MOV dst.__zw src.__xy
368 * when interleaving the arrays the source swizzles must be moved
369 * according to the changed dst write mask.
371 uint16_t out_swizzle
= 0;
372 for (int idx
= 0; idx
< 4; ++idx
) {
373 uint16_t orig_swz
= GET_SWZ(original_swizzle
, idx
);
374 int new_idx
= read_swizzle_map
[idx
];
376 out_swizzle
|= orig_swz
<< 3 * new_idx
;
381 uint16_t array_remapping::map_swizzles(uint16_t old_swizzle
) const
383 uint16_t out_swizzle
= 0;
384 for (int idx
= 0; idx
< 4; ++idx
) {
385 uint16_t swz
= read_swizzle_map
[GET_SWZ(old_swizzle
, idx
)];
386 out_swizzle
|= swz
<< 3 * idx
;
391 void array_remapping::print(std::ostream
& os
) const
394 os
<< "[aid: " << target_id
<< " swz: ";
395 for (int i
= 0; i
< 4; ++i
)
396 os
<< (read_swizzle_map
[i
] >= 0 ? xyzw
[read_swizzle_map
[i
]] : '_');
403 /* Required by the unit tests */
404 bool operator == (const array_remapping
& lhs
, const array_remapping
& rhs
)
406 if (lhs
.target_id
!= rhs
.target_id
)
409 if (lhs
.target_id
== 0)
412 for (int i
= 0; i
< 4; ++i
) {
413 if (lhs
.read_swizzle_map
[i
] != rhs
.read_swizzle_map
[i
])
420 bool sort_by_begin(const array_live_range
& lhs
, const array_live_range
& rhs
) {
421 return lhs
.begin() < rhs
.begin();
424 /* Helper class to evaluate merging and interleaving of arrays */
425 class array_merge_evaluator
{
427 typedef int (*array_merger
)(array_live_range
& range_1
,
428 array_live_range
& range_2
);
430 array_merge_evaluator(int _narrays
, array_live_range
*_ranges
,
433 /** Run the merge strategy on all arrays
434 * @returns number of successfull merges
439 virtual int do_run(array_live_range
& range_1
, array_live_range
& range_2
) = 0;
442 array_live_range
*ranges
;
446 array_merge_evaluator::array_merge_evaluator(int _narrays
,
447 array_live_range
*_ranges
,
455 int array_merge_evaluator::run()
459 for (int i
= 0; i
< narrays
; ++i
) {
460 if (ranges
[i
].is_mapped())
463 for (int j
= i
+ 1; j
< narrays
; ++j
) {
464 if (!ranges
[j
].is_mapped()) {
465 ARRAY_MERGE_DUMP("try merge " << i
<< " id:" << ranges
[i
].array_id()
466 << " and " << j
<< " id: "<< ranges
[j
].array_id()
468 int n
= do_run(ranges
[i
], ranges
[j
]);
478 /* Merge live ranges if possible at all */
479 class merge_live_range_always
: public array_merge_evaluator
{
481 merge_live_range_always(int _narrays
, array_live_range
*_ranges
):
482 array_merge_evaluator(_narrays
, _ranges
, false) {
485 int do_run(array_live_range
& range_1
, array_live_range
& range_2
){
486 if (range_2
.time_doesnt_overlap(range_1
)) {
487 ARRAY_MERGE_DUMP("merge " << range_2
<< " into " << range_1
<< "\n");
488 array_live_range::merge(&range_1
,&range_2
);
495 /* Merge live ranges only if they use the same swizzle */
496 class merge_live_range_equal_swizzle
: public merge_live_range_always
{
498 merge_live_range_equal_swizzle(int _narrays
, array_live_range
*_ranges
):
499 merge_live_range_always(_narrays
, _ranges
) {
502 int do_run(array_live_range
& range_1
, array_live_range
& range_2
){
503 if (range_1
.access_mask() == range_2
.access_mask()) {
504 return merge_live_range_always::do_run(range_1
, range_2
);
510 /* Interleave arrays if possible */
511 class interleave_live_range
: public array_merge_evaluator
{
513 interleave_live_range(int _narrays
, array_live_range
*_ranges
):
514 array_merge_evaluator(_narrays
, _ranges
, true) {
517 int do_run(array_live_range
& range_1
, array_live_range
& range_2
){
518 if ((range_2
.used_components() + range_1
.used_components() <= 4) &&
519 !range_1
.time_doesnt_overlap(range_2
)) {
520 ARRAY_MERGE_DUMP("Interleave " << range_2
<< " into " << range_1
<< "\n");
521 array_live_range::interleave(&range_1
, &range_2
);
528 /* Estimate the array merging: First in a loop, arrays with equal access mask
529 * are merged, then interleave arrays that together use at most four components,
530 * and have overlapping live ranges. Finally arrays are merged regardless of
532 * @param[in] narrays number of arrays
533 * @param[in,out] alt array life times, the merge target life time will be
534 * updated with the new life time.
535 * @param[in,out] remapping track the arraay index remapping and reswizzeling.
536 * @returns number of merged arrays
538 bool get_array_remapping(int narrays
, array_live_range
*ranges
,
539 array_remapping
*remapping
)
541 int total_remapped
= 0;
544 /* Sort by "begin of live range" so that we don't have to restart searching
547 std::sort(ranges
, ranges
+ narrays
, sort_by_begin
);
548 merge_live_range_equal_swizzle
merge_evaluator_es(narrays
, ranges
);
549 interleave_live_range
interleave_lr(narrays
, ranges
);
552 n_remapped
= merge_evaluator_es
.run();
554 /* try only one array interleave, if successfull, another
555 * live_range merge is tried. The test MergeAndInterleave5
556 * (mesa/st/tests/test_glsl_to_tgsi_array_merge.cpp)
557 * shows that this can result in more arrays being merged/interleaved.
559 n_remapped
+= interleave_lr
.run();
560 total_remapped
+= n_remapped
;
562 ARRAY_MERGE_DUMP("Remapped " << n_remapped
<< " arrays\n");
563 } while (n_remapped
> 0);
565 total_remapped
+= merge_live_range_always(narrays
, ranges
).run();
566 ARRAY_MERGE_DUMP("Remapped a total of " << total_remapped
<< " arrays\n");
568 /* Resolve the remapping chain */
569 for (int i
= 1; i
<= narrays
; ++i
) {
570 ARRAY_MERGE_DUMP("Map " << i
<< ":");
571 remapping
[ranges
[i
-1].array_id()].init_from(ranges
[i
-1]);
573 return total_remapped
> 0;
576 /* Remap the arrays in a TGSI program according to the given mapping.
577 * @param narrays number of arrays
578 * @param array_sizes array of arrays sizes
579 * @param map the array remapping information
580 * @param instructions TGSI program
581 * @returns number of arrays after remapping
583 int remap_arrays(int narrays
, unsigned *array_sizes
,
584 exec_list
*instructions
,
585 array_remapping
*map
)
587 /* re-calculate arrays */
588 #if __cplusplus < 201402L
589 int *idx_map
= new int[narrays
+ 1];
590 unsigned *old_sizes
= new unsigned[narrays
+ 1];
592 unique_ptr
<int[]> idx_map
= make_unique
<int[]>(narrays
+ 1);
593 unique_ptr
<unsigned[]> old_sizes
= make_unique
<unsigned[]>(narrays
+ 1);
596 memcpy(&old_sizes
[0], &array_sizes
[0], sizeof(unsigned) * narrays
);
598 /* Evaluate mapping for the array indices and update array sizes */
600 for (int i
= 1; i
<= narrays
; ++i
) {
601 if (!map
[i
].is_valid()) {
603 idx_map
[i
] = new_narrays
;
604 array_sizes
[new_narrays
] = old_sizes
[i
];
608 /* Map the array ids of merged arrays. */
609 for (int i
= 1; i
<= narrays
; ++i
) {
610 if (map
[i
].is_valid()) {
611 map
[i
].set_target_id(idx_map
[map
[i
].target_array_id()]);
615 /* Map the array ids of merge targets that got only renumbered. */
616 for (int i
= 1; i
<= narrays
; ++i
) {
617 if (!map
[i
].is_valid()) {
618 map
[i
].set_target_id(idx_map
[i
]);
622 /* Update the array ids and swizzles in the registers */
623 foreach_in_list(glsl_to_tgsi_instruction
, inst
, instructions
) {
624 for (unsigned j
= 0; j
< num_inst_src_regs(inst
); j
++) {
625 st_src_reg
& src
= inst
->src
[j
];
626 if (src
.file
== PROGRAM_ARRAY
&& src
.array_id
> 0) {
627 array_remapping
& m
= map
[src
.array_id
];
629 src
.array_id
= m
.target_array_id();
630 src
.swizzle
= m
.map_swizzles(src
.swizzle
);
634 for (unsigned j
= 0; j
< inst
->tex_offset_num_offset
; j
++) {
635 st_src_reg
& src
= inst
->tex_offsets
[j
];
636 if (src
.file
== PROGRAM_ARRAY
&& src
.array_id
> 0) {
637 array_remapping
& m
= map
[src
.array_id
];
639 src
.array_id
= m
.target_array_id();
640 src
.swizzle
= m
.map_swizzles(src
.swizzle
);
644 for (unsigned j
= 0; j
< num_inst_dst_regs(inst
); j
++) {
645 st_dst_reg
& dst
= inst
->dst
[j
];
646 if (dst
.file
== PROGRAM_ARRAY
&& dst
.array_id
> 0) {
647 array_remapping
& m
= map
[dst
.array_id
];
650 "remapping can only be done for single dest ops");
651 dst
.array_id
= m
.target_array_id();
652 dst
.writemask
= m
.map_writemask(dst
.writemask
);
654 /* If the target component is moved, then the source swizzles
655 * must be moved accordingly.
657 for (unsigned j
= 0; j
< num_inst_src_regs(inst
); j
++) {
658 st_src_reg
& src
= inst
->src
[j
];
659 src
.swizzle
= m
.move_read_swizzles(src
.swizzle
);
664 st_src_reg
& res
= inst
->resource
;
665 if (res
.file
== PROGRAM_ARRAY
&& res
.array_id
> 0) {
666 array_remapping
& m
= map
[res
.array_id
];
668 res
.array_id
= m
.target_array_id();
669 res
.swizzle
= m
.map_swizzles(res
.swizzle
);
674 #if __cplusplus < 201402L
684 using namespace tgsi_array_merge
;
686 int merge_arrays(int narrays
,
687 unsigned *array_sizes
,
688 exec_list
*instructions
,
689 struct array_live_range
*arr_live_ranges
)
691 array_remapping
*map
= new array_remapping
[narrays
+ 1];
693 if (get_array_remapping(narrays
, arr_live_ranges
, map
))
694 narrays
= remap_arrays(narrays
, array_sizes
, instructions
, map
);