1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Rob Clark <robclark@freedesktop.org>
29 #include "freedreno_util.h"
34 * Find/group instruction neighbors:
37 /* bleh.. we need to do the same group_n() thing for both inputs/outputs
38 * (where we have a simple instr[] array), and fanin nodes (where we have
39 * an extra indirection via reg->instr).
42 struct ir3_instruction
*(*get
)(void *arr
, int idx
);
43 void (*insert_mov
)(void *arr
, int idx
, struct ir3_instruction
*instr
);
46 static struct ir3_instruction
*arr_get(void *arr
, int idx
)
48 return ((struct ir3_instruction
**)arr
)[idx
];
50 static void arr_insert_mov_out(void *arr
, int idx
, struct ir3_instruction
*instr
)
52 ((struct ir3_instruction
**)arr
)[idx
] =
53 ir3_MOV(instr
->block
, instr
, TYPE_F32
);
55 static void arr_insert_mov_in(void *arr
, int idx
, struct ir3_instruction
*instr
)
57 /* so, we can't insert a mov in front of a meta:in.. and the downstream
58 * instruction already has a pointer to 'instr'. So we cheat a bit and
59 * morph the meta:in instruction into a mov and insert a new meta:in
62 struct ir3_instruction
*in
;
64 debug_assert(instr
->regs_count
== 1);
66 in
= ir3_instr_create(instr
->block
, OPC_META_INPUT
);
67 in
->inout
.block
= instr
->block
;
68 ir3_reg_create(in
, instr
->regs
[0]->num
, 0);
70 /* create src reg for meta:in and fixup to now be a mov: */
71 ir3_reg_create(instr
, 0, IR3_REG_SSA
)->instr
= in
;
73 instr
->cat1
.src_type
= TYPE_F32
;
74 instr
->cat1
.dst_type
= TYPE_F32
;
76 ((struct ir3_instruction
**)arr
)[idx
] = in
;
78 static struct group_ops arr_ops_out
= { arr_get
, arr_insert_mov_out
};
79 static struct group_ops arr_ops_in
= { arr_get
, arr_insert_mov_in
};
81 static struct ir3_instruction
*instr_get(void *arr
, int idx
)
83 return ssa(((struct ir3_instruction
*)arr
)->regs
[idx
+1]);
86 instr_insert_mov(void *arr
, int idx
, struct ir3_instruction
*instr
)
88 ((struct ir3_instruction
*)arr
)->regs
[idx
+1]->instr
=
89 ir3_MOV(instr
->block
, instr
, TYPE_F32
);
91 static struct group_ops instr_ops
= { instr_get
, instr_insert_mov
};
93 /* verify that cur != instr, but cur is also not in instr's neighbor-list: */
95 in_neighbor_list(struct ir3_instruction
*instr
, struct ir3_instruction
*cur
, int pos
)
105 for (instr
= ir3_neighbor_first(instr
); instr
; instr
= instr
->cp
.right
)
106 if ((idx
++ != pos
) && (instr
== cur
))
113 group_n(struct group_ops
*ops
, void *arr
, unsigned n
)
117 /* first pass, figure out what has conflicts and needs a mov
118 * inserted. Do this up front, before starting to setup
119 * left/right neighbor pointers. Trying to do it in a single
120 * pass could result in a situation where we can't even setup
121 * the mov's right neighbor ptr if the next instr also needs
125 for (i
= 0; i
< n
; i
++) {
126 struct ir3_instruction
*instr
= ops
->get(arr
, i
);
128 struct ir3_instruction
*left
= (i
> 0) ? ops
->get(arr
, i
- 1) : NULL
;
129 struct ir3_instruction
*right
= (i
< (n
-1)) ? ops
->get(arr
, i
+ 1) : NULL
;
132 /* check for left/right neighbor conflicts: */
133 conflict
= conflicts(instr
->cp
.left
, left
) ||
134 conflicts(instr
->cp
.right
, right
);
136 /* RA can't yet deal very well w/ group'd phi's: */
137 if (instr
->opc
== OPC_META_PHI
)
140 /* we also can't have an instr twice in the group: */
141 for (j
= i
+ 1; (j
< n
) && !conflict
; j
++)
142 if (in_neighbor_list(ops
->get(arr
, j
), instr
, i
))
146 ops
->insert_mov(arr
, i
, instr
);
147 /* inserting the mov may have caused a conflict
148 * against the previous:
155 /* second pass, now that we've inserted mov's, fixup left/right
156 * neighbors. This is guaranteed to succeed, since by definition
157 * the newly inserted mov's cannot conflict with anything.
159 for (i
= 0; i
< n
; i
++) {
160 struct ir3_instruction
*instr
= ops
->get(arr
, i
);
162 struct ir3_instruction
*left
= (i
> 0) ? ops
->get(arr
, i
- 1) : NULL
;
163 struct ir3_instruction
*right
= (i
< (n
-1)) ? ops
->get(arr
, i
+ 1) : NULL
;
165 debug_assert(!conflicts(instr
->cp
.left
, left
));
167 instr
->cp
.left_cnt
++;
168 instr
->cp
.left
= left
;
171 debug_assert(!conflicts(instr
->cp
.right
, right
));
173 instr
->cp
.right_cnt
++;
174 instr
->cp
.right
= right
;
181 instr_find_neighbors(struct ir3_instruction
*instr
)
183 struct ir3_instruction
*src
;
185 if (ir3_instr_check_mark(instr
))
188 if (instr
->opc
== OPC_META_FI
)
189 group_n(&instr_ops
, instr
, instr
->regs_count
- 1);
191 foreach_ssa_src(src
, instr
)
192 instr_find_neighbors(src
);
195 /* a bit of sadness.. we can't have "holes" in inputs from PoV of
196 * register assignment, they still need to be grouped together. So
197 * we need to insert dummy/padding instruction for grouping, and
198 * then take it back out again before anyone notices.
201 pad_and_group_input(struct ir3_instruction
**input
, unsigned n
)
204 struct ir3_block
*block
= NULL
;
206 for (i
= n
- 1; i
>= 0; i
--) {
207 struct ir3_instruction
*instr
= input
[i
];
209 block
= instr
->block
;
211 instr
= ir3_NOP(block
);
212 ir3_reg_create(instr
, 0, IR3_REG_SSA
); /* dummy dst */
218 group_n(&arr_ops_in
, input
, n
);
220 for (i
= 0; i
< n
; i
++) {
227 find_neighbors(struct ir3
*ir
)
231 /* shader inputs/outputs themselves must be contiguous as well:
233 * NOTE: group inputs first, since we only insert mov's
234 * *before* the conflicted instr (and that would go badly
235 * for inputs). By doing inputs first, we should never
236 * have a conflict on inputs.. pushing any conflict to
237 * resolve to the outputs, for stuff like:
239 * MOV OUT[n], IN[m].wzyx
241 * NOTE: we assume here inputs/outputs are grouped in vec4.
242 * This logic won't quite cut it if we don't align smaller
245 for (i
= 0; i
< ir
->ninputs
; i
+= 4)
246 pad_and_group_input(&ir
->inputs
[i
], 4);
247 for (i
= 0; i
< ir
->noutputs
; i
+= 4)
248 group_n(&arr_ops_out
, &ir
->outputs
[i
], 4);
250 for (i
= 0; i
< ir
->noutputs
; i
++) {
251 if (ir
->outputs
[i
]) {
252 struct ir3_instruction
*instr
= ir
->outputs
[i
];
253 instr_find_neighbors(instr
);
257 for (i
= 0; i
< ir
->keeps_count
; i
++) {
258 struct ir3_instruction
*instr
= ir
->keeps
[i
];
259 instr_find_neighbors(instr
);
264 ir3_group(struct ir3
*ir
)