*/
int
ir3_delayslots(struct ir3_instruction *assigner,
- struct ir3_instruction *consumer, unsigned n)
+ struct ir3_instruction *consumer, unsigned n, bool soft)
{
if (ignore_dep(assigner, consumer, n))
return 0;
if (is_meta(assigner) || is_meta(consumer))
return 0;
- if (writes_addr(assigner))
+ if (writes_addr0(assigner) || writes_addr1(assigner))
return 6;
+ /* On a6xx, it takes the number of delay slots to get a SFU result
+ * back (ie. using nop's instead of (ss) is:
+ *
+ * 8 - single warp
+ * 9 - two warps
+ * 10 - four warps
+ *
+ * and so on. Not quite sure where it tapers out (ie. how many
+ * warps share an SFU unit). But 10 seems like a reasonable #
+ * to choose:
+ */
+ if (soft && is_sfu(assigner))
+ return 10;
+
/* handled via sync flags: */
if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
return 0;
* find the worst case (shortest) distance (only possible after
* individual blocks are all scheduled)
*/
-unsigned
-ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
unsigned maxd, bool pred)
{
unsigned d = 0;
struct ir3_block *pred = (struct ir3_block *)entry->key;
unsigned n;
- n = ir3_distance(pred, instr, min, pred);
+ n = distance(pred, instr, min, pred);
min = MIN2(min, n);
}
delay = MAX2(delay, d);
}
} else {
- if (soft) {
- if (is_sfu(assigner)) {
- delay = 4;
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
- } else {
- delay = ir3_delayslots(assigner, consumer, srcn);
- }
- delay -= ir3_distance(block, assigner, delay, pred);
+ delay = ir3_delayslots(assigner, consumer, srcn, soft);
+ delay -= distance(block, assigner, delay, pred);
}
return delay;