From 7b0cc9bd386f62f58c39e66ce29f1423cfccdfb7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Christian=20K=C3=B6nig?= Date: Sat, 18 Dec 2010 21:32:16 +0100 Subject: [PATCH] r600g: implement replacing gpr with pv and ps --- src/gallium/drivers/r600/r600_asm.c | 64 +++++++++++++++++++++++++++-- src/gallium/drivers/r600/r600_asm.h | 2 +- src/gallium/drivers/r600/r600_sq.h | 2 + 3 files changed, 63 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 82911e92418..c0501f5018d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -107,7 +107,6 @@ static struct r600_bc_alu *r600_bc_alu(void) if (alu == NULL) return NULL; LIST_INITHEAD(&alu->list); - LIST_INITHEAD(&alu->bs_list); return alu; } @@ -551,6 +550,63 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al return -1; } +static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_alu *alu_prev) +{ + struct r600_bc_alu *slots[5]; + int gpr[5], chan[5]; + int i, j, r, src, num_src; + + r = assign_alu_units(alu_prev, slots); + if (r) + return r; + + for (i = 0; i < 5; ++i) { + if(slots[i] && slots[i]->dst.write && !slots[i]->dst.rel) { + gpr[i] = slots[i]->dst.sel; + if (is_alu_reduction_inst(slots[i])) + chan[i] = 0; + else + chan[i] = slots[i]->dst.chan; + } else + gpr[i] = -1; + + } + + r = assign_alu_units(alu_first, slots); + if (r) + return r; + + for (i = 0; i < 5; ++i) { + struct r600_bc_alu *alu = slots[i]; + if(!alu) + continue; + + num_src = r600_bc_get_num_operands(alu); + for (src = 0; src < num_src; ++src) { + if (!is_gpr(alu->src[src].sel) || alu->src[src].rel) + continue; + + if (alu->src[src].sel == gpr[4] && + alu->src[src].chan == chan[4]) { + alu->src[src].sel = V_SQ_ALU_SRC_PS; + alu->src[src].chan = 0; + continue; + } + + for (j = 0; j < 4; ++j) { + if (alu->src[src].sel == gpr[j] && + alu->src[src].chan == j) { + alu->src[src].sel = V_SQ_ALU_SRC_PV; + alu->src[src].chan = chan[j]; + break; + } + } + } + } + + return 0; +} + int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type) { struct r600_bc_alu *nalu = r600_bc_alu(); @@ -587,9 +643,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int bc->cf_last->inst = (type << 3); if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; - LIST_INITHEAD(&nalu->bs_list); - } else { - LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); } /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) * worst case */ @@ -628,9 +681,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int /* process cur ALU instructions for bank swizzle */ if (alu->last) { + if (bc->cf_last->prev_bs_head) + replace_gpr_with_pv_ps(bc->cf_last->curr_bs_head, bc->cf_last->prev_bs_head); r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); if (r) return r; + bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; bc->cf_last->curr_bs_head = NULL; } return 0; diff --git a/src/gallium/drivers/r600/r600_asm.h b/src/gallium/drivers/r600/r600_asm.h index fc60079c3e0..94ba902fb5b 100644 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@ -46,7 +46,6 @@ struct r600_bc_alu_dst { struct r600_bc_alu { struct list_head list; - struct list_head bs_list; /* bank swizzle list */ struct r600_bc_alu_src src[3]; struct r600_bc_alu_dst dst; unsigned inst; @@ -143,6 +142,7 @@ struct r600_bc_cf { struct list_head vtx; struct r600_bc_output output; struct r600_bc_alu *curr_bs_head; + struct r600_bc_alu *prev_bs_head; }; #define FC_NONE 0 diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h index 0573e63dc82..2401d47e2a2 100644 --- a/src/gallium/drivers/r600/r600_sq.h +++ b/src/gallium/drivers/r600/r600_sq.h @@ -187,6 +187,8 @@ #define V_SQ_ALU_SRC_M_1_INT 0x000000FB #define V_SQ_ALU_SRC_0_5 0x000000FC #define V_SQ_ALU_SRC_LITERAL 0x000000FD +#define V_SQ_ALU_SRC_PV 0x000000FE +#define V_SQ_ALU_SRC_PS 0x000000FF #define V_SQ_ALU_SRC_PARAM_BASE 0x000001C0 #define S_SQ_ALU_WORD0_SRC0_REL(x) (((x) & 0x1) << 9) #define G_SQ_ALU_WORD0_SRC0_REL(x) (((x) >> 9) & 0x1) -- 2.30.2