r600g: implement replacing gpr with pv and ps
authorChristian König <deathsimple@vodafone.de>
Sat, 18 Dec 2010 20:32:16 +0000 (21:32 +0100)
committerChristian König <deathsimple@vodafone.de>
Sat, 18 Dec 2010 20:32:16 +0000 (21:32 +0100)
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_asm.h
src/gallium/drivers/r600/r600_sq.h

index 82911e924188c94d7222b4de0a2f2f1503720cf8..c0501f5018d8a8965bc4f1a9ce1d136f497103fe 100644 (file)
@@ -107,7 +107,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
        if (alu == NULL)
                return NULL;
        LIST_INITHEAD(&alu->list);
-       LIST_INITHEAD(&alu->bs_list);
        return alu;
 }
 
@@ -551,6 +550,63 @@ static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *al
        return -1;
 }
 
+static int replace_gpr_with_pv_ps(struct r600_bc_alu *alu_first, struct r600_bc_alu *alu_prev)
+{
+       struct r600_bc_alu *slots[5];
+       int gpr[5], chan[5];
+       int i, j, r, src, num_src;
+       
+       r = assign_alu_units(alu_prev, slots);
+       if (r)
+               return r;
+
+       for (i = 0; i < 5; ++i) {
+               if(slots[i] && slots[i]->dst.write && !slots[i]->dst.rel) {
+                       gpr[i] = slots[i]->dst.sel;
+                       if (is_alu_reduction_inst(slots[i]))
+                               chan[i] = 0;
+                       else
+                               chan[i] = slots[i]->dst.chan;
+               } else
+                       gpr[i] = -1;
+               
+       }
+
+       r = assign_alu_units(alu_first, slots);
+       if (r)
+               return r;
+
+       for (i = 0; i < 5; ++i) {
+               struct r600_bc_alu *alu = slots[i];
+               if(!alu)
+                       continue;
+
+               num_src = r600_bc_get_num_operands(alu);
+               for (src = 0; src < num_src; ++src) {
+                       if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+                               continue;
+
+                       if (alu->src[src].sel == gpr[4] &&
+                               alu->src[src].chan == chan[4]) {
+                               alu->src[src].sel = V_SQ_ALU_SRC_PS;
+                               alu->src[src].chan = 0;
+                               continue;
+                       }
+
+                       for (j = 0; j < 4; ++j) {
+                               if (alu->src[src].sel == gpr[j] &&
+                                       alu->src[src].chan == j) {
+                                       alu->src[src].sel = V_SQ_ALU_SRC_PV;
+                                       alu->src[src].chan = chan[j];
+                                       break;
+                               }
+                       }
+               }
+       }
+
+       return 0;
+}
+
 int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int type)
 {
        struct r600_bc_alu *nalu = r600_bc_alu();
@@ -587,9 +643,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
        bc->cf_last->inst = (type << 3);
        if (!bc->cf_last->curr_bs_head) {
                bc->cf_last->curr_bs_head = nalu;
-               LIST_INITHEAD(&nalu->bs_list);
-       } else {
-               LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
        }
        /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
         * worst case */
@@ -628,9 +681,12 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
        /* process cur ALU instructions for bank swizzle */
        if (alu->last) {
+               if (bc->cf_last->prev_bs_head)
+                       replace_gpr_with_pv_ps(bc->cf_last->curr_bs_head, bc->cf_last->prev_bs_head);
                r = check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
                if (r)
                        return r;
+               bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
                bc->cf_last->curr_bs_head = NULL;
        }
        return 0;
index fc60079c3e08098cd97ba3ff46a744b39bf7e473..94ba902fb5b2b4112a8fcefbe284e5fec7908b5a 100644 (file)
@@ -46,7 +46,6 @@ struct r600_bc_alu_dst {
 
 struct r600_bc_alu {
        struct list_head                list;
-       struct list_head                bs_list; /* bank swizzle list */
        struct r600_bc_alu_src          src[3];
        struct r600_bc_alu_dst          dst;
        unsigned                        inst;
@@ -143,6 +142,7 @@ struct r600_bc_cf {
        struct list_head                vtx;
        struct r600_bc_output           output;
        struct r600_bc_alu              *curr_bs_head;
+       struct r600_bc_alu              *prev_bs_head;
 };
 
 #define FC_NONE                                0
index 0573e63dc8236c613e5eeb904e2c1c20878ea89c..2401d47e2a2fed11f22fd1607e10f04715a6d55a 100644 (file)
 #define     V_SQ_ALU_SRC_M_1_INT                                     0x000000FB
 #define     V_SQ_ALU_SRC_0_5                                         0x000000FC
 #define     V_SQ_ALU_SRC_LITERAL                                     0x000000FD
+#define     V_SQ_ALU_SRC_PV                                          0x000000FE
+#define     V_SQ_ALU_SRC_PS                                          0x000000FF
 #define     V_SQ_ALU_SRC_PARAM_BASE                                  0x000001C0
 #define   S_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) & 0x1) << 9)
 #define   G_SQ_ALU_WORD0_SRC0_REL(x)                                 (((x) >> 9) & 0x1)