r600g: set correct pitch/offset for depth textures in flushed state.
[mesa.git] / src / gallium / drivers / r600 / r600_asm.c
index 53122e54b7dae67e61a0b6b506fcfde92919dd43..e910d1cc73fb2a08444b73d4886293972d7cc3a7 100644 (file)
 #include "r600_formats.h"
 #include "r600d.h"
 
-static inline unsigned int r600_bc_get_num_operands(struct r600_bc_alu *alu)
+#define NUM_OF_CYCLES 3
+#define NUM_OF_COMPONENTS 4
+
+static inline unsigned int r600_bc_get_num_operands(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
        if(alu->is_op3)
                return 3;
 
-       switch (alu->inst) {
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
-               return 0;
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL: 
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE: 
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
-               return 2;
-
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV: 
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
-       case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
-               return 1;
-       default: R600_ERR(
-               "Need instruction operand number for 0x%x.\n", alu->inst); 
-       };
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               switch (alu->inst) {
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+                       return 0;
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+                       return 2;
+
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+               case V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+                       return 1;
+               default: R600_ERR(
+                       "Need instruction operand number for 0x%x.\n", alu->inst);
+               }
+               break;
+       case CHIPREV_EVERGREEN:
+               switch (alu->inst) {
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_NOP:
+                       return 0;
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ADD:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MUL:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MIN:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETNE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SETGE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_XY:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INTERP_ZW:
+                       return 2;
+
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FRACT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLOOR:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_TRUNC:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT_FLOOR:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN:
+               case EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS:
+                       return 1;
+               default: R600_ERR(
+                       "Need instruction operand number for 0x%x.\n", alu->inst);
+               }
+               break;
+       }
 
        return 3;
 }
@@ -104,7 +164,6 @@ static struct r600_bc_alu *r600_bc_alu(void)
        if (alu == NULL)
                return NULL;
        LIST_INITHEAD(&alu->list);
-       LIST_INITHEAD(&alu->bs_list);
        return alu;
 }
 
@@ -187,6 +246,37 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
 {
        int r;
 
+       if (bc->cf_last && (bc->cf_last->inst == output->inst ||
+               (bc->cf_last->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT) &&
+               output->inst == BC_INST(bc, V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_EXPORT_DONE))) &&
+               output->type == bc->cf_last->output.type &&
+               output->elem_size == bc->cf_last->output.elem_size &&
+               output->swizzle_x == bc->cf_last->output.swizzle_x &&
+               output->swizzle_y == bc->cf_last->output.swizzle_y &&
+               output->swizzle_z == bc->cf_last->output.swizzle_z &&
+               output->swizzle_w == bc->cf_last->output.swizzle_w &&
+               (output->burst_count + bc->cf_last->output.burst_count) <= 16) {
+
+               if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
+                       (output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
+
+                       bc->cf_last->output.end_of_program |= output->end_of_program;
+                       bc->cf_last->output.inst = output->inst;
+                       bc->cf_last->output.gpr = output->gpr;
+                       bc->cf_last->output.array_base = output->array_base;
+                       bc->cf_last->output.burst_count += output->burst_count;
+                       return 0;
+
+               } else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
+                       output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
+
+                       bc->cf_last->output.end_of_program |= output->end_of_program;
+                       bc->cf_last->output.inst = output->inst;
+                       bc->cf_last->output.burst_count += output->burst_count;
+                       return 0;
+               }
+       }
+
        r = r600_bc_add_cf(bc);
        if (r)
                return r;
@@ -195,222 +285,732 @@ int r600_bc_add_output(struct r600_bc *bc, const struct r600_bc_output *output)
        return 0;
 }
 
-const unsigned bank_swizzle_vec[8] = {SQ_ALU_VEC_210,  //000
-                                     SQ_ALU_VEC_120,  //001
-                                     SQ_ALU_VEC_102,  //010
-
-                                     SQ_ALU_VEC_201,  //011
-                                     SQ_ALU_VEC_012,  //100
-                                     SQ_ALU_VEC_021,  //101
-
-                                     SQ_ALU_VEC_012,  //110
-                                     SQ_ALU_VEC_012}; //111
-
-const unsigned bank_swizzle_scl[8] = {SQ_ALU_SCL_210,  //000
-                                     SQ_ALU_SCL_122,  //001
-                                     SQ_ALU_SCL_122,  //010
-
-                                     SQ_ALU_SCL_221,  //011
-                                     SQ_ALU_SCL_212,  //100
-                                     SQ_ALU_SCL_122,  //101
-
-                                     SQ_ALU_SCL_122,  //110
-                                     SQ_ALU_SCL_122}; //111
-
-static int init_gpr(struct r600_bc_alu *alu)
+/* alu instructions that can ony exits once per group */
+static int is_alu_once_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
-       int cycle, component;
-       /* set up gpr use */
-       for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
-               for (component = 0; component < NUM_OF_COMPONENTS; component++)
-                        alu->hw_gpr[cycle][component] = -1;
-       return 0;
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               return !alu->is_op3 && (
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+       case CHIPREV_EVERGREEN:
+       default:
+               return !alu->is_op3 && (
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_UINT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_UINT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGT_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLGE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_KILLNE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_UINT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_UINT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_INV ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_POP ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_CLR ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SET_RESTORE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETE_PUSH_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGT_PUSH_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETGE_PUSH_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETNE_PUSH_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLT_PUSH_INT ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_PRED_SETLE_PUSH_INT);
+       }
 }
 
-#if 0
-static int reserve_gpr(struct r600_bc_alu *alu, unsigned sel, unsigned chan, unsigned cycle)
+static int is_alu_reduction_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
-       if (alu->hw_gpr[cycle][chan] < 0)
-               alu->hw_gpr[cycle][chan] = sel;
-       else if (alu->hw_gpr[cycle][chan] != (int)sel) {
-               R600_ERR("Another scalar operation has already used GPR read port for channel\n");
-               return -1;
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               return !alu->is_op3 && (
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
+       case CHIPREV_EVERGREEN:
+       default:
+               return !alu->is_op3 && (
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4 ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_DOT4_IEEE ||
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MAX4);
        }
-       return 0;
 }
 
-static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int is_alu_cube_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
-       int table[3];
-       int ret = 0;
-       switch (swiz) {
-       case SQ_ALU_SCL_210:
-               table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_SCL_122:
-               table[0] = 1; table[1] = 2; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_SCL_212:
-               table[0] = 2; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_SCL_221:
-               table[0] = 2; table[1] = 2; table[2] = 1;
-               *p_cycle = table[sel];
-                break;
-               break;
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               return !alu->is_op3 &&
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
+       case CHIPREV_EVERGREEN:
        default:
-               R600_ERR("bad scalar bank swizzle value\n");
-               ret = -1;
-               break;
+               return !alu->is_op3 &&
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_CUBE;
        }
-       return ret;
 }
 
-static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle)
+static int is_alu_mova_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
-       int table[3];
-       int ret;
-
-       switch (swiz) {
-       case SQ_ALU_VEC_012:
-               table[0] = 0; table[1] = 1; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_VEC_021:
-               table[0] = 0; table[1] = 2; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_VEC_120:
-               table[0] = 1; table[1] = 2; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_VEC_102:
-               table[0] = 1; table[1] = 0; table[2] = 2;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_VEC_201:
-               table[0] = 2; table[1] = 0; table[2] = 1;
-                *p_cycle = table[sel];
-                break;
-       case SQ_ALU_VEC_210:
-               table[0] = 2; table[1] = 1; table[2] = 0;
-                *p_cycle = table[sel];
-                break;
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               return !alu->is_op3 && (
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_FLOOR ||
+                       alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
+       case CHIPREV_EVERGREEN:
        default:
-               R600_ERR("bad vector bank swizzle value\n");
-               ret = -1;
-               break;
+               return !alu->is_op3 && (
+                       alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOVA_INT);
        }
-       return ret;
 }
 
-
-
-static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter)
+/* alu instructions that can only execute on the vector unit */
+static int is_alu_vec_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
 {
-       int num_src;
-       int i;
-       int channel_swizzle;
-
-       num_src = r600_bc_get_num_operands(alu);
+       return is_alu_reduction_inst(bc, alu) ||
+               is_alu_mova_inst(bc, alu);
+}
 
-       for (i = 0; i < num_src; i++) {
-               channel_swizzle = alu->src[i].chan;
-               if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3)
-                       chan_counter[channel_swizzle]++;
+/* alu instructions that can only execute on the trans unit */
+static int is_alu_trans_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+       switch (bc->chiprev) {
+       case CHIPREV_R600:
+       case CHIPREV_R700:
+               if (!alu->is_op3)
+                       return alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_FLT_TO_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+                               alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+               else
+                       return alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT ||
+                               alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_D2 ||
+                               alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M2 ||
+                               alu->inst == V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT_M4;
+       case CHIPREV_EVERGREEN:
+       default:
+               if (!alu->is_op3)
+                       /* Note that FLT_TO_INT* instructions are vector instructions
+                        * on Evergreen, despite what the documentation says. */
+                       return alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_ASHR_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_INT_TO_FLT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHL_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LSHR_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULHI_UINT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MULLO_UINT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_INT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_UINT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_UINT_TO_FLT ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_COS ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_EXP_IEEE ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_CLAMPED ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_LOG_IEEE ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_CLAMPED ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_FF ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIP_IEEE ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_CLAMPED ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_FF ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_RECIPSQRT_IEEE ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SIN ||
+                               alu->inst == EG_V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_SQRT_IEEE;
+               else
+                       return alu->inst == EG_V_SQ_ALU_WORD1_OP3_SQ_OP3_INST_MUL_LIT;
        }
 }
 
-/* we need something like this I think - but this is bogus */
-int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+/* alu instructions that can execute on any unit */
+static int is_alu_any_unit_inst(struct r600_bc *bc, struct r600_bc_alu *alu)
+{
+       return !is_alu_vec_unit_inst(bc, alu) &&
+               !is_alu_trans_unit_inst(bc, alu);
+}
+
+static int assign_alu_units(struct r600_bc *bc, struct r600_bc_alu *alu_first,
+                           struct r600_bc_alu *assignment[5])
 {
        struct r600_bc_alu *alu;
-       int chan_counter[4]  = { 0 };
+       unsigned i, chan, trans;
+
+       for (i = 0; i < 5; i++)
+               assignment[i] = NULL;
+
+       for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) {
+               chan = alu->dst.chan;
+               if (is_alu_trans_unit_inst(bc, alu))
+                       trans = 1;
+               else if (is_alu_vec_unit_inst(bc, alu))
+                       trans = 0;
+               else if (assignment[chan])
+                       trans = 1; // assume ALU_INST_PREFER_VECTOR
+               else
+                       trans = 0;
 
-       update_chan_counter(alu_first, chan_counter);
+               if (trans) {
+                       if (assignment[4]) {
+                               assert(0); //ALU.Trans has already been allocated
+                               return -1;
+                       }
+                       assignment[4] = alu;
+               } else {
+                       if (assignment[chan]) {
+                               assert(0); //ALU.chan has already been allocated
+                               return -1;
+                       }
+                       assignment[chan] = alu;
+               }
 
-       LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-               update_chan_counter(alu, chan_counter);
+               if (alu->last)
+                       break;
        }
+       return 0;
+}
+
+struct alu_bank_swizzle {
+       int     hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
+       int     hw_cfile_addr[4];
+       int     hw_cfile_elem[4];
+};
+
+const unsigned cycle_for_bank_swizzle_vec[][3] = {
+       [SQ_ALU_VEC_012] = { 0, 1, 2 },
+       [SQ_ALU_VEC_021] = { 0, 2, 1 },
+       [SQ_ALU_VEC_120] = { 1, 2, 0 },
+       [SQ_ALU_VEC_102] = { 1, 0, 2 },
+       [SQ_ALU_VEC_201] = { 2, 0, 1 },
+       [SQ_ALU_VEC_210] = { 2, 1, 0 }
+};
+
+const unsigned cycle_for_bank_swizzle_scl[][3] = {
+       [SQ_ALU_SCL_210] = { 2, 1, 0 },
+       [SQ_ALU_SCL_122] = { 1, 2, 2 },
+       [SQ_ALU_SCL_212] = { 2, 1, 2 },
+       [SQ_ALU_SCL_221] = { 2, 2, 1 }
+};
+
+static void init_bank_swizzle(struct alu_bank_swizzle *bs)
+{
+       int i, cycle, component;
+       /* set up gpr use */
+       for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
+               for (component = 0; component < NUM_OF_COMPONENTS; component++)
+                        bs->hw_gpr[cycle][component] = -1;
+       for (i = 0; i < 4; i++)
+               bs->hw_cfile_addr[i] = -1;
+       for (i = 0; i < 4; i++)
+               bs->hw_cfile_elem[i] = -1;
+}
 
-       if (chan_counter[0] > 3 ||
-           chan_counter[1] > 3 ||
-           chan_counter[2] > 3 ||
-           chan_counter[3] > 3) {
-               R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n",
-                        alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]);
+static int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
+{
+       if (bs->hw_gpr[cycle][chan] == -1)
+               bs->hw_gpr[cycle][chan] = sel;
+       else if (bs->hw_gpr[cycle][chan] != (int)sel) {
+               // Another scalar operation has already used GPR read port for channel
                return -1;
        }
        return 0;
 }
-#endif
+
+static int reserve_cfile(struct r600_bc *bc, struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
+{
+       int res, num_res = 4;
+       if (bc->chiprev >= CHIPREV_R700) {
+               num_res = 2;
+               chan /= 2;
+       }
+       for (res = 0; res < num_res; ++res) {
+               if (bs->hw_cfile_addr[res] == -1) {
+                       bs->hw_cfile_addr[res] = sel;
+                       bs->hw_cfile_elem[res] = chan;
+                       return 0;
+               } else if (bs->hw_cfile_addr[res] == sel &&
+                       bs->hw_cfile_elem[res] == chan)
+                       return 0; // Read for this scalar element already reserved, nothing to do here.
+       }
+       // All cfile read ports are used, cannot reference vector element
+       return -1;
+}
+
+static int is_gpr(unsigned sel)
+{
+       return (sel >= 0 && sel <= 127);
+}
 
 /* CB constants start at 512, and get translated to a kcache index when ALU
  * clauses are constructed. Note that we handle kcache constants the same way
  * as (the now gone) cfile constants, is that really required? */
+static int is_cfile(unsigned sel)
+{
+       return (sel > 255 && sel < 512) ||
+               (sel > 511 && sel < 4607) || // Kcache before translate
+               (sel > 127 && sel < 192); // Kcache after translate
+}
+
 static int is_const(int sel)
 {
-       if (sel > 511 && sel < 4607)
-               return 1;
+       return is_cfile(sel) ||
+               (sel >= V_SQ_ALU_SRC_0 &&
+               sel <= V_SQ_ALU_SRC_LITERAL);
+}
+
+static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu,
+                       struct alu_bank_swizzle *bs, int bank_swizzle)
+{
+       int r, src, num_src, sel, elem, cycle;
+
+       num_src = r600_bc_get_num_operands(bc, alu);
+       for (src = 0; src < num_src; src++) {
+               sel = alu->src[src].sel;
+               elem = alu->src[src].chan;
+               if (is_gpr(sel)) {
+                       cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
+                       if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
+                               // Nothing to do; special-case optimization,
+                               // second source uses first source’s reservation
+                               continue;
+                       else {
+                               r = reserve_gpr(bs, sel, elem, cycle);
+                               if (r)
+                                       return r;
+                       }
+               } else if (is_cfile(sel)) {
+                       r = reserve_cfile(bc, bs, sel, elem);
+                       if (r)
+                               return r;
+               }
+               // No restrictions on PV, PS, literal or special constants
+       }
        return 0;
 }
 
-static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int check_scalar(struct r600_bc *bc, struct r600_bc_alu *alu,
+                       struct alu_bank_swizzle *bs, int bank_swizzle)
 {
-       unsigned swizzle_key;
+       int r, src, num_src, const_count, sel, elem, cycle;
+
+       num_src = r600_bc_get_num_operands(bc, alu);
+       for (const_count = 0, src = 0; src < num_src; ++src) {
+               sel = alu->src[src].sel;
+               elem = alu->src[src].chan;
+               if (is_const(sel)) { // Any constant, including literal and inline constants
+                       if (const_count >= 2)
+                               // More than two references to a constant in
+                               // transcendental operation.
+                               return -1;
+                       else
+                               const_count++;
+               }
+               if (is_cfile(sel)) {
+                       r = reserve_cfile(bc, bs, sel, elem);
+                       if (r)
+                               return r;
+               }
+       }
+       for (src = 0; src < num_src; ++src) {
+               sel = alu->src[src].sel;
+               elem = alu->src[src].chan;
+               if (is_gpr(sel)) {
+                       cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
+                       if (cycle < const_count)
+                               // Cycle for GPR load conflicts with
+                               // constant load in transcendental operation.
+                               return -1;
+                       r = reserve_gpr(bs, sel, elem, cycle);
+                       if (r)
+                               return r;
+               }
+               // Constants already processed
+               // No restrictions on PV, PS
+       }
+       return 0;
+}
+
+static int check_and_set_bank_swizzle(struct r600_bc *bc,
+                                     struct r600_bc_alu *slots[5])
+{
+       struct alu_bank_swizzle bs;
+       int bank_swizzle[5];
+       int i, r = 0, forced = 0;
+
+       for (i = 0; i < 5; i++)
+               if (slots[i] && slots[i]->bank_swizzle_force) {
+                       slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
+                       forced = 1;
+               }
 
-       if (alu->bank_swizzle_force) {
-               alu->bank_swizzle = alu->bank_swizzle_force;
+       if (forced)
                return 0;
+
+       // just check every possible combination of bank swizzle
+       // not very efficent, but works on the first try in most of the cases
+       for (i = 0; i < 4; i++)
+               bank_swizzle[i] = SQ_ALU_VEC_012;
+       bank_swizzle[4] = SQ_ALU_SCL_210;
+       while(bank_swizzle[4] <= SQ_ALU_SCL_221) {
+               init_bank_swizzle(&bs);
+               for (i = 0; i < 4; i++) {
+                       if (slots[i]) {
+                               r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
+                               if (r)
+                                       break;
+                       }
+               }
+               if (!r && slots[4]) {
+                       r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
+               }
+               if (!r) {
+                       for (i = 0; i < 5; i++) {
+                               if (slots[i])
+                                       slots[i]->bank_swizzle = bank_swizzle[i];
+                       }
+                       return 0;
+               }
+
+               for (i = 0; i < 5; i++) {
+                       bank_swizzle[i]++;
+                       if (bank_swizzle[i] <= SQ_ALU_VEC_210)
+                               break;
+                       else
+                               bank_swizzle[i] = SQ_ALU_VEC_012;
+               }
        }
-       swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-               (is_const(alu->src[1].sel) ? 2 : 0 ) + 
-               (is_const(alu->src[2].sel) ? 1 : 0 );
 
-       alu->bank_swizzle = bank_swizzle_scl[swizzle_key];
-       return 0;
+       // couldn't find a working swizzle
+       return -1;
 }
 
-static int check_vector(struct r600_bc *bc, struct r600_bc_alu *alu)
+static int replace_gpr_with_pv_ps(struct r600_bc *bc,
+                                 struct r600_bc_alu *slots[5], struct r600_bc_alu *alu_prev)
 {
-       unsigned swizzle_key;
+       struct r600_bc_alu *prev[5];
+       int gpr[5], chan[5];
+       int i, j, r, src, num_src;
 
-       if (alu->bank_swizzle_force) {
-               alu->bank_swizzle = alu->bank_swizzle_force;
-               return 0;
+       r = assign_alu_units(bc, alu_prev, prev);
+       if (r)
+               return r;
+
+       for (i = 0; i < 5; ++i) {
+               if(prev[i] && prev[i]->dst.write && !prev[i]->dst.rel) {
+                       gpr[i] = prev[i]->dst.sel;
+                       /* cube writes more than PV.X */
+                       if (!is_alu_cube_inst(bc, prev[i]) && is_alu_reduction_inst(bc, prev[i]))
+                               chan[i] = 0;
+                       else
+                               chan[i] = prev[i]->dst.chan;
+               } else
+                       gpr[i] = -1;
+       }
+
+       for (i = 0; i < 5; ++i) {
+               struct r600_bc_alu *alu = slots[i];
+               if(!alu)
+                       continue;
+
+               num_src = r600_bc_get_num_operands(bc, alu);
+               for (src = 0; src < num_src; ++src) {
+                       if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
+                               continue;
+
+                       if (alu->src[src].sel == gpr[4] &&
+                               alu->src[src].chan == chan[4]) {
+                               alu->src[src].sel = V_SQ_ALU_SRC_PS;
+                               alu->src[src].chan = 0;
+                               continue;
+                       }
+
+                       for (j = 0; j < 4; ++j) {
+                               if (alu->src[src].sel == gpr[j] &&
+                                       alu->src[src].chan == j) {
+                                       alu->src[src].sel = V_SQ_ALU_SRC_PV;
+                                       alu->src[src].chan = chan[j];
+                                       break;
+                               }
+                       }
+               }
        }
-       swizzle_key = (is_const(alu->src[0].sel) ? 4 : 0 ) + 
-               (is_const(alu->src[1].sel) ? 2 : 0 ) + 
-               (is_const(alu->src[2].sel) ? 1 : 0 );
 
-       alu->bank_swizzle = bank_swizzle_vec[swizzle_key];
        return 0;
 }
 
-static int check_and_set_bank_swizzle(struct r600_bc *bc, struct r600_bc_alu *alu_first)
+void r600_bc_special_constants(u32 value, unsigned *sel, unsigned *neg)
 {
-       struct r600_bc_alu *alu = NULL;
-       int num_instr = 1;
+       switch(value) {
+       case 0:
+               *sel = V_SQ_ALU_SRC_0;
+               break;
+       case 1:
+               *sel = V_SQ_ALU_SRC_1_INT;
+               break;
+       case -1:
+               *sel = V_SQ_ALU_SRC_M_1_INT;
+               break;
+       case 0x3F800000: // 1.0f
+               *sel = V_SQ_ALU_SRC_1;
+               break;
+       case 0x3F000000: // 0.5f
+               *sel = V_SQ_ALU_SRC_0_5;
+               break;
+       case 0xBF800000: // -1.0f
+               *sel = V_SQ_ALU_SRC_1;
+               *neg ^= 1;
+               break;
+       case 0xBF000000: // -0.5f
+               *sel = V_SQ_ALU_SRC_0_5;
+               *neg ^= 1;
+               break;
+       default:
+               *sel = V_SQ_ALU_SRC_LITERAL;
+               break;
+       }
+}
 
-       init_gpr(alu_first);
+/* compute how many literal are needed */
+static int r600_bc_alu_nliterals(struct r600_bc *bc, struct r600_bc_alu *alu,
+                                uint32_t literal[4], unsigned *nliteral)
+{
+       unsigned num_src = r600_bc_get_num_operands(bc, alu);
+       unsigned i, j;
+
+       for (i = 0; i < num_src; ++i) {
+               if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+                       uint32_t value = alu->src[i].value[alu->src[i].chan];
+                       unsigned found = 0;
+                       for (j = 0; j < *nliteral; ++j) {
+                               if (literal[j] == value) {
+                                       found = 1;
+                                       break;
+                               }
+                       }
+                       if (!found) {
+                               if (*nliteral >= 4)
+                                       return -EINVAL;
+                               literal[(*nliteral)++] = value;
+                       }
+               }
+       }
+       return 0;
+}
 
-       LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-               num_instr++;
+static void r600_bc_alu_adjust_literals(struct r600_bc *bc,
+                                       struct r600_bc_alu *alu,
+                                       uint32_t literal[4], unsigned nliteral)
+{
+       unsigned num_src = r600_bc_get_num_operands(bc, alu);
+       unsigned i, j;
+
+       for (i = 0; i < num_src; ++i) {
+               if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
+                       uint32_t value = alu->src[i].value[alu->src[i].chan];
+                       for (j = 0; j < nliteral; ++j) {
+                               if (literal[j] == value) {
+                                       alu->src[i].chan = j;
+                                       break;
+                               }
+                       }
+               }
        }
+}
 
-       if (num_instr == 1) {
-               check_scalar(bc, alu_first);
-               
-       } else {
-/*             check_read_slots(bc, bc->cf_last->curr_bs_head);*/
-               check_vector(bc, alu_first);
-               LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) {
-                       check_vector(bc, alu);
+static int merge_inst_groups(struct r600_bc *bc, struct r600_bc_alu *slots[5],
+                            struct r600_bc_alu *alu_prev)
+{
+       struct r600_bc_alu *prev[5];
+       struct r600_bc_alu *result[5] = { NULL };
+
+       uint32_t literal[4], prev_literal[4];
+       unsigned nliteral = 0, prev_nliteral = 0;
+
+       int i, j, r, src, num_src;
+       int num_once_inst = 0;
+       int have_mova = 0, have_rel = 0;
+
+       r = assign_alu_units(bc, alu_prev, prev);
+       if (r)
+               return r;
+
+       for (i = 0; i < 5; ++i) {
+               struct r600_bc_alu *alu;
+
+               /* check number of literals */
+               if (prev[i]) {
+                       if (r600_bc_alu_nliterals(bc, prev[i], literal, &nliteral))
+                               return 0;
+                       if (r600_bc_alu_nliterals(bc, prev[i], prev_literal, &prev_nliteral))
+                               return 0;
+                       if (is_alu_mova_inst(bc, prev[i])) {
+                               if (have_rel)
+                                       return 0;
+                               have_mova = 1;
+                       }
+                       num_once_inst += is_alu_once_inst(bc, prev[i]);
+               }
+               if (slots[i] && r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral))
+                       return 0;
+
+               // let's check used slots
+               if (prev[i] && !slots[i]) {
+                       result[i] = prev[i];
+                       continue;
+               } else if (prev[i] && slots[i]) {
+                       if (result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
+                               // trans unit is still free try to use it
+                               if (is_alu_any_unit_inst(bc, slots[i])) {
+                                       result[i] = prev[i];
+                                       result[4] = slots[i];
+                               } else if (is_alu_any_unit_inst(bc, prev[i])) {
+                                       result[i] = slots[i];
+                                       result[4] = prev[i];
+                               } else
+                                       return 0;
+                       } else
+                               return 0;
+               } else if(!slots[i]) {
+                       continue;
+               } else
+                       result[i] = slots[i];
+
+               // let's check source gprs
+               alu = slots[i];
+               num_once_inst += is_alu_once_inst(bc, alu);
+
+               num_src = r600_bc_get_num_operands(bc, alu);
+               for (src = 0; src < num_src; ++src) {
+                       if (alu->src[src].rel) {
+                               if (have_mova)
+                                       return 0;
+                               have_rel = 1;
+                       }
+
+                       // constants doesn't matter
+                       if (!is_gpr(alu->src[src].sel))
+                               continue;
+
+                       for (j = 0; j < 5; ++j) {
+                               if (!prev[j] || !prev[j]->dst.write)
+                                       continue;
+
+                               // if it's relative then we can't determin which gpr is really used
+                               if (prev[j]->dst.chan == alu->src[src].chan &&
+                                       (prev[j]->dst.sel == alu->src[src].sel ||
+                                       prev[j]->dst.rel || alu->src[src].rel))
+                                       return 0;
+                       }
+               }
+       }
+
+       /* more than one PRED_ or KILL_ ? */
+       if (num_once_inst > 1)
+               return 0;
+
+       /* check if the result can still be swizzlet */
+       r = check_and_set_bank_swizzle(bc, result);
+       if (r)
+               return 0;
+
+       /* looks like everything worked out right, apply the changes */
+
+       /* undo adding previus literals */
+       bc->cf_last->ndw -= align(prev_nliteral, 2);
+
+       /* sort instructions */
+       for (i = 0; i < 5; ++i) {
+               slots[i] = result[i];
+               if (result[i]) {
+                       LIST_DEL(&result[i]->list);
+                       result[i]->last = 0;
+                       LIST_ADDTAIL(&result[i]->list, &bc->cf_last->alu);
+               }
+       }
+
+       /* determine new last instruction */
+       LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list)->last = 1;
+
+       /* determine new first instruction */
+       for (i = 0; i < 5; ++i) {
+               if (result[i]) {
+                       bc->cf_last->curr_bs_head = result[i];
+                       break;
                }
        }
+
+       bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
+       bc->cf_last->prev2_bs_head = NULL;
+
        return 0;
 }
 
@@ -537,7 +1137,6 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
        if (nalu == NULL)
                return -ENOMEM;
        memcpy(nalu, alu, sizeof(struct r600_bc_alu));
-       nalu->nliteral = 0;
 
        if (bc->cf_last != NULL && bc->cf_last->inst != (type << 3)) {
                /* check if we could add it anyway */
@@ -572,34 +1171,16 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
        if (!bc->cf_last->curr_bs_head) {
                bc->cf_last->curr_bs_head = nalu;
-               LIST_INITHEAD(&nalu->bs_list);
-       } else {
-               LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list);
-       }
-       /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots)
-        * worst case */
-       if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) {
-               bc->force_add_cf = 1;
        }
        /* number of gpr == the last gpr used in any alu */
        for (i = 0; i < 3; i++) {
                if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
                        bc->ngpr = nalu->src[i].sel + 1;
                }
-               /* compute how many literal are needed
-                * either 2 or 4 literals
-                */
-               if (nalu->src[i].sel == 253) {
-                       if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) {
-                               nalu->nliteral = (nalu->src[i].chan + 2) & 0x6;
-                       }
-               }
-       }
-       if (!LIST_IS_EMPTY(&bc->cf_last->alu)) {
-               lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-               if (!lalu->last && lalu->nliteral > nalu->nliteral) {
-                       nalu->nliteral = lalu->nliteral;
-               }
+               if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
+                       r600_bc_special_constants(
+                               nalu->src[i].value[nalu->src[i].chan],
+                               &nalu->src[i].sel, &nalu->src[i].neg);
        }
        if (nalu->dst.sel >= bc->ngpr) {
                bc->ngpr = nalu->dst.sel + 1;
@@ -611,7 +1192,46 @@ int r600_bc_add_alu_type(struct r600_bc *bc, const struct r600_bc_alu *alu, int
 
        /* process cur ALU instructions for bank swizzle */
        if (nalu->last) {
-               check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head);
+               uint32_t literal[4];
+               unsigned nliteral;
+               struct r600_bc_alu *slots[5];
+               r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
+               if (r)
+                       return r;
+
+               if (bc->cf_last->prev_bs_head) {
+                       r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head);
+                       if (r)
+                               return r;
+               }
+
+               if (bc->cf_last->prev_bs_head) {
+                       r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
+                       if (r)
+                               return r;
+               }
+
+               r = check_and_set_bank_swizzle(bc, slots);
+               if (r)
+                       return r;
+
+               for (i = 0, nliteral = 0; i < 5; i++) {
+                       if (slots[i]) {
+                               r = r600_bc_alu_nliterals(bc, slots[i], literal, &nliteral);
+                               if (r)
+                                       return r;
+                       }
+               }
+               bc->cf_last->ndw += align(nliteral, 2);
+
+               /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
+                * worst case */
+               if ((bc->cf_last->ndw >> 1) >= 120) {
+                       bc->force_add_cf = 1;
+               }
+
+               bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
+               bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
                bc->cf_last->curr_bs_head = NULL;
        }
        return 0;
@@ -622,46 +1242,6 @@ int r600_bc_add_alu(struct r600_bc *bc, const struct r600_bc_alu *alu)
        return r600_bc_add_alu_type(bc, alu, BC_INST(bc, V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU));
 }
 
-int r600_bc_add_literal(struct r600_bc *bc, const u32 *value)
-{
-       struct r600_bc_alu *alu;
-
-       if (bc->cf_last == NULL) {
-               return 0;
-       }
-       if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_TEX) {
-               return 0;
-       }
-       /* all same on EG */
-       if (bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_JUMP ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_ELSE ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END ||
-           bc->cf_last->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) {
-               return 0;
-       }
-       /* same on EG */
-       if (((bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3)) &&
-            (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3)) &&
-            (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3)) &&
-            (bc->cf_last->inst != (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3))) ||
-               LIST_IS_EMPTY(&bc->cf_last->alu)) {
-               R600_ERR("last CF is not ALU (%p)\n", bc->cf_last);
-               return -EINVAL;
-       }
-       alu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list);
-       if (!alu->last || !alu->nliteral || alu->literal_added) {
-               return 0;
-       }
-       memcpy(alu->value, value, 4 * 4);
-       bc->cf_last->ndw += alu->nliteral;
-       bc->ndw += alu->nliteral;
-       alu->literal_added = 1;
-       return 0;
-}
-
 int r600_bc_add_vtx(struct r600_bc *bc, const struct r600_bc_vtx *vtx)
 {
        struct r600_bc_vtx *nvtx = r600_bc_vtx();
@@ -818,8 +1398,6 @@ static int r600_bc_tex_build(struct r600_bc *bc, struct r600_bc_tex *tex, unsign
 /* r600 only, r700/eg bits in r700_asm.c */
 static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsigned id)
 {
-       unsigned i;
-
        /* don't replace gpr by pv or ps for destination register */
        bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
                                S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
@@ -856,14 +1434,6 @@ static int r600_bc_alu_build(struct r600_bc *bc, struct r600_bc_alu *alu, unsign
                                        S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->predicate) |
                                        S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->predicate);
        }
-       if (alu->last) {
-               if (alu->nliteral && !alu->literal_added) {
-                       R600_ERR("Bug in ALU processing for instruction 0x%08x, literal not added correctly\n", alu->inst);
-               }
-               for (i = 0; i < alu->nliteral; i++) {
-                       bc->bytecode[id++] = alu->value[i];
-               }
-       }
        return 0;
 }
 
@@ -904,7 +1474,8 @@ static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf)
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
                        S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
-               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
+               bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
+                       S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
                        S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
@@ -941,8 +1512,10 @@ int r600_bc_build(struct r600_bc *bc)
        struct r600_bc_alu *alu;
        struct r600_bc_vtx *vtx;
        struct r600_bc_tex *tex;
+       uint32_t literal[4];
+       unsigned nliteral;
        unsigned addr;
-       int r;
+       int i, r;
 
        if (bc->callstack[0].max > 0)
                bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2;
@@ -1007,7 +1580,13 @@ int r600_bc_build(struct r600_bc *bc)
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3):
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3):
                case (V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3):
+                       nliteral = 0;
+                       memset(literal, 0, sizeof(literal));
                        LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+                               r = r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+                               if (r)
+                                       return r;
+                               r600_bc_alu_adjust_literals(bc, alu, literal, nliteral);
                                switch(bc->chiprev) {
                                case CHIPREV_R600:
                                        r = r600_bc_alu_build(bc, alu, addr);
@@ -1024,7 +1603,11 @@ int r600_bc_build(struct r600_bc *bc)
                                        return r;
                                addr += 2;
                                if (alu->last) {
-                                       addr += alu->nliteral;
+                                       for (i = 0; i < align(nliteral, 2); ++i) {
+                                               bc->bytecode[addr++] = literal[i];
+                                       }
+                                       nliteral = 0;
+                                       memset(literal, 0, sizeof(literal));
                                }
                        }
                        break;
@@ -1111,6 +1694,8 @@ void r600_bc_dump(struct r600_bc *bc)
        struct r600_bc_tex *tex = NULL;
 
        unsigned i, id;
+       uint32_t literal[4];
+       unsigned nliteral;
        char chip = '6';
 
        switch (bc->chiprev) {
@@ -1172,9 +1757,9 @@ void r600_bc_dump(struct r600_bc *bc)
                        fprintf(stderr, "SWIZ_Y:%X ", cf->output.swizzle_y);
                        fprintf(stderr, "SWIZ_Z:%X ", cf->output.swizzle_z);
                        fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
-                       fprintf(stderr, "SWIZ_W:%X ", cf->output.swizzle_w);
                        fprintf(stderr, "BARRIER:%X ", cf->output.barrier);
                        fprintf(stderr, "INST:%d ", cf->output.inst);
+                       fprintf(stderr, "BURST_COUNT:%d ", cf->output.burst_count);
                        fprintf(stderr, "EOP:%X\n", cf->output.end_of_program);
                        break;
                case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
@@ -1197,7 +1782,10 @@ void r600_bc_dump(struct r600_bc *bc)
                }
 
                id = cf->addr;
+               nliteral = 0;
                LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
+                       r600_bc_alu_nliterals(bc, alu, literal, &nliteral);
+
                        fprintf(stderr, "%04d %08X   ", id, bc->bytecode[id]);
                        fprintf(stderr, "SRC0(SEL:%d ", alu->src[0].sel);
                        fprintf(stderr, "REL:%d ", alu->src[0].rel);
@@ -1232,10 +1820,12 @@ void r600_bc_dump(struct r600_bc *bc)
 
                        id++;
                        if (alu->last) {
-                               for (i = 0; i < alu->nliteral; i++, id++) {
+                               for (i = 0; i < nliteral; i++, id++) {
                                        float *f = (float*)(bc->bytecode + id);
                                        fprintf(stderr, "%04d %08X\t%f\n", id, bc->bytecode[id], *f);
                                }
+                               id += nliteral & 1;
+                               nliteral = 0;
                        }
                }
 
@@ -1346,7 +1936,7 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
        }
 
        switch (desc->channel[i].type) {
-               /* Half-floats, floats, doubles */
+       /* Half-floats, floats, ints */
        case UTIL_FORMAT_TYPE_FLOAT:
                switch (desc->channel[i].size) {
                case 16:
@@ -1358,8 +1948,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                                *format = FMT_16_16_FLOAT;
                                break;
                        case 3:
-                               *format = FMT_16_16_16_FLOAT;
-                               break;
                        case 4:
                                *format = FMT_16_16_16_16_FLOAT;
                                break;
@@ -1399,8 +1987,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                                *format = FMT_8_8;
                                break;
                        case 3:
-                       //      *format = FMT_8_8_8; /* fails piglit draw-vertices test */
-                       //      break;
                        case 4:
                                *format = FMT_8_8_8_8;
                                break;
@@ -1415,8 +2001,6 @@ static void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
                                *format = FMT_16_16;
                                break;
                        case 3:
-                       //      *format = FMT_16_16_16; /* fails piglit draw-vertices test */
-                       //      break;
                        case 4:
                                *format = FMT_16_16_16_16;
                                break;