Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r500_fragprog.c
index 7e2faed69088e7db15ede99470bb015aac014d04..289bb87ae593dd83d5ad8fc46c270782bfa4e0ed 100644 (file)
 
 #include "r500_fragprog.h"
 
-#include "../r300_reg.h"
-
-static struct prog_src_register shadow_ambient(struct radeon_compiler * c, int tmu)
-{
-       struct prog_src_register reg = { 0, };
+#include <stdio.h>
 
-       reg.File = PROGRAM_STATE_VAR;
-       reg.Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_SHADOW_AMBIENT, tmu);
-       reg.Swizzle = SWIZZLE_WWWW;
-       return reg;
-}
+#include "../r300_reg.h"
 
 /**
- * Transform TEX, TXP, TXB, and KIL instructions in the following way:
- *  - implement texture compare (shadow extensions)
- *  - extract non-native source / destination operands
+ * Rewrite IF instructions to use the ALU result special register.
  */
-GLboolean r500_transform_TEX(
+int r500_transform_IF(
        struct radeon_compiler * c,
        struct rc_instruction * inst,
        void* data)
 {
-       struct r300_fragment_program_compiler *compiler =
-               (struct r300_fragment_program_compiler*)data;
-
-       if (inst->I.Opcode != OPCODE_TEX &&
-           inst->I.Opcode != OPCODE_TXB &&
-           inst->I.Opcode != OPCODE_TXP &&
-           inst->I.Opcode != OPCODE_KIL)
-               return GL_FALSE;
-
-       /* ARB_shadow & EXT_shadow_funcs */
-       if (inst->I.Opcode != OPCODE_KIL &&
-           c->Program.ShadowSamplers & (1 << inst->I.TexSrcUnit)) {
-               GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
-
-               if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
-                       inst->I.Opcode = OPCODE_MOV;
-
-                       if (comparefunc == GL_ALWAYS) {
-                               inst->I.SrcReg[0].File = PROGRAM_BUILTIN;
-                               inst->I.SrcReg[0].Swizzle = SWIZZLE_1111;
-                       } else {
-                               inst->I.SrcReg[0] = shadow_ambient(c, inst->I.TexSrcUnit);
-                       }
-
-                       return GL_TRUE;
-               } else {
-                       GLuint comparefunc = GL_NEVER + compiler->state.unit[inst->I.TexSrcUnit].texture_compare_func;
-                       GLuint depthmode = compiler->state.unit[inst->I.TexSrcUnit].depth_texture_mode;
-                       struct rc_instruction * inst_rcp = rc_insert_new_instruction(c, inst);
-                       struct rc_instruction * inst_mad = rc_insert_new_instruction(c, inst_rcp);
-                       struct rc_instruction * inst_cmp = rc_insert_new_instruction(c, inst_mad);
-                       int pass, fail;
-
-                       inst_rcp->I.Opcode = OPCODE_RCP;
-                       inst_rcp->I.DstReg.File = PROGRAM_TEMPORARY;
-                       inst_rcp->I.DstReg.Index = rc_find_free_temporary(c);
-                       inst_rcp->I.DstReg.WriteMask = WRITEMASK_W;
-                       inst_rcp->I.SrcReg[0] = inst->I.SrcReg[0];
-                       inst_rcp->I.SrcReg[0].Swizzle = SWIZZLE_WWWW;
-
-                       inst_cmp->I.DstReg = inst->I.DstReg;
-                       inst->I.DstReg.File = PROGRAM_TEMPORARY;
-                       inst->I.DstReg.Index = rc_find_free_temporary(c);
-                       inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-
-                       inst_mad->I.Opcode = OPCODE_MAD;
-                       inst_mad->I.DstReg.File = PROGRAM_TEMPORARY;
-                       inst_mad->I.DstReg.Index = rc_find_free_temporary(c);
-                       inst_mad->I.SrcReg[0] = inst->I.SrcReg[0];
-                       inst_mad->I.SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
-                       inst_mad->I.SrcReg[1].File = PROGRAM_TEMPORARY;
-                       inst_mad->I.SrcReg[1].Index = inst_rcp->I.DstReg.Index;
-                       inst_mad->I.SrcReg[1].Swizzle = SWIZZLE_WWWW;
-                       inst_mad->I.SrcReg[2].File = PROGRAM_TEMPORARY;
-                       inst_mad->I.SrcReg[2].Index = inst->I.DstReg.Index;
-                       if (depthmode == 0) /* GL_LUMINANCE */
-                               inst_mad->I.SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
-                       else if (depthmode == 2) /* GL_ALPHA */
-                               inst_mad->I.SrcReg[2].Swizzle = SWIZZLE_WWWW;
-
-                       /* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
-                        *   r  < tex  <=>      -tex+r < 0
-                        *   r >= tex  <=> not (-tex+r < 0 */
-                       if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
-                               inst_mad->I.SrcReg[2].Negate = inst_mad->I.SrcReg[2].Negate ^ NEGATE_XYZW;
-                       else
-                               inst_mad->I.SrcReg[0].Negate = inst_mad->I.SrcReg[0].Negate ^ NEGATE_XYZW;
-
-                       inst_cmp->I.Opcode = OPCODE_CMP;
-                       /* DstReg has been filled out above */
-                       inst_cmp->I.SrcReg[0].File = PROGRAM_TEMPORARY;
-                       inst_cmp->I.SrcReg[0].Index = inst_mad->I.DstReg.Index;
-
-                       if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
-                               pass = 1;
-                               fail = 2;
-                       } else {
-                               pass = 2;
-                               fail = 1;
-                       }
-
-                       inst_cmp->I.SrcReg[pass].File = PROGRAM_BUILTIN;
-                       inst_cmp->I.SrcReg[pass].Swizzle = SWIZZLE_1111;
-                       inst_cmp->I.SrcReg[fail] = shadow_ambient(c, inst->I.TexSrcUnit);
-               }
-       }
-
-       /* Cannot write texture to output registers */
-       if (inst->I.Opcode != OPCODE_KIL && inst->I.DstReg.File != PROGRAM_TEMPORARY) {
-               struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
-
-               inst_mov->I.Opcode = OPCODE_MOV;
-               inst_mov->I.DstReg = inst->I.DstReg;
-               inst_mov->I.SrcReg[0].File = PROGRAM_TEMPORARY;
-               inst_mov->I.SrcReg[0].Index = rc_find_free_temporary(c);
-
-               inst->I.DstReg.File = PROGRAM_TEMPORARY;
-               inst->I.DstReg.Index = inst_mov->I.SrcReg[0].Index;
-               inst->I.DstReg.WriteMask = WRITEMASK_XYZW;
-       }
-
-       /* Cannot read texture coordinate from constants file */
-       if (inst->I.SrcReg[0].File != PROGRAM_TEMPORARY && inst->I.SrcReg[0].File != PROGRAM_INPUT) {
-               struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
-
-               inst_mov->I.Opcode = OPCODE_MOV;
-               inst_mov->I.DstReg.File = PROGRAM_TEMPORARY;
-               inst_mov->I.DstReg.Index = rc_find_free_temporary(c);
-               inst_mov->I.SrcReg[0] = inst->I.SrcReg[0];
-
-               reset_srcreg(&inst->I.SrcReg[0]);
-               inst->I.SrcReg[0].File = PROGRAM_TEMPORARY;
-               inst->I.SrcReg[0].Index = inst_mov->I.DstReg.Index;
-       }
-
-       return GL_TRUE;
+       struct rc_instruction * inst_mov;
+
+       if (inst->U.I.Opcode != RC_OPCODE_IF)
+               return 0;
+
+       inst_mov = rc_insert_new_instruction(c, inst->Prev);
+       inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+       inst_mov->U.I.DstReg.WriteMask = 0;
+       inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+       inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+       inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+       inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+                       RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+       inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+       inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+       inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+       inst->U.I.SrcReg[0].Negate = 0;
+
+       return 1;
 }
 
-GLboolean r500FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
 {
-       GLuint relevant;
+       unsigned int relevant;
        int i;
 
-       if (opcode == OPCODE_TEX ||
-           opcode == OPCODE_TXB ||
-           opcode == OPCODE_TXP ||
-           opcode == OPCODE_KIL) {
+       if (opcode == RC_OPCODE_TEX ||
+           opcode == RC_OPCODE_TXB ||
+           opcode == RC_OPCODE_TXP ||
+           opcode == RC_OPCODE_KIL) {
                if (reg.Abs)
-                       return GL_FALSE;
+                       return 0;
 
-               if (opcode == OPCODE_KIL && (reg.Swizzle != SWIZZLE_NOOP || reg.Negate != NEGATE_NONE))
-                       return GL_FALSE;
+               if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+                       return 0;
 
                if (reg.Negate)
-                       reg.Negate ^= NEGATE_XYZW;
+                       reg.Negate ^= RC_MASK_XYZW;
 
                for(i = 0; i < 4; ++i) {
-                       GLuint swz = GET_SWZ(reg.Swizzle, i);
-                       if (swz == SWIZZLE_NIL) {
+                       unsigned int swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz == RC_SWIZZLE_UNUSED) {
                                reg.Negate &= ~(1 << i);
                                continue;
                        }
                        if (swz >= 4)
-                               return GL_FALSE;
+                               return 0;
                }
 
                if (reg.Negate)
-                       return GL_FALSE;
+                       return 0;
 
-               return GL_TRUE;
-       } else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
+               return 1;
+       } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
                /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
                 * if it doesn't fit perfectly into a .xyzw case... */
-               if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs && !reg.Negate)
-                       return GL_TRUE;
+               if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+                       return 1;
 
-               return GL_FALSE;
+               return 0;
        } else {
                /* ALU instructions support almost everything */
                if (reg.Abs)
-                       return GL_TRUE;
+                       return 1;
 
                relevant = 0;
                for(i = 0; i < 3; ++i) {
-                       GLuint swz = GET_SWZ(reg.Swizzle, i);
-                       if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
+                       unsigned int swz = GET_SWZ(reg.Swizzle, i);
+                       if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
                                relevant |= 1 << i;
                }
                if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
-                       return GL_FALSE;
+                       return 0;
 
-               return GL_TRUE;
+               return 1;
        }
 }
 
 /**
- * Implement a MOV with a potentially non-native swizzle.
+ * Split source register access.
  *
  * The only thing we *cannot* do in an ALU instruction is per-component
- * negation. Therefore, we split the MOV into two instructions when necessary.
+ * negation.
  */
-void r500FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+               struct rc_swizzle_split * split)
 {
-       GLuint negatebase[2] = { 0, 0 };
+       unsigned int negatebase[2] = { 0, 0 };
        int i;
 
        for(i = 0; i < 4; ++i) {
-               GLuint swz = GET_SWZ(src.Swizzle, i);
-               if (swz == SWIZZLE_NIL)
+               unsigned int swz = GET_SWZ(src.Swizzle, i);
+               if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
                        continue;
                negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
        }
 
+       split->NumPhases = 0;
+
        for(i = 0; i <= 1; ++i) {
                if (!negatebase[i])
                        continue;
 
-               struct rc_instruction *inst = rc_insert_new_instruction(s->Compiler, s->IP->Prev);
-               inst->I.Opcode = OPCODE_MOV;
-               inst->I.DstReg = dst;
-               inst->I.DstReg.WriteMask = negatebase[i];
-               inst->I.SrcReg[0] = src;
-               inst->I.SrcReg[0].Negate = (i == 0) ? NEGATE_NONE : NEGATE_XYZW;
+               split->Phase[split->NumPhases++] = negatebase[i];
        }
 }
 
+struct rc_swizzle_caps r500_swizzle_caps = {
+       .IsNative = r500_swizzle_is_native,
+       .Split = r500_swizzle_split
+};
 
 static char *toswiz(int swiz_val) {
   switch(swiz_val) {
@@ -263,7 +159,7 @@ static char *toswiz(int swiz_val) {
   case 2: return "B";
   case 3: return "A";
   case 4: return "0";
-  case 5: return "1/2";
+  case 5: return "H";
   case 6: return "1";
   case 7: return "U";
   }
@@ -353,15 +249,15 @@ static char *to_texop(int val)
   return NULL;
 }
 
-void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+void r500FragmentProgramDump(struct radeon_compiler *c, void *user)
 {
-  struct r500_fragment_program_code *code = &c->code.r500;
-  fprintf(stderr, "R500 Fragment Program:\n--------\n");
-
-  int n;
+  struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
+  struct r500_fragment_program_code *code = &compiler->code->code.r500;
+  int n, i;
   uint32_t inst;
   uint32_t inst0;
   char *str = NULL;
+  fprintf(stderr, "R500 Fragment Program:\n--------\n");
 
   for (n = 0; n < code->inst_end+1; n++) {
     inst0 = inst = code->inst[n].inst0;
@@ -381,8 +277,8 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
            to_mask((inst >> 15) & 0xf));
 
     switch(inst0 & 0x3) {
-    case 0:
-    case 1:
+    case R500_INST_TYPE_ALU:
+    case R500_INST_TYPE_OUT:
       fprintf(stderr,"\t1:RGB_ADDR   0x%08x:", code->inst[n].inst1);
       inst = code->inst[n].inst1;
 
@@ -401,19 +297,20 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
              (inst >> 30));
       fprintf(stderr,"\t3 RGB_INST:  0x%08x:", code->inst[n].inst3);
       inst = code->inst[n].inst3;
-      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
+      fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
              (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
              (inst >> 11) & 0x3,
              (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
-             (inst >> 24) & 0x3);
+             (inst >> 24) & 0x3, (inst >> 29) & 0x3);
 
 
       fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
       inst = code->inst[n].inst4;
-      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
+      fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
              (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
              (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
              (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+             (inst >> 29) & 0x3,
              (inst >> 31) & 0x1);
 
       fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
@@ -424,9 +321,87 @@ void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
              (inst >> 23) & 0x3,
              (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
       break;
-    case 2:
+    case R500_INST_TYPE_FC:
+      fprintf(stderr, "\t2:FC_INST    0x%08x:", code->inst[n].inst2);
+      inst = code->inst[n].inst2;
+      /* JUMP_FUNC JUMP_ANY*/
+      fprintf(stderr, "0x%02x %1x ", inst >> 8 & 0xff,
+          (inst & R500_FC_JUMP_ANY) >> 5);
+      
+      /* OP */
+      switch(inst & 0x7){
+      case R500_FC_OP_JUMP:
+       fprintf(stderr, "JUMP");
+        break;
+      case R500_FC_OP_LOOP:
+        fprintf(stderr, "LOOP");
+        break;
+      case R500_FC_OP_ENDLOOP:
+        fprintf(stderr, "ENDLOOP");
+        break;
+      case R500_FC_OP_REP:
+        fprintf(stderr, "REP");
+        break;
+      case R500_FC_OP_ENDREP:
+        fprintf(stderr, "ENDREP");
+        break;
+      case R500_FC_OP_BREAKLOOP:
+        fprintf(stderr, "BREAKLOOP");
+        break;
+      case R500_FC_OP_BREAKREP:
+        fprintf(stderr, "BREAKREP");
+       break;
+      case R500_FC_OP_CONTINUE:
+        fprintf(stderr, "CONTINUE");
+        break;
+      }
+      fprintf(stderr," "); 
+      /* A_OP */
+      switch(inst & (0x3 << 6)){
+      case R500_FC_A_OP_NONE:
+        fprintf(stderr, "NONE");
+        break;
+      case R500_FC_A_OP_POP:
+       fprintf(stderr, "POP");
+        break;
+      case R500_FC_A_OP_PUSH:
+        fprintf(stderr, "PUSH");
+        break;
+      }
+      /* B_OP0 B_OP1 */
+      for(i=0; i<2; i++){
+        fprintf(stderr, " ");
+        switch(inst & (0x3 << (24 + (i * 2)))){
+        /* R500_FC_B_OP0_NONE 
+        * R500_FC_B_OP1_NONE */
+       case 0:
+          fprintf(stderr, "NONE");
+          break;
+        case R500_FC_B_OP0_DECR:
+        case R500_FC_B_OP1_DECR:
+          fprintf(stderr, "DECR");
+          break;
+        case R500_FC_B_OP0_INCR:
+        case R500_FC_B_OP1_INCR:
+          fprintf(stderr, "INCR");
+          break;
+        }
+      }
+      /*POP_CNT B_ELSE */
+      fprintf(stderr, " %d %1x", (inst >> 16) & 0x1f, (inst & R500_FC_B_ELSE) >> 4);
+      inst = code->inst[n].inst3;
+      /* JUMP_ADDR */
+      fprintf(stderr, " %d", inst >> 16);
+      
+      if(code->inst[n].inst2 & R500_FC_IGNORE_UNCOVERED){
+        fprintf(stderr, " IGN_UNC");
+      }
+      inst = code->inst[n].inst3;
+      fprintf(stderr, "\n\t3:FC_ADDR    0x%08x:", inst);
+      fprintf(stderr, "BOOL: 0x%02x, INT: 0x%02x, JUMP_ADDR: %d, JMP_GLBL: %1x\n",
+      inst & 0x1f, (inst >> 8) & 0x1f, (inst >> 16) & 0x1ff, inst >> 31); 
       break;
-    case 3:
+    case R500_INST_TYPE_TEX:
       inst = code->inst[n].inst1;
       fprintf(stderr,"\t1:TEX_INST:  0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
              to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",