r600: add assembler support for literal(inline) constants
authorAndre Maasikas <amaasikas@gmail.com>
Mon, 7 Dec 2009 13:23:40 +0000 (15:23 +0200)
committerAndre Maasikas <amaasikas@gmail.com>
Tue, 8 Dec 2009 13:43:12 +0000 (15:43 +0200)
and use it in cubemap instruction sequence for testing

src/mesa/drivers/dri/r600/r700_assembler.c
src/mesa/drivers/dri/r600/r700_assembler.h

index 8155d53eebc0e021e47f1afb106c255bbfe6ea39..c1e3377af64667e35291dc558b91422b912f3595 100644 (file)
@@ -1733,7 +1733,7 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
     }
     else 
     {
-        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
+        pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
     }
 
     // If this clause constains any instruction that is forward dependent on a TEX instruction, 
@@ -2168,6 +2168,10 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
 
 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
 {
+    R700ALUInstruction            * alu_instruction_ptr;
+    R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
+    R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
+
     GLuint    number_of_scalar_operations;
     GLboolean is_single_scalar_operation;
     GLuint    scalar_channel_index;
@@ -2238,18 +2242,39 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
         contiguous_slots_needed = 4;
     }
 
+    contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
+
     initialize(pAsm);    
 
     for (scalar_channel_index=0;
             scalar_channel_index < number_of_scalar_operations; 
                 scalar_channel_index++) 
     {
-        R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
-        if (alu_instruction_ptr == NULL) 
-               {
-                       return GL_FALSE;
-               }
-        Init_R700ALUInstruction(alu_instruction_ptr);
+        if(scalar_channel_index == (number_of_scalar_operations-1))
+        {
+            switch(pAsm->D2.dst2.literal_slots)
+            {
+            case 0:
+                alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+                Init_R700ALUInstruction(alu_instruction_ptr);
+                break;
+            case 1:
+                alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
+                Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
+                alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
+                break;
+            case 2:
+                alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
+                Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
+                alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
+            break;
+            };
+        }
+        else
+        {
+            alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+            Init_R700ALUInstruction(alu_instruction_ptr);
+        }
         
         //src 0
         current_source_index = 0;
@@ -2447,12 +2472,12 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
             }
         }
 
-        contiguous_slots_needed = 0;
+        contiguous_slots_needed -= 1;
     }
 
     return GL_TRUE;
 }
-
+#if 0
 GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
 {
     R700ALUInstruction            * alu_instruction_ptr;
@@ -2705,7 +2730,7 @@ GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * p
 
     return GL_TRUE;
 }
-
+#endif
 GLboolean next_ins(r700_AssemblerBase *pAsm)
 {
     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
@@ -2758,11 +2783,11 @@ GLboolean next_ins(r700_AssemblerBase *pAsm)
     pAsm->is_tex = GL_FALSE;
     pAsm->need_tex_barrier = GL_FALSE;
     pAsm->D2.bits = 0;
-
+    pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
     return GL_TRUE;
 }
 
-/* not work yet */
+#if 0/* not work yet */
 GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
 {
     struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
@@ -2784,7 +2809,7 @@ GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
     pAsm->need_tex_barrier = GL_FALSE;
     return GL_TRUE;
 }
-
+#endif
 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
 {
     BITS tmp;
@@ -4472,13 +4497,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
         /* MULADD R0.x,  R0.x,  PS1,  (0x3FC00000, 1.5f).x
          * MULADD R0.y,  R0.y,  PS1,  (0x3FC00000, 1.5f).x
          * muladd has no writemask, have to use another temp 
-         * also no support for imm constants, so add 1 here
          */
         pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
         pAsm->D.dst.op3    = 1;
         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
         pAsm->D.dst.rtype = DST_REG_TEMPORARY;
         pAsm->D.dst.reg   = tmp2;
+        pAsm->D2.dst2.literal_slots = 1;
+        pAsm->C[0].f = 1.5F;
 
         setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
         pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
@@ -4489,12 +4515,13 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
         pAsm->S[1].src.reg   = tmp1;
         setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
         setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
-        pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+        /* immediate c 1.5 */
+        pAsm->S[2].src.rtype = SRC_REC_LITERAL;
         pAsm->S[2].src.reg   = tmp1;
-        setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
+        setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
 
         next_ins(pAsm);
-
+#if 0
         /* ADD the remaining .5 */
         pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4515,7 +4542,7 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
         noswizzle_PVSSRC(&(pAsm->S[1].src));
 
         next_ins(pAsm);
-
+#endif
         /* tmp1.xy = temp2.xy */
         pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
         setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -5410,7 +5437,7 @@ GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
     pAsm->D.dst.writey   = 0;
     pAsm->D.dst.writez   = 0;
     pAsm->D.dst.writew   = 0;
-    pAsm->D2.dst2.literal      = 1;
+    pAsm->D2.dst2.literal_slots      = 1;
     pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
     pAsm->D.dst.predicated     = 0;
     /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
@@ -5465,7 +5492,7 @@ GLboolean testFlag(r700_AssemblerBase *pAsm)
     pAsm->D.dst.writey   = 0;
     pAsm->D.dst.writez   = 0;
     pAsm->D.dst.writew   = 0;
-    pAsm->D2.dst2.literal      = 1;
+    pAsm->D2.dst2.literal_slots      = 1;
     pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
     pAsm->D.dst.predicated     = 1;
     pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
index cb7685464d6a8a2e968c38dda46db058f7d32f47..3fe65654ca0323a9b9f19ac60c0f5a33a7fc36c4 100644 (file)
@@ -114,7 +114,7 @@ typedef struct PVSDSTtag
 
 typedef struct PVSINSTtag
 {
-    BITS literal      :2; 
+    BITS literal_slots      :2; 
     BITS SaturateMode :2; 
     BITS index_mode   :3;
 } PVSINST;
@@ -345,6 +345,7 @@ typedef struct r700_AssemblerBase
        PVSDWORD D;
     PVSDWORD D2;
        PVSDWORD S[3];
+        PVSDWORD C[4];
 
        unsigned int uLastPosUpdate;
        unsigned int last_cond_register;