r600: add assembler support for literal(inline) constants
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.h
index f9c4d849c65a24e95329fcc82ddf27fa3aaf95bb..3fe65654ca0323a9b9f19ac60c0f5a33a7fc36c4 100644 (file)
@@ -72,7 +72,8 @@ typedef enum SrcRegisterType
     SRC_REG_INPUT          = 1,
     SRC_REG_CONSTANT       = 2,
     SRC_REG_ALT_TEMPORARY  = 3,
-    NUMBER_OF_SRC_REG_TYPE = 4
+    SRC_REC_LITERAL        = 4, 
+    NUMBER_OF_SRC_REG_TYPE = 5
 } SrcRegisterType;
 
 typedef enum DstRegisterType 
@@ -111,16 +112,24 @@ typedef struct PVSDSTtag
        BITS addrmode1:1; //32
 } PVSDST;
 
+typedef struct PVSINSTtag
+{
+    BITS literal_slots      :2; 
+    BITS SaturateMode :2; 
+    BITS index_mode   :3;
+} PVSINST;
+
 typedef struct PVSSRCtag 
 {
-       BITS rtype:4;            
+       BITS rtype:3;            
        BITS addrmode0:1;        
-       BITS reg:10;      //15     (8)
+       BITS reg:10;      //14     (8)
        BITS swizzlex:3;
        BITS swizzley:3;
        BITS swizzlez:3;
-       BITS swizzlew:3;  //27        
+       BITS swizzlew:3;  //26        
 
+       BITS abs:1;
        BITS negx:1;
        BITS negy:1;
        BITS negz:1;
@@ -148,6 +157,7 @@ typedef union PVSDWORDtag
 {
        BITS    bits;
        PVSDST  dst;
+    PVSINST dst2;
        PVSSRC  src;
        PVSMATH math;
        float   f;
@@ -251,6 +261,8 @@ enum
     FC_IF = 1,
     FC_LOOP = 2,
     FC_REP = 3,
+    FC_PUSH_VPM = 4,
+    FC_PUSH_WQM = 5,
 
     COND_NONE = 0,
     COND_BOOL = 1,
@@ -263,22 +275,52 @@ enum
 
 typedef struct FC_LEVEL 
 {
-       unsigned int           first; ///< first fc instruction on level (if, rep, loop)
-       unsigned int*          mid; ///< middle instructions - else or all breaks on this level
-       unsigned int           midLen;
-       unsigned int           type;
-       unsigned int           cond;
-       unsigned int           inv;
-       unsigned int           bpush; ///< 1 if first instruction does branch stack push
-                        int           id; ///< id of bool or int variable
+    R700ControlFlowGenericClause *  first;
+    R700ControlFlowGenericClause ** mid;
+    unsigned int unNumMid;
+    unsigned int midLen;
+    unsigned int type;
+    unsigned int cond;
+    unsigned int inv;
+    int id; ///< id of bool or int variable
 } FC_LEVEL;
 
 typedef struct VTX_FETCH_METHOD 
 {
-       GLboolean bEnableMini;
-       GLuint mega_fetch_remainder;
+    GLboolean bEnableMini;
+    GLuint mega_fetch_remainder;
 } VTX_FETCH_METHOD;
 
+typedef struct SUB_OFFSET
+{
+    GLint  subIL_Offset;
+    GLuint unCFoffset;
+    GLuint unStackDepthMax;
+    TypedShaderList lstCFInstructions_local;
+} SUB_OFFSET;
+
+typedef struct CALLER_POINTER
+{
+    GLint  subIL_Offset;
+    GLint  subDescIndex;
+    R700ControlFlowGenericClause* cf_ptr;
+} CALLER_POINTER;
+
+#define SQ_MAX_CALL_DEPTH 0x00000020
+
+typedef struct CALL_LEVEL
+{
+    unsigned int      FCSP_BeforeEntry;
+    GLint             subDescIndex;
+    GLushort          current;
+    GLushort          max;
+    TypedShaderList * plstCFInstructions_local;
+} CALL_LEVEL;
+
+#define HAS_CURRENT_LOOPRET 0x1L
+#define HAS_LOOPRET         0x2L
+#define LOOPRET_FLAGS       HAS_LOOPRET | HAS_CURRENT_LOOPRET
+
 typedef struct r700_AssemblerBase 
 {
        R700ControlFlowSXClause*      cf_last_export_ptr;
@@ -294,14 +336,19 @@ typedef struct r700_AssemblerBase
        // No clause has been created yet
        CF_CLAUSE_TYPE cf_current_clause_type;
 
+    BITS alu_x_opcode;
+
        GLuint number_of_exports;
        GLuint number_of_colorandz_exports;
        GLuint number_of_export_opcodes;
 
        PVSDWORD D;
+    PVSDWORD D2;
        PVSDWORD S[3];
+        PVSDWORD C[4];
 
        unsigned int uLastPosUpdate;
+       unsigned int last_cond_register;
 
        OUT_FRAGMENT_FMT_0     fp_stOutFmt0;
 
@@ -310,6 +357,8 @@ typedef struct r700_AssemblerBase
        unsigned int number_used_registers;
        unsigned int uUsedConsts; 
 
+    unsigned int flag_reg_index;
+
        // Fragment programs
        unsigned int uiFP_AttributeMap[FRAG_ATTRIB_MAX];
        unsigned int uiFP_OutputMap[FRAG_RESULT_MAX];
@@ -330,9 +379,6 @@ typedef struct r700_AssemblerBase
        unsigned int FCSP;
        FC_LEVEL fc_stack[32];
 
-       unsigned int branch_depth;
-       unsigned int max_branch_depth;
-
        //-----------------------------------------------------------------------------------
        // ArgSubst used in Assemble_Source() function
        //-----------------------------------------------------------------------------------
@@ -373,7 +419,24 @@ typedef struct r700_AssemblerBase
     SHADER_PIPE_TYPE currentShaderType;
     struct prog_instruction * pILInst;
     GLuint             uiCurInst;
+    GLubyte SamplerUnits[MAX_SAMPLERS];
     GLboolean   bR6xx;
+    /* helper to decide which type of instruction to assemble */
+    GLboolean is_tex;
+    /* we inserted helper intructions and need barrier on next TEX ins */ 
+    GLboolean need_tex_barrier; 
+
+    SUB_OFFSET     * subs;
+    GLuint           unSubArraySize;
+    GLuint           unSubArrayPointer;
+    CALLER_POINTER * callers;
+    GLuint           unCallerArraySize;
+    GLuint           unCallerArrayPointer;
+    unsigned int     CALLSP;
+    CALL_LEVEL       CALLSTACK[SQ_MAX_CALL_DEPTH];
+
+    GLuint unCFflags;
+
 } r700_AssemblerBase;
 
 //Internal use
@@ -411,6 +474,15 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
                                                                GLuint number_of_elements,
                                 GLenum dataElementType,
                                                                VTX_FETCH_METHOD* pFetchMethod);
+GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
+                                       GLuint              destination_register,                                                                      
+                                       GLenum              type,
+                                       GLint               size,
+                                       GLubyte             element,
+                                       GLuint              _signed,
+                                       GLboolean           normalize,
+                                       VTX_FETCH_METHOD  * pFetchMethod);
+GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
 GLuint gethelpr(r700_AssemblerBase* pAsm);
 void resethelpr(r700_AssemblerBase* pAsm);
 void checkop_init(r700_AssemblerBase* pAsm);
@@ -433,6 +505,10 @@ GLboolean assemble_alu_src(R700ALUInstruction*  alu_instruction_ptr,
 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
                               R700ALUInstruction* alu_instruction_ptr,
                               GLuint              contiguous_slots_needed);
+
+GLboolean add_cf_instruction(r700_AssemblerBase* pAsm);
+void add_return_inst(r700_AssemblerBase *pAsm);
+
 void get_src_properties(R700ALUInstruction*  alu_instruction_ptr,
                         int                  source_index,
                         BITS*                psrc_sel,
@@ -454,21 +530,36 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
                        R700ALUInstruction* alu_instruction_ptr);
 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm);
 GLboolean next_ins(r700_AssemblerBase *pAsm);
+
+/* TODO : merge next_ins/literal, assemble_alu_instruction/literal */
+GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
+GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral);
+
+GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops);
+GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset);
+GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue);
+GLboolean testFlag(r700_AssemblerBase *pAsm);
+GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP);
+GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF);
+
 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode);
 GLboolean assemble_ABS(r700_AssemblerBase *pAsm);
 GLboolean assemble_ADD(r700_AssemblerBase *pAsm);
+GLboolean assemble_ARL(r700_AssemblerBase *pAsm);
 GLboolean assemble_BAD(char *opcode_str);
 GLboolean assemble_CMP(r700_AssemblerBase *pAsm);
 GLboolean assemble_COS(r700_AssemblerBase *pAsm);
 GLboolean assemble_DOT(r700_AssemblerBase *pAsm);
 GLboolean assemble_DST(r700_AssemblerBase *pAsm);
 GLboolean assemble_EX2(r700_AssemblerBase *pAsm);
+GLboolean assemble_EXP(r700_AssemblerBase *pAsm);
 GLboolean assemble_FLR(r700_AssemblerBase *pAsm);
 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm);
 GLboolean assemble_FRC(r700_AssemblerBase *pAsm);
-GLboolean assemble_KIL(r700_AssemblerBase *pAsm);
+GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode);
 GLboolean assemble_LG2(r700_AssemblerBase *pAsm);
 GLboolean assemble_LRP(r700_AssemblerBase *pAsm);
+GLboolean assemble_LOG(r700_AssemblerBase *pAsm);
 GLboolean assemble_MAD(r700_AssemblerBase *pAsm);
 GLboolean assemble_LIT(r700_AssemblerBase *pAsm);
 GLboolean assemble_MAX(r700_AssemblerBase *pAsm);
@@ -481,14 +572,32 @@ GLboolean assemble_RSQ(r700_AssemblerBase *pAsm);
 GLboolean assemble_SIN(r700_AssemblerBase *pAsm);
 GLboolean assemble_SCS(r700_AssemblerBase *pAsm);
 GLboolean assemble_SGE(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode);
+GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode); 
+
 GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
 GLboolean assemble_STP(r700_AssemblerBase *pAsm);
 GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
 GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm);
-GLboolean assemble_IF(r700_AssemblerBase *pAsm);
+GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse);
+GLboolean assemble_ELSE(r700_AssemblerBase *pAsm);
 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm);
 
+GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm);
+GLboolean assemble_BRK(r700_AssemblerBase *pAsm);
+GLboolean assemble_COND(r700_AssemblerBase *pAsm);
+GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm);
+
+GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex);
+GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm);
+GLboolean assemble_RET(r700_AssemblerBase *pAsm);
+GLboolean assemble_CAL(r700_AssemblerBase *pAsm, 
+                       GLint nILindex,
+                       GLuint uiNumberInsts,
+                       struct prog_instruction *pILInst);
+
 GLboolean Process_Export(r700_AssemblerBase* pAsm,
                          GLuint type, 
                          GLuint export_starting_index,
@@ -500,12 +609,16 @@ GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm,
 
 
 //Interface
-GLboolean AssembleInstr(GLuint uiNumberInsts,
+GLboolean AssembleInstr(GLuint uiFirstInst,
+                        GLuint uiNumberInsts,
                         struct prog_instruction *pILInst, 
                                                r700_AssemblerBase *pR700AsmCode);
 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);  
 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode, GLbitfield OutputsWritten);
 
+GLboolean RelocProgram(r700_AssemblerBase * pAsm);
+GLboolean InitShaderProgram(r700_AssemblerBase * pAsm);
+
 int       Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader);
 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode);