Merge remote branch 'origin/7.8'
[mesa.git] / src / mesa / drivers / dri / r300 / compiler / r300_fragprog_emit.c
index 674d1f8cd35a023b43583113643e8d268fe8d774..37dafa771060b1ba94e1b6cb5c058b5d8cdecac1 100644 (file)
 #include "r300_fragprog_swizzle.h"
 
 
+struct r300_emit_state {
+       struct r300_fragment_program_compiler * compiler;
+
+       unsigned current_node : 2;
+       unsigned node_first_tex : 8;
+       unsigned node_first_alu : 8;
+       uint32_t node_flags;
+};
+
 #define PROG_CODE \
-       struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
+       struct r300_fragment_program_compiler *c = emit->compiler; \
        struct r300_fragment_program_code *code = &c->code->code.r300
 
 #define error(fmt, args...) do {                       \
 /**
  * Mark a temporary register as used.
  */
-static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
 {
-       if (index > code->max_temp_idx)
-               code->max_temp_idx = index;
+       if (index > code->pixsize)
+               code->pixsize = index;
 }
 
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+       if (src.File == RC_FILE_CONSTANT) {
+               return src.Index | (1 << 5);
+       } else if (src.File == RC_FILE_TEMPORARY) {
+               use_temporary(code, src.Index);
+               return src.Index;
+       }
 
-static GLuint translate_rgb_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+       return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
 {
        switch(opcode) {
-       case OPCODE_CMP: return R300_ALU_OUTC_CMP;
-       case OPCODE_DP3: return R300_ALU_OUTC_DP3;
-       case OPCODE_DP4: return R300_ALU_OUTC_DP4;
-       case OPCODE_FRC: return R300_ALU_OUTC_FRC;
+       case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+       case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+       case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+       case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
        default:
                error("translate_rgb_opcode(%i): Unknown opcode", opcode);
                /* fall through */
-       case OPCODE_NOP:
+       case RC_OPCODE_NOP:
                /* fall through */
-       case OPCODE_MAD: return R300_ALU_OUTC_MAD;
-       case OPCODE_MAX: return R300_ALU_OUTC_MAX;
-       case OPCODE_MIN: return R300_ALU_OUTC_MIN;
-       case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+       case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+       case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+       case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+       case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
        }
 }
 
-static GLuint translate_alpha_opcode(struct r300_fragment_program_compiler * c, GLuint opcode)
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
 {
        switch(opcode) {
-       case OPCODE_CMP: return R300_ALU_OUTA_CMP;
-       case OPCODE_DP3: return R300_ALU_OUTA_DP4;
-       case OPCODE_DP4: return R300_ALU_OUTA_DP4;
-       case OPCODE_EX2: return R300_ALU_OUTA_EX2;
-       case OPCODE_FRC: return R300_ALU_OUTA_FRC;
-       case OPCODE_LG2: return R300_ALU_OUTA_LG2;
+       case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+       case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+       case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+       case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+       case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+       case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
        default:
                error("translate_rgb_opcode(%i): Unknown opcode", opcode);
                /* fall through */
-       case OPCODE_NOP:
+       case RC_OPCODE_NOP:
                /* fall through */
-       case OPCODE_MAD: return R300_ALU_OUTA_MAD;
-       case OPCODE_MAX: return R300_ALU_OUTA_MAX;
-       case OPCODE_MIN: return R300_ALU_OUTA_MIN;
-       case OPCODE_RCP: return R300_ALU_OUTA_RCP;
-       case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+       case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+       case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+       case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+       case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+       case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
        }
 }
 
 /**
  * Emit one paired ALU instruction.
  */
-static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
 {
        PROG_CODE;
 
        if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
                error("Too many ALU instructions");
-               return GL_FALSE;
+               return 0;
        }
 
        int ip = code->alu.length++;
        int j;
-       code->node[code->cur_node].alu_end++;
 
-       code->alu.inst[ip].inst0 = translate_rgb_opcode(c, inst->RGB.Opcode);
-       code->alu.inst[ip].inst2 = translate_alpha_opcode(c, inst->Alpha.Opcode);
+       code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+       code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
 
        for(j = 0; j < 3; ++j) {
-               GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
-               if (!inst->RGB.Src[j].Constant)
-                       use_temporary(code, inst->RGB.Src[j].Index);
-               code->alu.inst[ip].inst1 |= src << (6*j);
+               unsigned int src = use_source(code, inst->RGB.Src[j]);
+               code->alu.inst[ip].rgb_addr |= src << (6*j);
 
-               src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
-               if (!inst->Alpha.Src[j].Constant)
-                       use_temporary(code, inst->Alpha.Src[j].Index);
-               code->alu.inst[ip].inst3 |= src << (6*j);
+               src = use_source(code, inst->Alpha.Src[j]);
+               code->alu.inst[ip].alpha_addr |= src << (6*j);
 
-               GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+               unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
                arg |= inst->RGB.Arg[j].Abs << 6;
                arg |= inst->RGB.Arg[j].Negate << 5;
-               code->alu.inst[ip].inst0 |= arg << (7*j);
+               code->alu.inst[ip].rgb_inst |= arg << (7*j);
 
                arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
                arg |= inst->Alpha.Arg[j].Abs << 6;
                arg |= inst->Alpha.Arg[j].Negate << 5;
-               code->alu.inst[ip].inst2 |= arg << (7*j);
+               code->alu.inst[ip].alpha_inst |= arg << (7*j);
        }
 
        if (inst->RGB.Saturate)
-               code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
+               code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
        if (inst->Alpha.Saturate)
-               code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
+               code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
 
        if (inst->RGB.WriteMask) {
                use_temporary(code, inst->RGB.DestIndex);
-               code->alu.inst[ip].inst1 |=
+               code->alu.inst[ip].rgb_addr |=
                        (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
                        (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
        }
        if (inst->RGB.OutputWriteMask) {
-               code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
-               code->node[code->cur_node].flags |= R300_RGBA_OUT;
+               code->alu.inst[ip].rgb_addr |=
+            (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+            R300_RGB_TARGET(inst->RGB.Target);
+               emit->node_flags |= R300_RGBA_OUT;
        }
 
        if (inst->Alpha.WriteMask) {
                use_temporary(code, inst->Alpha.DestIndex);
-               code->alu.inst[ip].inst3 |=
+               code->alu.inst[ip].alpha_addr |=
                        (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
                        R300_ALU_DSTA_REG;
        }
        if (inst->Alpha.OutputWriteMask) {
-               code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
-               code->node[code->cur_node].flags |= R300_RGBA_OUT;
+               code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+            R300_ALPHA_TARGET(inst->Alpha.Target);
+               emit->node_flags |= R300_RGBA_OUT;
        }
        if (inst->Alpha.DepthWriteMask) {
-               code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
-               code->node[code->cur_node].flags |= R300_W_OUT;
-               c->code->writes_depth = GL_TRUE;
+               code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+               emit->node_flags |= R300_W_OUT;
+               c->code->writes_depth = 1;
        }
 
-       return GL_TRUE;
+       return 1;
 }
 
 
 /**
  * Finish the current node without advancing to the next one.
  */
-static GLboolean finish_node(struct r300_fragment_program_compiler *c)
+static int finish_node(struct r300_emit_state * emit)
 {
-       struct r300_fragment_program_code *code = &c->code->code.r300;
-       struct r300_fragment_program_node *node = &code->node[code->cur_node];
+       struct r300_fragment_program_compiler * c = emit->compiler;
+       struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
 
-       if (node->alu_end < 0) {
+       if (code->alu.length == emit->node_first_alu) {
                /* Generate a single NOP for this node */
-               struct radeon_pair_instruction inst;
-               _mesa_bzero(&inst, sizeof(inst));
-               if (!emit_alu(c, &inst))
-                       return GL_FALSE;
+               struct rc_pair_instruction inst;
+               memset(&inst, 0, sizeof(inst));
+               if (!emit_alu(emit, &inst))
+                       return 0;
        }
 
-       if (node->tex_end < 0) {
-               if (code->cur_node == 0) {
-                       node->tex_end = 0;
-               } else {
-                       error("Node %i has no TEX instructions", code->cur_node);
-                       return GL_FALSE;
+       unsigned alu_offset = emit->node_first_alu;
+       unsigned alu_end = code->alu.length - alu_offset - 1;
+       unsigned tex_offset = emit->node_first_tex;
+       unsigned tex_end = code->tex.length - tex_offset - 1;
+
+       if (code->tex.length == emit->node_first_tex) {
+               if (emit->current_node > 0) {
+                       error("Node %i has no TEX instructions", emit->current_node);
+                       return 0;
                }
+
+               tex_end = 0;
        } else {
-               if (code->cur_node == 0)
-                       code->first_node_has_tex = 1;
+               if (emit->current_node == 0)
+                       code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
        }
 
-       return GL_TRUE;
+       /* Write the config register.
+        * Note: The order in which the words for each node are written
+        * is not correct here and needs to be fixed up once we're entirely
+        * done
+        *
+        * Also note that the register specification from AMD is slightly
+        * incorrect in its description of this register. */
+       code->code_addr[emit->current_node] =
+                       (alu_offset << R300_ALU_START_SHIFT) |
+                       (alu_end << R300_ALU_SIZE_SHIFT) |
+                       (tex_offset << R300_TEX_START_SHIFT) |
+                       (tex_end << R300_TEX_SIZE_SHIFT) |
+                       emit->node_flags;
+
+       return 1;
 }
 
 
@@ -220,98 +258,123 @@ static GLboolean finish_node(struct r300_fragment_program_compiler *c)
  * Begin a block of texture instructions.
  * Create the necessary indirection.
  */
-static GLboolean begin_tex(void* data)
+static int begin_tex(struct r300_emit_state * emit)
 {
        PROG_CODE;
 
-       if (code->cur_node == 0) {
-               if (code->node[0].alu_end < 0 &&
-                   code->node[0].tex_end < 0)
-                       return GL_TRUE;
+       if (code->alu.length == emit->node_first_alu &&
+           code->tex.length == emit->node_first_tex) {
+               return 1;
        }
 
-       if (code->cur_node == 3) {
+       if (emit->current_node == 3) {
                error("Too many texture indirections");
-               return GL_FALSE;
+               return 0;
        }
 
-       if (!finish_node(c))
-               return GL_FALSE;
+       if (!finish_node(emit))
+               return 0;
 
-       struct r300_fragment_program_node *node = &code->node[++code->cur_node];
-       node->alu_offset = code->alu.length;
-       node->alu_end = -1;
-       node->tex_offset = code->tex.length;
-       node->tex_end = -1;
-       return GL_TRUE;
+       emit->current_node++;
+       emit->node_first_tex = code->tex.length;
+       emit->node_first_alu = code->alu.length;
+       emit->node_flags = 0;
+       return 1;
 }
 
 
-static GLboolean emit_tex(void* data, struct radeon_pair_texture_instruction* inst)
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
 {
        PROG_CODE;
 
        if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
                error("Too many TEX instructions");
-               return GL_FALSE;
+               return 0;
        }
 
-       GLuint unit = inst->TexSrcUnit;
-       GLuint dest = inst->DestIndex;
-       GLuint opcode;
+       unsigned int unit = inst->U.I.TexSrcUnit;
+       unsigned int dest = inst->U.I.DstReg.Index;
+       unsigned int opcode;
 
-       switch(inst->Opcode) {
-       case RADEON_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
-       case RADEON_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
-       case RADEON_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
-       case RADEON_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+       switch(inst->U.I.Opcode) {
+       case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+       case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+       case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+       case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
        default:
-               error("Unknown texture opcode %i", inst->Opcode);
-               return GL_FALSE;
+               error("Unknown texture opcode %i", inst->U.I.Opcode);
+               return 0;
        }
 
-       if (inst->Opcode == RADEON_OPCODE_KIL) {
+       if (inst->U.I.Opcode == RC_OPCODE_KIL) {
                unit = 0;
                dest = 0;
        } else {
                use_temporary(code, dest);
        }
 
-       use_temporary(code, inst->SrcIndex);
+       use_temporary(code, inst->U.I.SrcReg[0].Index);
 
-       code->node[code->cur_node].tex_end++;
        code->tex.inst[code->tex.length++] =
-               (inst->SrcIndex << R300_SRC_ADDR_SHIFT) |
+               (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
                (dest << R300_DST_ADDR_SHIFT) |
                (unit << R300_TEX_ID_SHIFT) |
                (opcode << R300_TEX_INST_SHIFT);
-       return GL_TRUE;
+       return 1;
 }
 
 
-static const struct radeon_pair_handler pair_handler = {
-       .EmitPaired = &emit_alu,
-       .EmitTex = &emit_tex,
-       .BeginTexBlock = &begin_tex,
-       .MaxHwTemps = R300_PFS_NUM_TEMP_REGS
-};
-
 /**
  * Final compilation step: Turn the intermediate radeon_program into
  * machine-readable instructions.
  */
 void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
 {
+       struct r300_emit_state emit;
        struct r300_fragment_program_code *code = &compiler->code->code.r300;
 
-       _mesa_bzero(code, sizeof(struct r300_fragment_program_code));
-       code->node[0].alu_end = -1;
-       code->node[0].tex_end = -1;
+       memset(&emit, 0, sizeof(emit));
+       emit.compiler = compiler;
+
+       memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+       for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+           inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+           inst = inst->Next) {
+               if (inst->Type == RC_INSTRUCTION_NORMAL) {
+                       if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+                               begin_tex(&emit);
+                               continue;
+                       }
+
+                       emit_tex(&emit, inst);
+               } else {
+                       emit_alu(&emit, &inst->U.P);
+               }
+       }
+
+       if (code->pixsize >= compiler->max_temp_regs)
+               rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
 
-       radeonPairProgram(&compiler->Base, &pair_handler, compiler);
        if (compiler->Base.Error)
                return;
 
-       finish_node(compiler);
+       /* Finish the program */
+       finish_node(&emit);
+
+       code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+       code->code_offset =
+               (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+               ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+               (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+               ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+       if (emit.current_node < 3) {
+               int shift = 3 - emit.current_node;
+               int i;
+               for(i = emit.current_node; i >= 0; --i)
+                       code->code_addr[shift + i] = code->code_addr[i];
+               for(i = 0; i < shift; ++i)
+                       code->code_addr[i] = 0;
+       }
 }
-