r300g: Increase fragment shader limits for r400 cards
authorTom Stellard <tstellar@gmail.com>
Mon, 13 Dec 2010 17:11:25 +0000 (09:11 -0800)
committerTom Stellard <tstellar@gmail.com>
Mon, 24 Jan 2011 01:47:48 +0000 (17:47 -0800)
r400 fragment shaders now support up to 64 temporary registers,
512 ALU instructions, and 512 TEX instructions.

src/gallium/drivers/r300/r300_fs.c
src/gallium/drivers/r300/r300_reg.h
src/gallium/drivers/r300/r300_screen.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
src/mesa/drivers/dri/r300/compiler/radeon_code.h
src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
src/mesa/drivers/dri/r300/r300_reg.h

index 84773ab386c88298d55cdf7850b6d107972c22fa..b2c02bec86c947aa096bad7a8457a1122c7dc28a 100644 (file)
@@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer(
         }
     } else { /* r300 */
         struct r300_fragment_program_code *code = &generic_code->code.r300;
-
-        shader->cb_code_size = 19 +
-                               (r300->screen->caps.is_r400 ? 2 : 0) +
-                               code->alu.length * 4 +
-                               (code->tex.length ? (1 + code->tex.length) : 0) +
-                               imm_count * 5;
+        unsigned int alu_length = code->alu.length;
+        unsigned int alu_iterations = ((alu_length - 1) / 64) + 1;
+        unsigned int tex_length = code->tex.length;
+        unsigned int tex_iterations =
+            tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0;
+        unsigned int iterations =
+            alu_iterations > tex_iterations ? alu_iterations : tex_iterations;
+        unsigned int bank = 0;
+
+        shader->cb_code_size = 15 +
+            /* R400_US_CODE_BANK */
+            (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) +
+            /* R400_US_CODE_EXT */
+            (r300->screen->caps.is_r400 ? 2 : 0) +
+            /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */
+            (code->r390_mode ? (5 * alu_iterations) : 4) +
+            /* R400_US_ALU_EXT_ADDR_[0-63] */
+            (code->r390_mode ? (code->alu.length) : 0) +
+            /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */
+            code->alu.length * 4 +
+            /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */
+            (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) +
+            imm_count * 5;
 
         NEW_CB(shader->cb_code, shader->cb_code_size);
 
-        if (r300->screen->caps.is_r400)
-            OUT_CB_REG(R400_US_CODE_BANK, 0);
-
         OUT_CB_REG(R300_US_CONFIG, code->config);
         OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
         OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
 
+        if (code->r390_mode) {
+            OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext);
+        } else if (r300->screen->caps.is_r400) {
+            /* This register appears to affect shaders even if r390_mode is
+             * disabled, so it needs to be set to 0 for shaders that
+             * don't use r390_mode. */
+            OUT_CB_REG(R400_US_CODE_EXT, 0);
+        }
+
         OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
         OUT_CB_TABLE(code->code_addr, 4);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].rgb_inst);
+        do {
+            unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64);
+            unsigned int bank_alu_offset = bank * 64;
+            unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32);
+            unsigned int bank_tex_offset = bank * 32;
+
+            if (r300->screen->caps.is_r400) {
+                OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ?
+                                (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2
+            }
+
+            if (bank_alu_length > 0) {
+                OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst);
+
+                OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].rgb_addr);
+                OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst);
 
-        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].alpha_inst);
+                OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length);
+                for (i = 0; i < bank_alu_length; i++)
+                    OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr);
+
+                if (code->r390_mode) {
+                    OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length);
+                    for (i = 0; i < bank_alu_length; i++)
+                        OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr);
+                }
+            }
+
+            if (bank_tex_length > 0) {
+                OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length);
+                OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length);
+            }
 
-        OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
-        for (i = 0; i < code->alu.length; i++)
-            OUT_CB(code->alu.inst[i].alpha_addr);
+            alu_length -= bank_alu_length;
+            tex_length -= bank_tex_length;
+            bank++;
+        } while(code->r390_mode && (alu_length > 0 || tex_length > 0));
 
-        if (code->tex.length) {
-            OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
-            OUT_CB_TABLE(code->tex.inst, code->tex.length);
+        /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders
+         * will be rendered incorrectly. */
+        if (r300->screen->caps.is_r400) {
+            OUT_CB_REG(R400_US_CODE_BANK,
+                code->r390_mode ? R400_R390_MODE_ENABLE : 0);
         }
 
         /* Emit immediates. */
@@ -384,12 +438,17 @@ static void r300_translate_fragment_shader(
     compiler.code = &shader->code;
     compiler.state = shader->compare_state;
     compiler.Base.is_r500 = r300->screen->caps.is_r500;
+    compiler.Base.is_r400 = r300->screen->caps.is_r400;
     compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT);
     compiler.Base.has_half_swizzles = TRUE;
     compiler.Base.has_presub = TRUE;
-    compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+    compiler.Base.max_temp_regs =
+        compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32);
     compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32;
-    compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64;
+    compiler.Base.max_alu_insts =
+        (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64;
+    compiler.Base.max_tex_insts =
+        (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32;
     compiler.AllocateHwInputs = &allocate_hardware_inputs;
     compiler.UserData = &shader->inputs;
 
index d1154dee40a16297682965b520cf788a290e5ae7..1d93dab2ca256e262fa102f1fc43584537e314a9 100644 (file)
@@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 /* R4xx extended fragment shader registers. */
 #define R400_US_ALU_EXT_ADDR_0              0x4ac0 /* up to 63 (0x4bbc) */
-#   define R400_ADDR0_EXT_RGB_MSB_BIT       0x01
-#   define R400_ADDR1_EXT_RGB_MSB_BIT       0x02
-#   define R400_ADDR2_EXT_RGB_MSB_BIT       0x04
+#   define R400_ADDR_EXT_RGB_MSB_BIT(x)     (1 << (x))
 #   define R400_ADDRD_EXT_RGB_MSB_BIT       0x08
-#   define R400_ADDR0_EXT_A_MSB_BIT         0x10
-#   define R400_ADDR1_EXT_A_MSB_BIT         0x20
-#   define R400_ADDR2_EXT_A_MSB_BIT         0x40
+#   define R400_ADDR_EXT_A_MSB_BIT(x)       (1 << ((x) + 4))
 #   define R400_ADDRD_EXT_A_MSB_BIT         0x80
+
 #define R400_US_CODE_BANK                   0x46b8
 #   define R400_BANK_SHIFT                  0
 #   define R400_BANK_MASK                   0xf
index c75aeaa10a7f2155ff15ed8c5cb1f4f87fdff4b7..178ad63954ee698ba019f0cf61e5b3e5f4eb44df 100644 (file)
@@ -87,12 +87,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen)
 static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
 {
     struct r300_screen* r300screen = r300_screen(pscreen);
-    boolean is_r400 = r300screen->caps.is_r400;
     boolean is_r500 = r300screen->caps.is_r500;
 
-    /* XXX extended shader capabilities of r400 unimplemented */
-    is_r400 = FALSE;
-
     switch (param) {
         /* Supported features (boolean caps). */
         case PIPE_CAP_NPOT_TEXTURES:
@@ -175,9 +171,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
    boolean is_r400 = r300screen->caps.is_r400;
    boolean is_r500 = r300screen->caps.is_r500;
 
-   /* XXX extended shader capabilities of r400 unimplemented */
-   is_r400 = FALSE;
-
    switch (shader)
     {
     case PIPE_SHADER_FRAGMENT:
index 782671bac0156810f89448c73eeb997ec8beb824..deba9ca834d01c5a41be5628f9c1c7a948ed0308 100644 (file)
@@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst)
        }
 }
 
+static int get_msb(unsigned int bit, unsigned int r400_ext_addr)
+{
+       return (r400_ext_addr & bit) ? 1 << 5 : 0;
+}
+
 /* just some random things... */
 void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 {
@@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 
        fprintf(stderr, "Hardware program\n");
        fprintf(stderr, "----------------\n");
+       if (c->is_r400) {
+               fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext);
+       }
 
        for (n = 0; n <= (code->config & 3); n++) {
                uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
-               int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
-               int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+               unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) +
+                               (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6);
+               unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) +
+                               (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6);
                int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
                int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
 
-               fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
-                       "alu_end: %d, tex_end: %d  (code_addr: %08x)\n", n,
+               fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, "
+                       "alu_end: %u, tex_end: %d  (code_addr: %08x)\n", n,
                        alu_offset, tex_offset, alu_end, tex_end, code_addr);
 
                if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
@@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                        for (j = 0; j < 3; ++j) {
                                int regc = code->alu.inst[i].rgb_addr >> (j * 6);
                                int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+                               int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j),
+                                       code->alu.inst[i].r400_ext_addr);
+                               int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j),
+                                       code->alu.inst[i].r400_ext_addr);
 
                                sprintf(srcc[j], "%c%i",
-                                       (regc & 32) ? 'c' : 't', regc & 31);
+                                       (regc & 32) ? 'c' : 't', (regc & 31) | msbc);
                                sprintf(srca[j], "%c%i",
-                                       (rega & 32) ? 'c' : 't', rega & 31);
+                                       (rega & 32) ? 'c' : 't', (rega & 31) | msba);
                        }
 
                        dstc[0] = 0;
@@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
                                (code->alu.inst[i].
                                 rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
                        if (flags[0] != 0) {
+                               unsigned int msb = get_msb(
+                                       R400_ADDRD_EXT_RGB_MSB_BIT,
+                                       code->alu.inst[i].r400_ext_addr);
+
                                sprintf(dstc, "t%i.%s ",
-                                       (code->alu.inst[i].
-                                        rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+                                       ((code->alu.inst[i].
+                                        rgb_addr >> R300_ALU_DSTC_SHIFT)
+                                        & 31) | msb,
                                        flags);
                        }
                        sprintf(flags, "%s%s%s",
@@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user)
 
                        dsta[0] = 0;
                        if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+                               unsigned int msb = get_msb(
+                                       R400_ADDRD_EXT_A_MSB_BIT,
+                                       code->alu.inst[i].r400_ext_addr);
                                sprintf(dsta, "t%i.w ",
-                                       (code->alu.inst[i].
-                                        alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+                                       ((code->alu.inst[i].
+                                        alpha_addr >> R300_ALU_DSTA_SHIFT) & 31)
+                                        | msb);
                        }
                        if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
                                sprintf(tmp, "o%i.w ",
index 1db8678e890e23c718eaab589bdc8c3ba67e848e..28d132a5fe3ec42b1a2e73152d09c7fdd8ee6c3f 100644 (file)
@@ -64,6 +64,20 @@ struct r300_emit_state {
                        __FILE__, __FUNCTION__, ##args);        \
        } while(0)
 
+static unsigned int get_msbs_alu(unsigned int bits)
+{
+       return (bits >> 6) & 0x7;
+}
+
+/**
+ * @param lsbs The number of least significant bits
+ */
+static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
+{
+       return (bits >> lsbs) & 0x15;
+}
+
+#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
 
 /**
  * Mark a temporary register as used.
@@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r
                return src.Index | (1 << 5);
        } else if (src.File == RC_FILE_TEMPORARY) {
                use_temporary(code, src.Index);
-               return src.Index;
+               return src.Index & 0x1f;
        }
 
        return 0;
@@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
        code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
 
        for(j = 0; j < 3; ++j) {
+               /* Set the RGB address */
                unsigned int src = use_source(code, inst->RGB.Src[j]);
                unsigned int arg;
+               if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
+
                code->alu.inst[ip].rgb_addr |= src << (6*j);
 
+               /* Set the Alpha address */
                src = use_source(code, inst->Alpha.Src[j]);
+               if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
+
                code->alu.inst[ip].alpha_addr |= src << (6*j);
 
                arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
@@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
 
        if (inst->RGB.WriteMask) {
                use_temporary(code, inst->RGB.DestIndex);
+               if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
                code->alu.inst[ip].rgb_addr |=
-                       (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+                       ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
                        (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
        }
        if (inst->RGB.OutputWriteMask) {
@@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i
 
        if (inst->Alpha.WriteMask) {
                use_temporary(code, inst->Alpha.DestIndex);
+               if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
+                       code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
                code->alu.inst[ip].alpha_addr |=
-                       (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+                       ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
                        R300_ALU_DSTA_REG;
        }
        if (inst->Alpha.OutputWriteMask) {
@@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit)
        unsigned tex_offset;
        unsigned tex_end;
 
+       unsigned int alu_offset_msbs, alu_end_msbs;
+
        if (code->alu.length == emit->node_first_alu) {
                /* Generate a single NOP for this node */
                struct rc_pair_instruction inst;
@@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit)
         *
         * Also note that the register specification from AMD is slightly
         * incorrect in its description of this register. */
-       code->code_addr[emit->current_node] =
-                       (alu_offset << R300_ALU_START_SHIFT) |
-                       (alu_end << R300_ALU_SIZE_SHIFT) |
-                       (tex_offset << R300_TEX_START_SHIFT) |
-                       (tex_end << R300_TEX_SIZE_SHIFT) |
-                       emit->node_flags;
-
+       code->code_addr[emit->current_node]  =
+                       ((alu_offset << R300_ALU_START_SHIFT)
+                               & R300_ALU_START_MASK)
+                       | ((alu_end << R300_ALU_SIZE_SHIFT)
+                               & R300_ALU_SIZE_MASK)
+                       | ((tex_offset << R300_TEX_START_SHIFT)
+                               & R300_TEX_START_MASK)
+                       | ((tex_end << R300_TEX_SIZE_SHIFT)
+                               & R300_TEX_SIZE_MASK)
+                       | emit->node_flags
+                       | (get_msbs_tex(tex_offset, 5)
+                               << R400_TEX_START_MSB_SHIFT)
+                       | (get_msbs_tex(tex_end, 5)
+                               << R400_TEX_SIZE_MSB_SHIFT)
+                       ;
+
+       /* Write r400 extended instruction fields.  These will be ignored on
+        * r300 cards.  */
+       alu_offset_msbs = get_msbs_alu(alu_offset);
+       alu_end_msbs = get_msbs_alu(alu_end);
+       switch(emit->current_node) {
+       case 0:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
+               break;
+       case 1:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
+               break;
+       case 2:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
+               break;
+       case 3:
+               code->r400_code_offset_ext |=
+                       alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
+                       | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
+               break;
+       }
        return 1;
 }
 
@@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
        unsigned int opcode;
        PROG_CODE;
 
-       if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+       if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
                error("Too many TEX instructions");
                return 0;
        }
@@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
        use_temporary(code, inst->U.I.SrcReg[0].Index);
 
        code->tex.inst[code->tex.length++] =
-               (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
-               (dest << R300_DST_ADDR_SHIFT) |
-               (unit << R300_TEX_ID_SHIFT) |
-               (opcode << R300_TEX_INST_SHIFT);
+               ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
+                       & R300_SRC_ADDR_MASK)
+               | ((dest << R300_DST_ADDR_SHIFT)
+                       & R300_DST_ADDR_MASK)
+               | (unit << R300_TEX_ID_SHIFT)
+               | (opcode << R300_TEX_INST_SHIFT)
+               | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
+                       R400_SRC_ADDR_EXT_BIT : 0)
+               | (dest >= R300_PFS_NUM_TEMP_REGS ?
+                       R400_DST_ADDR_EXT_BIT : 0)
+               ;
        return 1;
 }
 
@@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
        struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
        struct r300_emit_state emit;
        struct r300_fragment_program_code *code = &compiler->code->code.r300;
+       unsigned int tex_end;
 
        memset(&emit, 0, sizeof(emit));
        emit.compiler = compiler;
@@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
        finish_node(&emit);
 
        code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+
+       /* Set r400 extended instruction fields.  These values will be ignored
+        * on r300 cards. */
+       code->r400_code_offset_ext |=
+               (get_msbs_alu(0)
+                               << R400_ALU_OFFSET_MSB_SHIFT)
+               | (get_msbs_alu(code->alu.length - 1)
+                               << R400_ALU_SIZE_MSB_SHIFT);
+
+       tex_end = code->tex.length ? code->tex.length - 1 : 0;
        code->code_offset =
-               (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
-               ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
-               (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
-               ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+               ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
+                       & R300_PFS_CNTL_ALU_OFFSET_MASK)
+               | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
+                       & R300_PFS_CNTL_ALU_END_MASK)
+               | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
+                       & R300_PFS_CNTL_TEX_OFFSET_MASK)
+               | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
+                       & R300_PFS_CNTL_TEX_END_MASK)
+               | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
+               | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
+               ;
 
        if (emit.current_node < 3) {
                int shift = 3 - emit.current_node;
@@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
                for(i = 0; i < shift; ++i)
                        code->code_addr[i] = 0;
        }
+
+       if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
+           || code->alu.length > R300_PFS_MAX_ALU_INST
+           || code->tex.length > R300_PFS_MAX_TEX_INST) {
+
+               code->r390_mode = 1;
+       }
 }
index b69e81698ae9cf3bdb29e54131bae11d23227b82..d14516689472308eb848e5b67fc436356fe630b6 100644 (file)
@@ -31,6 +31,9 @@
 #define R300_PFS_NUM_TEMP_REGS    32
 #define R300_PFS_NUM_CONST_REGS   32
 
+#define R400_PFS_MAX_ALU_INST     512
+#define R400_PFS_MAX_TEX_INST     512
+
 #define R500_PFS_MAX_INST         512
 #define R500_PFS_NUM_TEMP_REGS    128
 #define R500_PFS_NUM_CONST_REGS   256
@@ -187,24 +190,29 @@ struct r300_fragment_program_node {
  */
 struct r300_fragment_program_code {
        struct {
-               int length; /**< total # of texture instructions used */
-               uint32_t inst[R300_PFS_MAX_TEX_INST];
+               unsigned int length; /**< total # of texture instructions used */
+               uint32_t inst[R400_PFS_MAX_TEX_INST];
        } tex;
 
        struct {
-               int length; /**< total # of ALU instructions used */
+               unsigned int length; /**< total # of ALU instructions used */
                struct {
                        uint32_t rgb_inst;
                        uint32_t rgb_addr;
                        uint32_t alpha_inst;
                        uint32_t alpha_addr;
-               } inst[R300_PFS_MAX_ALU_INST];
+                       uint32_t r400_ext_addr;
+               } inst[R400_PFS_MAX_ALU_INST];
        } alu;
 
        uint32_t config; /* US_CONFIG */
        uint32_t pixsize; /* US_PIXSIZE */
        uint32_t code_offset; /* US_CODE_OFFSET */
+       uint32_t r400_code_offset_ext; /* US_CODE_EXT */
        uint32_t code_addr[4]; /* US_CODE_ADDR */
+       /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
+        * for r400 cards */
+       unsigned int r390_mode:1;
 };
 
 
index e6633395895b7dbb2a0fe7ef3878b04a9ec2b436..1e64af06bc35279ed65fb9bad91533dda851a03e 100644 (file)
@@ -50,6 +50,7 @@ struct radeon_compiler {
        char * ErrorMsg;
 
        /* Hardware specification. */
+       unsigned is_r400:1;
        unsigned is_r500:1;
        unsigned has_half_swizzles:1;
        unsigned has_presub:1;
@@ -57,6 +58,7 @@ struct radeon_compiler {
        unsigned max_temp_regs;
        unsigned max_constants;
        int max_alu_insts;
+       unsigned max_tex_insts;
 
        /* Whether to remove unused constants and empty holes in constant space. */
        unsigned remove_unused_constants:1;
index f7705b0f6fe2ffe94a9bc576f787ded207351570..2b9d85fae8b82312b496d0fb2eb1a44d4c527331 100644 (file)
@@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_PFS_CNTL_TEX_OFFSET_MASK     (31 << 13)
 #       define R300_PFS_CNTL_TEX_END_SHIFT       18
 #       define R300_PFS_CNTL_TEX_END_MASK        (31 << 18)
+#       define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
+#       define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
+#       define R400_PFS_CNTL_TEX_END_MSB_SHIFT   28
+#       define R400_PFS_CNTL_TEX_END_MSB_MASK    (0xf << 28)
 
 /* gap */
 
@@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_TEX_SIZE_MASK           (31 << 17)
 #      define R300_RGBA_OUT                (1 << 22)
 #      define R300_W_OUT                   (1 << 23)
+#       define R400_TEX_START_MSB_SHIFT     24
+#       define R400_TEX_START_MSG_MASK      (0xf << 24)
+#       define R400_TEX_SIZE_MSB_SHIFT      28
+#       define R400_TEX_SIZE_MSG_MASK       (0xf << 28)
 
 /* TEX
  * As far as I can tell, texture instructions cannot write into output
@@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #              define R300_TEX_OP_TXP          3
 #              define R300_TEX_OP_TXB          4
 #      define R300_TEX_INST_MASK               (7 << 15)
+#      define R400_SRC_ADDR_EXT_BIT         (1 << 19)
+#      define R400_DST_ADDR_EXT_BIT         (1 << 20)
 
 /* Output format from the unfied shader */
 #define R300_US_OUT_FMT                     0x46A4
@@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_ALU_OUTA_CLAMP              (1 << 30)
 /* END: Fragment program instruction set */
 
+/* R4xx extended fragment shader registers. */
+#define R400_US_ALU_EXT_ADDR_0              0x4ac0 /* up to 63 (0x4bbc) */
+#   define R400_ADDR_EXT_RGB_MSB_BIT(x)     (1 << (x))
+#   define R400_ADDRD_EXT_RGB_MSB_BIT       0x08
+#   define R400_ADDR_EXT_A_MSB_BIT(x)       (1 << ((x) + 4))
+#   define R400_ADDRD_EXT_A_MSB_BIT         0x80
+
+#define R400_US_CODE_BANK                   0x46b8
+#   define R400_BANK_SHIFT                  0
+#   define R400_BANK_MASK                   0xf
+#   define R400_R390_MODE_ENABLE            (1 << 4)
+#define R400_US_CODE_EXT                    0x46bc
+#   define R400_ALU_OFFSET_MSB_SHIFT        0
+#   define R400_ALU_OFFSET_MSB_MASK         (0x7 << 0)
+#   define R400_ALU_SIZE_MSB_SHIFT          3
+#   define R400_ALU_SIZE_MSB_MASK           (0x7 << 3)
+#   define R400_ALU_START0_MSB_SHIFT        6
+#   define R400_ALU_START0_MSB_MASK         (0x7 << 6)
+#   define R400_ALU_SIZE0_MSB_SHIFT         9
+#   define R400_ALU_SIZE0_MSB_MASK          (0x7 << 9)
+#   define R400_ALU_START1_MSB_SHIFT        12
+#   define R400_ALU_START1_MSB_MASK         (0x7 << 12)
+#   define R400_ALU_SIZE1_MSB_SHIFT         15
+#   define R400_ALU_SIZE1_MSB_MASK          (0x7 << 15)
+#   define R400_ALU_START2_MSB_SHIFT        18
+#   define R400_ALU_START2_MSB_MASK         (0x7 << 18)
+#   define R400_ALU_SIZE2_MSB_SHIFT         21
+#   define R400_ALU_SIZE2_MSB_MASK          (0x7 << 21)
+#   define R400_ALU_START3_MSB_SHIFT        24
+#   define R400_ALU_START3_MSB_MASK         (0x7 << 24)
+#   define R400_ALU_SIZE3_MSB_SHIFT         27
+#   define R400_ALU_SIZE3_MSB_MASK          (0x7 << 27)
+/* END: R4xx extended fragment shader registers. */
+
 /* Fog: Fog Blending Enable */
 #define R300_FG_FOG_BLEND                             0x4bc0
 #       define R300_FG_FOG_BLEND_DISABLE              (0 << 0)