From: Tom Stellard Date: Mon, 13 Dec 2010 17:11:25 +0000 (-0800) Subject: r300g: Increase fragment shader limits for r400 cards X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c40ec20c273104198f7b3c52af2cd2328833b72b;p=mesa.git r300g: Increase fragment shader limits for r400 cards r400 fragment shaders now support up to 64 temporary registers, 512 ALU instructions, and 512 TEX instructions. --- diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 84773ab386c..b2c02bec86c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -298,44 +298,98 @@ static void r300_emit_fs_code_to_buffer( } } else { /* r300 */ struct r300_fragment_program_code *code = &generic_code->code.r300; - - shader->cb_code_size = 19 + - (r300->screen->caps.is_r400 ? 2 : 0) + - code->alu.length * 4 + - (code->tex.length ? (1 + code->tex.length) : 0) + - imm_count * 5; + unsigned int alu_length = code->alu.length; + unsigned int alu_iterations = ((alu_length - 1) / 64) + 1; + unsigned int tex_length = code->tex.length; + unsigned int tex_iterations = + tex_length > 0 ? ((tex_length - 1) / 32) + 1 : 0; + unsigned int iterations = + alu_iterations > tex_iterations ? alu_iterations : tex_iterations; + unsigned int bank = 0; + + shader->cb_code_size = 15 + + /* R400_US_CODE_BANK */ + (r300->screen->caps.is_r400 ? 2 * (iterations + 1): 0) + + /* R400_US_CODE_EXT */ + (r300->screen->caps.is_r400 ? 2 : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0, R400_US_ALU_EXT_ADDR_0 */ + (code->r390_mode ? (5 * alu_iterations) : 4) + + /* R400_US_ALU_EXT_ADDR_[0-63] */ + (code->r390_mode ? (code->alu.length) : 0) + + /* R300_US_ALU_{RGB,ALPHA}_{INST,ADDR}_0 */ + code->alu.length * 4 + + /* R300_US_TEX_INST_0, R300_US_TEX_INST_[0-31] */ + (code->tex.length > 0 ? code->tex.length + tex_iterations : 0) + + imm_count * 5; NEW_CB(shader->cb_code, shader->cb_code_size); - if (r300->screen->caps.is_r400) - OUT_CB_REG(R400_US_CODE_BANK, 0); - OUT_CB_REG(R300_US_CONFIG, code->config); OUT_CB_REG(R300_US_PIXSIZE, code->pixsize); OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset); + if (code->r390_mode) { + OUT_CB_REG(R400_US_CODE_EXT, code->r400_code_offset_ext); + } else if (r300->screen->caps.is_r400) { + /* This register appears to affect shaders even if r390_mode is + * disabled, so it needs to be set to 0 for shaders that + * don't use r390_mode. */ + OUT_CB_REG(R400_US_CODE_EXT, 0); + } + OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4); OUT_CB_TABLE(code->code_addr, 4); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_inst); + do { + unsigned int bank_alu_length = (alu_length < 64 ? alu_length : 64); + unsigned int bank_alu_offset = bank * 64; + unsigned int bank_tex_length = (tex_length < 32 ? tex_length : 32); + unsigned int bank_tex_offset = bank * 32; + + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, code->r390_mode ? + (bank << R400_BANK_SHIFT) | R400_R390_MODE_ENABLE : 0);//2 + } + + if (bank_alu_length > 0) { + OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_inst); + + OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].rgb_addr); - OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].rgb_addr); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_inst); - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_inst); + OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].alpha_addr); + + if (code->r390_mode) { + OUT_CB_REG_SEQ(R400_US_ALU_EXT_ADDR_0, bank_alu_length); + for (i = 0; i < bank_alu_length; i++) + OUT_CB(code->alu.inst[i + bank_alu_offset].r400_ext_addr); + } + } + + if (bank_tex_length > 0) { + OUT_CB_REG_SEQ(R300_US_TEX_INST_0, bank_tex_length); + OUT_CB_TABLE(code->tex.inst + bank_tex_offset, bank_tex_length); + } - OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length); - for (i = 0; i < code->alu.length; i++) - OUT_CB(code->alu.inst[i].alpha_addr); + alu_length -= bank_alu_length; + tex_length -= bank_tex_length; + bank++; + } while(code->r390_mode && (alu_length > 0 || tex_length > 0)); - if (code->tex.length) { - OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length); - OUT_CB_TABLE(code->tex.inst, code->tex.length); + /* R400_US_CODE_BANK needs to be reset to 0, otherwise some shaders + * will be rendered incorrectly. */ + if (r300->screen->caps.is_r400) { + OUT_CB_REG(R400_US_CODE_BANK, + code->r390_mode ? R400_R390_MODE_ENABLE : 0); } /* Emit immediates. */ @@ -384,12 +438,17 @@ static void r300_translate_fragment_shader( compiler.code = &shader->code; compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; + compiler.Base.is_r400 = r300->screen->caps.is_r400; compiler.Base.disable_optimizations = DBG_ON(r300, DBG_NO_OPT); compiler.Base.has_half_swizzles = TRUE; compiler.Base.has_presub = TRUE; - compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_temp_regs = + compiler.Base.is_r500 ? 128 : (compiler.Base.is_r400 ? 64 : 32); compiler.Base.max_constants = compiler.Base.is_r500 ? 256 : 32; - compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; + compiler.Base.max_alu_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 64; + compiler.Base.max_tex_insts = + (compiler.Base.is_r500 || compiler.Base.is_r400) ? 512 : 32; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index d1154dee40a..1d93dab2ca2 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -2162,14 +2162,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. /* R4xx extended fragment shader registers. */ #define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ -# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01 -# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02 -# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04 +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) # define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 -# define R400_ADDR0_EXT_A_MSB_BIT 0x10 -# define R400_ADDR1_EXT_A_MSB_BIT 0x20 -# define R400_ADDR2_EXT_A_MSB_BIT 0x40 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) # define R400_ADDRD_EXT_A_MSB_BIT 0x80 + #define R400_US_CODE_BANK 0x46b8 # define R400_BANK_SHIFT 0 # define R400_BANK_MASK 0xf diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c index c75aeaa10a7..178ad63954e 100644 --- a/src/gallium/drivers/r300/r300_screen.c +++ b/src/gallium/drivers/r300/r300_screen.c @@ -87,12 +87,8 @@ static const char* r300_get_name(struct pipe_screen* pscreen) static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param) { struct r300_screen* r300screen = r300_screen(pscreen); - boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (param) { /* Supported features (boolean caps). */ case PIPE_CAP_NPOT_TEXTURES: @@ -175,9 +171,6 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e boolean is_r400 = r300screen->caps.is_r400; boolean is_r500 = r300screen->caps.is_r500; - /* XXX extended shader capabilities of r400 unimplemented */ - is_r400 = FALSE; - switch (shader) { case PIPE_SHADER_FRAGMENT: diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c index 782671bac01..deba9ca834d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c @@ -49,6 +49,11 @@ static void presub_string(char out[10], unsigned int inst) } } +static int get_msb(unsigned int bit, unsigned int r400_ext_addr) +{ + return (r400_ext_addr & bit) ? 1 << 5 : 0; +} + /* just some random things... */ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) { @@ -61,16 +66,21 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) fprintf(stderr, "Hardware program\n"); fprintf(stderr, "----------------\n"); + if (c->is_r400) { + fprintf(stderr, "code_offset_ext: %08x\n", code->r400_code_offset_ext); + } for (n = 0; n <= (code->config & 3); n++) { uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n]; - int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT; - int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT; + unsigned int alu_offset = ((code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT) + + (((code->r400_code_offset_ext >> (24 - (n * 6))) & 0x7) << 6); + unsigned int alu_end = ((code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT) + + (((code->r400_code_offset_ext >> (27 - (n * 6))) & 0x7) << 6); int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT; int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT; - fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, " - "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n, + fprintf(stderr, "NODE %d: alu_offset: %u, tex_offset: %d, " + "alu_end: %u, tex_end: %d (code_addr: %08x)\n", n, alu_offset, tex_offset, alu_end, tex_end, code_addr); if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) { @@ -125,11 +135,15 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) for (j = 0; j < 3; ++j) { int regc = code->alu.inst[i].rgb_addr >> (j * 6); int rega = code->alu.inst[i].alpha_addr >> (j * 6); + int msbc = get_msb(R400_ADDR_EXT_RGB_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); + int msba = get_msb(R400_ADDR_EXT_A_MSB_BIT(j), + code->alu.inst[i].r400_ext_addr); sprintf(srcc[j], "%c%i", - (regc & 32) ? 'c' : 't', regc & 31); + (regc & 32) ? 'c' : 't', (regc & 31) | msbc); sprintf(srca[j], "%c%i", - (rega & 32) ? 'c' : 't', rega & 31); + (rega & 32) ? 'c' : 't', (rega & 31) | msba); } dstc[0] = 0; @@ -141,9 +155,14 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) (code->alu.inst[i]. rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : ""); if (flags[0] != 0) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_RGB_MSB_BIT, + code->alu.inst[i].r400_ext_addr); + sprintf(dstc, "t%i.%s ", - (code->alu.inst[i]. - rgb_addr >> R300_ALU_DSTC_SHIFT) & 31, + ((code->alu.inst[i]. + rgb_addr >> R300_ALU_DSTC_SHIFT) + & 31) | msb, flags); } sprintf(flags, "%s%s%s", @@ -166,9 +185,13 @@ void r300FragmentProgramDump(struct radeon_compiler *c, void *user) dsta[0] = 0; if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) { + unsigned int msb = get_msb( + R400_ADDRD_EXT_A_MSB_BIT, + code->alu.inst[i].r400_ext_addr); sprintf(dsta, "t%i.w ", - (code->alu.inst[i]. - alpha_addr >> R300_ALU_DSTA_SHIFT) & 31); + ((code->alu.inst[i]. + alpha_addr >> R300_ALU_DSTA_SHIFT) & 31) + | msb); } if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) { sprintf(tmp, "o%i.w ", diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index 1db8678e890..28d132a5fe3 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -64,6 +64,20 @@ struct r300_emit_state { __FILE__, __FUNCTION__, ##args); \ } while(0) +static unsigned int get_msbs_alu(unsigned int bits) +{ + return (bits >> 6) & 0x7; +} + +/** + * @param lsbs The number of least significant bits + */ +static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs) +{ + return (bits >> lsbs) & 0x15; +} + +#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask) /** * Mark a temporary register as used. @@ -83,7 +97,7 @@ static unsigned int use_source(struct r300_fragment_program_code* code, struct r return src.Index | (1 << 5); } else if (src.File == RC_FILE_TEMPORARY) { use_temporary(code, src.Index); - return src.Index; + return src.Index & 0x1f; } return 0; @@ -151,11 +165,19 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode); for(j = 0; j < 3; ++j) { + /* Set the RGB address */ unsigned int src = use_source(code, inst->RGB.Src[j]); unsigned int arg; + if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j); + code->alu.inst[ip].rgb_addr |= src << (6*j); + /* Set the Alpha address */ src = use_source(code, inst->Alpha.Src[j]); + if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j); + code->alu.inst[ip].alpha_addr |= src << (6*j); arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle); @@ -223,8 +245,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->RGB.WriteMask) { use_temporary(code, inst->RGB.DestIndex); + if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT; code->alu.inst[ip].rgb_addr |= - (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) | + ((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) | (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT); } if (inst->RGB.OutputWriteMask) { @@ -236,8 +260,10 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i if (inst->Alpha.WriteMask) { use_temporary(code, inst->Alpha.DestIndex); + if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS) + code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT; code->alu.inst[ip].alpha_addr |= - (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) | + ((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) | R300_ALU_DSTA_REG; } if (inst->Alpha.OutputWriteMask) { @@ -269,6 +295,8 @@ static int finish_node(struct r300_emit_state * emit) unsigned tex_offset; unsigned tex_end; + unsigned int alu_offset_msbs, alu_end_msbs; + if (code->alu.length == emit->node_first_alu) { /* Generate a single NOP for this node */ struct rc_pair_instruction inst; @@ -301,13 +329,48 @@ static int finish_node(struct r300_emit_state * emit) * * Also note that the register specification from AMD is slightly * incorrect in its description of this register. */ - code->code_addr[emit->current_node] = - (alu_offset << R300_ALU_START_SHIFT) | - (alu_end << R300_ALU_SIZE_SHIFT) | - (tex_offset << R300_TEX_START_SHIFT) | - (tex_end << R300_TEX_SIZE_SHIFT) | - emit->node_flags; - + code->code_addr[emit->current_node] = + ((alu_offset << R300_ALU_START_SHIFT) + & R300_ALU_START_MASK) + | ((alu_end << R300_ALU_SIZE_SHIFT) + & R300_ALU_SIZE_MASK) + | ((tex_offset << R300_TEX_START_SHIFT) + & R300_TEX_START_MASK) + | ((tex_end << R300_TEX_SIZE_SHIFT) + & R300_TEX_SIZE_MASK) + | emit->node_flags + | (get_msbs_tex(tex_offset, 5) + << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 5) + << R400_TEX_SIZE_MSB_SHIFT) + ; + + /* Write r400 extended instruction fields. These will be ignored on + * r300 cards. */ + alu_offset_msbs = get_msbs_alu(alu_offset); + alu_end_msbs = get_msbs_alu(alu_end); + switch(emit->current_node) { + case 0: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START3_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT; + break; + case 1: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START2_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT; + break; + case 2: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START1_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT; + break; + case 3: + code->r400_code_offset_ext |= + alu_offset_msbs << R400_ALU_START0_MSB_SHIFT + | alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT; + break; + } return 1; } @@ -348,7 +411,7 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) unsigned int opcode; PROG_CODE; - if (code->tex.length >= R300_PFS_MAX_TEX_INST) { + if (code->tex.length >= emit->compiler->Base.max_tex_insts) { error("Too many TEX instructions"); return 0; } @@ -376,10 +439,17 @@ static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst) use_temporary(code, inst->U.I.SrcReg[0].Index); code->tex.inst[code->tex.length++] = - (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) | - (dest << R300_DST_ADDR_SHIFT) | - (unit << R300_TEX_ID_SHIFT) | - (opcode << R300_TEX_INST_SHIFT); + ((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) + & R300_SRC_ADDR_MASK) + | ((dest << R300_DST_ADDR_SHIFT) + & R300_DST_ADDR_MASK) + | (unit << R300_TEX_ID_SHIFT) + | (opcode << R300_TEX_INST_SHIFT) + | (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ? + R400_SRC_ADDR_EXT_BIT : 0) + | (dest >= R300_PFS_NUM_TEMP_REGS ? + R400_DST_ADDR_EXT_BIT : 0) + ; return 1; } @@ -393,6 +463,7 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c; struct r300_emit_state emit; struct r300_fragment_program_code *code = &compiler->code->code.r300; + unsigned int tex_end; memset(&emit, 0, sizeof(emit)); emit.compiler = compiler; @@ -424,11 +495,28 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) finish_node(&emit); code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */ + + /* Set r400 extended instruction fields. These values will be ignored + * on r300 cards. */ + code->r400_code_offset_ext |= + (get_msbs_alu(0) + << R400_ALU_OFFSET_MSB_SHIFT) + | (get_msbs_alu(code->alu.length - 1) + << R400_ALU_SIZE_MSB_SHIFT); + + tex_end = code->tex.length ? code->tex.length - 1 : 0; code->code_offset = - (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) | - ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) | - (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) | - ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT); + ((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) + & R300_PFS_CNTL_ALU_OFFSET_MASK) + | (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT) + & R300_PFS_CNTL_ALU_END_MASK) + | ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) + & R300_PFS_CNTL_TEX_OFFSET_MASK) + | ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT) + & R300_PFS_CNTL_TEX_END_MASK) + | (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT) + | (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT) + ; if (emit.current_node < 3) { int shift = 3 - emit.current_node; @@ -438,4 +526,11 @@ void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user) for(i = 0; i < shift; ++i) code->code_addr[i] = 0; } + + if (code->pixsize >= R300_PFS_NUM_TEMP_REGS + || code->alu.length > R300_PFS_MAX_ALU_INST + || code->tex.length > R300_PFS_MAX_TEX_INST) { + + code->r390_mode = 1; + } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index b69e81698ae..d1451668947 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -31,6 +31,9 @@ #define R300_PFS_NUM_TEMP_REGS 32 #define R300_PFS_NUM_CONST_REGS 32 +#define R400_PFS_MAX_ALU_INST 512 +#define R400_PFS_MAX_TEX_INST 512 + #define R500_PFS_MAX_INST 512 #define R500_PFS_NUM_TEMP_REGS 128 #define R500_PFS_NUM_CONST_REGS 256 @@ -187,24 +190,29 @@ struct r300_fragment_program_node { */ struct r300_fragment_program_code { struct { - int length; /**< total # of texture instructions used */ - uint32_t inst[R300_PFS_MAX_TEX_INST]; + unsigned int length; /**< total # of texture instructions used */ + uint32_t inst[R400_PFS_MAX_TEX_INST]; } tex; struct { - int length; /**< total # of ALU instructions used */ + unsigned int length; /**< total # of ALU instructions used */ struct { uint32_t rgb_inst; uint32_t rgb_addr; uint32_t alpha_inst; uint32_t alpha_addr; - } inst[R300_PFS_MAX_ALU_INST]; + uint32_t r400_ext_addr; + } inst[R400_PFS_MAX_ALU_INST]; } alu; uint32_t config; /* US_CONFIG */ uint32_t pixsize; /* US_PIXSIZE */ uint32_t code_offset; /* US_CODE_OFFSET */ + uint32_t r400_code_offset_ext; /* US_CODE_EXT */ uint32_t code_addr[4]; /* US_CODE_ADDR */ + /*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries + * for r400 cards */ + unsigned int r390_mode:1; }; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index e6633395895..1e64af06bc3 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -50,6 +50,7 @@ struct radeon_compiler { char * ErrorMsg; /* Hardware specification. */ + unsigned is_r400:1; unsigned is_r500:1; unsigned has_half_swizzles:1; unsigned has_presub:1; @@ -57,6 +58,7 @@ struct radeon_compiler { unsigned max_temp_regs; unsigned max_constants; int max_alu_insts; + unsigned max_tex_insts; /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants:1; diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h index f7705b0f6fe..2b9d85fae8b 100644 --- a/src/mesa/drivers/dri/r300/r300_reg.h +++ b/src/mesa/drivers/dri/r300/r300_reg.h @@ -1658,6 +1658,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) # define R300_PFS_CNTL_TEX_END_SHIFT 18 # define R300_PFS_CNTL_TEX_END_MASK (31 << 18) +# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24 +# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24) +# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28 +# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28) /* gap */ @@ -1682,6 +1686,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_SIZE_MASK (31 << 17) # define R300_RGBA_OUT (1 << 22) # define R300_W_OUT (1 << 23) +# define R400_TEX_START_MSB_SHIFT 24 +# define R400_TEX_START_MSG_MASK (0xf << 24) +# define R400_TEX_SIZE_MSB_SHIFT 28 +# define R400_TEX_SIZE_MSG_MASK (0xf << 28) /* TEX * As far as I can tell, texture instructions cannot write into output @@ -1702,6 +1710,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_TEX_OP_TXP 3 # define R300_TEX_OP_TXB 4 # define R300_TEX_INST_MASK (7 << 15) +# define R400_SRC_ADDR_EXT_BIT (1 << 19) +# define R400_DST_ADDR_EXT_BIT (1 << 20) /* Output format from the unfied shader */ #define R300_US_OUT_FMT 0x46A4 @@ -1979,6 +1989,40 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_ALU_OUTA_CLAMP (1 << 30) /* END: Fragment program instruction set */ +/* R4xx extended fragment shader registers. */ +#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */ +# define R400_ADDR_EXT_RGB_MSB_BIT(x) (1 << (x)) +# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08 +# define R400_ADDR_EXT_A_MSB_BIT(x) (1 << ((x) + 4)) +# define R400_ADDRD_EXT_A_MSB_BIT 0x80 + +#define R400_US_CODE_BANK 0x46b8 +# define R400_BANK_SHIFT 0 +# define R400_BANK_MASK 0xf +# define R400_R390_MODE_ENABLE (1 << 4) +#define R400_US_CODE_EXT 0x46bc +# define R400_ALU_OFFSET_MSB_SHIFT 0 +# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0) +# define R400_ALU_SIZE_MSB_SHIFT 3 +# define R400_ALU_SIZE_MSB_MASK (0x7 << 3) +# define R400_ALU_START0_MSB_SHIFT 6 +# define R400_ALU_START0_MSB_MASK (0x7 << 6) +# define R400_ALU_SIZE0_MSB_SHIFT 9 +# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9) +# define R400_ALU_START1_MSB_SHIFT 12 +# define R400_ALU_START1_MSB_MASK (0x7 << 12) +# define R400_ALU_SIZE1_MSB_SHIFT 15 +# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15) +# define R400_ALU_START2_MSB_SHIFT 18 +# define R400_ALU_START2_MSB_MASK (0x7 << 18) +# define R400_ALU_SIZE2_MSB_SHIFT 21 +# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21) +# define R400_ALU_START3_MSB_SHIFT 24 +# define R400_ALU_START3_MSB_MASK (0x7 << 24) +# define R400_ALU_SIZE3_MSB_SHIFT 27 +# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27) +/* END: R4xx extended fragment shader registers. */ + /* Fog: Fog Blending Enable */ #define R300_FG_FOG_BLEND 0x4bc0 # define R300_FG_FOG_BLEND_DISABLE (0 << 0)