From 313e95f0c792580f6051fedb67e65838405c88f8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 1 Sep 2010 00:56:57 +0200 Subject: [PATCH] r300/compiler: add new compiler parameter max_alu_insts --- src/gallium/drivers/r300/r300_fs.c | 1 + src/gallium/drivers/r300/r300_vs.c | 1 + .../dri/r300/compiler/r300_fragprog_emit.c | 2 +- .../drivers/dri/r300/compiler/r3xx_fragprog.c | 11 ++++---- .../drivers/dri/r300/compiler/r3xx_vertprog.c | 5 +--- .../dri/r300/compiler/r500_fragprog_emit.c | 10 +++---- .../dri/r300/compiler/radeon_compiler.h | 1 + .../dri/r300/compiler/radeon_emulate_loops.c | 27 +++++++++---------- .../dri/r300/compiler/radeon_emulate_loops.h | 7 +++-- src/mesa/drivers/dri/r300/r300_blit.c | 4 +++ .../drivers/dri/r300/r300_fragprog_common.c | 1 + src/mesa/drivers/dri/r300/r300_vertprog.c | 3 +++ 12 files changed, 38 insertions(+), 35 deletions(-) diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index 9845e546109..2b9861cae38 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -386,6 +386,7 @@ static void r300_translate_fragment_shader( compiler.state = shader->compare_state; compiler.Base.is_r500 = r300->screen->caps.is_r500; compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32; + compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; compiler.Base.remove_unused_constants = TRUE; compiler.AllocateHwInputs = &allocate_hardware_inputs; compiler.UserData = &shader->inputs; diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c index 5f8dbb28d0c..29569d92f59 100644 --- a/src/gallium/drivers/r300/r300_vs.c +++ b/src/gallium/drivers/r300/r300_vs.c @@ -206,6 +206,7 @@ void r300_translate_vertex_shader(struct r300_context *r300, compiler.UserData = vs; compiler.Base.is_r500 = r300->screen->caps.is_r500; compiler.Base.max_temp_regs = 32; + compiler.Base.max_alu_insts = r300->screen->caps.is_r500 ? 1024 : 256; compiler.Base.remove_unused_constants = TRUE; if (compiler.Base.Debug) { diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c index b27a683c397..ba5461bef48 100644 --- a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c @@ -135,7 +135,7 @@ static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* i { PROG_CODE; - if (code->alu.length >= R300_PFS_MAX_ALU_INST) { + if (code->alu.length >= c->Base.max_alu_insts) { error("Too many ALU instructions"); return 0; } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index f82d0f1e62d..96c5d6fda9f 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -107,12 +107,11 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "before compilation"); - if (c->Base.is_r500){ - rc_unroll_loops(&c->Base, R500_PFS_MAX_INST); + if (c->Base.is_r500) { + rc_unroll_loops(&c->Base); debug_program_log(c, "after unroll loops"); - } - else{ - rc_transform_loops(&c->Base, -1); + } else { + rc_transform_loops(&c->Base); debug_program_log(c, "after transform loops"); rc_emulate_branches(&c->Base); @@ -165,7 +164,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after deadcode"); if (!c->Base.is_r500) { - rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST); + rc_emulate_loops(&c->Base); debug_program_log(c, "after emulate loops"); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index b5186809bc3..29c6c869ff7 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -985,10 +985,7 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c) debug_program_log(c, "before compilation"); - if (c->Base.is_r500) - rc_transform_loops(&c->Base, R500_VS_MAX_ALU); - else - rc_transform_loops(&c->Base, R300_VS_MAX_ALU); + rc_transform_loops(&c->Base); if (c->Base.Error) return; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 5612a4e5ccc..8a6aafe5af1 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -229,7 +229,7 @@ static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair { PROG_CODE; - if (code->inst_end >= 511) { + if (code->inst_end >= c->Base.max_alu_insts-1) { error("emit_alu: Too many instructions"); return; } @@ -322,7 +322,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst { PROG_CODE; - if (code->inst_end >= 511) { + if (code->inst_end >= c->Base.max_alu_insts-1) { error("emit_tex: Too many instructions"); return 0; } @@ -370,7 +370,7 @@ static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_inst static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst) { - if (s->Code->inst_end >= 511) { + if (s->Code->inst_end >= s->C->max_alu_insts-1) { rc_error(s->C, "emit_tex: Too many instructions"); return; } @@ -577,7 +577,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi } } - if (code->max_temp_idx >= 128) + if (code->max_temp_idx >= compiler->Base.max_temp_regs) rc_error(&compiler->Base, "Too many hardware temporaries used"); if (compiler->Base.Error) @@ -587,7 +587,7 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) { /* This may happen when dead-code elimination is disabled or * when most of the fragment program logic is leading to a KIL */ - if (code->inst_end >= 511) { + if (code->inst_end >= compiler->Base.max_alu_insts-1) { rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions"); return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h index a61c005274a..1833fcebcac 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h @@ -42,6 +42,7 @@ struct radeon_compiler { /* Hardware specification. */ unsigned is_r500:1; unsigned max_temp_regs; + int max_alu_insts; /* Whether to remove unused constants and empty holes in constant space. */ unsigned remove_unused_constants:1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index dbb41767633..d13b2be42db 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -78,12 +78,12 @@ static int src_reg_is_immediate(struct rc_src_register * src, } static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, - struct loop_info * loop, unsigned int prog_inst_limit) + struct loop_info * loop) { unsigned int total_i = rc_recompute_ips(c); unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; /* +1 because the program already has one iteration of the loop. */ - return 1 + ((prog_inst_limit - total_i) / loop_i); + return 1 + ((c->max_alu_insts - total_i) / loop_i); } static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, @@ -187,11 +187,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, } /** - * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless + * If c->max_alu_inst is -1, then all eligible loops will be unrolled regardless * of how many iterations they have. */ -static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, - unsigned int prog_inst_limit) +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop) { int end_loops; int iterations; @@ -300,9 +299,8 @@ static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, return 0; } - if (prog_inst_limit > 0 - && iterations > loop_max_possible_iterations(c, loop, - prog_inst_limit)) { + if (c->max_alu_insts > 0 + && iterations > loop_max_possible_iterations(c, loop)) { return 0; } @@ -436,7 +434,7 @@ static int transform_loop(struct emulate_loop_state * s, if (!build_loop_info(s->C, loop, inst)) return 0; - if(try_unroll_loop(s->C, loop, s->prog_inst_limit)){ + if(try_unroll_loop(s->C, loop)){ return 1; } @@ -472,14 +470,13 @@ static int transform_loop(struct emulate_loop_state * s, return 1; } -void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit) +void rc_transform_loops(struct radeon_compiler *c) { struct emulate_loop_state * s = &c->loop_state; struct rc_instruction * ptr; memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; - s->prog_inst_limit = prog_inst_limit; for(ptr = s->C->Program.Instructions.Next; ptr != &s->C->Program.Instructions; ptr = ptr->Next) { if(ptr->Type == RC_INSTRUCTION_NORMAL && @@ -490,7 +487,7 @@ void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit) } } -void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) +void rc_unroll_loops(struct radeon_compiler *c) { struct rc_instruction * inst; struct loop_info loop; @@ -500,13 +497,13 @@ void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { if (build_loop_info(c, &loop, inst)) { - try_unroll_loop(c, &loop, prog_inst_limit); + try_unroll_loop(c, &loop); } } } } -void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit) +void rc_emulate_loops(struct radeon_compiler *c) { struct emulate_loop_state * s = &c->loop_state; int i; @@ -518,7 +515,7 @@ void rc_emulate_loops(struct radeon_compiler *c, int prog_inst_limit) continue; } unsigned int iterations = loop_max_possible_iterations( - s->C, &s->Loops[i], prog_inst_limit); + s->C, &s->Loops[i]); unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 43af8ee480e..caf1c576588 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -21,13 +21,12 @@ struct emulate_loop_state { struct loop_info * Loops; unsigned int LoopCount; unsigned int LoopReserved; - int prog_inst_limit; }; -void rc_transform_loops(struct radeon_compiler *c, int prog_inst_limit); +void rc_transform_loops(struct radeon_compiler *c); -void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); +void rc_unroll_loops(struct radeon_compiler * c); -void rc_emulate_loops(struct radeon_compiler * c, int prog_inst_limit); +void rc_emulate_loops(struct radeon_compiler * c); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c index d2c25fb9cd4..91d715a7169 100644 --- a/src/mesa/drivers/dri/r300/r300_blit.c +++ b/src/mesa/drivers/dri/r300/r300_blit.c @@ -88,6 +88,9 @@ static void create_vertex_program(struct r300_context *r300) compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0); compiler.SetHwInputOutput = vp_ins_outs; compiler.code = &r300->blit.vp_code; + compiler.Base.is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515; + compiler.Base.max_temp_regs = 32; + compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256; r3xx_compile_vertex_program(&compiler); } @@ -120,6 +123,7 @@ static void create_fragment_program(struct r300_context *r300) compiler.enable_shadow_ambient = GL_TRUE; compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515); compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; + compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; compiler.code = &r300->blit.fp_code; compiler.AllocateHwInputs = fp_allocate_hw_inputs; diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c index 7b6521c7480..d6d41b4e42c 100644 --- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c +++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c @@ -221,6 +221,7 @@ static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_prog compiler.enable_shadow_ambient = GL_TRUE; compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE; compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32; + compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 512 : 64; compiler.OutputDepth = FRAG_RESULT_DEPTH; memset(compiler.OutputColor, 0, 4 * sizeof(unsigned)); compiler.OutputColor[0] = FRAG_RESULT_COLOR; diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c index 67d8b2b3286..948517bd807 100644 --- a/src/mesa/drivers/dri/r300/r300_vertprog.c +++ b/src/mesa/drivers/dri/r300/r300_vertprog.c @@ -244,6 +244,9 @@ static struct r300_vertex_program *build_program(GLcontext *ctx, compiler.code = &vp->code; compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads); compiler.SetHwInputOutput = &t_inputs_outputs; + compiler.Base.is_r500 = R300_CONTEXT(ctx)->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515; + compiler.Base.max_temp_regs = 32; + compiler.Base.max_alu_insts = compiler.Base.is_r500 ? 1024 : 256; if (compiler.Base.Debug) { fprintf(stderr, "Initial vertex program:\n"); -- 2.30.2