From 810345492eca34c2ad12728b5491a4691cc62ec2 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 17 Sep 2012 14:33:56 +0000 Subject: [PATCH] r600g: Use LOOP_START_DX10 for loops MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not limited to 4096 iterations like the other LOOP_* instructions. Compute shaders need to use this instruction, and since we aren't optimizing loops with the LOOP_CONFIG* registers for pixel and vertex shaders, it seems like we should just use it for everything. Reviewed-by: Marek Olšák --- src/gallium/drivers/r600/eg_asm.c | 1 + src/gallium/drivers/r600/r600_asm.c | 8 +++++++- src/gallium/drivers/r600/r600_shader.c | 4 +++- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r600/eg_asm.c b/src/gallium/drivers/r600/eg_asm.c index d2c1679796a..00ac4a8c25b 100644 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@ -121,6 +121,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf) case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c index 648e8b6ed5d..58350c6f75d 100644 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@ -1878,6 +1878,7 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case V_SQ_CF_WORD1_SQ_CF_INST_POP: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: @@ -1952,6 +1953,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: @@ -1986,7 +1988,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case V_SQ_CF_WORD1_SQ_CF_INST_POP: - case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: @@ -2089,6 +2091,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2: case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: @@ -2172,6 +2175,7 @@ int r600_bytecode_build(struct r600_bytecode *bc) case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2: case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: @@ -2360,6 +2364,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc) case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: @@ -2454,6 +2459,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc) case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: case V_SQ_CF_WORD1_SQ_CF_INST_POP: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL: + case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE: case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK: diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 3e746e5e2e2..8288c040a63 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -5101,7 +5101,9 @@ static int tgsi_endif(struct r600_shader_ctx *ctx) static int tgsi_bgnloop(struct r600_shader_ctx *ctx) { - r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL)); + /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not + * limited to 4096 iterations, like the other LOOP_* instructions. */ + r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10)); fc_pushlevel(ctx, FC_LOOP); -- 2.30.2