r600g: Use LOOP_START_DX10 for loops
authorTom Stellard <thomas.stellard@amd.com>
Mon, 17 Sep 2012 14:33:56 +0000 (14:33 +0000)
committerTom Stellard <thomas.stellard@amd.com>
Wed, 19 Sep 2012 18:58:53 +0000 (14:58 -0400)
LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not limited
to 4096 iterations like the other LOOP_* instructions.  Compute shaders
need to use this instruction, and since we aren't optimizing loops with
the LOOP_CONFIG* registers for pixel and vertex shaders, it seems like
we should just use it for everything.

Reviewed-by: Marek Olšák <maraeo@gmail.com>
src/gallium/drivers/r600/eg_asm.c
src/gallium/drivers/r600/r600_asm.c
src/gallium/drivers/r600/r600_shader.c

index d2c1679796a3bd0ee1a5bdd1b76e4307ee6de8f2..00ac4a8c25ba3465d94bd0fce6c7d6ec807a2afd 100644 (file)
@@ -121,6 +121,7 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+       case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
index 648e8b6ed5d7d2f9336e3f2209c616b55343ac56..58350c6f75d7f3658dabfd3405408e8859af9704 100644 (file)
@@ -1878,6 +1878,7 @@ static int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode
        case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
        case V_SQ_CF_WORD1_SQ_CF_INST_POP:
        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -1952,6 +1953,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+                       case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -1986,7 +1988,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                        case V_SQ_CF_WORD1_SQ_CF_INST_JUMP:
                        case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                        case V_SQ_CF_WORD1_SQ_CF_INST_POP:
-                       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+                       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2089,6 +2091,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                        case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF1:
                        case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF2:
                        case EG_V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3_BUF3:
+                       case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
@@ -2172,6 +2175,7 @@ int r600_bytecode_build(struct r600_bytecode *bc)
                        case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM2:
                        case V_SQ_CF_ALLOC_EXPORT_WORD1_SQ_CF_INST_MEM_STREAM3:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+                       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2360,6 +2364,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_POP:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+                       case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
                        case EG_V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
@@ -2454,6 +2459,7 @@ void r600_bytecode_dump(struct r600_bytecode *bc)
                        case V_SQ_CF_WORD1_SQ_CF_INST_ELSE:
                        case V_SQ_CF_WORD1_SQ_CF_INST_POP:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL:
+                       case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_END:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_CONTINUE:
                        case V_SQ_CF_WORD1_SQ_CF_INST_LOOP_BREAK:
index 3e746e5e2e22337d1734a2c18e2464660465935c..8288c040a637df08fc0d4d1f25a6e54e82f680a1 100644 (file)
@@ -5101,7 +5101,9 @@ static int tgsi_endif(struct r600_shader_ctx *ctx)
 
 static int tgsi_bgnloop(struct r600_shader_ctx *ctx)
 {
-       r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_NO_AL));
+       /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not
+        * limited to 4096 iterations, like the other LOOP_* instructions. */
+       r600_bytecode_add_cfinst(ctx->bc, CTX_INST(V_SQ_CF_WORD1_SQ_CF_INST_LOOP_START_DX10));
 
        fc_pushlevel(ctx, FC_LOOP);