projects
/
mesa.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
pan/midgard: Remove undefined behavior
[mesa.git]
/
src
/
panfrost
/
midgard
/
midgard_schedule.c
diff --git
a/src/panfrost/midgard/midgard_schedule.c
b/src/panfrost/midgard/midgard_schedule.c
index acfc5754480d5e67b09eaead69a98d6d9a716f31..d05bdf79d4782715f66d7cc3e4dbd289e41ff925 100644
(file)
--- a/
src/panfrost/midgard/midgard_schedule.c
+++ b/
src/panfrost/midgard/midgard_schedule.c
@@
-24,7
+24,6
@@
#include "compiler.h"
#include "midgard_ops.h"
#include "util/u_memory.h"
#include "compiler.h"
#include "midgard_ops.h"
#include "util/u_memory.h"
-#include "util/register_allocate.h"
/* Scheduling for Midgard is complicated, to say the least. ALU instructions
* must be grouped into VLIW bundles according to following model:
/* Scheduling for Midgard is complicated, to say the least. ALU instructions
* must be grouped into VLIW bundles according to following model:
@@
-166,6
+165,9
@@
mir_create_dependency_graph(midgard_instruction **instructions, unsigned count,
util_dynarray_fini(&last_read[i]);
util_dynarray_fini(&last_write[i]);
}
util_dynarray_fini(&last_read[i]);
util_dynarray_fini(&last_write[i]);
}
+
+ free(last_read);
+ free(last_write);
}
/* Does the mask cover more than a scalar? */
}
/* Does the mask cover more than a scalar? */
@@
-370,7
+372,7
@@
mir_adjust_constants(midgard_instruction *ins,
if (!ins->has_constants)
return true;
if (!ins->has_constants)
return true;
- if (ins->alu.reg_mode
== midgard_reg_mode_16
) {
+ if (ins->alu.reg_mode
!= midgard_reg_mode_32
) {
/* TODO: 16-bit constant combining */
if (pred->constant_count)
return false;
/* TODO: 16-bit constant combining */
if (pred->constant_count)
return false;
@@
-1072,7
+1074,9
@@
schedule_block(compiler_context *ctx, midgard_block *block)
/* Blend constant was backwards as well. blend_offset if set is
* strictly positive, as an offset of zero would imply constants before
/* Blend constant was backwards as well. blend_offset if set is
* strictly positive, as an offset of zero would imply constants before
- * any instructions which is invalid in Midgard */
+ * any instructions which is invalid in Midgard. TODO: blend constants
+ * are broken if you spill since then quadword_count becomes invalid
+ * XXX */
if (blend_offset)
ctx->blend_constant_offset = ((ctx->quadword_count + block->quadword_count) - blend_offset - 1) * 0x10;
if (blend_offset)
ctx->blend_constant_offset = ((ctx->quadword_count + block->quadword_count) - blend_offset - 1) * 0x10;
@@
-1090,6
+1094,9
@@
schedule_block(compiler_context *ctx, midgard_block *block)
mir_foreach_instr_in_block_scheduled_rev(block, ins) {
list_add(&ins->link, &block->instructions);
}
mir_foreach_instr_in_block_scheduled_rev(block, ins) {
list_add(&ins->link, &block->instructions);
}
+
+ free(instructions); /* Allocated by flatten_mir() */
+ free(worklist);
}
/* When we're 'squeezing down' the values in the IR, we maintain a hash
}
/* When we're 'squeezing down' the values in the IR, we maintain a hash
@@
-1117,7
+1124,8
@@
find_or_allocate_temp(compiler_context *ctx, unsigned hash)
return temp;
}
return temp;
}
-/* Reassigns numbering to get rid of gaps in the indices */
+/* Reassigns numbering to get rid of gaps in the indices and to prioritize
+ * smaller register classes */
static void
mir_squeeze_index(compiler_context *ctx)
static void
mir_squeeze_index(compiler_context *ctx)
@@
-1127,8
+1135,18
@@
mir_squeeze_index(compiler_context *ctx)
/* TODO don't leak old hash_to_temp */
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
/* TODO don't leak old hash_to_temp */
ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
+ /* We need to prioritize texture registers on older GPUs so we don't
+ * fail RA trying to assign to work registers r0/r1 when a work
+ * register is already there */
+
mir_foreach_instr_global(ctx, ins) {
mir_foreach_instr_global(ctx, ins) {
- ins->dest = find_or_allocate_temp(ctx, ins->dest);
+ if (ins->type == TAG_TEXTURE_4)
+ ins->dest = find_or_allocate_temp(ctx, ins->dest);
+ }
+
+ mir_foreach_instr_global(ctx, ins) {
+ if (ins->type != TAG_TEXTURE_4)
+ ins->dest = find_or_allocate_temp(ctx, ins->dest);
for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]);
for (unsigned i = 0; i < ARRAY_SIZE(ins->src); ++i)
ins->src[i] = find_or_allocate_temp(ctx, ins->src[i]);
@@
-1157,16
+1175,14
@@
v_load_store_scratch(
/* For register spilling - to thread local storage */
.arg_1 = 0xEA,
.arg_2 = 0x1E,
/* For register spilling - to thread local storage */
.arg_1 = 0xEA,
.arg_2 = 0x1E,
-
- /* Splattered across, TODO combine logically */
- .varying_parameters = (byte & 0x1FF) << 1,
- .address = (byte >> 9)
},
/* If we spill an unspill, RA goes into an infinite loop */
.no_spill = true
};
},
/* If we spill an unspill, RA goes into an infinite loop */
.no_spill = true
};
+ ins.constants[0] = byte;
+
if (is_store) {
/* r0 = r26, r1 = r27 */
assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27));
if (is_store) {
/* r0 = r26, r1 = r27 */
assert(srcdest == SSA_FIXED_REGISTER(26) || srcdest == SSA_FIXED_REGISTER(27));
@@
-1185,7
+1201,7
@@
v_load_store_scratch(
static void mir_spill_register(
compiler_context *ctx,
static void mir_spill_register(
compiler_context *ctx,
- struct
ra_graph *g
,
+ struct
lcra_state *l
,
unsigned *spill_count)
{
unsigned spill_index = ctx->temp_count;
unsigned *spill_count)
{
unsigned spill_index = ctx->temp_count;
@@
-1194,10
+1210,21
@@
static void mir_spill_register(
* spill node. All nodes are equal in spill cost, but we can't spill
* nodes written to from an unspill */
* spill node. All nodes are equal in spill cost, but we can't spill
* nodes written to from an unspill */
- for (unsigned i = 0; i < ctx->temp_count; ++i) {
- ra_set_node_spill_cost(g, i, 1.0);
+ unsigned *cost = calloc(ctx->temp_count, sizeof(cost[0]));
+
+ mir_foreach_instr_global(ctx, ins) {
+ if (ins->dest < ctx->temp_count)
+ cost[ins->dest]++;
+
+ mir_foreach_src(ins, s) {
+ if (ins->src[s] < ctx->temp_count)
+ cost[ins->src[s]]++;
+ }
}
}
+ for (unsigned i = 0; i < ctx->temp_count; ++i)
+ lcra_set_node_spill_cost(l, i, cost[i]);
+
/* We can't spill any bundles that contain unspills. This could be
* optimized to allow use of r27 to spill twice per bundle, but if
* you're at the point of optimizing spilling, it's too late.
/* We can't spill any bundles that contain unspills. This could be
* optimized to allow use of r27 to spill twice per bundle, but if
* you're at the point of optimizing spilling, it's too late.
@@
-1216,7
+1243,7
@@
static void mir_spill_register(
unsigned src = bun->instructions[i]->src[s];
if (src < ctx->temp_count)
unsigned src = bun->instructions[i]->src[s];
if (src < ctx->temp_count)
-
ra_set_node_spill_cost(g, src, -1.0
);
+
lcra_set_node_spill_cost(l, src, -1
);
}
}
}
}
}
}
@@
-1227,12
+1254,12
@@
static void mir_spill_register(
for (unsigned i = 0; i < bun->instruction_count; ++i) {
unsigned dest = bun->instructions[i]->dest;
if (dest < ctx->temp_count)
for (unsigned i = 0; i < bun->instruction_count; ++i) {
unsigned dest = bun->instructions[i]->dest;
if (dest < ctx->temp_count)
-
ra_set_node_spill_cost(g, dest, -1.0
);
+
lcra_set_node_spill_cost(l, dest, -1
);
}
}
}
}
}
}
- int spill_node =
ra_get_best_spill_node(g
);
+ int spill_node =
lcra_get_best_spill_node(l
);
if (spill_node < 0) {
mir_print_shader(ctx);
if (spill_node < 0) {
mir_print_shader(ctx);
@@
-1243,9
+1270,8
@@
static void mir_spill_register(
* legitimately spill to TLS, but special registers just spill to work
* registers */
* legitimately spill to TLS, but special registers just spill to work
* registers */
- unsigned class = ra_get_node_class(g, spill_node);
- bool is_special = (class >> 2) != REG_CLASS_WORK;
- bool is_special_w = (class >> 2) == REG_CLASS_TEXW;
+ bool is_special = l->class[spill_node] != REG_CLASS_WORK;
+ bool is_special_w = l->class[spill_node] == REG_CLASS_TEXW;
/* Allocate TLS slot (maybe) */
unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
/* Allocate TLS slot (maybe) */
unsigned spill_slot = !is_special ? (*spill_count)++ : 0;
@@
-1366,12
+1392,14
@@
static void mir_spill_register(
mir_foreach_instr_global(ctx, ins) {
ins->hint = false;
}
mir_foreach_instr_global(ctx, ins) {
ins->hint = false;
}
+
+ free(cost);
}
void
schedule_program(compiler_context *ctx)
{
}
void
schedule_program(compiler_context *ctx)
{
- struct
ra_graph *g
= NULL;
+ struct
lcra_state *l
= NULL;
bool spilled = false;
int iter_count = 1000; /* max iterations */
bool spilled = false;
int iter_count = 1000; /* max iterations */
@@
-1396,13
+1424,17
@@
schedule_program(compiler_context *ctx)
do {
if (spilled)
do {
if (spilled)
- mir_spill_register(ctx,
g
, &spill_count);
+ mir_spill_register(ctx,
l
, &spill_count);
mir_squeeze_index(ctx);
mir_invalidate_liveness(ctx);
mir_squeeze_index(ctx);
mir_invalidate_liveness(ctx);
- g = NULL;
- g = allocate_registers(ctx, &spilled);
+ if (l) {
+ lcra_free(l);
+ l = NULL;
+ }
+
+ l = allocate_registers(ctx, &spilled);
} while(spilled && ((iter_count--) > 0));
if (iter_count <= 0) {
} while(spilled && ((iter_count--) > 0));
if (iter_count <= 0) {
@@
-1415,5
+1447,7
@@
schedule_program(compiler_context *ctx)
ctx->tls_size = spill_count * 16;
ctx->tls_size = spill_count * 16;
- install_registers(ctx, g);
+ install_registers(ctx, l);
+
+ lcra_free(l);
}
}