/*
* Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2019 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
static unsigned reg_type_to_mask[WORK_STRIDE] = {
0xF, /* xyzw */
0x7, 0x7 << 1, /* xyz */
- 0x3, 0x3 << 1, 0x3 << 2, /* xy */
- 0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3 /* x */
+ 0x3, 0x3 << 1, 0x3 << 2, /* xy */
+ 0x1, 0x1 << 1, 0x1 << 2, 0x1 << 3 /* x */
};
static unsigned reg_type_to_swizzle[WORK_STRIDE] = {
}
static unsigned
-compose_swizzle(unsigned swizzle, unsigned mask, struct phys_reg reg, struct phys_reg dst)
+compose_swizzle(unsigned swizzle, unsigned mask,
+ struct phys_reg reg, struct phys_reg dst)
{
- unsigned out = 0;
-
- for (unsigned c = 0; c < 4; ++c) {
- unsigned s = (swizzle >> (2*c)) & 0x3;
- unsigned q = (reg.swizzle >> (2*s)) & 0x3;
-
- out |= (q << (2*c));
- }
+ unsigned out = pan_compose_swizzle(swizzle, reg.swizzle);
/* Based on the register mask, we need to adjust over. E.g if we're
* writing to yz, a base swizzle of xy__ becomes _xy_. Save the
if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
return hash;
- unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
+ unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(
+ ctx->hash_to_temp, hash + 1);
if (temp)
return temp - 1;
temp = ctx->temp_count++;
ctx->max_hash = MAX2(ctx->max_hash, hash);
- _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
+ _mesa_hash_table_u64_insert(ctx->hash_to_temp,
+ hash + 1, (void *) ((uintptr_t) temp + 1));
return temp;
}
static unsigned int
midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
{
- /* Choose the first available register to minimise reported register pressure */
+ /* Choose the first available register to minimise register pressure */
for (int i = 0; i < (16 * WORK_STRIDE); ++i) {
if (BITSET_TEST(regs, i)) {
};
/* Add the full set of work registers */
- for (int i = 0; i < work_count; ++i) {
+ for (unsigned i = 0; i < work_count; ++i) {
int base = WORK_STRIDE * i;
/* Build a full set of subdivisions */
ra_class_add_reg(regs, work_vec1, base + 8);
ra_class_add_reg(regs, work_vec1, base + 9);
- for (unsigned i = 0; i < 10; ++i) {
- for (unsigned j = 0; j < 10; ++j) {
- unsigned mask1 = reg_type_to_mask[i];
- unsigned mask2 = reg_type_to_mask[j];
+ for (unsigned a = 0; a < 10; ++a) {
+ unsigned mask1 = reg_type_to_mask[a];
+
+ for (unsigned b = 0; b < 10; ++b) {
+ unsigned mask2 = reg_type_to_mask[b];
if (mask1 & mask2)
- ra_add_reg_conflict(regs, base + i, base + j);
+ ra_add_reg_conflict(regs,
+ base + a, base + b);
}
}
}
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
- /* Default to vec4 if we're not sure */
-
- int mask = 0xF;
-
- if (ins->type == TAG_ALU_4)
- mask = squeeze_writemask(ins->alu.mask);
- else if (ins->type == TAG_LOAD_STORE_4)
- mask = ins->load_store.mask;
-
- int class = util_logbase2(mask) + 1;
+ int class = util_logbase2(ins->mask) + 1;
/* Use the largest class if there's ambiguity, this
* handles partial writes */
if (ins->ssa_args.dest < 0) continue;
if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
- /* If this destination is not yet live, it is now since we just wrote it */
+ /* If this destination is not yet live, it is
+ * now since we just wrote it */
int dest = ins->ssa_args.dest;
* invocations, and if there are none, the source dies
* */
- int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
+ int sources[2] = {
+ ins->ssa_args.src0, ins->ssa_args.src1
+ };
for (int src = 0; src < 2; ++src) {
int s = sources[src];
for (int i = 0; i < nodes; ++i) {
for (int j = i + 1; j < nodes; ++j) {
- if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
+ bool j_overlaps_i = live_start[j] < live_end[i];
+ bool i_overlaps_j = live_end[j] < live_start[i];
+
+ if (i_overlaps_j || j_overlaps_i)
ra_add_node_interference(g, i, j);
}
}
static void
install_registers_instr(
- compiler_context *ctx,
- struct ra_graph *g,
- midgard_instruction *ins)
+ compiler_context *ctx,
+ struct ra_graph *g,
+ midgard_instruction *ins)
{
ssa_args args = ins->ssa_args;
struct phys_reg src2 = index_to_reg(ctx, g, adjusted_src);
struct phys_reg dest = index_to_reg(ctx, g, args.dest);
- unsigned mask = squeeze_writemask(ins->alu.mask);
- ins->alu.mask = expand_writemask(compose_writemask(mask, dest));
+ unsigned uncomposed_mask = ins->mask;
+ ins->mask = compose_writemask(uncomposed_mask, dest);
/* Adjust the dest mask if necessary. Mostly this is a no-op
* but it matters for dot products */
- dest.mask = effective_writemask(&ins->alu);
+ dest.mask = effective_writemask(&ins->alu, ins->mask);
midgard_vector_alu_src mod1 =
vector_alu_from_unsigned(ins->alu.src1);
- mod1.swizzle = compose_swizzle(mod1.swizzle, mask, src1, dest);
+ mod1.swizzle = compose_swizzle(mod1.swizzle, uncomposed_mask, src1, dest);
ins->alu.src1 = vector_alu_srco_unsigned(mod1);
ins->registers.src1_reg = src1.reg;
ins->registers.src2_imm = args.inline_constant;
if (args.inline_constant) {
- /* Encode inline 16-bit constant as a vector by default */
+ /* Encode inline 16-bit constant. See disassembler for
+ * where the algorithm is from */
ins->registers.src2_reg = ins->inline_constant >> 11;
int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+ uint16_t imm = ((lower_11 >> 8) & 0x7) |
+ ((lower_11 & 0xFF) << 3);
- uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
ins->alu.src2 = imm << 2;
} else {
midgard_vector_alu_src mod2 =
vector_alu_from_unsigned(ins->alu.src2);
- mod2.swizzle = compose_swizzle(mod2.swizzle, mask, src2, dest);
+ mod2.swizzle = compose_swizzle(
+ mod2.swizzle, uncomposed_mask, src2, dest);
ins->alu.src2 = vector_alu_srco_unsigned(mod2);
ins->registers.src2_reg = src2.reg;
}
case TAG_LOAD_STORE_4: {
- if (OP_IS_STORE(ins->load_store.op)) {
+ if (OP_IS_STORE_VARY(ins->load_store.op)) {
/* TODO: use ssa_args for st_vary */
ins->load_store.reg = 0;
} else {
- struct phys_reg src = index_to_reg(ctx, g, args.dest);
+ /* Which physical register we read off depends on
+ * whether we are loading or storing -- think about the
+ * logical dataflow */
+
+ unsigned r = OP_IS_STORE(ins->load_store.op) ?
+ args.src0 : args.dest;
+ struct phys_reg src = index_to_reg(ctx, g, r);
ins->load_store.reg = src.reg;
ins->load_store.swizzle = compose_swizzle(
- ins->load_store.swizzle, 0xF,
- default_phys_reg(0), src);
+ ins->load_store.swizzle, 0xF,
+ default_phys_reg(0), src);
- ins->load_store.mask = compose_writemask(
- ins->load_store.mask, src);
+ ins->mask = compose_writemask(
+ ins->mask, src);
}
break;