v2: fix regression on r600 NOP instructions.
Signed-off-by: Vadim Girlin <vadimgirlin@gmail.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
- bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
} else if (cfop->flags & CF_STRM) {
/* MEM_STREAM instructions */
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size);
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
- bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
id++;
} else {
/* branch, loop, call, return instructions */
return 0;
}
+#if 0
void eg_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
{
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+#endif
if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
- bc->cf_last->output.end_of_program |= output->end_of_program;
bc->cf_last->op = bc->cf_last->output.op = output->op;
bc->cf_last->output.gpr = output->gpr;
bc->cf_last->output.array_base = output->array_base;
} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
- bc->cf_last->output.end_of_program |= output->end_of_program;
bc->cf_last->op = bc->cf_last->output.op = output->op;
bc->cf_last->output.burst_count += output->burst_count;
return 0;
return r;
bc->cf_last->op = output->op;
memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
+ bc->cf_last->barrier = 1;
return 0;
}
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program);
+ S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
} else if (cfop->flags & CF_STRM) {
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type);
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->output.barrier) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
- S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->output.end_of_program) |
+ S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
} else {
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
S_SQ_CF_WORD1_BARRIER(1) |
S_SQ_CF_WORD1_COND(cf->cond) |
- S_SQ_CF_WORD1_POP_COUNT(cf->pop_count);
+ S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
+ S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
}
return 0;
}
print_indent(o, 67);
fprintf(stderr, " ES:%X ", cf->output.elem_size);
- if (!cf->output.barrier)
+ if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
- if (cf->output.end_of_program)
+ if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
} else if (r600_isa_cf(cf->op)->flags & CF_STRM) {
fprintf(stderr, " ES:%i ", cf->output.elem_size);
if (cf->output.array_size != 0xFFF)
fprintf(stderr, "AS:%i ", cf->output.array_size);
- if (!cf->output.barrier)
+ if (!cf->barrier)
fprintf(stderr, "NO_BARRIER ");
- if (cf->output.end_of_program)
+ if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
} else {
}
}
+#if 0
void r600_bytecode_export_read(struct r600_bytecode *bc,
struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
{
output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
}
+#endif
unsigned array_size;
unsigned comp_mask;
unsigned type;
- unsigned end_of_program;
unsigned op;
unsigned swizzle_z;
unsigned swizzle_w;
unsigned burst_count;
- unsigned barrier;
};
struct r600_bytecode_kcache {
struct r600_bytecode_kcache kcache[4];
unsigned r6xx_uses_waterfall;
unsigned eg_alu_extended;
+ unsigned barrier;
+ unsigned end_of_program;
struct list_head alu;
struct list_head tex;
struct list_head vtx;
output.array_base = so->output[i].dst_offset - so->output[i].start_component;
output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
output.burst_count = 1;
- output.barrier = 1;
/* array_size is an upper limit for the burst_count
* with MEM_STREAM instructions */
output.array_size = 0xFFF;
output[j].swizzle_z = 2;
output[j].swizzle_w = 3;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = -1;
output[j].op = CF_OP_EXPORT;
switch (ctx.type) {
output[j].swizzle_z = 2;
output[j].swizzle_w = key.alpha_to_one ? 5 : 3;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].array_base = k;
output[j].op = CF_OP_EXPORT;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS;
output[j].array_base = next_pos_base;
output[j].op = CF_OP_EXPORT;
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
output[j].swizzle_z = 7;
output[j].swizzle_w = 7;
output[j].burst_count = 1;
- output[j].barrier = 1;
output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL;
output[j].array_base = 0;
output[j].op = CF_OP_EXPORT;
/* set export done on last export of each type */
for (i = noutput - 1, output_done = 0; i >= 0; i--) {
- if (ctx.bc->chip_class < CAYMAN) {
- if (i == (noutput - 1)) {
- output[i].end_of_program = 1;
- }
- }
if (!(output_done & (1 << output[i].type))) {
output_done |= (1 << output[i].type);
output[i].op = CF_OP_EXPORT_DONE;
goto out_err;
}
}
+
/* add program end */
- if (!use_llvm && ctx.bc->chip_class == CAYMAN)
- cm_bytecode_add_cf_end(ctx.bc);
+ if (!use_llvm) {
+ if (ctx.bc->chip_class == CAYMAN)
+ cm_bytecode_add_cf_end(ctx.bc);
+ else {
+ const struct cf_op_info *last = r600_isa_cf(ctx.bc->cf_last->op);
+
+ if (last->flags & CF_CLAUSE)
+ r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
+
+ ctx.bc->cf_last->end_of_program = 1;
+ }
+ }
/* check GPR limit - we have 124 = 128 - 4
* (4 are reserved as alu clause temporary registers) */