From 51f22689a419a8a13ca105e8ffc905b5fadea0db Mon Sep 17 00:00:00 2001 From: Christoph Bumiller Date: Fri, 10 Dec 2010 20:13:40 +0100 Subject: [PATCH] nvc0: fix branching ops - bra is PC relative - jump to else condition was inverted - handle integer comparisons --- src/gallium/drivers/nvc0/nvc0_pc.c | 3 ++- src/gallium/drivers/nvc0/nvc0_pc.h | 6 ++--- src/gallium/drivers/nvc0/nvc0_pc_emit.c | 28 +++++++++++++++------- src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c | 2 +- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/nvc0/nvc0_pc.c b/src/gallium/drivers/nvc0/nvc0_pc.c index e38f6ced24b..cf7b8e347fb 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.c +++ b/src/gallium/drivers/nvc0/nvc0_pc.c @@ -328,6 +328,7 @@ nvc0_emit_program(struct nv_pc *pc) NOUVEAU_DBG("emitting program: size = %u\n", pc->emit_size); + pc->emit_pos = 0; for (n = 0; n < pc->num_blocks; ++n) { struct nv_instruction *i; struct nv_basic_block *b = pc->bb_list[n]; @@ -335,7 +336,7 @@ nvc0_emit_program(struct nv_pc *pc) for (i = b->entry; i; i = i->next) { nvc0_emit_instruction(pc, i); pc->emit += 2; - pc->emit_pos += 2; + pc->emit_pos += 8; } } assert(pc->emit == &code[pc->emit_size / 4]); diff --git a/src/gallium/drivers/nvc0/nvc0_pc.h b/src/gallium/drivers/nvc0/nvc0_pc.h index b48b0b1fbad..df0314965a3 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc.h +++ b/src/gallium/drivers/nvc0/nvc0_pc.h @@ -425,7 +425,7 @@ struct nv_basic_block { uint priv; /* reset to 0 after you're done */ uint pass_seq; - uint32_t emit_pos; /* position, size in emitted code */ + uint32_t emit_pos; /* position, size in emitted code (in bytes) */ uint32_t emit_size; uint32_t live_set[NV_PC_MAX_VALUES / 32]; @@ -457,8 +457,8 @@ struct nv_pc { unsigned immd_count; uint32_t *emit; - unsigned emit_size; - unsigned emit_pos; + uint32_t emit_size; + uint32_t emit_pos; void *reloc_entries; unsigned num_relocs; diff --git a/src/gallium/drivers/nvc0/nvc0_pc_emit.c b/src/gallium/drivers/nvc0/nvc0_pc_emit.c index 6735f93fd3c..cd1ad03b00b 100644 --- a/src/gallium/drivers/nvc0/nvc0_pc_emit.c +++ b/src/gallium/drivers/nvc0/nvc0_pc_emit.c @@ -73,7 +73,7 @@ create_fixup(struct nv_pc *pc, uint8_t ty, f = (struct nvc0_fixup *)pc->reloc_entries; - f[n].ofst = (pc->emit_pos + w) * 4; + f[n].ofst = pc->emit_pos + w * 4; f[n].type = ty; f[n].data = data; f[n].mask = m; @@ -217,19 +217,26 @@ const_space_index(struct nv_instruction *i, int s) static void emit_flow(struct nv_pc *pc, struct nv_instruction *i, uint8_t op) { - pc->emit[0] = 0x000001e7; + pc->emit[0] = 0x00000007; pc->emit[1] = op << 24; - set_pred(pc, i); + if (op == 0x40 || (op >= 0x80 && op <= 0x98)) { + /* bra, exit, ret or kil */ + pc->emit[0] |= 0x1e0; + set_pred(pc, i); + } if (i->target) { - uint32_t pos = i->target->emit_pos; + int32_t pcrel = i->target->emit_pos - (pc->emit_pos + 8); + /* we will need relocations only for global functions */ + /* create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 0, pos, 26, 0xfc000000); create_fixup(pc, NVC0_FIXUP_CODE_RELOC, 1, pos, -6, 0x0001ffff); + */ - pc->emit[0] |= (pos & 0x3f) << 26; - pc->emit[1] |= (pos >> 6) & 0x1ffff; + pc->emit[0] |= (pcrel & 0x3f) << 26; + pc->emit[1] |= (pcrel >> 6) & 0x1ffff; } } @@ -893,6 +900,11 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) emit_mul_f32(pc, i); break; case NV_OP_SET_F32: + case NV_OP_SET_F32_AND: + case NV_OP_SET_F32_OR: + case NV_OP_SET_F32_XOR: + case NV_OP_SET_S32: + case NV_OP_SET_U32: case NV_OP_FSET_F32: emit_set(pc, i); break; @@ -926,8 +938,8 @@ nvc0_emit_instruction(struct nv_pc *pc, struct nv_instruction *i) break; case NV_OP_JOIN: case NV_OP_NOP: - pc->emit[0] = 0x00003c00; - pc->emit[1] = 0x00000000; + pc->emit[0] = 0x00003de4; + pc->emit[1] = 0x40000000; break; case NV_OP_SELP: emit_selp(pc, i); diff --git a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c index a6797db9c54..26f9e735fb2 100644 --- a/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c +++ b/src/gallium/drivers/nvc0/nvc0_tgsi_to_nc.c @@ -1494,7 +1494,7 @@ bld_instruction(struct bld_context *bld, bld->join_bb[bld->cond_lvl] = bld->pc->current_block; bld->cond_bb[bld->cond_lvl] = bld->pc->current_block; - src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_NE, + src1 = bld_setp(bld, NV_OP_SET_U32, NV_CC_EQ, emit_fetch(bld, insn, 0, 0), bld->zero); bld_flow(bld, NV_OP_BRA, src1, NULL, (bld->cond_lvl == 0)); -- 2.30.2