From: Christian König Date: Sat, 8 Jan 2011 12:24:36 +0000 (+0100) Subject: Merge remote branch 'origin/master' into pipe-video X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=72e30991559017c16d48569e612dbc0970e3b9ca;p=mesa.git Merge remote branch 'origin/master' into pipe-video Conflicts: configure.ac src/gallium/drivers/r600/eg_asm.c src/gallium/drivers/r600/r600_asm.c src/gallium/drivers/r600/r600_asm.h src/gallium/include/pipe/p_format.h src/gallium/targets/dri-nouveau/Makefile --- 72e30991559017c16d48569e612dbc0970e3b9ca diff --cc configure.ac index df51ce205b1,bcf7cd38a61..a1d754c5b4b --- a/configure.ac +++ b/configure.ac @@@ -1700,27 -1683,8 +1693,27 @@@ AC_ARG_ENABLE([gallium-nouveau] [enable_gallium_nouveau="$enableval"], [enable_gallium_nouveau=no]) if test "x$enable_gallium_nouveau" = xyes; then - GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50" + GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nvfx nv50 nvc0" - gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" + gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "xvmc-nouveau" +fi + +dnl +dnl Gallium G3DVL configuration +dnl +AC_ARG_ENABLE([gallium-g3dvl], + [AS_HELP_STRING([--enable-gallium-g3dvl], + [build gallium g3dvl @<:@default=disabled@:>@])], + [enable_gallium_g3dvl="$enableval"], + [enable_gallium_g3dvl=no]) +if test "x$enable_gallium_g3dvl" = xyes; then + case "$mesa_driver" in + xlib) + GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS xvmc-softpipe" + ;; + dri) + GALLIUM_WINSYS_DIRS="$GALLIUM_WINSYS_DIRS g3dvl/dri" + ;; + esac fi dnl diff --cc src/gallium/drivers/r600/eg_asm.c index 1bb4c6b2afb,c44506c7eba..1881e633d54 --- a/src/gallium/drivers/r600/eg_asm.c +++ b/src/gallium/drivers/r600/eg_asm.c @@@ -37,17 -36,16 +37,17 @@@ int eg_bc_cf_build(struct r600_bc *bc, switch (cf->inst) { case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3): case (EG_V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3): + assert(!end_of_program); bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); + S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | + S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | - S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | - S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | + S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | - S_SQ_CF_ALU_WORD1_BARRIER(1) | - S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); ++ S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | ++ S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); break; case EG_V_SQ_CF_WORD1_SQ_CF_INST_TEX: case EG_V_SQ_CF_WORD1_SQ_CF_INST_VTX: diff --cc src/gallium/drivers/r600/r600_asm.c index bee1c941e5d,326724520b3..b15758adc33 --- a/src/gallium/drivers/r600/r600_asm.c +++ b/src/gallium/drivers/r600/r600_asm.c @@@ -441,111 -239,110 +444,122 @@@ static int reserve_gpr(struct alu_bank_ return 0; } -static int cycle_for_scalar_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) -{ - int table[3]; - int ret = 0; - switch (swiz) { - case SQ_ALU_SCL_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_122: - table[0] = 1; table[1] = 2; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_212: - table[0] = 2; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_SCL_221: - table[0] = 2; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - break; - default: - R600_ERR("bad scalar bank swizzle value\n"); - ret = -1; - break; +static int reserve_cfile(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan) +{ + int res, resmatch = -1, resempty = -1; + for (res = 3; res >= 0; --res) { + if (bs->hw_cfile_addr[res] == -1) + resempty = res; + else if (bs->hw_cfile_addr[res] == sel && + bs->hw_cfile_elem[res] == chan) + resmatch = res; } - return ret; -} - -static int cycle_for_vector_bank_swizzle(const int swiz, const int sel, unsigned *p_cycle) -{ - int table[3]; - int ret; - - switch (swiz) { - case SQ_ALU_VEC_012: - table[0] = 0; table[1] = 1; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_021: - table[0] = 0; table[1] = 2; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_120: - table[0] = 1; table[1] = 2; table[2] = 0; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_102: - table[0] = 1; table[1] = 0; table[2] = 2; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_201: - table[0] = 2; table[1] = 0; table[2] = 1; - *p_cycle = table[sel]; - break; - case SQ_ALU_VEC_210: - table[0] = 2; table[1] = 1; table[2] = 0; - *p_cycle = table[sel]; - break; - default: - R600_ERR("bad vector bank swizzle value\n"); - ret = -1; - break; + if (resmatch != -1) + return 0; // Read for this scalar element already reserved, nothing to do here. + else if (resempty != -1) { + bs->hw_cfile_addr[resempty] = sel; + bs->hw_cfile_elem[resempty] = chan; + } else { + // All cfile read ports are used, cannot reference vector element + return -1; } - return ret; + return 0; } +static int is_gpr(unsigned sel) +{ + return (sel >= 0 && sel <= 127); +} +static int is_cfile(unsigned sel) +{ + return (sel > 255 && sel < 512); +} -static void update_chan_counter(struct r600_bc_alu *alu, int *chan_counter) ++/* CB constants start at 512, and get translated to a kcache index when ALU ++ * clauses are constructed. Note that we handle kcache constants the same way ++ * as (the now gone) cfile constants, is that really required? */ ++static int is_cb_const(int sel) + { - int num_src; - int i; - int channel_swizzle; ++ if (sel > 511 && sel < 4607) ++ return 1; ++ return 0; ++} + - num_src = r600_bc_get_num_operands(alu); +static int is_const(int sel) +{ + return is_cfile(sel) || ++ is_cb_const(sel) || + (sel >= V_SQ_ALU_SRC_0 && + sel <= V_SQ_ALU_SRC_LITERAL); +} + +static int check_vector(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) +{ + int r, src, num_src, sel, elem, cycle; - for (i = 0; i < num_src; i++) { - channel_swizzle = alu->src[i].chan; - if ((alu->src[i].sel > 0 && alu->src[i].sel < 128) && channel_swizzle <= 3) - chan_counter[channel_swizzle]++; + num_src = r600_bc_get_num_operands(alu); + for (src = 0; src < num_src; src++) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src]; + if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan) + // Nothing to do; special-case optimization, + // second source uses first source’s reservation + continue; + else { + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + } else if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } + // No restrictions on PV, PS, literal or special constants } + return 0; } -/* we need something like this I think - but this is bogus */ -int check_read_slots(struct r600_bc *bc, struct r600_bc_alu *alu_first) +static int check_scalar(struct r600_bc_alu *alu, struct alu_bank_swizzle *bs, int bank_swizzle) { - struct r600_bc_alu *alu; - int chan_counter[4] = { 0 }; - - update_chan_counter(alu_first, chan_counter); + int r, src, num_src, const_count, sel, elem, cycle; - LIST_FOR_EACH_ENTRY(alu, &alu_first->bs_list, bs_list) { - update_chan_counter(alu, chan_counter); + num_src = r600_bc_get_num_operands(alu); + for (const_count = 0, src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_const(sel)) { // Any constant, including literal and inline constants + if (const_count >= 2) + // More than two references to a constant in + // transcendental operation. + return -1; + else + const_count++; + } + if (is_cfile(sel)) { + r = reserve_cfile(bs, sel, elem); + if (r) + return r; + } } - - if (chan_counter[0] > 3 || - chan_counter[1] > 3 || - chan_counter[2] > 3 || - chan_counter[3] > 3) { - R600_ERR("needed to split instruction for input ran out of banks %x %d %d %d %d\n", - alu_first->inst, chan_counter[0], chan_counter[1], chan_counter[2], chan_counter[3]); - return -1; + for (src = 0; src < num_src; ++src) { + sel = alu->src[src].sel; + elem = alu->src[src].chan; + if (is_gpr(sel)) { + cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src]; + if (cycle < const_count) + // Cycle for GPR load conflicts with + // constant load in transcendental operation. + return -1; + r = reserve_gpr(bs, sel, elem, cycle); + if (r) + return r; + } + // Constants already processed + // No restrictions on PV, PS } return 0; } @@@ -868,55 -547,58 +996,61 @@@ int r600_bc_add_alu_type(struct r600_b free(nalu); return r; } - bc->cf_last->inst = (type << 3); } + bc->cf_last->inst = (type << 3); + + /* Setup the kcache for this ALU instruction. This will start a new + * ALU clause if needed. */ + if ((r = r600_bc_alloc_kcache_lines(bc, nalu, type))) { + free(nalu); + return r; + } + if (!bc->cf_last->curr_bs_head) { bc->cf_last->curr_bs_head = nalu; - LIST_INITHEAD(&nalu->bs_list); - } else { - LIST_ADDTAIL(&nalu->bs_list, &bc->cf_last->curr_bs_head->bs_list); } - /* at most 128 slots, one add alu can add 4 slots + 4 constants(2 slots) + /* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots) * worst case */ - if (alu->last && (bc->cf_last->ndw >> 1) >= 120) { + if (nalu->last && (bc->cf_last->ndw >> 1) >= 120) { bc->force_add_cf = 1; } - /* number of gpr == the last gpr used in any alu */ + /* replace special constants */ for (i = 0; i < 3; i++) { - if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) { - bc->ngpr = nalu->src[i].sel + 1; - } - /* compute how many literal are needed - * either 2 or 4 literals - */ - if (nalu->src[i].sel == 253) { - if (((nalu->src[i].chan + 2) & 0x6) > nalu->nliteral) { - nalu->nliteral = (nalu->src[i].chan + 2) & 0x6; - } - } - } - if (!LIST_IS_EMPTY(&bc->cf_last->alu)) { - lalu = LIST_ENTRY(struct r600_bc_alu, bc->cf_last->alu.prev, list); - if (!lalu->last && lalu->nliteral > nalu->nliteral) { - nalu->nliteral = lalu->nliteral; - } - } - if (nalu->dst.sel >= bc->ngpr) { - bc->ngpr = nalu->dst.sel + 1; + if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL) + r600_bc_special_constants( + nalu->src[i].value[nalu->src[i].chan], + &nalu->src[i].sel, &nalu->src[i].neg); } LIST_ADDTAIL(&nalu->list, &bc->cf_last->alu); /* each alu use 2 dwords */ bc->cf_last->ndw += 2; bc->ndw += 2; - bc->cf_last->kcache0_mode = 2; - /* process cur ALU instructions for bank swizzle */ - if (alu->last) { + if (nalu->last) { - check_and_set_bank_swizzle(bc, bc->cf_last->curr_bs_head); + struct r600_bc_alu *slots[5]; + r = assign_alu_units(bc->cf_last->curr_bs_head, slots); + if (r) + return r; + + if (bc->cf_last->prev_bs_head) { + r = merge_inst_groups(bc, slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + if (bc->cf_last->prev_bs_head) { + r = replace_gpr_with_pv_ps(slots, bc->cf_last->prev_bs_head); + if (r) + return r; + } + + r = check_and_set_bank_swizzle(slots); + if (r) + return r; + + bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head; + bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head; bc->cf_last->curr_bs_head = NULL; } return 0; @@@ -1170,841 -908,20 +1304,841 @@@ static enum cf_class get_cf_class(struc R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); return -EINVAL; } - return 0; } -int r600_bc_build(struct r600_bc *bc) +/* common for r600/r700 - eg in eg_asm.c */ +static int r600_bc_cf_build(struct r600_bc *bc, struct r600_bc_cf *cf) { - struct r600_bc_cf *cf; - struct r600_bc_alu *alu; - struct r600_bc_vtx *vtx; - struct r600_bc_tex *tex; - unsigned addr; - int r; + unsigned id = cf->id; + unsigned end_of_program = bc->cf.prev == &cf->list; - if (bc->callstack[0].max > 0) - bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; + switch (get_cf_class(cf)) { + case CF_CLASS_ALU: + assert(!end_of_program); + bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) | - S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache0_mode) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache0_bank) | - S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache1_bank); ++ S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) | ++ S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) | ++ S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank); + + bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(cf->inst >> 3) | - S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache1_mode) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache0_addr) | - S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache1_addr) | ++ S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) | ++ S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) | ++ S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) | + S_SQ_CF_ALU_WORD1_BARRIER(cf->barrier) | + S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->chiprev == CHIPREV_R600 ? cf->r6xx_uses_waterfall : 0) | + S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1); + break; + case CF_CLASS_TEXTURE: + case CF_CLASS_VERTEX: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(cf->barrier) | + S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1) | + S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program); + break; + case CF_CLASS_EXPORT: + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) | + S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) | + S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type); + bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) | + S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) | + S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) | + S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(cf->inst) | + S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(end_of_program); + break; + case CF_CLASS_OTHER: + bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1); + bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(cf->inst) | + S_SQ_CF_WORD1_BARRIER(cf->barrier) | + S_SQ_CF_WORD1_COND(cf->cond) | + S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) | + S_SQ_CF_WORD1_END_OF_PROGRAM(end_of_program); + + break; + default: + R600_ERR("unsupported CF instruction (0x%X)\n", cf->inst); + return -EINVAL; + } + return 0; +} + +struct gpr_usage_range { + int replacement; + int32_t start; + int32_t end; +}; + +struct gpr_usage { + unsigned channels:4; + int32_t first_write; + int32_t last_write[4]; + unsigned nranges; + struct gpr_usage_range *ranges; +}; + +static struct gpr_usage_range* add_gpr_usage_range(struct gpr_usage *usage) +{ + usage->nranges++; + usage->ranges = realloc(usage->ranges, usage->nranges * sizeof(struct gpr_usage_range)); + if (!usage->ranges) + return NULL; + return &usage->ranges[usage->nranges-1]; +} + +static void notice_gpr_read(struct gpr_usage *usage, int32_t id, unsigned chan) +{ + usage->channels |= 1 << chan; + usage->first_write = -1; + if (!usage->nranges) { + struct gpr_usage_range* range = add_gpr_usage_range(usage); + range->replacement = -1; + range->start = -1; + range->end = -1; + } + if (usage->ranges[usage->nranges-1].end < id) + usage->ranges[usage->nranges-1].end = id; +} + +static void notice_gpr_rel_read(struct gpr_usage usage[128], int32_t id, unsigned chan) +{ + unsigned i; + for (i = 0; i < 128; ++i) + notice_gpr_read(&usage[i], id, chan); +} + +static void notice_gpr_last_write(struct gpr_usage *usage, int32_t id, unsigned chan) +{ + usage->last_write[chan] = id; +} + +static void notice_gpr_write(struct gpr_usage *usage, int32_t id, unsigned chan, + int predicate, int prefered_replacement) +{ + int32_t start = usage->first_write != -1 ? usage->first_write : id; + usage->channels &= ~(1 << chan); + if (usage->channels) { + if (usage->first_write == -1) + usage->first_write = id; + } else if (!usage->nranges || (usage->ranges[usage->nranges-1].start != start && !predicate)) { + usage->first_write = start; + struct gpr_usage_range* range = add_gpr_usage_range(usage); + range->replacement = prefered_replacement; + range->start = start; + range->end = -1; + } else if (usage->ranges[usage->nranges-1].start == start && prefered_replacement != -1) { + usage->ranges[usage->nranges-1].replacement = prefered_replacement; + } + notice_gpr_last_write(usage, id, chan); +} + +static void notice_gpr_rel_last_write(struct gpr_usage usage[128], int32_t id, unsigned chan) +{ + unsigned i; + for (i = 0; i < 128; ++i) + notice_gpr_last_write(&usage[i], id, chan); +} + +static void notice_gpr_rel_write(struct gpr_usage usage[128], int32_t id, unsigned chan) +{ + unsigned i; + for (i = 0; i < 128; ++i) + notice_gpr_write(&usage[i], id, chan, 1, -1); +} + +static void notice_alu_src_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], int32_t id) +{ + unsigned src, num_src; + + num_src = r600_bc_get_num_operands(alu); + for (src = 0; src < num_src; ++src) { + // constants doesn't matter + if (!is_gpr(alu->src[src].sel)) + continue; + + if (alu->src[src].rel) + notice_gpr_rel_read(usage, id, alu->src[src].chan); + else + notice_gpr_read(&usage[alu->src[src].sel], id, alu->src[src].chan); + } +} + +static void notice_alu_dst_gprs(struct r600_bc_alu *alu_first, struct gpr_usage usage[128], + int32_t id, int predicate) +{ + struct r600_bc_alu *alu; + for (alu = alu_first; alu; alu = LIST_ENTRY(struct r600_bc_alu, alu->list.next, list)) { + if (alu->dst.write) { + if (alu->dst.rel) + notice_gpr_rel_write(usage, id, alu->dst.chan); + else if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV && is_gpr(alu->src[0].sel)) + notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, + predicate, alu->src[0].sel); + else + notice_gpr_write(&usage[alu->dst.sel], id, alu->dst.chan, predicate, -1); + } + + if (alu->last) + break; + } +} + +static void notice_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], + int32_t id, int predicate) +{ + if (tex->src_rel) { + if (tex->src_sel_x < 4) + notice_gpr_rel_read(usage, id, tex->src_sel_x); + if (tex->src_sel_y < 4) + notice_gpr_rel_read(usage, id, tex->src_sel_y); + if (tex->src_sel_z < 4) + notice_gpr_rel_read(usage, id, tex->src_sel_z); + if (tex->src_sel_w < 4) + notice_gpr_rel_read(usage, id, tex->src_sel_w); + } else { + if (tex->src_sel_x < 4) + notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_x); + if (tex->src_sel_y < 4) + notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_y); + if (tex->src_sel_z < 4) + notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_z); + if (tex->src_sel_w < 4) + notice_gpr_read(&usage[tex->src_gpr], id, tex->src_sel_w); + } + if (tex->dst_rel) { + if (tex->dst_sel_x != 7) + notice_gpr_rel_write(usage, id, 0); + if (tex->dst_sel_y != 7) + notice_gpr_rel_write(usage, id, 1); + if (tex->dst_sel_z != 7) + notice_gpr_rel_write(usage, id, 2); + if (tex->dst_sel_w != 7) + notice_gpr_rel_write(usage, id, 3); + } else { + if (tex->dst_sel_x != 7) + notice_gpr_write(&usage[tex->dst_gpr], id, 0, predicate, -1); + if (tex->dst_sel_y != 7) + notice_gpr_write(&usage[tex->dst_gpr], id, 1, predicate, -1); + if (tex->dst_sel_z != 7) + notice_gpr_write(&usage[tex->dst_gpr], id, 2, predicate, -1); + if (tex->dst_sel_w != 7) + notice_gpr_write(&usage[tex->dst_gpr], id, 3, predicate, -1); + } +} + +static void notice_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], + int32_t id, int predicate) +{ + notice_gpr_read(&usage[vtx->src_gpr], id, vtx->src_sel_x); + + if (vtx->dst_sel_x != 7) + notice_gpr_write(&usage[vtx->dst_gpr], id, 0, predicate, -1); + if (vtx->dst_sel_y != 7) + notice_gpr_write(&usage[vtx->dst_gpr], id, 1, predicate, -1); + if (vtx->dst_sel_z != 7) + notice_gpr_write(&usage[vtx->dst_gpr], id, 2, predicate, -1); + if (vtx->dst_sel_w != 7) + notice_gpr_write(&usage[vtx->dst_gpr], id, 3, predicate, -1); +} + +static void notice_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], + struct r600_bc_cf *export_cf[128], int32_t export_remap[128]) +{ + //TODO handle other memory operations + struct gpr_usage *output = &usage[cf->output.gpr]; + int32_t id = (output->last_write[0] + 0x100) & ~0xFF; + + export_cf[cf->output.gpr] = cf; + export_remap[cf->output.gpr] = id; + if (cf->output.swizzle_x < 4) + notice_gpr_read(output, id, cf->output.swizzle_x); + if (cf->output.swizzle_y < 4) + notice_gpr_read(output, id, cf->output.swizzle_y); + if (cf->output.swizzle_z < 4) + notice_gpr_read(output, id, cf->output.swizzle_z); + if (cf->output.swizzle_w < 4) + notice_gpr_read(output, id, cf->output.swizzle_w); +} + +static struct gpr_usage_range *find_src_range(struct gpr_usage *usage, int32_t id) +{ + unsigned i; + for (i = 0; i < usage->nranges; ++i) { + struct gpr_usage_range* range = &usage->ranges[i]; + + if (range->start < id && id <= range->end) + return range; + } + return NULL; +} + +static struct gpr_usage_range *find_dst_range(struct gpr_usage *usage, int32_t id) +{ + unsigned i; + for (i = 0; i < usage->nranges; ++i) { + struct gpr_usage_range* range = &usage->ranges[i]; + int32_t end = range->end; + + if (range->start <= id && (id < end || end == -1)) + return range; + } + assert(0); /* should not happen */ + return NULL; +} + +static int is_barrier_needed(struct gpr_usage *usage, int32_t id, unsigned chan, int32_t last_barrier) +{ + if (usage->last_write[chan] != (id & ~0xFF)) + return usage->last_write[chan] >= last_barrier; + else + return 0; +} + +static int is_intersection(struct gpr_usage_range* a, struct gpr_usage_range* b) +{ + return a->start <= b->end && b->start < a->end; +} + +static int rate_replacement(struct gpr_usage *usage, struct gpr_usage_range* range) +{ + unsigned i; + int32_t best_start = 0x3FFFFFFF, best_end = 0x3FFFFFFF; + + for (i = 0; i < usage->nranges; ++i) { + if (usage->ranges[i].replacement != -1) + continue; /* ignore already remapped ranges */ + + if (is_intersection(&usage->ranges[i], range)) + return -1; /* forget it if usages overlap */ + + if (range->start >= usage->ranges[i].end) + best_start = MIN2(best_start, range->start - usage->ranges[i].end); + + if (range->end != -1 && range->end <= usage->ranges[i].start) + best_end = MIN2(best_end, usage->ranges[i].start - range->end); + } + return best_start + best_end; +} + +static void find_replacement(struct gpr_usage usage[128], unsigned current, + struct gpr_usage_range *range, int is_export) +{ + unsigned i; + int best_gpr = -1, best_rate = 0x7FFFFFFF; + + if (range->replacement != -1 && range->replacement <= current) { + struct gpr_usage_range *other = find_src_range(&usage[range->replacement], range->start); + if (other && other->replacement != -1) + range->replacement = other->replacement; + } + + if (range->replacement != -1 && range->replacement < current) { + int rate = rate_replacement(&usage[range->replacement], range); + + /* check if prefered replacement can be used */ + if (rate != -1) { + best_rate = rate; + best_gpr = range->replacement; + } + } + + if (best_gpr == -1 && (range->start & ~0xFF) == (range->end & ~0xFF)) { + /* register is just used inside one ALU clause */ + /* try to use clause temporaryis for it */ + for (i = 127; i > 123; --i) { + int rate = rate_replacement(&usage[i], range); + + if (rate == -1) /* can't be used because ranges overlap */ + continue; + + if (rate < best_rate) { + best_rate = rate; + best_gpr = i; + + /* can't get better than this */ + if (rate == 0 || is_export) + break; + } + } + } + + if (best_gpr == -1) { + for (i = 0; i < current; ++i) { + int rate = rate_replacement(&usage[i], range); + + if (rate == -1) /* can't be used because ranges overlap */ + continue; + + if (rate < best_rate) { + best_rate = rate; + best_gpr = i; + + /* can't get better than this */ + if (rate == 0) + break; + } + } + } + + range->replacement = best_gpr; + if (best_gpr != -1) { + struct gpr_usage_range *reservation = add_gpr_usage_range(&usage[best_gpr]); + reservation->replacement = -1; + reservation->start = range->start; + reservation->end = range->end; + } +} + +static void find_export_replacement(struct gpr_usage usage[128], + struct gpr_usage_range *range, struct r600_bc_cf *current, + struct r600_bc_cf *next, int32_t next_id) +{ + if (!next || next_id <= range->start || next_id > range->end) + return; + + if (current->output.type != next->output.type) + return; + + if ((current->output.array_base + 1) != next->output.array_base) + return; + + find_src_range(&usage[next->output.gpr], next_id)->replacement = range->replacement + 1; +} + +static void replace_alu_gprs(struct r600_bc_alu *alu, struct gpr_usage usage[128], + int32_t id, int32_t last_barrier, unsigned *barrier) +{ + struct gpr_usage *cur_usage; + struct gpr_usage_range *range; + unsigned src, num_src; + + num_src = r600_bc_get_num_operands(alu); + for (src = 0; src < num_src; ++src) { + // constants doesn't matter + if (!is_gpr(alu->src[src].sel)) + continue; + + cur_usage = &usage[alu->src[src].sel]; + range = find_src_range(cur_usage, id); + if (range->replacement != -1) + alu->src[src].sel = range->replacement; + + *barrier |= is_barrier_needed(cur_usage, id, alu->src[src].chan, last_barrier); + } + + if (alu->dst.write) { + cur_usage = &usage[alu->dst.sel]; + range = find_dst_range(cur_usage, id); + if (range->replacement == alu->dst.sel) { + if (!alu->is_op3) + alu->dst.write = 0; + else + /*TODO: really check that register 123 is useable */ + alu->dst.sel = 123; + } else if (range->replacement != -1) { + alu->dst.sel = range->replacement; + } + if (alu->dst.rel) + notice_gpr_rel_last_write(usage, id, alu->dst.chan); + else + notice_gpr_last_write(cur_usage, id, alu->dst.chan); + } +} + +static void replace_tex_gprs(struct r600_bc_tex *tex, struct gpr_usage usage[128], + int32_t id, int32_t last_barrier, unsigned *barrier) +{ + struct gpr_usage *cur_usage = &usage[tex->src_gpr]; + struct gpr_usage_range *range = find_src_range(cur_usage, id); + + if (tex->src_rel) { + *barrier = 1; + } else { + if (tex->src_sel_x < 4) + *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_x, last_barrier); + if (tex->src_sel_y < 4) + *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_y, last_barrier); + if (tex->src_sel_z < 4) + *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_z, last_barrier); + if (tex->src_sel_w < 4) + *barrier |= is_barrier_needed(cur_usage, id, tex->src_sel_w, last_barrier); + } + + if (range->replacement != -1) + tex->src_gpr = range->replacement; + + cur_usage = &usage[tex->dst_gpr]; + range = find_dst_range(cur_usage, id); + if (range->replacement != -1) + tex->dst_gpr = range->replacement; + + if (tex->dst_rel) { + if (tex->dst_sel_x != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_x); + if (tex->dst_sel_y != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_y); + if (tex->dst_sel_z != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_z); + if (tex->dst_sel_w != 7) + notice_gpr_rel_last_write(usage, id, tex->dst_sel_w); + } else { + if (tex->dst_sel_x != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_x); + if (tex->dst_sel_y != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_y); + if (tex->dst_sel_z != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_z); + if (tex->dst_sel_w != 7) + notice_gpr_last_write(cur_usage, id, tex->dst_sel_w); + } +} + +static void replace_vtx_gprs(struct r600_bc_vtx *vtx, struct gpr_usage usage[128], + int32_t id, int32_t last_barrier, unsigned *barrier) +{ + struct gpr_usage *cur_usage = &usage[vtx->src_gpr]; + struct gpr_usage_range *range = find_src_range(cur_usage, id); + + *barrier |= is_barrier_needed(cur_usage, id, vtx->src_sel_x, last_barrier); + + if (range->replacement != -1) + vtx->src_gpr = range->replacement; + + cur_usage = &usage[vtx->dst_gpr]; + range = find_dst_range(cur_usage, id); + if (range->replacement != -1) + vtx->dst_gpr = range->replacement; + + if (vtx->dst_sel_x != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_x); + if (vtx->dst_sel_y != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_y); + if (vtx->dst_sel_z != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_z); + if (vtx->dst_sel_w != 7) + notice_gpr_last_write(cur_usage, id, vtx->dst_sel_w); +} + +static void replace_export_gprs(struct r600_bc_cf *cf, struct gpr_usage usage[128], + int32_t id, int32_t last_barrier) +{ + //TODO handle other memory operations + struct gpr_usage *cur_usage = &usage[cf->output.gpr]; + struct gpr_usage_range *range = find_src_range(cur_usage, id); + + cf->barrier = 0; + if (cf->output.swizzle_x < 4) + cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_x, last_barrier); + if (cf->output.swizzle_y < 4) + cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_y, last_barrier); + if (cf->output.swizzle_z < 4) + cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_z, last_barrier); + if (cf->output.swizzle_w < 4) + cf->barrier |= is_barrier_needed(cur_usage, -1, cf->output.swizzle_w, last_barrier); + + if (range->replacement != -1) + cf->output.gpr = range->replacement; +} + +static void optimize_alu_inst(struct r600_bc_cf *cf, struct r600_bc_alu *alu) +{ + struct r600_bc_alu *alu_next; + unsigned chan; + unsigned src, num_src; + + /* check if a MOV could be optimized away */ + if (alu->inst == V_SQ_ALU_WORD1_OP2_SQ_OP2_INST_MOV) { + + /* destination equals source? */ + if (alu->dst.sel != alu->src[0].sel || + alu->dst.chan != alu->src[0].chan) + return; + + /* any special handling for the source? */ + if (alu->src[0].rel || alu->src[0].neg || alu->src[0].abs) + return; + + /* any special handling for destination? */ + if (alu->dst.rel || alu->dst.clamp) + return; + + /* ok find next instruction group and check if ps/pv is used */ + for (alu_next = alu; !alu_next->last; alu_next = NEXT_ALU(alu_next)); + + if (alu_next->list.next != &cf->alu) { + chan = is_alu_reduction_inst(alu) ? 0 : alu->dst.chan; + for (alu_next = NEXT_ALU(alu_next); alu_next; alu_next = NEXT_ALU(alu_next)) { + num_src = r600_bc_get_num_operands(alu_next); + for (src = 0; src < num_src; ++src) { + if (alu_next->src[src].sel == V_SQ_ALU_SRC_PV && + alu_next->src[src].chan == chan) + return; + + if (alu_next->src[src].sel == V_SQ_ALU_SRC_PS) + return; + } + + if (alu_next->last) + break; + } + } + + r600_bc_remove_alu(cf, alu); + } +} + +static void optimize_export_inst(struct r600_bc *bc, struct r600_bc_cf *cf) +{ + struct r600_bc_cf *prev = LIST_ENTRY(struct r600_bc_cf, cf->list.prev, list); + if (&prev->list == &bc->cf || + prev->inst != cf->inst || + prev->output.type != cf->output.type || + prev->output.elem_size != cf->output.elem_size || + prev->output.swizzle_x != cf->output.swizzle_x || + prev->output.swizzle_y != cf->output.swizzle_y || + prev->output.swizzle_z != cf->output.swizzle_z || + prev->output.swizzle_w != cf->output.swizzle_w) + return; + + if ((prev->output.burst_count + cf->output.burst_count) > 16) + return; + + if ((prev->output.gpr + prev->output.burst_count) == cf->output.gpr && + (prev->output.array_base + prev->output.burst_count) == cf->output.array_base) { + + prev->output.burst_count += cf->output.burst_count; + r600_bc_remove_cf(bc, cf); + + } else if (prev->output.gpr == (cf->output.gpr + cf->output.burst_count) && + prev->output.array_base == (cf->output.array_base + cf->output.burst_count)) { + + cf->output.burst_count += prev->output.burst_count; + r600_bc_remove_cf(bc, prev); + } +} + +static void r600_bc_optimize(struct r600_bc *bc) +{ + struct r600_bc_cf *cf, *next_cf; + struct r600_bc_alu *first, *next_alu; + struct r600_bc_alu *alu; + struct r600_bc_vtx *vtx; + struct r600_bc_tex *tex; + struct gpr_usage usage[128]; + + /* assume that each gpr is exported only once */ + struct r600_bc_cf *export_cf[128] = { NULL }; + int32_t export_remap[128]; + + int32_t id, barrier[bc->nstack]; + unsigned i, j, stack, predicate, old_stack; + + memset(&usage, 0, sizeof(usage)); + for (i = 0; i < 128; ++i) { + usage[i].first_write = -1; + usage[i].last_write[0] = -1; + usage[i].last_write[1] = -1; + usage[i].last_write[2] = -1; + usage[i].last_write[3] = -1; + } + + /* first gather some informations about the gpr usage */ + id = 0; stack = 0; + LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) { + switch (get_cf_class(cf)) { + case CF_CLASS_ALU: + predicate = 0; + first = NULL; + LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) { + if (!first) + first = alu; + notice_alu_src_gprs(alu, usage, id); + if (alu->last) { + notice_alu_dst_gprs(first, usage, id, predicate || stack > 0); + first = NULL; + ++id; + } + if (is_alu_pred_inst(alu)) + predicate++; + } + if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3) + stack += predicate; + else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3) + stack -= 1; + else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3) + stack -= 2; + break; + case CF_CLASS_TEXTURE: + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + notice_tex_gprs(tex, usage, id++, stack > 0); + } + break; + case CF_CLASS_VERTEX: + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + notice_vtx_gprs(vtx, usage, id++, stack > 0); + } + break; + case CF_CLASS_EXPORT: + notice_export_gprs(cf, usage, export_cf, export_remap); + continue; // don't increment id + case CF_CLASS_OTHER: + switch (cf->inst) { + case V_SQ_CF_WORD1_SQ_CF_INST_JUMP: + case V_SQ_CF_WORD1_SQ_CF_INST_ELSE: + case V_SQ_CF_WORD1_SQ_CF_INST_CALL_FS: + break; + + case V_SQ_CF_WORD1_SQ_CF_INST_POP: + stack -= cf->pop_count; + break; + + default: + // TODO implement loop handling + goto out; + } + } + id += 0x100; + id &= ~0xFF; + } + assert(stack == 0); + + /* try to optimize gpr usage */ + for (i = 0; i < 124; ++i) { + for (j = 0; j < usage[i].nranges; ++j) { + struct gpr_usage_range *range = &usage[i].ranges[j]; + int is_export = export_cf[i] && export_cf[i + 1] && + range->start < export_remap[i] && + export_remap[i] <= range->end; + + if (range->start == -1) + range->replacement = -1; + else if (range->end == -1) + range->replacement = i; + else + find_replacement(usage, i, range, is_export); + + if (range->replacement == -1) + bc->ngpr = i; + else if (range->replacement < i && range->replacement > bc->ngpr) + bc->ngpr = range->replacement; + + if (is_export && range->replacement != -1) { + find_export_replacement(usage, range, export_cf[i], + export_cf[i + 1], export_remap[i + 1]); + } + } + } + bc->ngpr++; + + /* apply the changes */ + for (i = 0; i < 128; ++i) { + usage[i].last_write[0] = -1; + usage[i].last_write[1] = -1; + usage[i].last_write[2] = -1; + usage[i].last_write[3] = -1; + } + barrier[0] = 0; + id = 0; stack = 0; + LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) { + old_stack = stack; + switch (get_cf_class(cf)) { + case CF_CLASS_ALU: + predicate = 0; + first = NULL; + cf->barrier = 0; + LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) { + replace_alu_gprs(alu, usage, id, barrier[stack], &cf->barrier); + if (alu->last) + ++id; + + if (is_alu_pred_inst(alu)) + predicate++; + + if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU << 3) + optimize_alu_inst(cf, alu); + } + if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_PUSH_BEFORE << 3) + stack += predicate; + else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP_AFTER << 3) + stack -= 1; + else if (cf->inst == V_SQ_CF_ALU_WORD1_SQ_CF_INST_ALU_POP2_AFTER << 3) + stack -= 2; + if (LIST_IS_EMPTY(&cf->alu)) { + r600_bc_remove_cf(bc, cf); + cf = NULL; + } + break; + case CF_CLASS_TEXTURE: + cf->barrier = 0; + LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) { + replace_tex_gprs(tex, usage, id++, barrier[stack], &cf->barrier); + } + break; + case CF_CLASS_VERTEX: + cf->barrier = 0; + LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) { + replace_vtx_gprs(vtx, usage, id++, barrier[stack], &cf->barrier); + } + break; + case CF_CLASS_EXPORT: + continue; // don't increment id + case CF_CLASS_OTHER: + if (cf->inst == V_SQ_CF_WORD1_SQ_CF_INST_POP) { + cf->barrier = 0; + stack -= cf->pop_count; + } + break; + } + + id &= ~0xFF; + if (cf && cf->barrier) + barrier[old_stack] = id; + + for (i = old_stack + 1; i <= stack; ++i) + barrier[i] = barrier[old_stack]; + + id += 0x100; + if (stack != 0) /* ensue exports are placed outside of conditional blocks */ + continue; + + for (i = 0; i < 128; ++i) { + if (!export_cf[i] || id < export_remap[i]) + continue; + + r600_bc_move_cf(bc, export_cf[i], next_cf); + replace_export_gprs(export_cf[i], usage, export_remap[i], barrier[stack]); + if (export_cf[i]->barrier) + barrier[stack] = id - 1; + next_cf = LIST_ENTRY(struct r600_bc_cf, export_cf[i]->list.next, list); + optimize_export_inst(bc, export_cf[i]); + export_cf[i] = NULL; + } + } + assert(stack == 0); + +out: + for (i = 0; i < 128; ++i) { + free(usage[i].ranges); + } +} + +int r600_bc_build(struct r600_bc *bc) +{ + struct r600_bc_cf *cf; + struct r600_bc_alu *alu; + struct r600_bc_vtx *vtx; + struct r600_bc_tex *tex; + struct r600_bc_cf *exports[4] = { NULL }; + uint32_t literal[4]; + unsigned nliteral; + unsigned addr; + int i, r; + + if (bc->callstack[0].max > 0) + bc->nstack = ((bc->callstack[0].max + 3) >> 2) + 2; if (bc->type == TGSI_PROCESSOR_VERTEX && !bc->nstack) { bc->nstack = 1; } diff --cc src/gallium/drivers/r600/r600_asm.h index 6059e45737f,a5504ad39f4..519245f3af2 --- a/src/gallium/drivers/r600/r600_asm.h +++ b/src/gallium/drivers/r600/r600_asm.h @@@ -114,9 -122,15 +114,15 @@@ struct r600_bc_output unsigned swizzle_y; unsigned swizzle_z; unsigned swizzle_w; - unsigned barrier; + unsigned burst_count; }; + struct r600_bc_kcache { + unsigned bank; + unsigned mode; + unsigned addr; + }; + struct r600_bc_cf { struct list_head list; unsigned inst; @@@ -126,13 -140,7 +132,8 @@@ unsigned cond; unsigned pop_count; unsigned cf_addr; /* control flow addr */ + unsigned barrier; - unsigned kcache0_mode; - unsigned kcache1_mode; - unsigned kcache0_addr; - unsigned kcache1_addr; - unsigned kcache0_bank; - unsigned kcache1_bank; + struct r600_bc_kcache kcache[2]; unsigned r6xx_uses_waterfall; struct list_head alu; struct list_head tex; diff --cc src/gallium/drivers/r600/r600_shader.c index d78e249ae95,51ff7db3612..95367d7c536 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@@ -541,9 -542,11 +543,11 @@@ int r600_shader_from_tgsi(const struct ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + ctx.info.file_count[TGSI_FILE_OUTPUT]; - ctx.file_offset[TGSI_FILE_CONSTANT] = 128; + /* Outside the GPR range. This will be translated to one of the + * kcache banks later. */ + ctx.file_offset[TGSI_FILE_CONSTANT] = 512; - ctx.file_offset[TGSI_FILE_IMMEDIATE] = 253; + ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; ctx.temp_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + ctx.info.file_count[TGSI_FILE_TEMPORARY]; diff --cc src/gallium/include/pipe/p_format.h index 119d304d927,74a9939df73..3aa11be4b5b --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@@ -186,22 -186,16 +186,28 @@@ enum pipe_format PIPE_FORMAT_R8G8B8X8_UNORM = 134, PIPE_FORMAT_B4G4R4X4_UNORM = 135, + PIPE_FORMAT_YV12 = 136, + PIPE_FORMAT_YV16 = 137, + PIPE_FORMAT_IYUV = 138, /**< aka I420 */ + PIPE_FORMAT_NV12 = 139, + PIPE_FORMAT_NV21 = 140, + PIPE_FORMAT_AYUV = PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_VUYA = PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_XYUV = PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_VUYX = PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_IA44 = 141, + PIPE_FORMAT_AI44 = 142, + /* some stencil samplers formats */ - PIPE_FORMAT_X24S8_USCALED = 136, - PIPE_FORMAT_S8X24_USCALED = 137, - PIPE_FORMAT_X32_S8X24_USCALED = 138, + PIPE_FORMAT_X24S8_USCALED = 143, + PIPE_FORMAT_S8X24_USCALED = 144, + PIPE_FORMAT_X32_S8X24_USCALED = 145, + - PIPE_FORMAT_B2G3R3_UNORM = 139, - PIPE_FORMAT_L16A16_UNORM = 140, - PIPE_FORMAT_A16_UNORM = 141, - PIPE_FORMAT_I16_UNORM = 142, ++ PIPE_FORMAT_B2G3R3_UNORM = 146, ++ PIPE_FORMAT_L16A16_UNORM = 147, ++ PIPE_FORMAT_A16_UNORM = 148, ++ PIPE_FORMAT_I16_UNORM = 149, + PIPE_FORMAT_COUNT }; diff --cc src/gallium/targets/dri-nouveau/Makefile index 9dfe86c8949,eb1ee859a00..26c927e0a81 --- a/src/gallium/targets/dri-nouveau/Makefile +++ b/src/gallium/targets/dri-nouveau/Makefile @@@ -10,7 -10,7 +10,8 @@@ PIPE_DRIVERS = $(TOP)/src/gallium/drivers/rbug/librbug.a \ $(TOP)/src/gallium/drivers/nvfx/libnvfx.a \ $(TOP)/src/gallium/drivers/nv50/libnv50.a \ + $(TOP)/src/gallium/drivers/nvc0/libnvc0.a \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ $(TOP)/src/gallium/drivers/nouveau/libnouveau.a C_SOURCES = \