X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr300%2Fr300_emit.c;h=232259e21d17b06070f8130f33fb0c67dd9301ad;hb=222d2f2ac2c7d93cbc0643082c78278ad2c8cfce;hp=dd6cc4d4f84f10547214f616093bf1875573bfee;hpb=93bce03b275f66b6b2db410bbef38954de6a617c;p=mesa.git diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index dd6cc4d4f84..232259e21d1 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -170,15 +171,27 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat { struct r300_fragment_shader *fs = r300_fs(r300); struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned count = fs->shader->externals_count * 4; + unsigned count = fs->shader->externals_count; + unsigned i, j; CS_LOCALS(r300); if (count == 0) return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); + if (buf->remap_table){ + for (i = 0; i < count; i++) { + float *data = (float*)&buf->ptr[buf->remap_table[i]*4]; + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(data[j])); + } + } else { + for (i = 0; i < count; i++) + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j])); + } + END_CS; } @@ -190,7 +203,6 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo unsigned count = fs->shader->rc_state_count; unsigned first = fs->shader->externals_count; unsigned end = constants->Count; - uint32_t cdata[4]; unsigned j; CS_LOCALS(r300); @@ -203,11 +215,9 @@ void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, vo const float *data = get_rc_constant_state(r300, &constants->Constants[i]); - for (j = 0; j < 4; j++) - cdata[j] = pack_float24(data[j]); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4); - OUT_CS_TABLE(cdata, 4); + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(data[j])); } } END_CS; @@ -225,7 +235,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat { struct r300_fragment_shader *fs = r300_fs(r300); struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned count = fs->shader->externals_count * 4; + unsigned count = fs->shader->externals_count; CS_LOCALS(r300); if (count == 0) @@ -233,8 +243,15 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); - OUT_CS_TABLE(buf->constants, count); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); + if (buf->remap_table){ + for (unsigned i = 0; i < count; i++) { + uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_TABLE(data, 4); + } + } else { + OUT_CS_TABLE(buf->ptr, count * 4); + } END_CS; } @@ -272,8 +289,17 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) struct r300_gpu_flush *gpuflush = (struct r300_gpu_flush*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + uint32_t height = fb->height; + uint32_t width = fb->width; CS_LOCALS(r300); + if (r300->cbzb_clear) { + struct r300_surface *surf = r300_surface(fb->cbufs[0]); + + height = surf->cbzb_height; + width = surf->cbzb_width; + } + BEGIN_CS(size); /* Set up scissors. @@ -281,13 +307,13 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); if (r300->screen->caps.is_r500) { OUT_CS(0); - OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) | - ((fb->height - 1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width - 1) << R300_SCISSORS_X_SHIFT) | + ((height - 1) << R300_SCISSORS_Y_SHIFT)); } else { OUT_CS((1440 << R300_SCISSORS_X_SHIFT) | (1440 << R300_SCISSORS_Y_SHIFT)); - OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) | - ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((width + 1440-1) << R300_SCISSORS_X_SHIFT) | + ((height + 1440-1) << R300_SCISSORS_Y_SHIFT)); } /* Flush CB & ZB caches and wait until the 3D engine is idle and clean. */ @@ -295,22 +321,36 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) END_CS; } +void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) +{ + struct r300_aa_state *aa = (struct r300_aa_state*)state; + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); + + if (aa->dest) { + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->offset, 0, aa->dest->domain); + + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1); + OUT_CS_RELOC(aa->dest->buffer, aa->dest->pitch, 0, aa->dest->domain); + } + + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); + END_CS; +} + void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean has_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); - /* XXX unpipelined regs - rb3d_aaresolve_ctl - rb3d_aaresolve_offset - rb3d_aaresolve_pitch - gb_aa_config - */ - /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not * what we usually want. */ if (r300->screen->caps.is_r500) { @@ -325,37 +365,105 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) surf = r300_surface(fb->cbufs[i]); OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); } + /* Set up the ZB part of the CBZB clear. */ + if (r300->cbzb_clear) { + surf = r300_surface(fb->cbufs[0]); + + OUT_CS_REG(R300_ZB_FORMAT, surf->cbzb_format); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_midpoint_offset, 0, surf->domain); + + OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); + OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); + } /* Set up a zbuffer. */ - if (fb->zsbuf) { + else if (fb->zsbuf) { surf = r300_surface(fb->zsbuf); OUT_CS_REG(R300_ZB_FORMAT, surf->format); - OUT_CS_REG(R300_ZB_BW_CNTL, 0); OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); - OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain); OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); - OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0); + OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); + + if (has_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } + } + } + + END_CS; +} - OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0); +void r300_emit_hyperz_state(struct r300_context *r300, + unsigned size, void *state) +{ + struct r300_hyperz_state *z = state; + CS_LOCALS(r300); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); +} - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); - } +void r300_emit_hyperz_end(struct r300_context *r300) +{ + struct r300_hyperz_state z = + *(struct r300_hyperz_state*)r300->hyperz_state.state; - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); - } + z.flush = 1; + z.zb_bw_cntl = 0; + z.zb_depthclearvalue = 0; + z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; + + r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); +} + +void r300_emit_fb_state_pipelined(struct r300_context *r300, + unsigned size, void *state) +{ + struct pipe_framebuffer_state* fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + unsigned i; + CS_LOCALS(r300); + + BEGIN_CS(size); /* Colorbuffer format in the US block. * (must be written after unpipelined regs) */ @@ -366,6 +474,42 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) for (; i < 4; i++) { OUT_CS(R300_US_OUT_FMT_UNUSED); } + + /* Multisampling. Depends on framebuffer sample count. + * These are pipelined regs and as such cannot be moved + * to the AA state. */ + if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { + unsigned mspos0 = 0x66666666; + unsigned mspos1 = 0x6666666; + + if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { + /* Subsample placement. These may not be optimal. */ + switch (fb->cbufs[0]->texture->nr_samples) { + case 2: + mspos0 = 0x33996633; + mspos1 = 0x6666663; + break; + case 3: + mspos0 = 0x33936933; + mspos1 = 0x6666663; + break; + case 4: + mspos0 = 0x33939933; + mspos1 = 0x3966663; + break; + case 6: + mspos0 = 0x22a2aa22; + mspos1 = 0x2a65672; + break; + default: + debug_printf("r300: Bad number of multisamples!\n"); + } + } + + OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); + OUT_CS(mspos0); + OUT_CS(mspos1); + } END_CS; } @@ -413,13 +557,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 3) * 4, - 0, query->domain, 0); + 0, query->domain); case 3: /* pipe 2 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 2) * 4, - 0, query->domain, 0); + 0, query->domain); case 2: /* pipe 1 only */ /* As mentioned above, accomodate RV380 and older. */ @@ -427,13 +571,13 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 1) * 4, - 0, query->domain, 0); + 0, query->domain); case 1: /* pipe 0 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); OUT_CS_RELOC(buf, (query->num_results + 0) * 4, - 0, query->domain, 0); + 0, query->domain); break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" @@ -455,7 +599,7 @@ static void rv530_emit_query_end_single_z(struct r300_context *r300, BEGIN_CS(8); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -469,10 +613,10 @@ static void rv530_emit_query_end_double_z(struct r300_context *r300, BEGIN_CS(14); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1); OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1); - OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0); + OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain); OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); END_CS; } @@ -506,102 +650,27 @@ void r300_emit_query_end(struct r300_context* r300) } } +void r300_emit_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_rs_state* rs = state; - struct pipe_framebuffer_state* fb = r300->fb_state.state; - float scale, offset; - unsigned mspos0, mspos1, aa_config; CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); - - /* Multisampling. Depends on framebuffer sample count. */ - if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) { - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - aa_config = R300_GB_AA_CONFIG_AA_ENABLE; - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2; - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3; - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4; - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6; - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - mspos0 = rs->multisample_position_0; - mspos1 = rs->multisample_position_1; - break; - } - - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(mspos0); - OUT_CS(mspos1); - - OUT_CS_REG(R300_GB_AA_CONFIG, aa_config); - } else { - OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); - OUT_CS(rs->multisample_position_0); - OUT_CS(rs->multisample_position_1); - - OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config); - } - } - - OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size); - OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2); - OUT_CS(rs->point_minmax); - OUT_CS(rs->line_control); - + OUT_CS_TABLE(rs->cb_main, 25); if (rs->polygon_offset_enable) { - scale = rs->depth_scale * 12; - offset = rs->depth_offset; - - switch (r300->zbuffer_bpp) { - case 16: - offset *= 4; - break; - case 24: - offset *= 2; - break; + if (r300->zbuffer_bpp == 16) { + OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); + } else { + OUT_CS_TABLE(rs->cb_poly_offset_zb24, 5); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4); - OUT_CS_32F(scale); - OUT_CS_32F(offset); - OUT_CS_32F(scale); - OUT_CS_32F(offset); } - - OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2); - OUT_CS(rs->polygon_offset_enable); - OUT_CS(rs->cull_mode); - OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); - OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); - OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode); - OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule); - OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable); - OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4); - OUT_CS_32F(rs->point_texcoord_left); - OUT_CS_32F(rs->point_texcoord_bottom); - OUT_CS_32F(rs->point_texcoord_right); - OUT_CS_32F(rs->point_texcoord_top); END_CS; } @@ -614,11 +683,20 @@ void r300_emit_rs_block_state(struct r300_context* r300, unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1; CS_LOCALS(r300); - if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) { + if (DBG_ON(r300, DBG_RS_BLOCK)) { r500_dump_rs_block(rs); - } - DBG(r300, DBG_DRAW, "r300: RS emit:\n"); + fprintf(stderr, "r300: RS emit:\n"); + + for (i = 0; i < count; i++) + fprintf(stderr, " : ip %d: 0x%08x\n", i, rs->ip[i]); + + for (i = 0; i < count; i++) + fprintf(stderr, " : inst %d: 0x%08x\n", i, rs->inst[i]); + + fprintf(stderr, " : count: 0x%08x inst_count: 0x%08x\n", + rs->count, rs->inst_count); + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); @@ -634,9 +712,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_IP_0, count); } OUT_CS_TABLE(rs->ip, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]); - } OUT_CS_REG_SEQ(R300_RS_COUNT, 2); OUT_CS(rs->count); @@ -648,13 +723,6 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_RS_INST_0, count); } OUT_CS_TABLE(rs->inst, count); - for (i = 0; i < count; i++) { - DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]); - } - - DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n", - rs->count, rs->inst_count); - END_CS; } @@ -708,7 +776,7 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1); OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain, - 0, 0); + 0); } } END_CS; @@ -751,7 +819,7 @@ void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed) for (i = 0; i < aos_count; i++) { buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer); - OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0); + OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0); } END_CS; } @@ -760,7 +828,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) { CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, " + DBG(r300, DBG_SWTCL, "r300: Preparing vertex buffer %p for render, " "vertex size %d\n", r300->vbo, r300->vertex_info.size); /* Set the pointer to our vertex buffer. The emitted values are this: @@ -776,7 +844,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); OUT_CS(r300->vbo_offset); - OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0); + OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); END_CS; } @@ -788,21 +856,25 @@ void r300_emit_vertex_stream_state(struct r300_context* r300, unsigned i; CS_LOCALS(r300); - DBG(r300, DBG_DRAW, "r300: PSC emit:\n"); + if (DBG_ON(r300, DBG_PSC)) { + fprintf(stderr, "r300: PSC emit:\n"); + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl[i]); + } + + for (i = 0; i < streams->count; i++) { + fprintf(stderr, " : prog_stream_cntl_ext%d: 0x%08x\n", i, + streams->vap_prog_stream_cntl_ext[i]); + } + } BEGIN_CS(size); OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl[i]); - } OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count); OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count); - for (i = 0; i < streams->count; i++) { - DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i, - streams->vap_prog_stream_cntl_ext[i]); - } END_CS; } @@ -815,6 +887,13 @@ void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state) END_CS; } +void r300_emit_vap_invariant_state(struct r300_context *r300, + unsigned size, void *state) +{ + CS_LOCALS(r300); + WRITE_CS_TABLE(state, size); +} + void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) { struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state; @@ -830,7 +909,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, vtx_mem_size / output_count, 10); - unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5); unsigned imm_first = vs->externals_count; unsigned imm_end = vs->code.constants.Count; @@ -839,16 +918,6 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); - /* Amount of time to wait for vertex fetches in PVS */ - OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff); - - OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - OUT_CS_32F(1.0); - - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); /* R300_VAP_PVS_CODE_CNTL_0 * R300_VAP_PVS_CONST_CNTL @@ -883,6 +952,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_TABLE(data, 4); } } + + /* Emit flow control instructions. */ + if (code->num_fc_ops) { + + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); + } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + } + END_CS; } @@ -892,6 +977,7 @@ void r300_emit_vs_constants(struct r300_context* r300, unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; + unsigned i; CS_LOCALS(r300); if (!count) @@ -902,7 +988,14 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - OUT_CS_TABLE(buf->constants, count * 4); + if (buf->remap_table){ + for (i = 0; i < count; i++) { + uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_TABLE(data, 4); + } + } else { + OUT_CS_TABLE(buf->ptr, count * 4); + } END_CS; } @@ -919,6 +1012,111 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + + offset = tex->hiz_mem[fb->zsbuf->level]->ofs; + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; + + /* Mark the current zbuffer's hiz ram as in use. */ + tex->hiz_in_use[fb->zsbuf->level] = TRUE; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } + + /* Mark the current zbuffer's zmask as in use. */ + tex->zmask_in_use[fb->zsbuf->level] = TRUE; +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { @@ -961,27 +1159,22 @@ void r300_emit_buffer_validate(struct r300_context *r300, } /* Clean out BOs. */ - r300->rws->reset_bos(r300->rws); + r300->rws->cs_reset_buffers(r300->cs); validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); assert(tex && tex->buffer && "cbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->cbufs[i])->domain); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_texture(fb->zsbuf->texture); assert(tex && tex->buffer && "zsbuf is marked, but NULL!"); - if (!r300_add_texture(r300->rws, tex, - 0, tex->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, 0, + r300_surface(fb->zsbuf)->domain); } /* ...textures... */ for (i = 0; i < texstate->count; i++) { @@ -990,48 +1183,31 @@ validate: } tex = r300_texture(texstate->sampler_views[i]->base.texture); - if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, tex->buffer, tex->domain, 0); } /* ...occlusion query buffer... */ - if (r300->query_current) { - if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer, - 0, r300->query_current->domain)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->query_current) + r300->rws->cs_add_buffer(r300->cs, r300->query_current->buffer, + 0, r300->query_current->domain); /* ...vertex buffer for SWTCL path... */ - if (r300->vbo) { - if (!r300_add_buffer(r300->rws, r300->vbo, - r300_buffer(r300->vbo)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } + if (r300->vbo) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(r300->vbo)->buf, + r300_buffer(r300->vbo)->domain, 0); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers) { for (i = 0; i < r300->velems->count; i++) { pbuf = vbuf[velem[i].vertex_buffer_index].buffer; - if (!r300_add_buffer(r300->rws, pbuf, - r300_buffer(pbuf)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } + r300->rws->cs_add_buffer(r300->cs, r300_buffer(pbuf)->buf, + r300_buffer(pbuf)->domain, 0); } } /* ...and index buffer for HWTCL path. */ - if (index_buffer) { - if (!r300_add_buffer(r300->rws, index_buffer, - r300_buffer(index_buffer)->domain, 0)) { - r300->context.flush(&r300->context, 0, NULL); - goto validate; - } - } - if (!r300->rws->validate(r300->rws)) { + if (index_buffer) + r300->rws->cs_add_buffer(r300->cs, r300_buffer(index_buffer)->buf, + r300_buffer(index_buffer)->domain, 0); + + if (!r300->rws->cs_validate(r300->cs)) { r300->context.flush(&r300->context, 0, NULL); if (invalid) { /* Well, hell. */ @@ -1060,6 +1236,19 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) return dwords; } +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) +{ + unsigned dwords = 0; + + /* Emitted in flush. */ + dwords += 26; /* emit_query_end */ + dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + if (r500_index_bias_supported(r300)) + dwords += 2; + + return dwords; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) {