X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr300%2Fr300_emit.c;h=c187f115da43a7c756480366a076e2cdef783f9c;hb=ab130400cf91ab471e265e58193c95f04c7aeeda;hp=53e7f25df3a15f75d67c980e78935718f43e33ef;hpb=f92d1a54e998c1fbe65eefd7af539f97a60fc82b;p=mesa.git diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 53e7f25df3a..c187f115da4 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,6 +25,7 @@ #include "util/u_format.h" #include "util/u_math.h" +#include "util/u_mm.h" #include "util/u_simple_list.h" #include "r300_context.h" @@ -88,7 +89,7 @@ static const float * get_rc_constant_state( { struct r300_textures_state* texstate = r300->textures_state.state; static float vec[4] = { 0.0, 0.0, 0.0, 1.0 }; - struct pipe_resource *tex; + struct r300_texture *tex; assert(constant->Type == RC_CONSTANT_STATE); @@ -96,9 +97,17 @@ static const float * get_rc_constant_state( /* Factor for converting rectangle coords to * normalized coords. Should only show up on non-r500. */ case RC_STATE_R300_TEXRECT_FACTOR: - tex = texstate->sampler_views[constant->u.State[1]]->base.texture; - vec[0] = 1.0 / tex->width0; - vec[1] = 1.0 / tex->height0; + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + vec[0] = 1.0 / tex->desc.width0; + vec[1] = 1.0 / tex->desc.height0; + break; + + case RC_STATE_R300_TEXSCALE_FACTOR: + tex = r300_texture(texstate->sampler_views[constant->u.State[1]]->base.texture); + /* Add a small number to the texture size to work around rounding errors in hw. */ + vec[0] = tex->desc.b.b.width0 / (tex->desc.width0 + 0.001f); + vec[1] = tex->desc.b.b.height0 / (tex->desc.height0 + 0.001f); + vec[2] = tex->desc.b.b.depth0 / (tex->desc.depth0 + 0.001f); break; case RC_STATE_R300_VIEWPORT_SCALE: @@ -178,10 +187,19 @@ void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat return; BEGIN_CS(size); - OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count); - for (i = 0; i < count; i++) - for (j = 0; j < 4; j++) - OUT_CS(pack_float24(buf->ptr[i*4+j])); + OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count * 4); + if (buf->remap_table){ + for (i = 0; i < count; i++) { + float *data = (float*)&buf->ptr[buf->remap_table[i]*4]; + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(data[j])); + } + } else { + for (i = 0; i < count; i++) + for (j = 0; j < 4; j++) + OUT_CS(pack_float24(*(float*)&buf->ptr[i*4+j])); + } + END_CS; } @@ -225,7 +243,7 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat { struct r300_fragment_shader *fs = r300_fs(r300); struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; - unsigned count = fs->shader->externals_count * 4; + unsigned count = fs->shader->externals_count; CS_LOCALS(r300); if (count == 0) @@ -233,8 +251,15 @@ void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *stat BEGIN_CS(size); OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST); - OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count); - OUT_CS_TABLE(buf->ptr, count); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count * 4); + if (buf->remap_table){ + for (unsigned i = 0; i < count; i++) { + uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_TABLE(data, 4); + } + } else { + OUT_CS_TABLE(buf->ptr, count * 4); + } END_CS; } @@ -283,6 +308,10 @@ void r300_emit_gpu_flush(struct r300_context *r300, unsigned size, void *state) width = surf->cbzb_width; } + DBG(r300, DBG_SCISSOR, + "r300: Scissor width: %i, height: %i, CBZB clear: %s\n", + width, height, r300->cbzb_clear ? "YES" : "NO"); + BEGIN_CS(size); /* Set up scissors. @@ -329,6 +358,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; + boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); CS_LOCALS(r300); BEGIN_CS(size); @@ -364,6 +394,10 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->cbzb_pitch, 0, surf->domain); + + DBG(r300, DBG_CBZB, + "CBZB clearing cbuf %08x %08x\n", surf->cbzb_format, + surf->cbzb_pitch); } /* Set up a zbuffer. */ else if (fb->zsbuf) { @@ -377,15 +411,32 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1); OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain); - /* HiZ RAM. */ - if (r300->screen->caps.has_hiz) { - OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); - OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + if (can_hyperz) { + uint32_t surf_pitch; + struct r300_texture *tex; + int level = surf->base.level; + tex = r300_texture(surf->base.texture); + + surf_pitch = surf->pitch & R300_DEPTHPITCH_MASK; + /* HiZ RAM. */ + if (r300->screen->caps.hiz_ram) { + if (tex->hiz_mem[level]) { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, tex->hiz_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_HIZ_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0); + } + } + /* Z Mask RAM. (compressed zbuffer) */ + if (tex->zmask_mem[level]) { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, tex->zmask_mem[level]->ofs << 2); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, surf_pitch); + } else { + OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); + OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); + } } - - /* Z Mask RAM. (compressed zbuffer) */ - OUT_CS_REG(R300_ZB_ZMASK_OFFSET, 0); - OUT_CS_REG(R300_ZB_ZMASK_PITCH, 0); } END_CS; @@ -394,8 +445,12 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) void r300_emit_hyperz_state(struct r300_context *r300, unsigned size, void *state) { + struct r300_hyperz_state *z = state; CS_LOCALS(r300); - WRITE_CS_TABLE(state, size); + if (z->flush) + WRITE_CS_TABLE(&z->cb_flush_begin, size); + else + WRITE_CS_TABLE(&z->cb_begin, size - 2); } void r300_emit_hyperz_end(struct r300_context *r300) @@ -403,9 +458,11 @@ void r300_emit_hyperz_end(struct r300_context *r300) struct r300_hyperz_state z = *(struct r300_hyperz_state*)r300->hyperz_state.state; + z.flush = 1; z.zb_bw_cntl = 0; z.zb_depthclearvalue = 0; z.sc_hyperz = R300_SC_HYPERZ_ADJ_2; + z.gb_z_peq_config = 0; r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } @@ -618,7 +675,7 @@ void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state) CS_LOCALS(r300); BEGIN_CS(size); - OUT_CS_TABLE(rs->cb_main, 25); + OUT_CS_TABLE(rs->cb_main, RS_STATE_MAIN_SIZE); if (rs->polygon_offset_enable) { if (r300->zbuffer_bpp == 16) { OUT_CS_TABLE(rs->cb_poly_offset_zb16, 5); @@ -660,6 +717,8 @@ void r300_emit_rs_block_state(struct r300_context* r300, OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); OUT_CS(rs->vap_out_vtx_fmt[0]); OUT_CS(rs->vap_out_vtx_fmt[1]); + OUT_CS_REG_SEQ(R300_GB_ENABLE, 1); + OUT_CS(rs->gb_enable); if (r300->screen->caps.is_r500) { OUT_CS_REG_SEQ(R500_RS_IP_0, count); @@ -798,7 +857,7 @@ void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed) OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0)); OUT_CS(r300->vertex_info.size | (r300->vertex_info.size << 8)); - OUT_CS(r300->vbo_offset); + OUT_CS(r300->draw_vbo_offset); OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0); END_CS; } @@ -864,7 +923,7 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count, vtx_mem_size / output_count, 10); - unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6); + unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 5); unsigned imm_first = vs->externals_count; unsigned imm_end = vs->code.constants.Count; @@ -907,6 +966,22 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_TABLE(data, 4); } } + + /* Emit flow control instructions. */ + if (code->num_fc_ops) { + + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); + } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); + OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + } + END_CS; } @@ -916,6 +991,7 @@ void r300_emit_vs_constants(struct r300_context* r300, unsigned count = ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count; struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state; + unsigned i; CS_LOCALS(r300); if (!count) @@ -926,7 +1002,14 @@ void r300_emit_vs_constants(struct r300_context* r300, (r300->screen->caps.is_r500 ? R500_PVS_CONST_START : R300_PVS_CONST_START)); OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4); - OUT_CS_TABLE(buf->ptr, count * 4); + if (buf->remap_table){ + for (i = 0; i < count; i++) { + uint32_t *data = &buf->ptr[buf->remap_table[i]*4]; + OUT_CS_TABLE(data, 4); + } + } else { + OUT_CS_TABLE(buf->ptr, count * 4); + } END_CS; } @@ -943,6 +1026,111 @@ void r300_emit_viewport_state(struct r300_context* r300, END_CS; } +static void r300_emit_hiz_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_HIZ, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +static void r300_emit_zmask_line_clear(struct r300_context *r300, int start, uint16_t count, uint32_t val) +{ + CS_LOCALS(r300); + BEGIN_CS(4); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_ZMASK, 2); + OUT_CS(start); + OUT_CS(count); + OUT_CS(val); + END_CS; +} + +#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y)) + +void r300_emit_hiz_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_hyperz_state *z = + (struct r300_hyperz_state*)r300->hyperz_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0, height, offset_shift; + struct r300_texture* tex; + int i; + + tex = r300_texture(fb->zsbuf->texture); + + offset = tex->hiz_mem[fb->zsbuf->level]->ofs; + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + /* convert from pixels to 4x4 blocks */ + stride = ALIGN_DIVUP(stride, 4); + + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + /* there are 4 blocks per dwords */ + stride = ALIGN_DIVUP(stride, 4); + + height = ALIGN_DIVUP(fb->zsbuf->height, 4); + + offset_shift = 2; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_hiz_line_clear(r300, offset, stride, 0xffffffff); + } + z->current_func = -1; + + /* Mark the current zbuffer's hiz ram as in use. */ + tex->hiz_in_use[fb->zsbuf->level] = TRUE; +} + +void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_screen* r300screen = r300->screen; + uint32_t stride, offset = 0; + struct r300_texture* tex; + uint32_t i, height; + int mult, offset_shift; + + tex = r300_texture(fb->zsbuf->texture); + stride = tex->desc.stride_in_pixels[fb->zsbuf->level]; + + offset = tex->zmask_mem[fb->zsbuf->level]->ofs; + + if (r300->z_compression == RV350_Z_COMPRESS_88) + mult = 8; + else + mult = 4; + + height = ALIGN_DIVUP(fb->zsbuf->height, mult); + + offset_shift = 4; + offset_shift += (r300screen->caps.num_frag_pipes / 2); + stride = ALIGN_DIVUP(stride, r300screen->caps.num_frag_pipes); + + /* okay have width in pixels - divide by block width */ + stride = ALIGN_DIVUP(stride, mult); + /* have width in blocks - divide by number of fragment pipes screen width */ + /* 16 blocks per dword */ + stride = ALIGN_DIVUP(stride, 16); + + for (i = 0; i < height; i++) { + offset = i * stride; + offset <<= offset_shift; + r300_emit_zmask_line_clear(r300, offset, stride, 0x0);//0xffffffff); + } + + /* Mark the current zbuffer's zmask as in use. */ + tex->zmask_in_use[fb->zsbuf->level] = TRUE; +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { @@ -963,9 +1151,9 @@ void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, voi END_CS; } -void r300_emit_buffer_validate(struct r300_context *r300, - boolean do_validate_vertex_buffers, - struct pipe_resource *index_buffer) +boolean r300_emit_buffer_validate(struct r300_context *r300, + boolean do_validate_vertex_buffers, + struct pipe_resource *index_buffer) { struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; @@ -976,7 +1164,6 @@ void r300_emit_buffer_validate(struct r300_context *r300, struct pipe_vertex_element *velem = r300->velems->velem; struct pipe_resource *pbuf; unsigned i; - boolean invalid = FALSE; /* upload buffers first */ if (r300->screen->caps.has_tcl && r300->any_user_vbs) { @@ -987,7 +1174,6 @@ void r300_emit_buffer_validate(struct r300_context *r300, /* Clean out BOs. */ r300->rws->cs_reset_buffers(r300->cs); -validate: /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { tex = r300_texture(fb->cbufs[i]->texture); @@ -1034,15 +1220,10 @@ validate: r300_buffer(index_buffer)->domain, 0); if (!r300->rws->cs_validate(r300->cs)) { - r300->context.flush(&r300->context, 0, NULL); - if (invalid) { - /* Well, hell. */ - fprintf(stderr, "r300: Stuck in validation loop, gonna quit now.\n"); - abort(); - } - invalid = TRUE; - goto validate; + return FALSE; } + + return TRUE; } unsigned r300_get_num_dirty_dwords(struct r300_context *r300) @@ -1062,6 +1243,19 @@ unsigned r300_get_num_dirty_dwords(struct r300_context *r300) return dwords; } +unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) +{ + unsigned dwords = 0; + + /* Emitted in flush. */ + dwords += 26; /* emit_query_end */ + dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ + if (r500_index_bias_supported(r300)) + dwords += 2; + + return dwords; +} + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300) {