X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fr300%2Fr300_emit.c;h=80c959b95d0d3bb2c6c21136f65546fa0173eb1d;hb=d2d85b950d78c553b2694cda9ef0cc7bf9a0f737;hp=6b91f93e6d69701fd2f996ee709b0d664f46a34f;hpb=4af3fe857dc07923af8786b434dee32d493b577e;p=mesa.git diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index 6b91f93e6d6..80c959b95d0 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -25,7 +25,6 @@ #include "util/u_format.h" #include "util/u_math.h" -#include "util/u_mm.h" #include "r300_context.h" #include "r300_cb.h" @@ -42,13 +41,20 @@ void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend = (struct r300_blend_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct pipe_surface *cb; CS_LOCALS(r300); - if (fb->nr_cbufs) { - if (fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) + cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL; + + if (cb) { + if (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT) { WRITE_CS_TABLE(blend->cb_noclamp, size); - else - WRITE_CS_TABLE(blend->cb_clamp, size); + } else if (cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT) { + WRITE_CS_TABLE(blend->cb_noclamp_noalpha, size); + } else { + unsigned swz = r300_surface(cb)->colormask_swizzle; + WRITE_CS_TABLE(blend->cb_clamp[swz], size); + } } else { WRITE_CS_TABLE(blend->cb_no_readwrite, size); } @@ -77,19 +83,35 @@ void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state) struct r300_dsa_state* dsa = (struct r300_dsa_state*)state; struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + boolean is_r500 = r300->screen->caps.is_r500; CS_LOCALS(r300); + uint32_t alpha_func = dsa->alpha_function; - if (fb->zsbuf) { - if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) - WRITE_CS_TABLE(&dsa->cb_begin_fp16, size); - else - WRITE_CS_TABLE(&dsa->cb_begin, size); - } else { - if (fb->nr_cbufs && fb->cbufs[0]->format == PIPE_FORMAT_R16G16B16A16_FLOAT) - WRITE_CS_TABLE(dsa->cb_fp16_zb_no_readwrite, size); - else - WRITE_CS_TABLE(dsa->cb_zb_no_readwrite, size); + /* Choose the alpha ref value between 8-bit (FG_ALPHA_FUNC.AM_VAL) and + * 16-bit (FG_ALPHA_VALUE). */ + if (is_r500 && (alpha_func & R300_FG_ALPHA_FUNC_ENABLE)) { + struct pipe_surface *cb = fb->nr_cbufs ? r300_get_nonnull_cb(fb, 0) : NULL; + + if (cb && + (cb->format == PIPE_FORMAT_R16G16B16A16_FLOAT || + cb->format == PIPE_FORMAT_R16G16B16X16_FLOAT)) { + alpha_func |= R500_FG_ALPHA_FUNC_FP16_ENABLE; + } else { + alpha_func |= R500_FG_ALPHA_FUNC_8BIT; + } } + + /* Setup alpha-to-coverage. */ + if (r300->alpha_to_coverage && r300->msaa_enable) { + /* Always set 3/6, it improves precision even for 2x and 4x MSAA. */ + alpha_func |= R300_FG_ALPHA_FUNC_MASK_ENABLE | + R300_FG_ALPHA_FUNC_CFG_3_OF_6; + } + + BEGIN_CS(size); + OUT_CS_REG(R300_FG_ALPHA_FUNC, alpha_func); + OUT_CS_TABLE(fb->zsbuf ? &dsa->cb_begin : dsa->cb_zb_no_readwrite, size-2); + END_CS; } static void get_rc_constant_state( @@ -120,9 +142,9 @@ static void get_rc_constant_state( case RC_STATE_R300_TEXSCALE_FACTOR: tex = r300_resource(texstate->sampler_views[constant->u.State[1]]->base.texture); /* Add a small number to the texture size to work around rounding errors in hw. */ - vec[0] = tex->b.b.b.width0 / (tex->tex.width0 + 0.001f); - vec[1] = tex->b.b.b.height0 / (tex->tex.height0 + 0.001f); - vec[2] = tex->b.b.b.depth0 / (tex->tex.depth0 + 0.001f); + vec[0] = tex->b.b.width0 / (tex->tex.width0 + 0.001f); + vec[1] = tex->b.b.height0 / (tex->tex.height0 + 0.001f); + vec[2] = tex->b.b.depth0 / (tex->tex.depth0 + 0.001f); vec[3] = 1; break; @@ -361,12 +383,16 @@ void r300_emit_aa_state(struct r300_context *r300, unsigned size, void *state) OUT_CS_REG(R300_GB_AA_CONFIG, aa->aa_config); if (aa->dest) { - OUT_CS_REG(R300_RB3D_AARESOLVE_OFFSET, aa->dest->offset); + OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 3); + OUT_CS(aa->dest->offset); + OUT_CS(aa->dest->pitch & R300_RB3D_AARESOLVE_PITCH_MASK); + OUT_CS(R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE | + R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE); OUT_CS_RELOC(aa->dest); - OUT_CS_REG(R300_RB3D_AARESOLVE_PITCH, aa->dest->pitch); + } else { + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0); } - OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, aa->aaresolve_ctl); END_CS; } @@ -375,33 +401,46 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state; struct r300_surface* surf; unsigned i; - boolean can_hyperz = r300->rws->get_value(r300->rws, R300_CAN_HYPERZ); uint32_t rb3d_cctl = 0; CS_LOCALS(r300); BEGIN_CS(size); - /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not - * what we usually want. */ if (r300->screen->caps.is_r500) { rb3d_cctl = R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE; } + /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers. */ if (fb->nr_cbufs && r300->fb_multiwrite) { rb3d_cctl |= R300_RB3D_CCTL_NUM_MULTIWRITES(fb->nr_cbufs); } + if (r300->cmask_in_use) { + rb3d_cctl |= R300_RB3D_CCTL_AA_COMPRESSION_ENABLE | + R300_RB3D_CCTL_CMASK_ENABLE; + } OUT_CS_REG(R300_RB3D_CCTL, rb3d_cctl); /* Set up colorbuffers. */ for (i = 0; i < fb->nr_cbufs; i++) { - surf = r300_surface(fb->cbufs[i]); + surf = r300_surface(r300_get_nonnull_cb(fb, i)); OUT_CS_REG(R300_RB3D_COLOROFFSET0 + (4 * i), surf->offset); OUT_CS_RELOC(surf); OUT_CS_REG(R300_RB3D_COLORPITCH0 + (4 * i), surf->pitch); OUT_CS_RELOC(surf); + + if (r300->cmask_in_use && i == 0) { + OUT_CS_REG(R300_RB3D_CMASK_OFFSET0, 0); + OUT_CS_REG(R300_RB3D_CMASK_PITCH0, surf->pitch_cmask); + OUT_CS_REG(R300_RB3D_COLOR_CLEAR_VALUE, r300->color_clear_value); + if (r300->screen->caps.is_r500 && r300->screen->info.drm_minor >= 29) { + OUT_CS_REG_SEQ(R500_RB3D_COLOR_CLEAR_VALUE_AR, 2); + OUT_CS(r300->color_clear_value_ar); + OUT_CS(r300->color_clear_value_gb); + } + } } /* Set up the ZB part of the CBZB clear. */ @@ -432,7 +471,7 @@ void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state) OUT_CS_REG(R300_ZB_DEPTHPITCH, surf->pitch); OUT_CS_RELOC(surf); - if (can_hyperz) { + if (r300->hyperz_enabled) { /* HiZ RAM. */ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0); OUT_CS_REG(R300_ZB_HIZ_PITCH, surf->pitch_hiz); @@ -471,9 +510,82 @@ void r300_emit_hyperz_end(struct r300_context *r300) r300_emit_hyperz_state(r300, r300->hyperz_state.size, &z); } +#define R300_NIBBLES(x0, y0, x1, y1, x2, y2, d0y, d0x) \ + (((x0) & 0xf) | (((y0) & 0xf) << 4) | \ + (((x1) & 0xf) << 8) | (((y1) & 0xf) << 12) | \ + (((x2) & 0xf) << 16) | (((y2) & 0xf) << 20) | \ + (((d0y) & 0xf) << 24) | (((d0x) & 0xf) << 28)) + +static unsigned r300_get_mspos(int index, unsigned *p) +{ + unsigned reg, i, distx, disty, dist; + + if (index == 0) { + /* MSPOS0 contains positions for samples 0,1,2 as (X,Y) pairs of nibbles, + * followed by a (Y,X) pair containing the minimum distance from the pixel + * edge: + * X0, Y0, X1, Y1, X2, Y2, D0_Y, D0_X + * + * There is a quirk when setting D0_X. The value represents the distance + * from the left edge of the pixel quad to the first sample in subpixels. + * All values less than eight should use the actual value, but „7‟ should + * be used for the distance „8‟. The hardware will convert 7 into 8 internally. + */ + distx = 11; + for (i = 0; i < 12; i += 2) { + if (p[i] < distx) + distx = p[i]; + } + + disty = 11; + for (i = 1; i < 12; i += 2) { + if (p[i] < disty) + disty = p[i]; + } + + if (distx == 8) + distx = 7; + + reg = R300_NIBBLES(p[0], p[1], p[2], p[3], p[4], p[5], disty, distx); + } else { + /* MSPOS1 contains positions for samples 3,4,5 as (X,Y) pairs of nibbles, + * followed by the minimum distance from the pixel edge (not sure if X or Y): + * X3, Y3, X4, Y4, X5, Y5, D1 + */ + dist = 11; + for (i = 0; i < 12; i++) { + if (p[i] < dist) + dist = p[i]; + } + + reg = R300_NIBBLES(p[6], p[7], p[8], p[9], p[10], p[11], dist, 0); + } + return reg; +} + void r300_emit_fb_state_pipelined(struct r300_context *r300, unsigned size, void *state) { + /* The sample coordinates are in the range [0,11], because + * GB_TILE_CONFIG.SUBPIXEL is set to the 1/12 subpixel precision. + * + * Some sample coordinates reach to neighboring pixels and should not be used. + * (e.g. Y=11) + * + * The unused samples must be set to the positions of other valid samples. */ + static unsigned sample_locs_1x[12] = { + 6,6, 6,6, 6,6, 6,6, 6,6, 6,6 + }; + static unsigned sample_locs_2x[12] = { + 3,9, 9,3, 9,3, 9,3, 9,3, 9,3 + }; + static unsigned sample_locs_4x[12] = { + 4,4, 8,8, 2,10, 10,2, 10,2, 10,2 + }; + static unsigned sample_locs_6x[12] = { + 3,1, 7,3, 11,5, 1,7, 5,9, 9,10 + }; + struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)r300->fb_state.state; unsigned i, num_cbufs = fb->nr_cbufs; @@ -492,7 +604,7 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, * (must be written after unpipelined regs) */ OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); for (i = 0; i < num_cbufs; i++) { - OUT_CS(r300_surface(fb->cbufs[i])->format); + OUT_CS(r300_surface(r300_get_nonnull_cb(fb, i))->format); } for (; i < 1; i++) { OUT_CS(R300_US_OUT_FMT_C4_8 | @@ -503,34 +615,26 @@ void r300_emit_fb_state_pipelined(struct r300_context *r300, OUT_CS(R300_US_OUT_FMT_UNUSED); } - /* Multisampling. Depends on framebuffer sample count. - * These are pipelined regs and as such cannot be moved - * to the AA state. */ - mspos0 = 0x66666666; - mspos1 = 0x6666666; - - if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) { - /* Subsample placement. These may not be optimal. */ - switch (fb->cbufs[0]->texture->nr_samples) { - case 2: - mspos0 = 0x33996633; - mspos1 = 0x6666663; - break; - case 3: - mspos0 = 0x33936933; - mspos1 = 0x6666663; - break; - case 4: - mspos0 = 0x33939933; - mspos1 = 0x3966663; - break; - case 6: - mspos0 = 0x22a2aa22; - mspos1 = 0x2a65672; - break; - default: - debug_printf("r300: Bad number of multisamples!\n"); - } + /* Set sample positions. It depends on the framebuffer sample count. + * These are pipelined regs and as such cannot be moved to the AA state. + */ + switch (r300->num_samples) { + default: + mspos0 = r300_get_mspos(0, sample_locs_1x); + mspos1 = r300_get_mspos(1, sample_locs_1x); + break; + case 2: + mspos0 = r300_get_mspos(0, sample_locs_2x); + mspos1 = r300_get_mspos(1, sample_locs_2x); + break; + case 4: + mspos0 = r300_get_mspos(0, sample_locs_4x); + mspos1 = r300_get_mspos(1, sample_locs_4x); + break; + case 6: + mspos0 = r300_get_mspos(0, sample_locs_6x); + mspos1 = r300_get_mspos(1, sample_locs_6x); + break; } OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2); @@ -548,7 +652,7 @@ void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state) return; BEGIN_CS(size); - if (r300->screen->caps.family == CHIP_FAMILY_RV530) { + if (r300->screen->caps.family == CHIP_RV530) { OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL); } else { OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL); @@ -562,11 +666,12 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, struct r300_query *query) { struct r300_capabilities* caps = &r300->screen->caps; + uint32_t gb_pipes = r300->screen->info.r300_num_gb_pipes; CS_LOCALS(r300); - assert(caps->num_frag_pipes); + assert(gb_pipes); - BEGIN_CS(6 * caps->num_frag_pipes + 2); + BEGIN_CS(6 * gb_pipes + 2); /* I'm not so sure I like this switch, but it's hard to be elegant * when there's so many special cases... * @@ -575,7 +680,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, * 4-byte offset for each pipe. RV380 and older are special; they have * only two pipes, and the second pipe's enable is on bit 3, not bit 1, * so there's a chipset cap for that. */ - switch (caps->num_frag_pipes) { + switch (gb_pipes) { case 4: /* pipe 3 only */ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3); @@ -588,7 +693,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, OUT_CS_RELOC(r300->query_current); case 2: /* pipe 1 only */ - /* As mentioned above, accomodate RV380 and older. */ + /* As mentioned above, accommodate RV380 and older. */ OUT_CS_REG(R300_SU_REG_DEST, 1 << (caps->high_second_pipe ? 3 : 1)); OUT_CS_REG(R300_ZB_ZPASS_ADDR, (query->num_results + 1) * 4); @@ -601,7 +706,7 @@ static void r300_emit_query_end_frag_pipes(struct r300_context *r300, break; default: fprintf(stderr, "r300: Implementation error: Chipset reports %d" - " pixel pipes!\n", caps->num_frag_pipes); + " pixel pipes!\n", gb_pipes); abort(); } @@ -650,8 +755,8 @@ void r300_emit_query_end(struct r300_context* r300) if (query->begin_emitted == FALSE) return; - if (caps->family == CHIP_FAMILY_RV530) { - if (caps->num_z_pipes == 2) + if (caps->family == CHIP_RV530) { + if (r300->screen->info.r300_num_z_pipes == 2) rv530_emit_query_end_double_z(r300, query); else rv530_emit_query_end_single_z(r300, query); @@ -662,8 +767,8 @@ void r300_emit_query_end(struct r300_context* r300) query->num_results += query->num_pipes; /* XXX grab all the results and reset the counter. */ - if (query->num_results >= query->buffer_size / 4 - 4) { - query->num_results = (query->buffer_size / 4) / 2; + if (query->num_results >= query->buf->size / 4 - 4) { + query->num_results = (query->buf->size / 4) / 2; fprintf(stderr, "r300: Rewinding OQBO...\n"); } } @@ -746,6 +851,18 @@ void r300_emit_rs_block_state(struct r300_context* r300, END_CS; } +void r300_emit_sample_mask(struct r300_context *r300, + unsigned size, void *state) +{ + unsigned mask = (*(unsigned*)state) & ((1 << 6)-1); + CS_LOCALS(r300); + + BEGIN_CS(size); + OUT_CS_REG(R300_SC_SCREENDOOR, + mask | (mask << 6) | (mask << 12) | (mask << 18)); + END_CS; +} + void r300_emit_scissor_state(struct r300_context* r300, unsigned size, void* state) { @@ -775,6 +892,7 @@ void r300_emit_textures_state(struct r300_context *r300, struct r300_texture_sampler_state *texstate; struct r300_resource *tex; unsigned i; + boolean has_us_format = r300->screen->caps.has_us_format; CS_LOCALS(r300); BEGIN_CS(size); @@ -796,6 +914,11 @@ void r300_emit_textures_state(struct r300_context *r300, OUT_CS_REG(R300_TX_OFFSET_0 + (i * 4), texstate->format.tile_config); OUT_CS_RELOC(tex); + + if (has_us_format) { + OUT_CS_REG(R500_US_FORMAT0_0 + (i * 4), + texstate->format.us_format0); + } } } END_CS; @@ -804,8 +927,7 @@ void r300_emit_textures_state(struct r300_context *r300, void r300_emit_vertex_arrays(struct r300_context* r300, int offset, boolean indexed, int instance_id) { - struct pipe_vertex_buffer *vbuf = r300->vbuf_mgr->vertex_buffer; - struct pipe_resource **valid_vbuf = r300->vbuf_mgr->real_vertex_buffer; + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; struct pipe_vertex_element *velem = r300->velems->velem; struct r300_resource *buf; int i; @@ -843,7 +965,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, } for (i = 0; i < vertex_array_count; i++) { - buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer.resource); OUT_CS_RELOC(buf); } } else { @@ -895,7 +1017,7 @@ void r300_emit_vertex_arrays(struct r300_context* r300, int offset, } for (i = 0; i < vertex_array_count; i++) { - buf = r300_resource(valid_vbuf[velem[i].vertex_buffer_index]); + buf = r300_resource(vbuf[velem[i].vertex_buffer_index].buffer.resource); OUT_CS_RELOC(buf); } } @@ -923,7 +1045,10 @@ void r300_emit_vertex_arrays_swtcl(struct r300_context *r300, boolean indexed) (r300->vertex_info.size << 8)); OUT_CS(r300->draw_vbo_offset); OUT_CS(0); - OUT_CS_RELOC(r300_resource(r300->vbo)); + + assert(r300->vbo); + OUT_CS(0xc0001000); /* PKT3_NOP */ + OUT_CS(r300->rws->cs_lookup_buffer(r300->cs, r300->vbo) * 4); END_CS; } @@ -1010,22 +1135,21 @@ void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state) R300_PVS_NUM_CNTLRS(pvs_num_controllers) | R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) | R300_PVS_VF_MAX_VTX_NUM(12) | + (r300->clip_halfz ? R300_DX_CLIP_SPACE_DEF : 0) | (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0)); - /* Emit flow control instructions. */ - if (code->num_fc_ops) { - - OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); - if (r300screen->caps.is_r500) { - OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, code->num_fc_ops * 2); - OUT_CS_TABLE(code->fc_op_addrs.r500, code->num_fc_ops * 2); - } else { - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, code->num_fc_ops); - OUT_CS_TABLE(code->fc_op_addrs.r300, code->num_fc_ops); - } - OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, code->num_fc_ops); - OUT_CS_TABLE(code->fc_loop_index, code->num_fc_ops); + /* Emit flow control instructions. Even if there are no fc instructions, + * we still need to write the registers to make sure they are cleared. */ + OUT_CS_REG(R300_VAP_PVS_FLOW_CNTL_OPC, code->fc_ops); + if (r300screen->caps.is_r500) { + OUT_CS_REG_SEQ(R500_VAP_PVS_FLOW_CNTL_ADDRS_LW_0, R300_VS_MAX_FC_OPS * 2); + OUT_CS_TABLE(code->fc_op_addrs.r500, R300_VS_MAX_FC_OPS * 2); + } else { + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_ADDRS_0, R300_VS_MAX_FC_OPS); + OUT_CS_TABLE(code->fc_op_addrs.r300, R300_VS_MAX_FC_OPS); } + OUT_CS_REG_SEQ(R300_VAP_PVS_FLOW_CNTL_LOOP_INDEX_0, R300_VS_MAX_FC_OPS); + OUT_CS_TABLE(code->fc_loop_index, R300_VS_MAX_FC_OPS); END_CS; } @@ -1133,6 +1257,27 @@ void r300_emit_zmask_clear(struct r300_context *r300, unsigned size, void *state r300_mark_atom_dirty(r300, &r300->hyperz_state); } +void r300_emit_cmask_clear(struct r300_context *r300, unsigned size, void *state) +{ + struct pipe_framebuffer_state *fb = + (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_resource *tex; + CS_LOCALS(r300); + + tex = r300_resource(fb->cbufs[0]->texture); + + BEGIN_CS(size); + OUT_CS_PKT3(R300_PACKET3_3D_CLEAR_CMASK, 2); + OUT_CS(0); + OUT_CS(tex->tex.cmask_dwords); + OUT_CS(0); + END_CS; + + /* Mark the current zbuffer's zmask as in use. */ + r300->cmask_in_use = TRUE; + r300_mark_fb_state_dirty(r300, R300_CHANGED_CMASK_ENABLE); +} + void r300_emit_ztop_state(struct r300_context* r300, unsigned size, void* state) { @@ -1159,6 +1304,7 @@ boolean r300_emit_buffer_validate(struct r300_context *r300, { struct pipe_framebuffer_state *fb = (struct pipe_framebuffer_state*)r300->fb_state.state; + struct r300_aa_state *aa = (struct r300_aa_state*)r300->aa_state.state; struct r300_textures_state *texstate = (struct r300_textures_state*)r300->textures_state.state; struct r300_resource *tex; @@ -1169,17 +1315,36 @@ validate: if (r300->fb_state.dirty) { /* Color buffers... */ for (i = 0; i < fb->nr_cbufs; i++) { + if (!fb->cbufs[i]) + continue; tex = r300_resource(fb->cbufs[i]->texture); assert(tex && tex->buf && "cbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, - r300_surface(fb->cbufs[i])->domain); + r300->rws->cs_add_buffer(r300->cs, tex->buf, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, + r300_surface(fb->cbufs[i])->domain, + tex->b.b.nr_samples > 1 ? + RADEON_PRIO_COLOR_BUFFER_MSAA : + RADEON_PRIO_COLOR_BUFFER); } /* ...depth buffer... */ if (fb->zsbuf) { tex = r300_resource(fb->zsbuf->texture); assert(tex && tex->buf && "zsbuf is marked, but NULL!"); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, 0, - r300_surface(fb->zsbuf)->domain); + r300->rws->cs_add_buffer(r300->cs, tex->buf, + RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, + r300_surface(fb->zsbuf)->domain, + tex->b.b.nr_samples > 1 ? + RADEON_PRIO_DEPTH_BUFFER_MSAA : + RADEON_PRIO_DEPTH_BUFFER); + } + } + /* The AA resolve buffer. */ + if (r300->aa_state.dirty) { + if (aa->dest) { + r300->rws->cs_add_buffer(r300->cs, aa->dest->buf, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, + aa->dest->domain, + RADEON_PRIO_COLOR_BUFFER); } } if (r300->textures_state.dirty) { @@ -1190,42 +1355,54 @@ validate: } tex = r300_resource(texstate->sampler_views[i]->base.texture); - r300->rws->cs_add_reloc(r300->cs, tex->cs_buf, tex->domain, 0); + r300->rws->cs_add_buffer(r300->cs, tex->buf, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + tex->domain, RADEON_PRIO_SAMPLER_TEXTURE); } } /* ...occlusion query buffer... */ if (r300->query_current) - r300->rws->cs_add_reloc(r300->cs, r300->query_current->cs_buf, - 0, r300->query_current->domain); + r300->rws->cs_add_buffer(r300->cs, r300->query_current->buf, + RADEON_USAGE_WRITE | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, + RADEON_PRIO_QUERY); /* ...vertex buffer for SWTCL path... */ if (r300->vbo) - r300->rws->cs_add_reloc(r300->cs, r300_resource(r300->vbo)->cs_buf, - r300_resource(r300->vbo)->domain, 0); + r300->rws->cs_add_buffer(r300->cs, r300->vbo, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + RADEON_DOMAIN_GTT, + RADEON_PRIO_VERTEX_BUFFER); /* ...vertex buffers for HWTCL path... */ if (do_validate_vertex_buffers && r300->vertex_arrays_dirty) { - struct pipe_resource **buf = r300->vbuf_mgr->real_vertex_buffer; - struct pipe_resource **last = r300->vbuf_mgr->real_vertex_buffer + - r300->vbuf_mgr->nr_real_vertex_buffers; - for (; buf != last; buf++) { - if (!*buf) + struct pipe_vertex_buffer *vbuf = r300->vertex_buffer; + struct pipe_vertex_buffer *last = r300->vertex_buffer + + r300->nr_vertex_buffers; + struct pipe_resource *buf; + + for (; vbuf != last; vbuf++) { + buf = vbuf->buffer.resource; + if (!buf) continue; - r300->rws->cs_add_reloc(r300->cs, r300_resource(*buf)->cs_buf, - r300_resource(*buf)->domain, 0); + r300->rws->cs_add_buffer(r300->cs, r300_resource(buf)->buf, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + r300_resource(buf)->domain, + RADEON_PRIO_SAMPLER_BUFFER); } } /* ...and index buffer for HWTCL path. */ if (index_buffer) - r300->rws->cs_add_reloc(r300->cs, r300_resource(index_buffer)->cs_buf, - r300_resource(index_buffer)->domain, 0); + r300->rws->cs_add_buffer(r300->cs, r300_resource(index_buffer)->buf, + RADEON_USAGE_READ | RADEON_USAGE_SYNCHRONIZED, + r300_resource(index_buffer)->domain, + RADEON_PRIO_INDEX_BUFFER); - /* Now do the validation. */ + /* Now do the validation (flush is called inside cs_validate on failure). */ if (!r300->rws->cs_validate(r300->cs)) { /* Ooops, an infinite loop, give up. */ if (flushed) return FALSE; - r300_flush(&r300->context, R300_FLUSH_ASYNC, NULL); flushed = TRUE; goto validate; } @@ -1258,7 +1435,8 @@ unsigned r300_get_num_cs_end_dwords(struct r300_context *r300) dwords += 26; /* emit_query_end */ dwords += r300->hyperz_state.size + 2; /* emit_hyperz_end + zcache flush */ if (r300->screen->caps.is_r500) - dwords += 2; + dwords += 2; /* emit_index_bias */ + dwords += 3; /* MSPOS */ return dwords; }