X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fradeonsi_shader.c;h=88fc0400a8315ac3a4ea5b2325f17a7fa6d94433;hb=d7d539a1cb8dcf50cb7cd534e6ae7df3f42914c8;hp=a1dec1757cb8bf2557f6cef182965e3f9517debf;hpb=237cb074cb0efa50633f35e737122471957747b2;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index a1dec1757cb..88fc0400a83 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -34,6 +34,7 @@ #include "gallivm/lp_bld_logic.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_flow.h" #include "radeon_llvm.h" #include "radeon_llvm_emit.h" #include "util/u_memory.h" @@ -59,6 +60,11 @@ struct si_shader_context struct tgsi_token * tokens; struct si_pipe_shader *shader; unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */ + int param_streamout_config; + int param_streamout_write_index; + int param_streamout_offset[4]; + int param_vertex_id; + int param_instance_id; LLVMValueRef const_md; LLVMValueRef const_resource; #if HAVE_LLVM >= 0x0304 @@ -67,6 +73,7 @@ struct si_shader_context LLVMValueRef *constants; LLVMValueRef *resources; LLVMValueRef *samplers; + LLVMValueRef so_buffers[4]; }; static struct si_shader_context * si_shader_context( @@ -115,13 +122,16 @@ static LLVMValueRef build_indexed_load( return result; } -static LLVMValueRef get_instance_index( +static LLVMValueRef get_instance_index_for_fetch( struct radeon_llvm_context * radeon_bld, unsigned divisor) { + struct si_shader_context *si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); struct gallivm_state * gallivm = radeon_bld->soa.bld_base.base.gallivm; - LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_INSTANCE_ID); + LLVMValueRef result = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_instance_id); result = LLVMBuildAdd(gallivm->builder, result, LLVMGetParam( radeon_bld->main_fn, SI_PARAM_START_INSTANCE), ""); @@ -164,11 +174,12 @@ static void declare_input_vs( if (divisor) { /* Build index from instance ID, start instance and divisor */ si_shader_ctx->shader->shader.uses_instanceid = true; - buffer_index = get_instance_index(&si_shader_ctx->radeon_bld, divisor); + buffer_index = get_instance_index_for_fetch(&si_shader_ctx->radeon_bld, divisor); } else { /* Load the buffer index, which is always stored in VGPR0 * for Vertex Shaders */ - buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_VERTEX_ID); + buffer_index = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + si_shader_ctx->param_vertex_id); } vec4_type = LLVMVectorType(base->elem_type, 4); @@ -397,16 +408,19 @@ static void declare_system_value( unsigned index, const struct tgsi_full_declaration *decl) { - + struct si_shader_context *si_shader_ctx = + si_shader_context(&radeon_bld->soa.bld_base); LLVMValueRef value = 0; switch (decl->Semantic.Name) { case TGSI_SEMANTIC_INSTANCEID: - value = get_instance_index(radeon_bld, 1); + value = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_instance_id); break; case TGSI_SEMANTIC_VERTEXID: - value = LLVMGetParam(radeon_bld->main_fn, SI_PARAM_VERTEX_ID); + value = LLVMGetParam(radeon_bld->main_fn, + si_shader_ctx->param_vertex_id); break; default: @@ -651,6 +665,206 @@ static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, } } +static void si_dump_streamout(struct pipe_stream_output_info *so) +{ + unsigned i; + + if (so->num_outputs) + fprintf(stderr, "STREAMOUT\n"); + + for (i = 0; i < so->num_outputs; i++) { + unsigned mask = ((1 << so->output[i].num_components) - 1) << + so->output[i].start_component; + fprintf(stderr, " %i: BUF%i[%i..%i] <- OUT[%i].%s%s%s%s\n", + i, so->output[i].output_buffer, + so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, + so->output[i].register_index, + mask & 1 ? "x" : "", + mask & 2 ? "y" : "", + mask & 4 ? "z" : "", + mask & 8 ? "w" : ""); + } +} + +/* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4. + * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2), + * or v4i32 (num_channels=3,4). */ +static void build_tbuffer_store(struct si_shader_context *shader, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset, + unsigned dfmt, + unsigned nfmt, + unsigned offen, + unsigned idxen, + unsigned glc, + unsigned slc, + unsigned tfe) +{ + struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + LLVMValueRef args[] = { + rsrc, + vdata, + LLVMConstInt(i32, num_channels, 0), + vaddr, + soffset, + LLVMConstInt(i32, inst_offset, 0), + LLVMConstInt(i32, dfmt, 0), + LLVMConstInt(i32, nfmt, 0), + LLVMConstInt(i32, offen, 0), + LLVMConstInt(i32, idxen, 0), + LLVMConstInt(i32, glc, 0), + LLVMConstInt(i32, slc, 0), + LLVMConstInt(i32, tfe, 0) + }; + + /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */ + unsigned func = CLAMP(num_channels, 1, 3) - 1; + const char *types[] = {"i32", "v2i32", "v4i32"}; + char name[256]; + snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]); + + lp_build_intrinsic(gallivm->builder, name, + LLVMVoidTypeInContext(gallivm->context), + args, Elements(args)); +} + +static void build_streamout_store(struct si_shader_context *shader, + LLVMValueRef rsrc, + LLVMValueRef vdata, + unsigned num_channels, + LLVMValueRef vaddr, + LLVMValueRef soffset, + unsigned inst_offset) +{ + static unsigned dfmt[] = { + V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_DATA_FORMAT_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32, + V_008F0C_BUF_DATA_FORMAT_32_32_32_32 + }; + assert(num_channels >= 1 && num_channels <= 4); + + build_tbuffer_store(shader, rsrc, vdata, num_channels, vaddr, soffset, + inst_offset, dfmt[num_channels-1], + V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0); +} + +/* On SI, the vertex shader is responsible for writing streamout data + * to buffers. */ +static void si_llvm_emit_streamout(struct si_shader_context *shader) +{ + struct pipe_stream_output_info *so = &shader->shader->selector->so; + struct gallivm_state *gallivm = &shader->radeon_bld.gallivm; + LLVMBuilderRef builder = gallivm->builder; + int i, j; + struct lp_build_if_state if_ctx; + + LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context); + + LLVMValueRef so_param = + LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_config); + + /* Get bits [22:16], i.e. (so_param >> 16) & 127; */ + LLVMValueRef so_vtx_count = + LLVMBuildAnd(builder, + LLVMBuildLShr(builder, so_param, + LLVMConstInt(i32, 16, 0), ""), + LLVMConstInt(i32, 127, 0), ""); + + LLVMValueRef tid = build_intrinsic(builder, "llvm.SI.tid", i32, + NULL, 0, LLVMReadNoneAttribute); + + /* can_emit = tid < so_vtx_count; */ + LLVMValueRef can_emit = + LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, ""); + + /* Emit the streamout code conditionally. This actually avoids + * out-of-bounds buffer access. The hw tells us via the SGPR + * (so_vtx_count) which threads are allowed to emit streamout data. */ + lp_build_if(&if_ctx, gallivm, can_emit); + { + /* The buffer offset is computed as follows: + * ByteOffset = streamout_offset[buffer_id]*4 + + * (streamout_write_index + thread_id)*stride[buffer_id] + + * attrib_offset + */ + + LLVMValueRef so_write_index = + LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_write_index); + + /* Compute (streamout_write_index + thread_id). */ + so_write_index = LLVMBuildAdd(builder, so_write_index, tid, ""); + + /* Compute the write offset for each enabled buffer. */ + LLVMValueRef so_write_offset[4] = {}; + for (i = 0; i < 4; i++) { + if (!so->stride[i]) + continue; + + LLVMValueRef so_offset = LLVMGetParam(shader->radeon_bld.main_fn, + shader->param_streamout_offset[i]); + so_offset = LLVMBuildMul(builder, so_offset, LLVMConstInt(i32, 4, 0), ""); + + so_write_offset[i] = LLVMBuildMul(builder, so_write_index, + LLVMConstInt(i32, so->stride[i]*4, 0), ""); + so_write_offset[i] = LLVMBuildAdd(builder, so_write_offset[i], so_offset, ""); + } + + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS] = shader->radeon_bld.soa.outputs; + + /* Write streamout data. */ + for (i = 0; i < so->num_outputs; i++) { + unsigned buf_idx = so->output[i].output_buffer; + unsigned reg = so->output[i].register_index; + unsigned start = so->output[i].start_component; + unsigned num_comps = so->output[i].num_components; + LLVMValueRef out[4]; + + assert(num_comps && num_comps <= 4); + if (!num_comps || num_comps > 4) + continue; + + /* Load the output as int. */ + for (j = 0; j < num_comps; j++) { + out[j] = LLVMBuildLoad(builder, outputs[reg][start+j], ""); + out[j] = LLVMBuildBitCast(builder, out[j], i32, ""); + } + + /* Pack the output. */ + LLVMValueRef vdata = NULL; + + switch (num_comps) { + case 1: /* as i32 */ + vdata = out[0]; + break; + case 2: /* as v2i32 */ + case 3: /* as v4i32 (aligned to 4) */ + case 4: /* as v4i32 */ + vdata = LLVMGetUndef(LLVMVectorType(i32, util_next_power_of_two(num_comps))); + for (j = 0; j < num_comps; j++) { + vdata = LLVMBuildInsertElement(builder, vdata, out[j], + LLVMConstInt(i32, j, 0), ""); + } + break; + } + + build_streamout_store(shader, shader->so_buffers[buf_idx], + vdata, num_comps, + so_write_offset[buf_idx], + LLVMConstInt(i32, 0, 0), + so->output[i].dst_offset*4); + } + } + lp_build_endif(&if_ctx); +} + /* XXX: This is partially implemented for VS only at this point. It is not complete */ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) { @@ -669,6 +883,10 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) int depth_index = -1, stencil_index = -1; int i; + if (si_shader_ctx->shader->selector->so.num_outputs) { + si_llvm_emit_streamout(si_shader_ctx); + } + while (!tgsi_parse_end_of_tokens(parse)) { struct tgsi_full_declaration *d = &parse->FullToken.FullDeclaration; @@ -826,7 +1044,10 @@ handle_semantic: args[7] = args[8] = args[6] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); - mask |= 0x2; + /* Only setting the stencil component bit (0x2) here + * breaks some stencil piglit tests + */ + mask |= 0x3; if (depth_index < 0) args[5] = args[6]; @@ -951,8 +1172,6 @@ static void tex_fetch_args( unsigned sampler_src, sampler_index; LLVMValueRef coords[4]; LLVMValueRef address[16]; - LLVMValueRef sample_index_rewrite = NULL; - LLVMValueRef sample_chan = NULL; int ref_pos; unsigned num_coords = tgsi_util_get_texture_coord_dim(target, &ref_pos); unsigned count = 0; @@ -1012,7 +1231,7 @@ static void tex_fetch_args( if (num_coords > 2) address[count++] = coords[2]; - /* Pack LOD */ + /* Pack LOD or sample index */ if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF) address[count++] = coords[3]; @@ -1049,10 +1268,15 @@ static void tex_fetch_args( target == TGSI_TEXTURE_2D_ARRAY_MSAA) { struct lp_build_context *uint_bld = &bld_base->uint_bld; struct lp_build_emit_data txf_emit_data = *emit_data; - LLVMValueRef txf_address[16]; + LLVMValueRef txf_address[4]; unsigned txf_count = count; - memcpy(txf_address, address, sizeof(address)); + memcpy(txf_address, address, sizeof(txf_address)); + + if (target == TGSI_TEXTURE_2D_MSAA) { + txf_address[2] = bld_base->uint_bld.zero; + } + txf_address[3] = bld_base->uint_bld.zero; /* Pad to a power-of-two size. */ while (txf_count < util_next_power_of_two(txf_count)) @@ -1064,18 +1288,13 @@ static void tex_fetch_args( LLVMInt32TypeInContext(bld_base->base.gallivm->context), 4); txf_emit_data.args[0] = lp_build_gather_values(gallivm, txf_address, txf_count); txf_emit_data.args[1] = si_shader_ctx->resources[FMASK_TEX_OFFSET + sampler_index]; - txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm, target); + txf_emit_data.args[2] = lp_build_const_int32(bld_base->base.gallivm, + target == TGSI_TEXTURE_2D_MSAA ? TGSI_TEXTURE_2D : TGSI_TEXTURE_2D_ARRAY); txf_emit_data.arg_count = 3; build_tex_intrinsic(&txf_action, bld_base, &txf_emit_data); /* Initialize some constants. */ - if (target == TGSI_TEXTURE_2D_MSAA) { - sample_chan = LLVMConstInt(uint_bld->elem_type, 2, 0); - } else { - sample_chan = LLVMConstInt(uint_bld->elem_type, 3, 0); - } - LLVMValueRef four = LLVMConstInt(uint_bld->elem_type, 4, 0); LLVMValueRef F = LLVMConstInt(uint_bld->elem_type, 0xF, 0); @@ -1085,13 +1304,10 @@ static void tex_fetch_args( txf_emit_data.output[0], uint_bld->zero, ""); - LLVMValueRef sample_index = - LLVMBuildExtractElement(gallivm->builder, - txf_emit_data.args[0], - sample_chan, ""); + unsigned sample_chan = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3; LLVMValueRef sample_index4 = - LLVMBuildMul(gallivm->builder, sample_index, four, ""); + LLVMBuildMul(gallivm->builder, address[sample_chan], four, ""); LLVMValueRef shifted_fmask = LLVMBuildLShr(gallivm->builder, fmask, sample_index4, ""); @@ -1115,9 +1331,10 @@ static void tex_fetch_args( LLVMBuildICmp(gallivm->builder, LLVMIntNE, fmask_word1, uint_bld->zero, ""); - sample_index_rewrite = + /* Replace the MSAA sample index. */ + address[sample_chan] = LLVMBuildSelect(gallivm->builder, word1_is_nonzero, - final_sample, sample_index, ""); + final_sample, address[sample_chan], ""); } /* Resource */ @@ -1132,17 +1349,31 @@ static void tex_fetch_args( assert(inst->Texture.NumOffsets == 1); - address[0] = - lp_build_add(uint_bld, address[0], - bld->immediates[off->Index][off->SwizzleX]); - if (num_coords > 1) + switch (target) { + case TGSI_TEXTURE_3D: + address[2] = lp_build_add(uint_bld, address[2], + bld->immediates[off->Index][off->SwizzleZ]); + /* fall through */ + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOWRECT: + case TGSI_TEXTURE_2D_ARRAY: + case TGSI_TEXTURE_SHADOW2D_ARRAY: address[1] = lp_build_add(uint_bld, address[1], - bld->immediates[off->Index][off->SwizzleY]); - if (num_coords > 2) - address[2] = - lp_build_add(uint_bld, address[2], - bld->immediates[off->Index][off->SwizzleZ]); + bld->immediates[off->Index][off->SwizzleY]); + /* fall through */ + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: + case TGSI_TEXTURE_1D_ARRAY: + case TGSI_TEXTURE_SHADOW1D_ARRAY: + address[0] = + lp_build_add(uint_bld, address[0], + bld->immediates[off->Index][off->SwizzleX]); + break; + /* texture offsets do not apply to other texture targets */ + } } emit_data->dst_type = LLVMVectorType( @@ -1170,13 +1401,6 @@ static void tex_fetch_args( address[count++] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); emit_data->args[0] = lp_build_gather_values(gallivm, address, count); - - /* Replace the MSAA sample index if needed. */ - if (sample_index_rewrite) { - emit_data->args[0] = - LLVMBuildInsertElement(gallivm->builder, emit_data->args[0], - sample_index_rewrite, sample_chan, ""); - } } static void build_tex_intrinsic(const struct lp_build_tgsi_action * action, @@ -1346,7 +1570,7 @@ static void create_function(struct si_shader_context *si_shader_ctx) struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; struct gallivm_state *gallivm = bld_base->base.gallivm; LLVMTypeRef params[20], f32, i8, i32, v2i32, v3i32; - unsigned i; + unsigned i, last_sgpr, num_params; i8 = LLVMInt8TypeInContext(gallivm->context); i32 = LLVMInt32TypeInContext(gallivm->context); @@ -1358,17 +1582,40 @@ static void create_function(struct si_shader_context *si_shader_ctx) params[SI_PARAM_SAMPLER] = params[SI_PARAM_CONST]; params[SI_PARAM_RESOURCE] = LLVMPointerType(LLVMVectorType(i8, 32), CONST_ADDR_SPACE); - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_SAMPLER]; + switch (si_shader_ctx->type) { + case TGSI_PROCESSOR_VERTEX: + params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST]; + params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST]; params[SI_PARAM_START_INSTANCE] = i32; - params[SI_PARAM_VERTEX_ID] = i32; - params[SI_PARAM_DUMMY_0] = i32; - params[SI_PARAM_DUMMY_1] = i32; - params[SI_PARAM_INSTANCE_ID] = i32; - radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 9); + num_params = SI_PARAM_START_INSTANCE+1; - } else { + /* The locations of the other parameters are assigned dynamically. */ + + /* Streamout SGPRs. */ + if (si_shader_ctx->shader->selector->so.num_outputs) { + params[si_shader_ctx->param_streamout_config = num_params++] = i32; + params[si_shader_ctx->param_streamout_write_index = num_params++] = i32; + } + /* A streamout buffer offset is loaded if the stride is non-zero. */ + for (i = 0; i < 4; i++) { + if (!si_shader_ctx->shader->selector->so.stride[i]) + continue; + + params[si_shader_ctx->param_streamout_offset[i] = num_params++] = i32; + } + + last_sgpr = num_params-1; + + /* VGPRs */ + params[si_shader_ctx->param_vertex_id = num_params++] = i32; + params[num_params++] = i32; /* unused*/ + params[num_params++] = i32; /* unused */ + params[si_shader_ctx->param_instance_id = num_params++] = i32; + break; + + case TGSI_PROCESSOR_FRAGMENT: params[SI_PARAM_PRIM_MASK] = i32; + last_sgpr = SI_PARAM_PRIM_MASK; params[SI_PARAM_PERSP_SAMPLE] = v2i32; params[SI_PARAM_PERSP_CENTER] = v2i32; params[SI_PARAM_PERSP_CENTROID] = v2i32; @@ -1385,18 +1632,20 @@ static void create_function(struct si_shader_context *si_shader_ctx) params[SI_PARAM_ANCILLARY] = f32; params[SI_PARAM_SAMPLE_COVERAGE] = f32; params[SI_PARAM_POS_FIXED_PT] = f32; - radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, 20); + num_params = SI_PARAM_POS_FIXED_PT+1; + break; + + default: + assert(0 && "unimplemented shader"); + return; } + assert(num_params <= Elements(params)); + radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params); radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type); - for (i = SI_PARAM_CONST; i <= SI_PARAM_VERTEX_BUFFER; ++i) { - LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); - LLVMAddAttribute(P, LLVMInRegAttribute); - } - if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { - LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, - SI_PARAM_START_INSTANCE); + for (i = 0; i <= last_sgpr; ++i) { + LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i); LLVMAddAttribute(P, LLVMInRegAttribute); } @@ -1482,20 +1731,40 @@ static void preload_samplers(struct si_shader_context *si_shader_ctx) } } +static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx) +{ + struct lp_build_tgsi_context * bld_base = &si_shader_ctx->radeon_bld.soa.bld_base; + struct gallivm_state * gallivm = bld_base->base.gallivm; + unsigned i; + + if (!si_shader_ctx->shader->selector->so.num_outputs) + return; + + LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, + SI_PARAM_SO_BUFFER); + + /* Load the resources, we rely on the code sinking to do the rest */ + for (i = 0; i < 4; ++i) { + if (si_shader_ctx->shader->selector->so.stride[i]) { + LLVMValueRef offset = lp_build_const_int32(gallivm, i); + + si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset); + } + } +} + int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, LLVMModuleRef mod) { unsigned i; uint32_t *ptr; - bool dump; struct radeon_llvm_binary binary; - - dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); - + bool dump = r600_can_dump_shader(&rctx->screen->b, + shader->selector ? shader->selector->tokens : NULL); memset(&binary, 0, sizeof(binary)); radeon_llvm_compile(mod, &binary, - r600_get_llvm_processor_name(rctx->screen->family), dump); - if (dump) { + r600_get_llvm_processor_name(rctx->screen->b.family), dump); + if (dump && ! binary.disassembled) { fprintf(stderr, "SI CODE:\n"); for (i = 0; i < binary.code_size; i+=4 ) { fprintf(stderr, "%02x%02x%02x%02x\n", binary.code[i + 3], @@ -1535,14 +1804,14 @@ int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, } /* copy new shader */ - si_resource_reference(&shader->bo, NULL); - shader->bo = si_resource_create_custom(rctx->context.screen, PIPE_USAGE_IMMUTABLE, + r600_resource_reference(&shader->bo, NULL); + shader->bo = r600_resource_create_custom(rctx->b.b.screen, PIPE_USAGE_IMMUTABLE, binary.code_size); if (shader->bo == NULL) { return -ENOMEM; } - ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->cs_buf, rctx->cs, PIPE_TRANSFER_WRITE); + ptr = (uint32_t*)rctx->b.ws->buffer_map(shader->bo->cs_buf, rctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE); if (0 /*R600_BIG_ENDIAN*/) { for (i = 0; i < binary.code_size / 4; ++i) { ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4)); @@ -1550,7 +1819,7 @@ int si_compile_llvm(struct r600_context *rctx, struct si_pipe_shader *shader, } else { memcpy(ptr, binary.code, binary.code_size); } - rctx->ws->buffer_unmap(shader->bo->cs_buf); + rctx->b.ws->buffer_unmap(shader->bo->cs_buf); free(binary.code); free(binary.config); @@ -1568,10 +1837,8 @@ int si_pipe_shader_create( struct tgsi_shader_info shader_info; struct lp_build_tgsi_context * bld_base; LLVMModuleRef mod; - bool dump; int r = 0; - - dump = debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE); + bool dump = r600_can_dump_shader(&rctx->screen->b, shader->selector->tokens); assert(shader->shader.noutput == 0); assert(shader->shader.ninterp == 0); @@ -1615,6 +1882,7 @@ int si_pipe_shader_create( create_function(&si_shader_ctx); preload_constants(&si_shader_ctx); preload_samplers(&si_shader_ctx); + preload_streamout_buffers(&si_shader_ctx); shader->shader.nr_cbufs = rctx->framebuffer.nr_cbufs; @@ -1622,6 +1890,7 @@ int si_pipe_shader_create( * conversion fails. */ if (dump) { tgsi_dump(sel->tokens, 0); + si_dump_streamout(&sel->so); } if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) { @@ -1649,5 +1918,5 @@ int si_pipe_shader_create( void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader) { - si_resource_reference(&shader->bo, NULL); + r600_resource_reference(&shader->bo, NULL); }