/* Create the compute shader function. */
unsigned old_type = ctx->type;
+ gl_shader_stage old_stage = ctx->stage;
ctx->type = PIPE_SHADER_COMPUTE;
+ ctx->stage = MESA_SHADER_COMPUTE;
si_llvm_create_func(ctx, "prim_discard_cs", NULL, 0, THREADGROUP_SIZE);
ctx->type = old_type;
+ ctx->stage = old_stage;
if (VERTEX_COUNTER_GDS_MODE == 2) {
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-gds-size", 256);
if (key->opt.cs_indexed) {
for (unsigned i = 0; i < 3; i++) {
index[i] = ac_build_buffer_load_format(&ctx->ac, input_indexbuf, index[i], ctx->ac.i32_0,
- 1, 0, true);
+ 1, 0, true, false);
index[i] = ac_to_integer(&ctx->ac, index[i]);
}
}
if (!ac_has_vec3_support(ctx->ac.chip_class, true))
vdata = ac_build_expand_to_vec4(&ctx->ac, vdata, 3);
- ac_build_buffer_store_format(&ctx->ac, output_indexbuf, vdata, vindex, ctx->ac.i32_0, 3,
+ ac_build_buffer_store_format(&ctx->ac, output_indexbuf, vdata, vindex, ctx->ac.i32_0,
ac_glc | (INDEX_STORES_USE_SLC ? ac_slc : 0));
}
ac_build_endif(&ctx->ac, 16607);
unsigned num_subdraws = DIV_ROUND_UP(num_prims, SPLIT_PRIMS_PACKET_LEVEL);
unsigned need_compute_dw = 11 /* shader */ + 34 /* first draw */ +
24 * (num_subdraws - 1) + /* subdraws */
- 20; /* leave some space at the end */
+ 30; /* leave some space at the end */
unsigned need_gfx_dw = si_get_minimum_num_gfx_cs_dwords(sctx);
if (sctx->chip_class <= GFX7 || FORCE_REWIND_EMULATION)