- struct si_shader *shader = ctx->shader;
- unsigned tess_inner_index, tess_outer_index;
- LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
- LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
- unsigned stride, outer_comps, inner_comps, i, offset;
-
- /* Add a barrier before loading tess factors from LDS. */
- if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
- si_llvm_emit_barrier(ctx);
-
- /* Do this only for invocation 0, because the tess levels are per-patch,
- * not per-vertex.
- *
- * This can't jump, because invocation 0 executes this. It should
- * at least mask out the loads and stores for other invocations.
- */
- ac_build_ifcc(&ctx->ac,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- invocation_id, ctx->i32_0, ""), 6503);
-
- /* Determine the layout of one tess factor element in the buffer. */
- switch (shader->key.part.tcs.epilog.prim_mode) {
- case PIPE_PRIM_LINES:
- stride = 2; /* 2 dwords, 1 vec2 store */
- outer_comps = 2;
- inner_comps = 0;
- break;
- case PIPE_PRIM_TRIANGLES:
- stride = 4; /* 4 dwords, 1 vec4 store */
- outer_comps = 3;
- inner_comps = 1;
- break;
- case PIPE_PRIM_QUADS:
- stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
- outer_comps = 4;
- inner_comps = 2;
- break;
- default:
- assert(0);
- return;
- }
-
- for (i = 0; i < 4; i++) {
- inner[i] = LLVMGetUndef(ctx->i32);
- outer[i] = LLVMGetUndef(ctx->i32);
- }
-
- if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
- /* Tess factors are in VGPRs. */
- for (i = 0; i < outer_comps; i++)
- outer[i] = out[i] = invoc0_tf_outer[i];
- for (i = 0; i < inner_comps; i++)
- inner[i] = out[outer_comps+i] = invoc0_tf_inner[i];
- } else {
- /* Load tess_inner and tess_outer from LDS.
- * Any invocation can write them, so we can't get them from a temporary.
- */
- tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
- tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
-
- lds_base = tcs_out_current_patch_data_offset;
- lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_inner_index * 4, 0), "");
- lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
- LLVMConstInt(ctx->i32,
- tess_outer_index * 4, 0), "");
-
- for (i = 0; i < outer_comps; i++) {
- outer[i] = out[i] =
- lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
- }
- for (i = 0; i < inner_comps; i++) {
- inner[i] = out[outer_comps+i] =
- lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
- }
- }
-
- if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
- /* For isolines, the hardware expects tess factors in the
- * reverse order from what NIR specifies.
- */
- LLVMValueRef tmp = out[0];
- out[0] = out[1];
- out[1] = tmp;
- }
-
- /* Convert the outputs to vectors for stores. */
- vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
- vec1 = NULL;
-
- if (stride > 4)
- vec1 = ac_build_gather_values(&ctx->ac, out+4, stride - 4);
-
- /* Get the buffer. */
- buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
-
- /* Get the offset. */
- tf_base = ac_get_arg(&ctx->ac,
- ctx->tcs_factor_offset);
- byteoffset = LLVMBuildMul(ctx->ac.builder, rel_patch_id,
- LLVMConstInt(ctx->i32, 4 * stride, 0), "");
-
- ac_build_ifcc(&ctx->ac,
- LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
- rel_patch_id, ctx->i32_0, ""), 6504);
-
- /* Store the dynamic HS control word. */
- offset = 0;
- if (ctx->screen->info.chip_class <= GFX8) {
- ac_build_buffer_store_dword(&ctx->ac, buffer,
- LLVMConstInt(ctx->i32, 0x80000000, 0),
- 1, ctx->i32_0, tf_base,
- offset, ac_glc);
- offset += 4;
- }
-
- ac_build_endif(&ctx->ac, 6504);
-
- /* Store the tessellation factors. */
- ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
- MIN2(stride, 4), byteoffset, tf_base,
- offset, ac_glc);
- offset += 16;
- if (vec1)
- ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
- stride - 4, byteoffset, tf_base,
- offset, ac_glc);
-
- /* Store the tess factors into the offchip buffer if TES reads them. */
- if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
- LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
- LLVMValueRef tf_inner_offset;
- unsigned param_outer, param_inner;
-
- buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
- base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
-
- param_outer = si_shader_io_get_unique_index_patch(
- TGSI_SEMANTIC_TESSOUTER, 0);
- tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
- LLVMConstInt(ctx->i32, param_outer, 0));
-
- unsigned outer_vec_size =
- ac_has_vec3_support(ctx->screen->info.chip_class, false) ?
- outer_comps : util_next_power_of_two(outer_comps);
- outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
-
- ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
- outer_comps, tf_outer_offset,
- base, 0, ac_glc);
- if (inner_comps) {
- param_inner = si_shader_io_get_unique_index_patch(
- TGSI_SEMANTIC_TESSINNER, 0);
- tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
- LLVMConstInt(ctx->i32, param_inner, 0));
-
- inner_vec = inner_comps == 1 ? inner[0] :
- ac_build_gather_values(&ctx->ac, inner, inner_comps);
- ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
- inner_comps, tf_inner_offset,
- base, 0, ac_glc);
- }
- }
-
- ac_build_endif(&ctx->ac, 6503);
+ struct si_shader *shader = ctx->shader;
+ unsigned tess_inner_index, tess_outer_index;
+ LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
+ LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
+ unsigned stride, outer_comps, inner_comps, i, offset;
+
+ /* Add a barrier before loading tess factors from LDS. */
+ if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
+ si_llvm_emit_barrier(ctx);
+
+ /* Do this only for invocation 0, because the tess levels are per-patch,
+ * not per-vertex.
+ *
+ * This can't jump, because invocation 0 executes this. It should
+ * at least mask out the loads and stores for other invocations.
+ */
+ ac_build_ifcc(&ctx->ac,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
+
+ /* Determine the layout of one tess factor element in the buffer. */
+ switch (shader->key.part.tcs.epilog.prim_mode) {
+ case PIPE_PRIM_LINES:
+ stride = 2; /* 2 dwords, 1 vec2 store */
+ outer_comps = 2;
+ inner_comps = 0;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ stride = 4; /* 4 dwords, 1 vec4 store */
+ outer_comps = 3;
+ inner_comps = 1;
+ break;
+ case PIPE_PRIM_QUADS:
+ stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
+ outer_comps = 4;
+ inner_comps = 2;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ for (i = 0; i < 4; i++) {
+ inner[i] = LLVMGetUndef(ctx->ac.i32);
+ outer[i] = LLVMGetUndef(ctx->ac.i32);
+ }
+
+ if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
+ /* Tess factors are in VGPRs. */
+ for (i = 0; i < outer_comps; i++)
+ outer[i] = out[i] = invoc0_tf_outer[i];
+ for (i = 0; i < inner_comps; i++)
+ inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
+ } else {
+ /* Load tess_inner and tess_outer from LDS.
+ * Any invocation can write them, so we can't get them from a temporary.
+ */
+ tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+
+ lds_base = tcs_out_current_patch_data_offset;
+ lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
+ lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
+ LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
+
+ for (i = 0; i < outer_comps; i++) {
+ outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
+ }
+ for (i = 0; i < inner_comps; i++) {
+ inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
+ }
+ }
+
+ if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) {
+ /* For isolines, the hardware expects tess factors in the
+ * reverse order from what NIR specifies.
+ */
+ LLVMValueRef tmp = out[0];
+ out[0] = out[1];
+ out[1] = tmp;
+ }
+
+ /* Convert the outputs to vectors for stores. */
+ vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
+ vec1 = NULL;
+
+ if (stride > 4)
+ vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
+
+ /* Get the buffer. */
+ buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
+
+ /* Get the offset. */
+ tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset);
+ byteoffset =
+ LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
+
+ ac_build_ifcc(&ctx->ac,
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
+
+ /* Store the dynamic HS control word. */
+ offset = 0;
+ if (ctx->screen->info.chip_class <= GFX8) {
+ ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
+ ctx->ac.i32_0, tf_base, offset, ac_glc);
+ offset += 4;
+ }
+
+ ac_build_endif(&ctx->ac, 6504);
+
+ /* Store the tessellation factors. */
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,
+ ac_glc);
+ offset += 16;
+ if (vec1)
+ ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,
+ ac_glc);
+
+ /* Store the tess factors into the offchip buffer if TES reads them. */
+ if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
+ LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
+ LLVMValueRef tf_inner_offset;
+ unsigned param_outer, param_inner;
+
+ buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
+ base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset);
+
+ param_outer = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0);
+ tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
+ LLVMConstInt(ctx->ac.i32, param_outer, 0));
+
+ unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
+ ? outer_comps
+ : util_next_power_of_two(outer_comps);
+ outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
+
+ ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,
+ ac_glc);
+ if (inner_comps) {
+ param_inner = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0);
+ tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
+ LLVMConstInt(ctx->ac.i32, param_inner, 0));
+
+ inner_vec =
+ inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
+ ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,
+ 0, ac_glc);
+ }
+ }
+
+ ac_build_endif(&ctx->ac, 6503);