+ if (key->vs_prolog.gs_fast_launch_tri_list ||
+ key->vs_prolog.gs_fast_launch_tri_strip) {
+ LLVMValueRef wave_id, thread_id_in_tg;
+
+ wave_id = si_unpack_param(ctx, input_sgpr_param[3], 24, 4);
+ thread_id_in_tg = ac_build_imad(&ctx->ac, wave_id,
+ LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false),
+ ac_get_thread_id(&ctx->ac));
+
+ /* The GS fast launch initializes all VGPRs to the value of
+ * the first thread, so we have to add the thread ID.
+ *
+ * Only these are initialized by the hw:
+ * VGPR2: Base Primitive ID
+ * VGPR5: Base Vertex ID
+ * VGPR6: Instance ID
+ */
+
+ /* Put the vertex thread IDs into VGPRs as-is instead of packing them.
+ * The NGG cull shader will read them from there.
+ */
+ if (key->vs_prolog.gs_fast_launch_tri_list) {
+ input_vgprs[0] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx01_offset */
+ LLVMConstInt(ctx->i32, 3, 0), /* Vertex 0 */
+ LLVMConstInt(ctx->i32, 0, 0));
+ input_vgprs[1] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx23_offset */
+ LLVMConstInt(ctx->i32, 3, 0), /* Vertex 1 */
+ LLVMConstInt(ctx->i32, 1, 0));
+ input_vgprs[4] = ac_build_imad(&ctx->ac, thread_id_in_tg, /* gs_vtx45_offset */
+ LLVMConstInt(ctx->i32, 3, 0), /* Vertex 2 */
+ LLVMConstInt(ctx->i32, 2, 0));
+ } else {
+ assert(key->vs_prolog.gs_fast_launch_tri_strip);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ /* Triangle indices: */
+ LLVMValueRef index[3] = {
+ thread_id_in_tg,
+ LLVMBuildAdd(builder, thread_id_in_tg,
+ LLVMConstInt(ctx->i32, 1, 0), ""),
+ LLVMBuildAdd(builder, thread_id_in_tg,
+ LLVMConstInt(ctx->i32, 2, 0), ""),
+ };
+ LLVMValueRef is_odd = LLVMBuildTrunc(ctx->ac.builder,
+ thread_id_in_tg, ctx->i1, "");
+ LLVMValueRef flatshade_first =
+ LLVMBuildICmp(builder, LLVMIntEQ,
+ si_unpack_param(ctx, ctx->vs_state_bits, 4, 2),
+ ctx->i32_0, "");
+
+ ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd,
+ flatshade_first, index);
+ input_vgprs[0] = index[0];
+ input_vgprs[1] = index[1];
+ input_vgprs[4] = index[2];
+ }
+
+ /* Triangles always have all edge flags set initially. */
+ input_vgprs[3] = LLVMConstInt(ctx->i32, 0x7 << 8, 0);
+
+ input_vgprs[2] = LLVMBuildAdd(ctx->ac.builder, input_vgprs[2],
+ thread_id_in_tg, ""); /* PrimID */
+ input_vgprs[5] = LLVMBuildAdd(ctx->ac.builder, input_vgprs[5],
+ thread_id_in_tg, ""); /* VertexID */
+ input_vgprs[8] = input_vgprs[6]; /* InstanceID */
+ }
+