}
}
+/**
+ * Return the number of patches to accumulate before an 8_PATCH mode thread is
+ * launched. In cases with a large number of input control points and a large
+ * amount of VS outputs, the VS URB space needed to store an entire 8 patches
+ * worth of data can be prohibitive, so it can be beneficial to launch threads
+ * early.
+ *
+ * See the 3DSTATE_HS::Patch Count Threshold documentation for the recommended
+ * values. Note that 0 means to "disable" early dispatch, meaning to wait for
+ * a full 8 patches as normal.
+ */
+static int
+get_patch_count_threshold(int input_control_points)
+{
+ if (input_control_points <= 4)
+ return 0;
+ else if (input_control_points <= 6)
+ return 5;
+ else if (input_control_points <= 8)
+ return 4;
+ else if (input_control_points <= 10)
+ return 3;
+ else if (input_control_points <= 14)
+ return 2;
+
+ /* Return patch count 1 for PATCHLIST_15 - PATCHLIST_32 */
+ return 1;
+}
+
+} /* namespace brw */
extern "C" const unsigned *
brw_compile_tcs(const struct brw_compiler *compiler,
struct brw_vue_map input_vue_map;
brw_compute_vue_map(devinfo, &input_vue_map, nir->info.inputs_read,
- nir->info.separate_shader);
+ nir->info.separate_shader, 1);
brw_compute_tess_vue_map(&vue_prog_data->vue_map,
nir->info.outputs_written,
nir->info.patch_outputs_written);
bool has_primitive_id =
nir->info.system_values_read & (1 << SYSTEM_VALUE_PRIMITIVE_ID);
+ prog_data->patch_count_threshold = brw::get_patch_count_threshold(key->input_vertices);
+
if (compiler->use_tcs_8_patch &&
nir->info.tess.tcs_vertices_out <= (devinfo->gen >= 12 ? 32 : 16) &&
- 2 + has_primitive_id + key->input_vertices <= 31) {
+ 2 + has_primitive_id + key->input_vertices <= (devinfo->gen >= 12 ? 63 : 31)) {
/* 3DSTATE_HS imposes two constraints on using 8_PATCH mode. First, the
* "Instance" field limits the number of output vertices to [1, 16] on
* gen11 and below, or [1, 32] on gen12 and above. Secondly, the
nir->info.name));
}
- g.generate_code(v.cfg, 8, v.shader_stats, stats);
+ g.generate_code(v.cfg, 8, v.shader_stats,
+ v.performance_analysis.require(), stats);
+
+ g.add_const_data(nir->constant_data, nir->constant_data_size);
assembly = g.get_assembly();
} else {
- vec4_tcs_visitor v(compiler, log_data, key, prog_data,
+ brw::vec4_tcs_visitor v(compiler, log_data, key, prog_data,
nir, mem_ctx, shader_time_index, &input_vue_map);
if (!v.run()) {
if (error_str)
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
- &prog_data->base, v.cfg, stats);
+ &prog_data->base, v.cfg,
+ v.performance_analysis.require(),
+ stats);
}
return assembly;
}
-
-
-} /* namespace brw */