#ifndef SI_SHADER_H
#define SI_SHADER_H
-#include <llvm-c/Core.h> /* LLVMModuleRef */
-#include <llvm-c/TargetMachine.h>
#include "util/u_inlines.h"
#include "util/u_queue.h"
#include "util/simple_mtx.h"
*/
#define SI_MAX_IO_GENERIC 32
+#define SI_NGG_PRIM_EDGE_FLAG_BITS ((1 << 9) | (1 << 19) | (1 << 29))
+
/* SGPR user data indices */
enum {
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
#define C_VS_STATE_PROVOKING_VTX_INDEX 0xFFFFFFCF
#define S_VS_STATE_STREAMOUT_QUERY_ENABLED(x) (((unsigned)(x) & 0x1) << 6)
#define C_VS_STATE_STREAMOUT_QUERY_ENABLED 0xFFFFFFBF
-#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 8)
-#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFFE000FF
+#define S_VS_STATE_SMALL_PRIM_PRECISION(x) (((unsigned)(x) & 0xF) << 7)
+#define C_VS_STATE_SMALL_PRIM_PRECISION 0xFFFFF87F
+#define S_VS_STATE_LS_OUT_PATCH_SIZE(x) (((unsigned)(x) & 0x1FFF) << 11)
+#define C_VS_STATE_LS_OUT_PATCH_SIZE 0xFF0007FF
#define S_VS_STATE_LS_OUT_VERTEX_SIZE(x) (((unsigned)(x) & 0xFF) << 24)
#define C_VS_STATE_LS_OUT_VERTEX_SIZE 0x00FFFFFF
SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
};
+#define SI_NGG_CULL_VIEW_SMALLPRIMS (1 << 0) /* view.xy + small prims */
+#define SI_NGG_CULL_BACK_FACE (1 << 1) /* back faces */
+#define SI_NGG_CULL_FRONT_FACE (1 << 2) /* front faces */
+#define SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST (1 << 3) /* GS fast launch: triangles */
+#define SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP (1 << 4) /* GS fast launch: triangle strip */
+#define SI_NGG_CULL_GS_FAST_LAUNCH_ALL (0x3 << 3) /* GS fast launch (both prim types) */
+
/**
* For VS shader keys, describe any fixups required for vertex fetch.
*
bool vs_needs_prolog;
bool force_correct_derivs_after_kill;
bool prim_discard_cs_allowed;
+ bool ngg_culling_allowed;
unsigned num_vs_inputs;
unsigned num_vbos_in_user_sgprs;
unsigned pa_cl_vs_out_cntl;
unsigned as_ls:1;
unsigned as_es:1;
unsigned as_ngg:1;
+ unsigned has_ngg_cull_inputs:1; /* from the NGG cull shader */
+ unsigned gs_fast_launch_tri_list:1; /* for NGG culling */
+ unsigned gs_fast_launch_tri_strip:1; /* for NGG culling */
/* Prologs for monolithic shaders shouldn't set EXEC. */
unsigned is_monolithic:1;
} vs_prolog;
uint64_t kill_outputs; /* "get_unique_index" bits */
unsigned clip_disable:1;
+ /* For NGG VS and TES. */
+ unsigned ngg_culling:5; /* SI_NGG_CULL_* */
+
/* For shaders where monolithic variants have better code.
*
* This is a flag that has no effect on code generation,
unsigned pa_cl_vte_cntl;
unsigned pa_cl_ngg_cntl;
unsigned vgt_gs_max_vert_out; /* for API GS */
+ unsigned ge_pc_alloc; /* uconfig register */
} ngg;
struct {
unsigned spi_vs_out_config;
unsigned spi_shader_pos_format;
unsigned pa_cl_vte_cntl;
+ unsigned ge_pc_alloc; /* uconfig register */
} vs;
struct {
};
/* si_shader.c */
-struct si_shader *
-si_generate_gs_copy_shader(struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- struct si_shader_selector *gs_selector,
- struct pipe_debug_callback *debug);
-int si_compile_shader(struct si_screen *sscreen,
- struct ac_llvm_compiler *compiler,
- struct si_shader *shader,
- struct pipe_debug_callback *debug);
+bool si_compile_shader(struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug);
bool si_create_shader_variant(struct si_screen *sscreen,
struct ac_llvm_compiler *compiler,
struct si_shader *shader,
const char *si_get_shader_name(const struct si_shader *shader);
void si_shader_binary_clean(struct si_shader_binary *binary);
+/* si_shader_llvm_gs.c */
+struct si_shader *
+si_generate_gs_copy_shader(struct si_screen *sscreen,
+ struct ac_llvm_compiler *compiler,
+ struct si_shader_selector *gs_selector,
+ struct pipe_debug_callback *debug);
+
/* si_shader_nir.c */
void si_nir_scan_shader(const struct nir_shader *nir,
struct si_shader_info *info);
return sel->type != PIPE_SHADER_GEOMETRY &&
!sel->so.num_outputs &&
!sel->info.writes_edgeflag &&
+ !shader->key.opt.ngg_culling &&
(sel->type != PIPE_SHADER_VERTEX ||
!shader->key.mono.u.vs_export_prim_id);
}