winsys/radeon: fold cs_set_flush_callback into cs_create
[mesa.git] / src / gallium / drivers / radeonsi / si_shader.c
index 5b95c11580fa1a9e15b79c47964e7c75b21c245e..e4390eeac1f353c150ce452eb51bce237220f6b7 100644 (file)
@@ -319,7 +319,8 @@ static LLVMValueRef fetch_input_gs(
                                      4);
 
        /* Load the ESGS ring resource descriptor */
-       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                 SI_PARAM_RW_BUFFERS);
        t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
                                    lp_build_const_int32(gallivm, SI_RING_ESGS));
 
@@ -1202,7 +1203,8 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context * bld_base)
        }
 
        /* Load the ESGS ring resource descriptor */
-       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                 SI_PARAM_RW_BUFFERS);
        t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
                                    lp_build_const_int32(gallivm, SI_RING_ESGS));
 
@@ -1903,6 +1905,7 @@ static void si_llvm_emit_vertex(
        LLVMValueRef soffset = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
                                            SI_PARAM_GS2VS_OFFSET);
        LLVMValueRef gs_next_vertex;
+       LLVMValueRef can_emit, kill;
        LLVMValueRef t_list_ptr;
        LLVMValueRef t_list;
        LLVMValueRef args[2];
@@ -1910,7 +1913,8 @@ static void si_llvm_emit_vertex(
        int i;
 
        /* Load the GSVS ring resource descriptor */
-       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                 SI_PARAM_RW_BUFFERS);
        t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
                                    lp_build_const_int32(gallivm, SI_RING_GSVS));
 
@@ -1931,6 +1935,21 @@ static void si_llvm_emit_vertex(
 
        /* Write vertex attribute values to GSVS ring */
        gs_next_vertex = LLVMBuildLoad(gallivm->builder, si_shader_ctx->gs_next_vertex, "");
+
+       /* If this thread has already emitted the declared maximum number of
+        * vertices, kill it: excessive vertex emissions are not supposed to
+        * have any effect, and GS threads have no externally observable
+        * effects other than emitting vertices.
+        */
+       can_emit = LLVMBuildICmp(gallivm->builder, LLVMIntULE, gs_next_vertex,
+                                lp_build_const_int32(gallivm,
+                                                     shader->gs_max_out_vertices), "");
+       kill = lp_build_select(&bld_base->base, can_emit,
+                              lp_build_const_float(gallivm, 1.0f),
+                              lp_build_const_float(gallivm, -1.0f));
+       build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
+                       LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
+
        for (i = 0; i < shader->noutput; i++) {
                LLVMValueRef *out_ptr =
                        si_shader_ctx->radeon_bld.soa.outputs[shader->output[i].index];
@@ -2038,7 +2057,7 @@ static void create_function(struct si_shader_context *si_shader_ctx)
        struct lp_build_tgsi_context *bld_base = &si_shader_ctx->radeon_bld.soa.bld_base;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
        struct si_pipe_shader *shader = si_shader_ctx->shader;
-       LLVMTypeRef params[21], f32, i8, i32, v2i32, v3i32;
+       LLVMTypeRef params[SI_NUM_PARAMS], f32, i8, i32, v2i32, v3i32;
        unsigned i, last_sgpr, num_params;
 
        i8 = LLVMInt8TypeInContext(gallivm->context);
@@ -2049,6 +2068,8 @@ static void create_function(struct si_shader_context *si_shader_ctx)
 
        params[SI_PARAM_CONST] = LLVMPointerType(
                LLVMArrayType(LLVMVectorType(i8, 16), NUM_CONST_BUFFERS), CONST_ADDR_SPACE);
+       params[SI_PARAM_RW_BUFFERS] = params[SI_PARAM_CONST];
+
        /* We assume at most 16 textures per program at the moment.
         * This need probably need to be changed to support bindless textures */
        params[SI_PARAM_SAMPLER] = LLVMPointerType(
@@ -2059,7 +2080,6 @@ static void create_function(struct si_shader_context *si_shader_ctx)
        switch (si_shader_ctx->type) {
        case TGSI_PROCESSOR_VERTEX:
                params[SI_PARAM_VERTEX_BUFFER] = params[SI_PARAM_CONST];
-               params[SI_PARAM_SO_BUFFER] = params[SI_PARAM_CONST];
                params[SI_PARAM_START_INSTANCE] = i32;
                num_params = SI_PARAM_START_INSTANCE+1;
                if (shader->key.vs.as_es) {
@@ -2257,12 +2277,13 @@ static void preload_streamout_buffers(struct si_shader_context *si_shader_ctx)
                return;
 
        LLVMValueRef buf_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
-                                           SI_PARAM_SO_BUFFER);
+                                           SI_PARAM_RW_BUFFERS);
 
        /* Load the resources, we rely on the code sinking to do the rest */
        for (i = 0; i < 4; ++i) {
                if (si_shader_ctx->shader->selector->so.stride[i]) {
-                       LLVMValueRef offset = lp_build_const_int32(gallivm, i);
+                       LLVMValueRef offset = lp_build_const_int32(gallivm,
+                                                                  SI_RW_SO + i);
 
                        si_shader_ctx->so_buffers[i] = build_indexed_load(si_shader_ctx, buf_ptr, offset);
                }
@@ -2274,7 +2295,7 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
 {
        unsigned i;
        uint32_t *ptr;
-       struct radeon_llvm_binary binary;
+       struct radeon_shader_binary binary;
        bool dump = r600_can_dump_shader(&sctx->screen->b,
                        shader->selector ? shader->selector->tokens : NULL);
        memset(&binary, 0, sizeof(binary));
@@ -2328,9 +2349,9 @@ int si_compile_llvm(struct si_context *sctx, struct si_pipe_shader *shader,
        }
 
        ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
-       if (0 /*SI_BIG_ENDIAN*/) {
+       if (SI_BIG_ENDIAN) {
                for (i = 0; i < binary.code_size / 4; ++i) {
-                       ptr[i] = util_bswap32(*(uint32_t*)(binary.code + i*4));
+                       ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + i*4)));
                }
        } else {
                memcpy(ptr, binary.code, binary.code_size);
@@ -2371,7 +2392,8 @@ static int si_generate_gs_copy_shader(struct si_context *sctx,
        preload_streamout_buffers(si_shader_ctx);
 
        /* Load the GSVS ring resource descriptor */
-       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST);
+       t_list_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
+                                 SI_PARAM_RW_BUFFERS);
        t_list = build_indexed_load(si_shader_ctx, t_list_ptr,
                                    lp_build_const_int32(gallivm, SI_RING_GSVS));