2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "anv_private.h"
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
30 genX(compute_pipeline_create
)(
32 struct anv_pipeline_cache
* cache
,
33 const VkComputePipelineCreateInfo
* pCreateInfo
,
34 const VkAllocationCallbacks
* pAllocator
,
35 VkPipeline
* pPipeline
)
37 ANV_FROM_HANDLE(anv_device
, device
, _device
);
38 struct anv_pipeline
*pipeline
;
41 assert(pCreateInfo
->sType
== VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO
);
43 pipeline
= anv_alloc2(&device
->alloc
, pAllocator
, sizeof(*pipeline
), 8,
44 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT
);
46 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY
);
48 pipeline
->device
= device
;
49 pipeline
->layout
= anv_pipeline_layout_from_handle(pCreateInfo
->layout
);
51 pipeline
->blend_state
.map
= NULL
;
53 result
= anv_reloc_list_init(&pipeline
->batch_relocs
,
54 pAllocator
? pAllocator
: &device
->alloc
);
55 if (result
!= VK_SUCCESS
) {
56 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
59 pipeline
->batch
.next
= pipeline
->batch
.start
= pipeline
->batch_data
;
60 pipeline
->batch
.end
= pipeline
->batch
.start
+ sizeof(pipeline
->batch_data
);
61 pipeline
->batch
.relocs
= &pipeline
->batch_relocs
;
63 /* When we free the pipeline, we detect stages based on the NULL status
64 * of various prog_data pointers. Make them NULL by default.
66 memset(pipeline
->shaders
, 0, sizeof(pipeline
->shaders
));
68 pipeline
->vs_simd8
= NO_KERNEL
;
69 pipeline
->vs_vec4
= NO_KERNEL
;
70 pipeline
->gs_kernel
= NO_KERNEL
;
72 pipeline
->active_stages
= 0;
74 pipeline
->needs_data_cache
= false;
76 assert(pCreateInfo
->stage
.stage
== VK_SHADER_STAGE_COMPUTE_BIT
);
77 ANV_FROM_HANDLE(anv_shader_module
, module
, pCreateInfo
->stage
.module
);
78 result
= anv_pipeline_compile_cs(pipeline
, cache
, pCreateInfo
, module
,
79 pCreateInfo
->stage
.pName
,
80 pCreateInfo
->stage
.pSpecializationInfo
);
81 if (result
!= VK_SUCCESS
) {
82 anv_free2(&device
->alloc
, pAllocator
, pipeline
);
86 pipeline
->use_repclear
= false;
88 const struct brw_cs_prog_data
*cs_prog_data
= get_cs_prog_data(pipeline
);
90 anv_pipeline_setup_l3_config(pipeline
, cs_prog_data
->base
.total_shared
> 0);
92 uint32_t group_size
= cs_prog_data
->local_size
[0] *
93 cs_prog_data
->local_size
[1] * cs_prog_data
->local_size
[2];
94 uint32_t remainder
= group_size
& (cs_prog_data
->simd_size
- 1);
97 pipeline
->cs_right_mask
= ~0u >> (32 - remainder
);
99 pipeline
->cs_right_mask
= ~0u >> (32 - cs_prog_data
->simd_size
);
101 const uint32_t vfe_curbe_allocation
=
102 ALIGN(cs_prog_data
->push
.per_thread
.regs
* cs_prog_data
->threads
+
103 cs_prog_data
->push
.cross_thread
.regs
, 2);
105 anv_batch_emit(&pipeline
->batch
, GENX(MEDIA_VFE_STATE
), vfe
) {
106 vfe
.ScratchSpaceBasePointer
= (struct anv_address
) {
107 .bo
= anv_scratch_pool_alloc(device
, &device
->scratch_pool
,
109 cs_prog_data
->base
.total_scratch
),
112 vfe
.PerThreadScratchSpace
= ffs(cs_prog_data
->base
.total_scratch
/ 2048);
116 vfe
.GPGPUMode
= true;
118 vfe
.MaximumNumberofThreads
= device
->info
.max_cs_threads
- 1;
119 vfe
.NumberofURBEntries
= GEN_GEN
<= 7 ? 0 : 2;
120 vfe
.ResetGatewayTimer
= true;
122 vfe
.BypassGatewayControl
= true;
124 vfe
.URBEntryAllocationSize
= GEN_GEN
<= 7 ? 0 : 2;
125 vfe
.CURBEAllocationSize
= vfe_curbe_allocation
;
128 *pPipeline
= anv_pipeline_to_handle(pipeline
);