radv: add create_buffer_from_image() helper
[mesa.git] / src / amd / vulkan / radv_meta_bufimage.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26
27 /*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35 static nir_shader *
36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 nir_builder b;
39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 false,
42 false,
43 GLSL_TYPE_FLOAT);
44 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
45 false,
46 false,
47 GLSL_TYPE_FLOAT);
48 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
49 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
50 b.shader->info.cs.local_size[0] = 16;
51 b.shader->info.cs.local_size[1] = 16;
52 b.shader->info.cs.local_size[2] = 1;
53 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
54 sampler_type, "s_tex");
55 input_img->data.descriptor_set = 0;
56 input_img->data.binding = 0;
57
58 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
59 img_type, "out_img");
60 output_img->data.descriptor_set = 0;
61 output_img->data.binding = 1;
62
63 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
64 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
65 nir_ssa_def *block_size = nir_imm_ivec4(&b,
66 b.shader->info.cs.local_size[0],
67 b.shader->info.cs.local_size[1],
68 b.shader->info.cs.local_size[2], 0);
69
70 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
71
72
73
74 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
75 nir_intrinsic_set_base(offset, 0);
76 nir_intrinsic_set_range(offset, 16);
77 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
78 offset->num_components = is_3d ? 3 : 2;
79 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
80 nir_builder_instr_insert(&b, &offset->instr);
81
82 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
83 nir_intrinsic_set_base(stride, 0);
84 nir_intrinsic_set_range(stride, 16);
85 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
86 stride->num_components = 1;
87 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
88 nir_builder_instr_insert(&b, &stride->instr);
89
90 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
91 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
92
93 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
94 tex->sampler_dim = dim;
95 tex->op = nir_texop_txf;
96 tex->src[0].src_type = nir_tex_src_coord;
97 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
98 tex->src[1].src_type = nir_tex_src_lod;
99 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
100 tex->src[2].src_type = nir_tex_src_texture_deref;
101 tex->src[2].src = nir_src_for_ssa(input_img_deref);
102 tex->dest_type = nir_type_float;
103 tex->is_array = false;
104 tex->coord_components = is_3d ? 3 : 2;
105
106 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
107 nir_builder_instr_insert(&b, &tex->instr);
108
109 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
110 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
111
112 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
113 tmp = nir_iadd(&b, tmp, pos_x);
114
115 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
116
117 nir_ssa_def *outval = &tex->dest.ssa;
118 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
119 store->num_components = 4;
120 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
121 store->src[1] = nir_src_for_ssa(coord);
122 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
123 store->src[3] = nir_src_for_ssa(outval);
124
125 nir_builder_instr_insert(&b, &store->instr);
126 return b.shader;
127 }
128
129 /* Image to buffer - don't write use image accessors */
130 static VkResult
131 radv_device_init_meta_itob_state(struct radv_device *device)
132 {
133 VkResult result;
134 struct radv_shader_module cs = { .nir = NULL };
135 struct radv_shader_module cs_3d = { .nir = NULL };
136
137 cs.nir = build_nir_itob_compute_shader(device, false);
138 if (device->physical_device->rad_info.chip_class >= GFX9)
139 cs_3d.nir = build_nir_itob_compute_shader(device, true);
140
141 /*
142 * two descriptors one for the image being sampled
143 * one for the buffer being written.
144 */
145 VkDescriptorSetLayoutCreateInfo ds_create_info = {
146 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148 .bindingCount = 2,
149 .pBindings = (VkDescriptorSetLayoutBinding[]) {
150 {
151 .binding = 0,
152 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153 .descriptorCount = 1,
154 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155 .pImmutableSamplers = NULL
156 },
157 {
158 .binding = 1,
159 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160 .descriptorCount = 1,
161 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162 .pImmutableSamplers = NULL
163 },
164 }
165 };
166
167 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168 &ds_create_info,
169 &device->meta_state.alloc,
170 &device->meta_state.itob.img_ds_layout);
171 if (result != VK_SUCCESS)
172 goto fail;
173
174
175 VkPipelineLayoutCreateInfo pl_create_info = {
176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 .setLayoutCount = 1,
178 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
179 .pushConstantRangeCount = 1,
180 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181 };
182
183 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184 &pl_create_info,
185 &device->meta_state.alloc,
186 &device->meta_state.itob.img_p_layout);
187 if (result != VK_SUCCESS)
188 goto fail;
189
190 /* compute shader */
191
192 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
195 .module = radv_shader_module_to_handle(&cs),
196 .pName = "main",
197 .pSpecializationInfo = NULL,
198 };
199
200 VkComputePipelineCreateInfo vk_pipeline_info = {
201 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202 .stage = pipeline_shader_stage,
203 .flags = 0,
204 .layout = device->meta_state.itob.img_p_layout,
205 };
206
207 result = radv_CreateComputePipelines(radv_device_to_handle(device),
208 radv_pipeline_cache_to_handle(&device->meta_state.cache),
209 1, &vk_pipeline_info, NULL,
210 &device->meta_state.itob.pipeline);
211 if (result != VK_SUCCESS)
212 goto fail;
213
214 if (device->physical_device->rad_info.chip_class >= GFX9) {
215 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .module = radv_shader_module_to_handle(&cs_3d),
219 .pName = "main",
220 .pSpecializationInfo = NULL,
221 };
222
223 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225 .stage = pipeline_shader_stage_3d,
226 .flags = 0,
227 .layout = device->meta_state.itob.img_p_layout,
228 };
229
230 result = radv_CreateComputePipelines(radv_device_to_handle(device),
231 radv_pipeline_cache_to_handle(&device->meta_state.cache),
232 1, &vk_pipeline_info_3d, NULL,
233 &device->meta_state.itob.pipeline_3d);
234 if (result != VK_SUCCESS)
235 goto fail;
236 ralloc_free(cs_3d.nir);
237 }
238 ralloc_free(cs.nir);
239
240 return VK_SUCCESS;
241 fail:
242 ralloc_free(cs.nir);
243 ralloc_free(cs_3d.nir);
244 return result;
245 }
246
247 static void
248 radv_device_finish_meta_itob_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
253 state->itob.img_p_layout, &state->alloc);
254 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255 state->itob.img_ds_layout,
256 &state->alloc);
257 radv_DestroyPipeline(radv_device_to_handle(device),
258 state->itob.pipeline, &state->alloc);
259 if (device->physical_device->rad_info.chip_class >= GFX9)
260 radv_DestroyPipeline(radv_device_to_handle(device),
261 state->itob.pipeline_3d, &state->alloc);
262 }
263
264 static nir_shader *
265 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266 {
267 nir_builder b;
268 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270 false,
271 false,
272 GLSL_TYPE_FLOAT);
273 const struct glsl_type *img_type = glsl_sampler_type(dim,
274 false,
275 false,
276 GLSL_TYPE_FLOAT);
277 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
278 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
279 b.shader->info.cs.local_size[0] = 16;
280 b.shader->info.cs.local_size[1] = 16;
281 b.shader->info.cs.local_size[2] = 1;
282 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
283 buf_type, "s_tex");
284 input_img->data.descriptor_set = 0;
285 input_img->data.binding = 0;
286
287 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
288 img_type, "out_img");
289 output_img->data.descriptor_set = 0;
290 output_img->data.binding = 1;
291
292 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
293 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
294 nir_ssa_def *block_size = nir_imm_ivec4(&b,
295 b.shader->info.cs.local_size[0],
296 b.shader->info.cs.local_size[1],
297 b.shader->info.cs.local_size[2], 0);
298
299 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
300
301 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
302 nir_intrinsic_set_base(offset, 0);
303 nir_intrinsic_set_range(offset, 16);
304 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
305 offset->num_components = is_3d ? 3 : 2;
306 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
307 nir_builder_instr_insert(&b, &offset->instr);
308
309 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
310 nir_intrinsic_set_base(stride, 0);
311 nir_intrinsic_set_range(stride, 16);
312 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
313 stride->num_components = 1;
314 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
315 nir_builder_instr_insert(&b, &stride->instr);
316
317 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
318 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
319
320 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
321 tmp = nir_iadd(&b, tmp, pos_x);
322
323 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
324
325 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
326 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
327
328 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
329 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
330 tex->op = nir_texop_txf;
331 tex->src[0].src_type = nir_tex_src_coord;
332 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
333 tex->src[1].src_type = nir_tex_src_lod;
334 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
335 tex->src[2].src_type = nir_tex_src_texture_deref;
336 tex->src[2].src = nir_src_for_ssa(input_img_deref);
337 tex->dest_type = nir_type_float;
338 tex->is_array = false;
339 tex->coord_components = 1;
340
341 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
342 nir_builder_instr_insert(&b, &tex->instr);
343
344 nir_ssa_def *outval = &tex->dest.ssa;
345 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
346 store->num_components = 4;
347 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
348 store->src[1] = nir_src_for_ssa(img_coord);
349 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
350 store->src[3] = nir_src_for_ssa(outval);
351
352 nir_builder_instr_insert(&b, &store->instr);
353 return b.shader;
354 }
355
356 /* Buffer to image - don't write use image accessors */
357 static VkResult
358 radv_device_init_meta_btoi_state(struct radv_device *device)
359 {
360 VkResult result;
361 struct radv_shader_module cs = { .nir = NULL };
362 struct radv_shader_module cs_3d = { .nir = NULL };
363 cs.nir = build_nir_btoi_compute_shader(device, false);
364 if (device->physical_device->rad_info.chip_class >= GFX9)
365 cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366 /*
367 * two descriptors one for the image being sampled
368 * one for the buffer being written.
369 */
370 VkDescriptorSetLayoutCreateInfo ds_create_info = {
371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373 .bindingCount = 2,
374 .pBindings = (VkDescriptorSetLayoutBinding[]) {
375 {
376 .binding = 0,
377 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378 .descriptorCount = 1,
379 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380 .pImmutableSamplers = NULL
381 },
382 {
383 .binding = 1,
384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 .descriptorCount = 1,
386 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387 .pImmutableSamplers = NULL
388 },
389 }
390 };
391
392 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393 &ds_create_info,
394 &device->meta_state.alloc,
395 &device->meta_state.btoi.img_ds_layout);
396 if (result != VK_SUCCESS)
397 goto fail;
398
399
400 VkPipelineLayoutCreateInfo pl_create_info = {
401 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402 .setLayoutCount = 1,
403 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404 .pushConstantRangeCount = 1,
405 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406 };
407
408 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409 &pl_create_info,
410 &device->meta_state.alloc,
411 &device->meta_state.btoi.img_p_layout);
412 if (result != VK_SUCCESS)
413 goto fail;
414
415 /* compute shader */
416
417 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 .module = radv_shader_module_to_handle(&cs),
421 .pName = "main",
422 .pSpecializationInfo = NULL,
423 };
424
425 VkComputePipelineCreateInfo vk_pipeline_info = {
426 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427 .stage = pipeline_shader_stage,
428 .flags = 0,
429 .layout = device->meta_state.btoi.img_p_layout,
430 };
431
432 result = radv_CreateComputePipelines(radv_device_to_handle(device),
433 radv_pipeline_cache_to_handle(&device->meta_state.cache),
434 1, &vk_pipeline_info, NULL,
435 &device->meta_state.btoi.pipeline);
436 if (result != VK_SUCCESS)
437 goto fail;
438
439 if (device->physical_device->rad_info.chip_class >= GFX9) {
440 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
443 .module = radv_shader_module_to_handle(&cs_3d),
444 .pName = "main",
445 .pSpecializationInfo = NULL,
446 };
447
448 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450 .stage = pipeline_shader_stage_3d,
451 .flags = 0,
452 .layout = device->meta_state.btoi.img_p_layout,
453 };
454
455 result = radv_CreateComputePipelines(radv_device_to_handle(device),
456 radv_pipeline_cache_to_handle(&device->meta_state.cache),
457 1, &vk_pipeline_info_3d, NULL,
458 &device->meta_state.btoi.pipeline_3d);
459 ralloc_free(cs_3d.nir);
460 }
461 ralloc_free(cs.nir);
462
463 return VK_SUCCESS;
464 fail:
465 ralloc_free(cs_3d.nir);
466 ralloc_free(cs.nir);
467 return result;
468 }
469
470 static void
471 radv_device_finish_meta_btoi_state(struct radv_device *device)
472 {
473 struct radv_meta_state *state = &device->meta_state;
474
475 radv_DestroyPipelineLayout(radv_device_to_handle(device),
476 state->btoi.img_p_layout, &state->alloc);
477 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478 state->btoi.img_ds_layout,
479 &state->alloc);
480 radv_DestroyPipeline(radv_device_to_handle(device),
481 state->btoi.pipeline, &state->alloc);
482 radv_DestroyPipeline(radv_device_to_handle(device),
483 state->btoi.pipeline_3d, &state->alloc);
484 }
485
486 /* Buffer to image - special path for R32G32B32 */
487 static nir_shader *
488 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489 {
490 nir_builder b;
491 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492 false,
493 false,
494 GLSL_TYPE_FLOAT);
495 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
496 false,
497 false,
498 GLSL_TYPE_FLOAT);
499 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
500 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
501 b.shader->info.cs.local_size[0] = 16;
502 b.shader->info.cs.local_size[1] = 16;
503 b.shader->info.cs.local_size[2] = 1;
504 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
505 buf_type, "s_tex");
506 input_img->data.descriptor_set = 0;
507 input_img->data.binding = 0;
508
509 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
510 img_type, "out_img");
511 output_img->data.descriptor_set = 0;
512 output_img->data.binding = 1;
513
514 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
515 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
516 nir_ssa_def *block_size = nir_imm_ivec4(&b,
517 b.shader->info.cs.local_size[0],
518 b.shader->info.cs.local_size[1],
519 b.shader->info.cs.local_size[2], 0);
520
521 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
522
523 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
524 nir_intrinsic_set_base(offset, 0);
525 nir_intrinsic_set_range(offset, 16);
526 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
527 offset->num_components = 2;
528 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
529 nir_builder_instr_insert(&b, &offset->instr);
530
531 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
532 nir_intrinsic_set_base(pitch, 0);
533 nir_intrinsic_set_range(pitch, 16);
534 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
535 pitch->num_components = 1;
536 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
537 nir_builder_instr_insert(&b, &pitch->instr);
538
539 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
540 nir_intrinsic_set_base(stride, 0);
541 nir_intrinsic_set_range(stride, 16);
542 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
543 stride->num_components = 1;
544 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
545 nir_builder_instr_insert(&b, &stride->instr);
546
547 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
548 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
549
550 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
551 tmp = nir_iadd(&b, tmp, pos_x);
552
553 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
554
555 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
556
557 nir_ssa_def *global_pos =
558 nir_iadd(&b,
559 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
560 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
561
562 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
563
564 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
565 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
566 tex->op = nir_texop_txf;
567 tex->src[0].src_type = nir_tex_src_coord;
568 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
569 tex->src[1].src_type = nir_tex_src_lod;
570 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
571 tex->src[2].src_type = nir_tex_src_texture_deref;
572 tex->src[2].src = nir_src_for_ssa(input_img_deref);
573 tex->dest_type = nir_type_float;
574 tex->is_array = false;
575 tex->coord_components = 1;
576 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
577 nir_builder_instr_insert(&b, &tex->instr);
578
579 nir_ssa_def *outval = &tex->dest.ssa;
580
581 for (int chan = 0; chan < 3; chan++) {
582 nir_ssa_def *local_pos =
583 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
584
585 nir_ssa_def *coord =
586 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
587
588 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
589 store->num_components = 1;
590 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
591 store->src[1] = nir_src_for_ssa(coord);
592 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
593 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
594 nir_builder_instr_insert(&b, &store->instr);
595 }
596
597 return b.shader;
598 }
599
600 static VkResult
601 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602 {
603 VkResult result;
604 struct radv_shader_module cs = { .nir = NULL };
605
606 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607
608 VkDescriptorSetLayoutCreateInfo ds_create_info = {
609 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611 .bindingCount = 2,
612 .pBindings = (VkDescriptorSetLayoutBinding[]) {
613 {
614 .binding = 0,
615 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616 .descriptorCount = 1,
617 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618 .pImmutableSamplers = NULL
619 },
620 {
621 .binding = 1,
622 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623 .descriptorCount = 1,
624 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625 .pImmutableSamplers = NULL
626 },
627 }
628 };
629
630 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631 &ds_create_info,
632 &device->meta_state.alloc,
633 &device->meta_state.btoi_r32g32b32.img_ds_layout);
634 if (result != VK_SUCCESS)
635 goto fail;
636
637
638 VkPipelineLayoutCreateInfo pl_create_info = {
639 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640 .setLayoutCount = 1,
641 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642 .pushConstantRangeCount = 1,
643 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644 };
645
646 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647 &pl_create_info,
648 &device->meta_state.alloc,
649 &device->meta_state.btoi_r32g32b32.img_p_layout);
650 if (result != VK_SUCCESS)
651 goto fail;
652
653 /* compute shader */
654
655 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
658 .module = radv_shader_module_to_handle(&cs),
659 .pName = "main",
660 .pSpecializationInfo = NULL,
661 };
662
663 VkComputePipelineCreateInfo vk_pipeline_info = {
664 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665 .stage = pipeline_shader_stage,
666 .flags = 0,
667 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668 };
669
670 result = radv_CreateComputePipelines(radv_device_to_handle(device),
671 radv_pipeline_cache_to_handle(&device->meta_state.cache),
672 1, &vk_pipeline_info, NULL,
673 &device->meta_state.btoi_r32g32b32.pipeline);
674
675 fail:
676 ralloc_free(cs.nir);
677 return result;
678 }
679
680 static void
681 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682 {
683 struct radv_meta_state *state = &device->meta_state;
684
685 radv_DestroyPipelineLayout(radv_device_to_handle(device),
686 state->btoi_r32g32b32.img_p_layout, &state->alloc);
687 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688 state->btoi_r32g32b32.img_ds_layout,
689 &state->alloc);
690 radv_DestroyPipeline(radv_device_to_handle(device),
691 state->btoi_r32g32b32.pipeline, &state->alloc);
692 }
693
694 static nir_shader *
695 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696 {
697 nir_builder b;
698 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699 const struct glsl_type *buf_type = glsl_sampler_type(dim,
700 false,
701 false,
702 GLSL_TYPE_FLOAT);
703 const struct glsl_type *img_type = glsl_sampler_type(dim,
704 false,
705 false,
706 GLSL_TYPE_FLOAT);
707 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
708 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
709 b.shader->info.cs.local_size[0] = 16;
710 b.shader->info.cs.local_size[1] = 16;
711 b.shader->info.cs.local_size[2] = 1;
712 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
713 buf_type, "s_tex");
714 input_img->data.descriptor_set = 0;
715 input_img->data.binding = 0;
716
717 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
718 img_type, "out_img");
719 output_img->data.descriptor_set = 0;
720 output_img->data.binding = 1;
721
722 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
723 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
724 nir_ssa_def *block_size = nir_imm_ivec4(&b,
725 b.shader->info.cs.local_size[0],
726 b.shader->info.cs.local_size[1],
727 b.shader->info.cs.local_size[2], 0);
728
729 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
730
731 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
732 nir_intrinsic_set_base(src_offset, 0);
733 nir_intrinsic_set_range(src_offset, 24);
734 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
735 src_offset->num_components = is_3d ? 3 : 2;
736 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
737 nir_builder_instr_insert(&b, &src_offset->instr);
738
739 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
740 nir_intrinsic_set_base(dst_offset, 0);
741 nir_intrinsic_set_range(dst_offset, 24);
742 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
743 dst_offset->num_components = is_3d ? 3 : 2;
744 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
745 nir_builder_instr_insert(&b, &dst_offset->instr);
746
747 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
748 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
749
750 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
751
752 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
753 tex->sampler_dim = dim;
754 tex->op = nir_texop_txf;
755 tex->src[0].src_type = nir_tex_src_coord;
756 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
757 tex->src[1].src_type = nir_tex_src_lod;
758 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
759 tex->src[2].src_type = nir_tex_src_texture_deref;
760 tex->src[2].src = nir_src_for_ssa(input_img_deref);
761 tex->dest_type = nir_type_float;
762 tex->is_array = false;
763 tex->coord_components = is_3d ? 3 : 2;
764
765 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
766 nir_builder_instr_insert(&b, &tex->instr);
767
768 nir_ssa_def *outval = &tex->dest.ssa;
769 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
770 store->num_components = 4;
771 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
772 store->src[1] = nir_src_for_ssa(dst_coord);
773 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
774 store->src[3] = nir_src_for_ssa(outval);
775
776 nir_builder_instr_insert(&b, &store->instr);
777 return b.shader;
778 }
779
780 /* image to image - don't write use image accessors */
781 static VkResult
782 radv_device_init_meta_itoi_state(struct radv_device *device)
783 {
784 VkResult result;
785 struct radv_shader_module cs = { .nir = NULL };
786 struct radv_shader_module cs_3d = { .nir = NULL };
787 cs.nir = build_nir_itoi_compute_shader(device, false);
788 if (device->physical_device->rad_info.chip_class >= GFX9)
789 cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790 /*
791 * two descriptors one for the image being sampled
792 * one for the buffer being written.
793 */
794 VkDescriptorSetLayoutCreateInfo ds_create_info = {
795 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797 .bindingCount = 2,
798 .pBindings = (VkDescriptorSetLayoutBinding[]) {
799 {
800 .binding = 0,
801 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802 .descriptorCount = 1,
803 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804 .pImmutableSamplers = NULL
805 },
806 {
807 .binding = 1,
808 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809 .descriptorCount = 1,
810 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811 .pImmutableSamplers = NULL
812 },
813 }
814 };
815
816 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817 &ds_create_info,
818 &device->meta_state.alloc,
819 &device->meta_state.itoi.img_ds_layout);
820 if (result != VK_SUCCESS)
821 goto fail;
822
823
824 VkPipelineLayoutCreateInfo pl_create_info = {
825 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826 .setLayoutCount = 1,
827 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828 .pushConstantRangeCount = 1,
829 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830 };
831
832 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833 &pl_create_info,
834 &device->meta_state.alloc,
835 &device->meta_state.itoi.img_p_layout);
836 if (result != VK_SUCCESS)
837 goto fail;
838
839 /* compute shader */
840
841 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
844 .module = radv_shader_module_to_handle(&cs),
845 .pName = "main",
846 .pSpecializationInfo = NULL,
847 };
848
849 VkComputePipelineCreateInfo vk_pipeline_info = {
850 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851 .stage = pipeline_shader_stage,
852 .flags = 0,
853 .layout = device->meta_state.itoi.img_p_layout,
854 };
855
856 result = radv_CreateComputePipelines(radv_device_to_handle(device),
857 radv_pipeline_cache_to_handle(&device->meta_state.cache),
858 1, &vk_pipeline_info, NULL,
859 &device->meta_state.itoi.pipeline);
860 if (result != VK_SUCCESS)
861 goto fail;
862
863 if (device->physical_device->rad_info.chip_class >= GFX9) {
864 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
867 .module = radv_shader_module_to_handle(&cs_3d),
868 .pName = "main",
869 .pSpecializationInfo = NULL,
870 };
871
872 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874 .stage = pipeline_shader_stage_3d,
875 .flags = 0,
876 .layout = device->meta_state.itoi.img_p_layout,
877 };
878
879 result = radv_CreateComputePipelines(radv_device_to_handle(device),
880 radv_pipeline_cache_to_handle(&device->meta_state.cache),
881 1, &vk_pipeline_info_3d, NULL,
882 &device->meta_state.itoi.pipeline_3d);
883
884 ralloc_free(cs_3d.nir);
885 }
886 ralloc_free(cs.nir);
887
888 return VK_SUCCESS;
889 fail:
890 ralloc_free(cs.nir);
891 ralloc_free(cs_3d.nir);
892 return result;
893 }
894
895 static void
896 radv_device_finish_meta_itoi_state(struct radv_device *device)
897 {
898 struct radv_meta_state *state = &device->meta_state;
899
900 radv_DestroyPipelineLayout(radv_device_to_handle(device),
901 state->itoi.img_p_layout, &state->alloc);
902 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903 state->itoi.img_ds_layout,
904 &state->alloc);
905 radv_DestroyPipeline(radv_device_to_handle(device),
906 state->itoi.pipeline, &state->alloc);
907 if (device->physical_device->rad_info.chip_class >= GFX9)
908 radv_DestroyPipeline(radv_device_to_handle(device),
909 state->itoi.pipeline_3d, &state->alloc);
910 }
911
912 static nir_shader *
913 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
914 {
915 nir_builder b;
916 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
917 const struct glsl_type *img_type = glsl_sampler_type(dim,
918 false,
919 false,
920 GLSL_TYPE_FLOAT);
921 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
922 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
923 b.shader->info.cs.local_size[0] = 16;
924 b.shader->info.cs.local_size[1] = 16;
925 b.shader->info.cs.local_size[2] = 1;
926
927 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
928 img_type, "out_img");
929 output_img->data.descriptor_set = 0;
930 output_img->data.binding = 0;
931
932 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
933 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
934 nir_ssa_def *block_size = nir_imm_ivec4(&b,
935 b.shader->info.cs.local_size[0],
936 b.shader->info.cs.local_size[1],
937 b.shader->info.cs.local_size[2], 0);
938
939 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
940
941 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
942 nir_intrinsic_set_base(clear_val, 0);
943 nir_intrinsic_set_range(clear_val, 20);
944 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
945 clear_val->num_components = 4;
946 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
947 nir_builder_instr_insert(&b, &clear_val->instr);
948
949 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
950 nir_intrinsic_set_base(layer, 0);
951 nir_intrinsic_set_range(layer, 20);
952 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
953 layer->num_components = 1;
954 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
955 nir_builder_instr_insert(&b, &layer->instr);
956
957 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
958
959 nir_ssa_def *comps[4];
960 comps[0] = nir_channel(&b, global_id, 0);
961 comps[1] = nir_channel(&b, global_id, 1);
962 comps[2] = global_z;
963 comps[3] = nir_imm_int(&b, 0);
964 global_id = nir_vec(&b, comps, 4);
965
966 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
967 store->num_components = 4;
968 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
969 store->src[1] = nir_src_for_ssa(global_id);
970 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
971 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
972
973 nir_builder_instr_insert(&b, &store->instr);
974 return b.shader;
975 }
976
977 static VkResult
978 radv_device_init_meta_cleari_state(struct radv_device *device)
979 {
980 VkResult result;
981 struct radv_shader_module cs = { .nir = NULL };
982 struct radv_shader_module cs_3d = { .nir = NULL };
983 cs.nir = build_nir_cleari_compute_shader(device, false);
984 if (device->physical_device->rad_info.chip_class >= GFX9)
985 cs_3d.nir = build_nir_cleari_compute_shader(device, true);
986
987 /*
988 * two descriptors one for the image being sampled
989 * one for the buffer being written.
990 */
991 VkDescriptorSetLayoutCreateInfo ds_create_info = {
992 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
993 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
994 .bindingCount = 1,
995 .pBindings = (VkDescriptorSetLayoutBinding[]) {
996 {
997 .binding = 0,
998 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
999 .descriptorCount = 1,
1000 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1001 .pImmutableSamplers = NULL
1002 },
1003 }
1004 };
1005
1006 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1007 &ds_create_info,
1008 &device->meta_state.alloc,
1009 &device->meta_state.cleari.img_ds_layout);
1010 if (result != VK_SUCCESS)
1011 goto fail;
1012
1013
1014 VkPipelineLayoutCreateInfo pl_create_info = {
1015 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1016 .setLayoutCount = 1,
1017 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1018 .pushConstantRangeCount = 1,
1019 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1020 };
1021
1022 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1023 &pl_create_info,
1024 &device->meta_state.alloc,
1025 &device->meta_state.cleari.img_p_layout);
1026 if (result != VK_SUCCESS)
1027 goto fail;
1028
1029 /* compute shader */
1030
1031 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1032 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1033 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1034 .module = radv_shader_module_to_handle(&cs),
1035 .pName = "main",
1036 .pSpecializationInfo = NULL,
1037 };
1038
1039 VkComputePipelineCreateInfo vk_pipeline_info = {
1040 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1041 .stage = pipeline_shader_stage,
1042 .flags = 0,
1043 .layout = device->meta_state.cleari.img_p_layout,
1044 };
1045
1046 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1047 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1048 1, &vk_pipeline_info, NULL,
1049 &device->meta_state.cleari.pipeline);
1050 if (result != VK_SUCCESS)
1051 goto fail;
1052
1053
1054 if (device->physical_device->rad_info.chip_class >= GFX9) {
1055 /* compute shader */
1056 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1057 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1058 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1059 .module = radv_shader_module_to_handle(&cs_3d),
1060 .pName = "main",
1061 .pSpecializationInfo = NULL,
1062 };
1063
1064 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1065 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1066 .stage = pipeline_shader_stage_3d,
1067 .flags = 0,
1068 .layout = device->meta_state.cleari.img_p_layout,
1069 };
1070
1071 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1072 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1073 1, &vk_pipeline_info_3d, NULL,
1074 &device->meta_state.cleari.pipeline_3d);
1075 if (result != VK_SUCCESS)
1076 goto fail;
1077
1078 ralloc_free(cs_3d.nir);
1079 }
1080 ralloc_free(cs.nir);
1081 return VK_SUCCESS;
1082 fail:
1083 ralloc_free(cs.nir);
1084 ralloc_free(cs_3d.nir);
1085 return result;
1086 }
1087
1088 static void
1089 radv_device_finish_meta_cleari_state(struct radv_device *device)
1090 {
1091 struct radv_meta_state *state = &device->meta_state;
1092
1093 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1094 state->cleari.img_p_layout, &state->alloc);
1095 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1096 state->cleari.img_ds_layout,
1097 &state->alloc);
1098 radv_DestroyPipeline(radv_device_to_handle(device),
1099 state->cleari.pipeline, &state->alloc);
1100 radv_DestroyPipeline(radv_device_to_handle(device),
1101 state->cleari.pipeline_3d, &state->alloc);
1102 }
1103
1104 /* Special path for clearing R32G32B32 images using a compute shader. */
1105 static nir_shader *
1106 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1107 {
1108 nir_builder b;
1109 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
1110 false,
1111 false,
1112 GLSL_TYPE_FLOAT);
1113 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1114 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1115 b.shader->info.cs.local_size[0] = 16;
1116 b.shader->info.cs.local_size[1] = 16;
1117 b.shader->info.cs.local_size[2] = 1;
1118
1119 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1120 img_type, "out_img");
1121 output_img->data.descriptor_set = 0;
1122 output_img->data.binding = 0;
1123
1124 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
1125 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
1126 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1127 b.shader->info.cs.local_size[0],
1128 b.shader->info.cs.local_size[1],
1129 b.shader->info.cs.local_size[2], 0);
1130
1131 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1132
1133 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1134 nir_intrinsic_set_base(clear_val, 0);
1135 nir_intrinsic_set_range(clear_val, 16);
1136 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1137 clear_val->num_components = 3;
1138 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1139 nir_builder_instr_insert(&b, &clear_val->instr);
1140
1141 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1142 nir_intrinsic_set_base(stride, 0);
1143 nir_intrinsic_set_range(stride, 16);
1144 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1145 stride->num_components = 1;
1146 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1147 nir_builder_instr_insert(&b, &stride->instr);
1148
1149 nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1150 nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1151
1152 nir_ssa_def *global_pos =
1153 nir_iadd(&b,
1154 nir_imul(&b, global_y, &stride->dest.ssa),
1155 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1156
1157 for (unsigned chan = 0; chan < 3; chan++) {
1158 nir_ssa_def *local_pos =
1159 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1160
1161 nir_ssa_def *coord =
1162 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1163
1164 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1165 store->num_components = 1;
1166 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1167 store->src[1] = nir_src_for_ssa(coord);
1168 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1169 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1170 nir_builder_instr_insert(&b, &store->instr);
1171 }
1172
1173 return b.shader;
1174 }
1175
1176 static VkResult
1177 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1178 {
1179 VkResult result;
1180 struct radv_shader_module cs = { .nir = NULL };
1181
1182 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1183
1184 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1185 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1186 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1187 .bindingCount = 1,
1188 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1189 {
1190 .binding = 0,
1191 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1192 .descriptorCount = 1,
1193 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1194 .pImmutableSamplers = NULL
1195 },
1196 }
1197 };
1198
1199 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1200 &ds_create_info,
1201 &device->meta_state.alloc,
1202 &device->meta_state.cleari_r32g32b32.img_ds_layout);
1203 if (result != VK_SUCCESS)
1204 goto fail;
1205
1206 VkPipelineLayoutCreateInfo pl_create_info = {
1207 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1208 .setLayoutCount = 1,
1209 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1210 .pushConstantRangeCount = 1,
1211 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1212 };
1213
1214 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1215 &pl_create_info,
1216 &device->meta_state.alloc,
1217 &device->meta_state.cleari_r32g32b32.img_p_layout);
1218 if (result != VK_SUCCESS)
1219 goto fail;
1220
1221 /* compute shader */
1222 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1223 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1224 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1225 .module = radv_shader_module_to_handle(&cs),
1226 .pName = "main",
1227 .pSpecializationInfo = NULL,
1228 };
1229
1230 VkComputePipelineCreateInfo vk_pipeline_info = {
1231 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1232 .stage = pipeline_shader_stage,
1233 .flags = 0,
1234 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1235 };
1236
1237 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1238 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1239 1, &vk_pipeline_info, NULL,
1240 &device->meta_state.cleari_r32g32b32.pipeline);
1241
1242 fail:
1243 ralloc_free(cs.nir);
1244 return result;
1245 }
1246
1247 static void
1248 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1249 {
1250 struct radv_meta_state *state = &device->meta_state;
1251
1252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1253 state->cleari_r32g32b32.img_p_layout,
1254 &state->alloc);
1255 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1256 state->cleari_r32g32b32.img_ds_layout,
1257 &state->alloc);
1258 radv_DestroyPipeline(radv_device_to_handle(device),
1259 state->cleari_r32g32b32.pipeline, &state->alloc);
1260 }
1261
1262 void
1263 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1264 {
1265 radv_device_finish_meta_itob_state(device);
1266 radv_device_finish_meta_btoi_state(device);
1267 radv_device_finish_meta_btoi_r32g32b32_state(device);
1268 radv_device_finish_meta_itoi_state(device);
1269 radv_device_finish_meta_cleari_state(device);
1270 radv_device_finish_meta_cleari_r32g32b32_state(device);
1271 }
1272
1273 VkResult
1274 radv_device_init_meta_bufimage_state(struct radv_device *device)
1275 {
1276 VkResult result;
1277
1278 result = radv_device_init_meta_itob_state(device);
1279 if (result != VK_SUCCESS)
1280 goto fail_itob;
1281
1282 result = radv_device_init_meta_btoi_state(device);
1283 if (result != VK_SUCCESS)
1284 goto fail_btoi;
1285
1286 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1287 if (result != VK_SUCCESS)
1288 goto fail_btoi_r32g32b32;
1289
1290 result = radv_device_init_meta_itoi_state(device);
1291 if (result != VK_SUCCESS)
1292 goto fail_itoi;
1293
1294 result = radv_device_init_meta_cleari_state(device);
1295 if (result != VK_SUCCESS)
1296 goto fail_cleari;
1297
1298 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1299 if (result != VK_SUCCESS)
1300 goto fail_cleari_r32g32b32;
1301
1302 return VK_SUCCESS;
1303 fail_cleari_r32g32b32:
1304 radv_device_finish_meta_cleari_r32g32b32_state(device);
1305 fail_cleari:
1306 radv_device_finish_meta_cleari_state(device);
1307 fail_itoi:
1308 radv_device_finish_meta_itoi_state(device);
1309 fail_btoi_r32g32b32:
1310 radv_device_finish_meta_btoi_r32g32b32_state(device);
1311 fail_btoi:
1312 radv_device_finish_meta_btoi_state(device);
1313 fail_itob:
1314 radv_device_finish_meta_itob_state(device);
1315 return result;
1316 }
1317
1318 static void
1319 create_iview(struct radv_cmd_buffer *cmd_buffer,
1320 struct radv_meta_blit2d_surf *surf,
1321 struct radv_image_view *iview)
1322 {
1323 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1324 radv_meta_get_view_type(surf->image);
1325 radv_image_view_init(iview, cmd_buffer->device,
1326 &(VkImageViewCreateInfo) {
1327 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1328 .image = radv_image_to_handle(surf->image),
1329 .viewType = view_type,
1330 .format = surf->format,
1331 .subresourceRange = {
1332 .aspectMask = surf->aspect_mask,
1333 .baseMipLevel = surf->level,
1334 .levelCount = 1,
1335 .baseArrayLayer = surf->layer,
1336 .layerCount = 1
1337 },
1338 });
1339 }
1340
1341 static void
1342 create_bview(struct radv_cmd_buffer *cmd_buffer,
1343 struct radv_buffer *buffer,
1344 unsigned offset,
1345 VkFormat format,
1346 struct radv_buffer_view *bview)
1347 {
1348 radv_buffer_view_init(bview, cmd_buffer->device,
1349 &(VkBufferViewCreateInfo) {
1350 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1351 .flags = 0,
1352 .buffer = radv_buffer_to_handle(buffer),
1353 .format = format,
1354 .offset = offset,
1355 .range = VK_WHOLE_SIZE,
1356 });
1357
1358 }
1359
1360 static void
1361 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1362 struct radv_meta_blit2d_surf *surf,
1363 VkBufferUsageFlagBits usage,
1364 VkBuffer *buffer)
1365 {
1366 struct radv_device *device = cmd_buffer->device;
1367 struct radv_device_memory mem = { .bo = surf->image->bo };
1368
1369 radv_CreateBuffer(radv_device_to_handle(device),
1370 &(VkBufferCreateInfo) {
1371 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1372 .flags = 0,
1373 .size = surf->image->size,
1374 .usage = usage,
1375 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1376 }, NULL, buffer);
1377
1378 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1379 (VkBindBufferMemoryInfoKHR[]) {
1380 {
1381 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1382 .buffer = *buffer,
1383 .memory = radv_device_memory_to_handle(&mem),
1384 .memoryOffset = surf->image->offset,
1385 }
1386 });
1387 }
1388
1389 static void
1390 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1391 struct radv_image_view *src,
1392 struct radv_buffer_view *dst)
1393 {
1394 struct radv_device *device = cmd_buffer->device;
1395
1396 radv_meta_push_descriptor_set(cmd_buffer,
1397 VK_PIPELINE_BIND_POINT_COMPUTE,
1398 device->meta_state.itob.img_p_layout,
1399 0, /* set */
1400 2, /* descriptorWriteCount */
1401 (VkWriteDescriptorSet[]) {
1402 {
1403 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1404 .dstBinding = 0,
1405 .dstArrayElement = 0,
1406 .descriptorCount = 1,
1407 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1408 .pImageInfo = (VkDescriptorImageInfo[]) {
1409 {
1410 .sampler = VK_NULL_HANDLE,
1411 .imageView = radv_image_view_to_handle(src),
1412 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1413 },
1414 }
1415 },
1416 {
1417 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1418 .dstBinding = 1,
1419 .dstArrayElement = 0,
1420 .descriptorCount = 1,
1421 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1422 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1423 }
1424 });
1425 }
1426
1427 void
1428 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1429 struct radv_meta_blit2d_surf *src,
1430 struct radv_meta_blit2d_buffer *dst,
1431 unsigned num_rects,
1432 struct radv_meta_blit2d_rect *rects)
1433 {
1434 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1435 struct radv_device *device = cmd_buffer->device;
1436 struct radv_image_view src_view;
1437 struct radv_buffer_view dst_view;
1438
1439 create_iview(cmd_buffer, src, &src_view);
1440 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1441 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1442
1443 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1444 src->image->type == VK_IMAGE_TYPE_3D)
1445 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1446
1447 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1448 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1449
1450 for (unsigned r = 0; r < num_rects; ++r) {
1451 unsigned push_constants[4] = {
1452 rects[r].src_x,
1453 rects[r].src_y,
1454 src->layer,
1455 dst->pitch
1456 };
1457 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1458 device->meta_state.itob.img_p_layout,
1459 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1460 push_constants);
1461
1462 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1463 }
1464 }
1465
1466 static void
1467 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1468 struct radv_buffer_view *src,
1469 struct radv_buffer_view *dst)
1470 {
1471 struct radv_device *device = cmd_buffer->device;
1472
1473 radv_meta_push_descriptor_set(cmd_buffer,
1474 VK_PIPELINE_BIND_POINT_COMPUTE,
1475 device->meta_state.btoi_r32g32b32.img_p_layout,
1476 0, /* set */
1477 2, /* descriptorWriteCount */
1478 (VkWriteDescriptorSet[]) {
1479 {
1480 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1481 .dstBinding = 0,
1482 .dstArrayElement = 0,
1483 .descriptorCount = 1,
1484 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1485 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1486 },
1487 {
1488 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1489 .dstBinding = 1,
1490 .dstArrayElement = 0,
1491 .descriptorCount = 1,
1492 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1493 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1494 }
1495 });
1496 }
1497
1498 static void
1499 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1500 struct radv_meta_blit2d_buffer *src,
1501 struct radv_meta_blit2d_surf *dst,
1502 unsigned num_rects,
1503 struct radv_meta_blit2d_rect *rects)
1504 {
1505 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1506 struct radv_device *device = cmd_buffer->device;
1507 struct radv_buffer_view src_view, dst_view;
1508 unsigned dst_offset = 0;
1509 unsigned stride;
1510 VkFormat dst_format;
1511 VkBuffer buffer;
1512
1513 switch (dst->format) {
1514 case VK_FORMAT_R32G32B32_UINT:
1515 dst_format = VK_FORMAT_R32_UINT;
1516 break;
1517 case VK_FORMAT_R32G32B32_SINT:
1518 dst_format = VK_FORMAT_R32_SINT;
1519 break;
1520 case VK_FORMAT_R32G32B32_SFLOAT:
1521 dst_format = VK_FORMAT_R32_SFLOAT;
1522 break;
1523 default:
1524 unreachable("invalid R32G32B32 format");
1525 }
1526
1527 /* This special btoi path for R32G32B32 formats will write the linear
1528 * image as a buffer with the same underlying memory. The compute
1529 * shader will clear all components separately using a R32 format.
1530 */
1531 create_buffer_from_image(cmd_buffer, dst,
1532 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1533 &buffer);
1534
1535 create_bview(cmd_buffer, src->buffer, src->offset,
1536 src->format, &src_view);
1537 create_bview(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset,
1538 dst_format, &dst_view);
1539 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1540
1541 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1542 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1543
1544 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1545 stride = dst->image->surface.u.gfx9.surf_pitch;
1546 } else {
1547 stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
1548 }
1549
1550 for (unsigned r = 0; r < num_rects; ++r) {
1551 unsigned push_constants[4] = {
1552 rects[r].dst_x,
1553 rects[r].dst_y,
1554 stride,
1555 src->pitch,
1556 };
1557
1558 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1559 device->meta_state.btoi_r32g32b32.img_p_layout,
1560 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1561 push_constants);
1562
1563 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1564 }
1565
1566 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1567 }
1568
1569 static void
1570 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1571 struct radv_buffer_view *src,
1572 struct radv_image_view *dst)
1573 {
1574 struct radv_device *device = cmd_buffer->device;
1575
1576 radv_meta_push_descriptor_set(cmd_buffer,
1577 VK_PIPELINE_BIND_POINT_COMPUTE,
1578 device->meta_state.btoi.img_p_layout,
1579 0, /* set */
1580 2, /* descriptorWriteCount */
1581 (VkWriteDescriptorSet[]) {
1582 {
1583 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1584 .dstBinding = 0,
1585 .dstArrayElement = 0,
1586 .descriptorCount = 1,
1587 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1588 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1589 },
1590 {
1591 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1592 .dstBinding = 1,
1593 .dstArrayElement = 0,
1594 .descriptorCount = 1,
1595 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1596 .pImageInfo = (VkDescriptorImageInfo[]) {
1597 {
1598 .sampler = VK_NULL_HANDLE,
1599 .imageView = radv_image_view_to_handle(dst),
1600 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1601 },
1602 }
1603 }
1604 });
1605 }
1606
1607 void
1608 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1609 struct radv_meta_blit2d_buffer *src,
1610 struct radv_meta_blit2d_surf *dst,
1611 unsigned num_rects,
1612 struct radv_meta_blit2d_rect *rects)
1613 {
1614 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1615 struct radv_device *device = cmd_buffer->device;
1616 struct radv_buffer_view src_view;
1617 struct radv_image_view dst_view;
1618
1619 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1620 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1621 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1622 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1623 num_rects, rects);
1624 return;
1625 }
1626
1627 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1628 create_iview(cmd_buffer, dst, &dst_view);
1629 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1630
1631 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1632 dst->image->type == VK_IMAGE_TYPE_3D)
1633 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1634 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1635 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1636
1637 for (unsigned r = 0; r < num_rects; ++r) {
1638 unsigned push_constants[4] = {
1639 rects[r].dst_x,
1640 rects[r].dst_y,
1641 dst->layer,
1642 src->pitch,
1643 };
1644 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1645 device->meta_state.btoi.img_p_layout,
1646 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1647 push_constants);
1648
1649 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1650 }
1651 }
1652
1653 static void
1654 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1655 struct radv_image_view *src,
1656 struct radv_image_view *dst)
1657 {
1658 struct radv_device *device = cmd_buffer->device;
1659
1660 radv_meta_push_descriptor_set(cmd_buffer,
1661 VK_PIPELINE_BIND_POINT_COMPUTE,
1662 device->meta_state.itoi.img_p_layout,
1663 0, /* set */
1664 2, /* descriptorWriteCount */
1665 (VkWriteDescriptorSet[]) {
1666 {
1667 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1668 .dstBinding = 0,
1669 .dstArrayElement = 0,
1670 .descriptorCount = 1,
1671 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1672 .pImageInfo = (VkDescriptorImageInfo[]) {
1673 {
1674 .sampler = VK_NULL_HANDLE,
1675 .imageView = radv_image_view_to_handle(src),
1676 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1677 },
1678 }
1679 },
1680 {
1681 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1682 .dstBinding = 1,
1683 .dstArrayElement = 0,
1684 .descriptorCount = 1,
1685 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1686 .pImageInfo = (VkDescriptorImageInfo[]) {
1687 {
1688 .sampler = VK_NULL_HANDLE,
1689 .imageView = radv_image_view_to_handle(dst),
1690 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1691 },
1692 }
1693 }
1694 });
1695 }
1696
1697 void
1698 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1699 struct radv_meta_blit2d_surf *src,
1700 struct radv_meta_blit2d_surf *dst,
1701 unsigned num_rects,
1702 struct radv_meta_blit2d_rect *rects)
1703 {
1704 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
1705 struct radv_device *device = cmd_buffer->device;
1706 struct radv_image_view src_view, dst_view;
1707
1708 create_iview(cmd_buffer, src, &src_view);
1709 create_iview(cmd_buffer, dst, &dst_view);
1710
1711 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1712
1713 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1714 src->image->type == VK_IMAGE_TYPE_3D)
1715 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
1716 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1717 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1718
1719 for (unsigned r = 0; r < num_rects; ++r) {
1720 unsigned push_constants[6] = {
1721 rects[r].src_x,
1722 rects[r].src_y,
1723 src->layer,
1724 rects[r].dst_x,
1725 rects[r].dst_y,
1726 dst->layer,
1727 };
1728 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1729 device->meta_state.itoi.img_p_layout,
1730 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1731 push_constants);
1732
1733 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1734 }
1735 }
1736
1737 static void
1738 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1739 struct radv_buffer_view *view)
1740 {
1741 struct radv_device *device = cmd_buffer->device;
1742
1743 radv_meta_push_descriptor_set(cmd_buffer,
1744 VK_PIPELINE_BIND_POINT_COMPUTE,
1745 device->meta_state.cleari_r32g32b32.img_p_layout,
1746 0, /* set */
1747 1, /* descriptorWriteCount */
1748 (VkWriteDescriptorSet[]) {
1749 {
1750 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1751 .dstBinding = 0,
1752 .dstArrayElement = 0,
1753 .descriptorCount = 1,
1754 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1755 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
1756 }
1757 });
1758 }
1759
1760 static void
1761 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1762 struct radv_meta_blit2d_surf *dst,
1763 const VkClearColorValue *clear_color)
1764 {
1765 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
1766 struct radv_device *device = cmd_buffer->device;
1767 struct radv_buffer_view dst_view;
1768 unsigned stride;
1769 VkFormat format;
1770 VkBuffer buffer;
1771
1772 switch (dst->format) {
1773 case VK_FORMAT_R32G32B32_UINT:
1774 format = VK_FORMAT_R32_UINT;
1775 break;
1776 case VK_FORMAT_R32G32B32_SINT:
1777 format = VK_FORMAT_R32_SINT;
1778 break;
1779 case VK_FORMAT_R32G32B32_SFLOAT:
1780 format = VK_FORMAT_R32_SFLOAT;
1781 break;
1782 default:
1783 unreachable("invalid R32G32B32 format");
1784 }
1785
1786 /* This special clear path for R32G32B32 formats will write the linear
1787 * image as a buffer with the same underlying memory. The compute
1788 * shader will clear all components separately using a R32 format.
1789 */
1790 create_buffer_from_image(cmd_buffer, dst,
1791 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1792 &buffer);
1793
1794 create_bview(cmd_buffer, radv_buffer_from_handle(buffer), 0, format, &dst_view);
1795 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1796
1797 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1798 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1799
1800 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1801 stride = dst->image->surface.u.gfx9.surf_pitch;
1802 } else {
1803 stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
1804 }
1805
1806 unsigned push_constants[4] = {
1807 clear_color->uint32[0],
1808 clear_color->uint32[1],
1809 clear_color->uint32[2],
1810 stride,
1811 };
1812
1813 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1814 device->meta_state.cleari_r32g32b32.img_p_layout,
1815 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1816 push_constants);
1817
1818 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
1819 dst->image->info.height, 1);
1820
1821 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1822 }
1823
1824 static void
1825 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1826 struct radv_image_view *dst_iview)
1827 {
1828 struct radv_device *device = cmd_buffer->device;
1829
1830 radv_meta_push_descriptor_set(cmd_buffer,
1831 VK_PIPELINE_BIND_POINT_COMPUTE,
1832 device->meta_state.cleari.img_p_layout,
1833 0, /* set */
1834 1, /* descriptorWriteCount */
1835 (VkWriteDescriptorSet[]) {
1836 {
1837 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1838 .dstBinding = 0,
1839 .dstArrayElement = 0,
1840 .descriptorCount = 1,
1841 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1842 .pImageInfo = (VkDescriptorImageInfo[]) {
1843 {
1844 .sampler = VK_NULL_HANDLE,
1845 .imageView = radv_image_view_to_handle(dst_iview),
1846 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1847 },
1848 }
1849 },
1850 });
1851 }
1852
1853 void
1854 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
1855 struct radv_meta_blit2d_surf *dst,
1856 const VkClearColorValue *clear_color)
1857 {
1858 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
1859 struct radv_device *device = cmd_buffer->device;
1860 struct radv_image_view dst_iview;
1861
1862 if (dst->format == VK_FORMAT_R32G32B32_UINT ||
1863 dst->format == VK_FORMAT_R32G32B32_SINT ||
1864 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1865 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1866 return;
1867 }
1868
1869 create_iview(cmd_buffer, dst, &dst_iview);
1870 cleari_bind_descriptors(cmd_buffer, &dst_iview);
1871
1872 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1873 dst->image->type == VK_IMAGE_TYPE_3D)
1874 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
1875
1876 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1877 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1878
1879 unsigned push_constants[5] = {
1880 clear_color->uint32[0],
1881 clear_color->uint32[1],
1882 clear_color->uint32[2],
1883 clear_color->uint32[3],
1884 dst->layer,
1885 };
1886
1887 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1888 device->meta_state.cleari.img_p_layout,
1889 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
1890 push_constants);
1891
1892 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1893 }