radv: introduce the concept of meta save flags
[mesa.git] / src / amd / vulkan / radv_meta.c
1 /*
2 * Copyright © 2016 Red Hat
3 * based on intel anv code:
4 * Copyright © 2015 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "radv_meta.h"
27
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <pwd.h>
31 #include <sys/stat.h>
32
33 static void
34 radv_meta_save_novertex(struct radv_meta_saved_state *state,
35 const struct radv_cmd_buffer *cmd_buffer,
36 uint32_t flags)
37 {
38 state->flags = flags;
39
40 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
41 state->old_pipeline = cmd_buffer->state.pipeline;
42
43 /* Save all viewports. */
44 state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
45 typed_memcpy(state->viewport.viewports,
46 cmd_buffer->state.dynamic.viewport.viewports,
47 MAX_VIEWPORTS);
48
49 /* Save all scissors. */
50 state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
51 typed_memcpy(state->scissor.scissors,
52 cmd_buffer->state.dynamic.scissor.scissors,
53 MAX_SCISSORS);
54 }
55
56 if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
57 state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
58 }
59
60 if (state->flags & RADV_META_SAVE_CONSTANTS) {
61 memcpy(state->push_constants, cmd_buffer->push_constants,
62 MAX_PUSH_CONSTANTS_SIZE);
63 }
64 }
65
66 void
67 radv_meta_restore(const struct radv_meta_saved_state *state,
68 struct radv_cmd_buffer *cmd_buffer)
69 {
70 if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
71 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
72 VK_PIPELINE_BIND_POINT_GRAPHICS,
73 radv_pipeline_to_handle(state->old_pipeline));
74
75 cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
76
77 /* Restore all viewports. */
78 cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
79 typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports,
80 state->viewport.viewports,
81 MAX_VIEWPORTS);
82
83 /* Restore all scissors. */
84 cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
85 typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors,
86 state->scissor.scissors,
87 MAX_SCISSORS);
88
89 cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT |
90 1 << VK_DYNAMIC_STATE_SCISSOR;
91 }
92
93 if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
94 cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
95 cmd_buffer->state.descriptors_dirty |= (1 << 0);
96 }
97
98 if (state->flags & RADV_META_SAVE_CONSTANTS) {
99 memcpy(cmd_buffer->push_constants, state->push_constants,
100 MAX_PUSH_CONSTANTS_SIZE);
101 cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_ALL_GRAPHICS |
102 VK_SHADER_STAGE_COMPUTE_BIT;
103 }
104 }
105
106 void
107 radv_meta_save_pass(struct radv_meta_saved_pass_state *state,
108 const struct radv_cmd_buffer *cmd_buffer)
109 {
110 state->pass = cmd_buffer->state.pass;
111 state->subpass = cmd_buffer->state.subpass;
112 state->framebuffer = cmd_buffer->state.framebuffer;
113 state->attachments = cmd_buffer->state.attachments;
114 state->render_area = cmd_buffer->state.render_area;
115 }
116
117 void
118 radv_meta_restore_pass(const struct radv_meta_saved_pass_state *state,
119 struct radv_cmd_buffer *cmd_buffer)
120 {
121 cmd_buffer->state.pass = state->pass;
122 cmd_buffer->state.subpass = state->subpass;
123 cmd_buffer->state.framebuffer = state->framebuffer;
124 cmd_buffer->state.attachments = state->attachments;
125 cmd_buffer->state.render_area = state->render_area;
126 if (state->subpass)
127 radv_emit_framebuffer_state(cmd_buffer);
128 }
129
130 void
131 radv_meta_save_compute(struct radv_meta_saved_compute_state *state,
132 const struct radv_cmd_buffer *cmd_buffer,
133 unsigned push_constant_size)
134 {
135 state->old_pipeline = cmd_buffer->state.compute_pipeline;
136 state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
137 state->push_constant_size = push_constant_size;
138
139 if (state->push_constant_size) {
140 memcpy(state->push_constants, cmd_buffer->push_constants,
141 state->push_constant_size);
142 }
143 }
144
145 void
146 radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
147 struct radv_cmd_buffer *cmd_buffer)
148 {
149 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
150 radv_pipeline_to_handle(state->old_pipeline));
151
152 cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
153
154 if (state->push_constant_size) {
155 memcpy(cmd_buffer->push_constants, state->push_constants,
156 state->push_constant_size);
157 cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
158 }
159 }
160
161 VkImageViewType
162 radv_meta_get_view_type(const struct radv_image *image)
163 {
164 switch (image->type) {
165 case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
166 case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
167 case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
168 default:
169 unreachable("bad VkImageViewType");
170 }
171 }
172
173 /**
174 * When creating a destination VkImageView, this function provides the needed
175 * VkImageViewCreateInfo::subresourceRange::baseArrayLayer.
176 */
177 uint32_t
178 radv_meta_get_iview_layer(const struct radv_image *dest_image,
179 const VkImageSubresourceLayers *dest_subresource,
180 const VkOffset3D *dest_offset)
181 {
182 switch (dest_image->type) {
183 case VK_IMAGE_TYPE_1D:
184 case VK_IMAGE_TYPE_2D:
185 return dest_subresource->baseArrayLayer;
186 case VK_IMAGE_TYPE_3D:
187 /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
188 * but meta does it anyway. When doing so, we translate the
189 * destination's z offset into an array offset.
190 */
191 return dest_offset->z;
192 default:
193 assert(!"bad VkImageType");
194 return 0;
195 }
196 }
197
198 static void *
199 meta_alloc(void* _device, size_t size, size_t alignment,
200 VkSystemAllocationScope allocationScope)
201 {
202 struct radv_device *device = _device;
203 return device->alloc.pfnAllocation(device->alloc.pUserData, size, alignment,
204 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
205 }
206
207 static void *
208 meta_realloc(void* _device, void *original, size_t size, size_t alignment,
209 VkSystemAllocationScope allocationScope)
210 {
211 struct radv_device *device = _device;
212 return device->alloc.pfnReallocation(device->alloc.pUserData, original,
213 size, alignment,
214 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
215 }
216
217 static void
218 meta_free(void* _device, void *data)
219 {
220 struct radv_device *device = _device;
221 return device->alloc.pfnFree(device->alloc.pUserData, data);
222 }
223
224 static bool
225 radv_builtin_cache_path(char *path)
226 {
227 char *xdg_cache_home = getenv("XDG_CACHE_HOME");
228 const char *suffix = "/radv_builtin_shaders";
229 const char *suffix2 = "/.cache/radv_builtin_shaders";
230 struct passwd pwd, *result;
231 char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
232
233 if (xdg_cache_home) {
234
235 if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX)
236 return false;
237
238 strcpy(path, xdg_cache_home);
239 strcat(path, suffix);
240 return true;
241 }
242
243 getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
244 if (!result)
245 return false;
246
247 strcpy(path, pwd.pw_dir);
248 strcat(path, "/.cache");
249 mkdir(path, 0755);
250
251 strcat(path, suffix);
252 return true;
253 }
254
255 static void
256 radv_load_meta_pipeline(struct radv_device *device)
257 {
258 char path[PATH_MAX + 1];
259 struct stat st;
260 void *data = NULL;
261
262 if (!radv_builtin_cache_path(path))
263 return;
264
265 int fd = open(path, O_RDONLY);
266 if (fd < 0)
267 return;
268 if (fstat(fd, &st))
269 goto fail;
270 data = malloc(st.st_size);
271 if (!data)
272 goto fail;
273 if(read(fd, data, st.st_size) == -1)
274 goto fail;
275
276 radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
277 fail:
278 free(data);
279 close(fd);
280 }
281
282 static void
283 radv_store_meta_pipeline(struct radv_device *device)
284 {
285 char path[PATH_MAX + 1], path2[PATH_MAX + 7];
286 size_t size;
287 void *data = NULL;
288
289 if (!device->meta_state.cache.modified)
290 return;
291
292 if (radv_GetPipelineCacheData(radv_device_to_handle(device),
293 radv_pipeline_cache_to_handle(&device->meta_state.cache),
294 &size, NULL))
295 return;
296
297 if (!radv_builtin_cache_path(path))
298 return;
299
300 strcpy(path2, path);
301 strcat(path2, "XXXXXX");
302 int fd = mkstemp(path2);//open(path, O_WRONLY | O_CREAT, 0600);
303 if (fd < 0)
304 return;
305 data = malloc(size);
306 if (!data)
307 goto fail;
308
309 if (radv_GetPipelineCacheData(radv_device_to_handle(device),
310 radv_pipeline_cache_to_handle(&device->meta_state.cache),
311 &size, data))
312 goto fail;
313 if(write(fd, data, size) == -1)
314 goto fail;
315
316 rename(path2, path);
317 fail:
318 free(data);
319 close(fd);
320 unlink(path2);
321 }
322
323 VkResult
324 radv_device_init_meta(struct radv_device *device)
325 {
326 VkResult result;
327
328 device->meta_state.alloc = (VkAllocationCallbacks) {
329 .pUserData = device,
330 .pfnAllocation = meta_alloc,
331 .pfnReallocation = meta_realloc,
332 .pfnFree = meta_free,
333 };
334
335 device->meta_state.cache.alloc = device->meta_state.alloc;
336 radv_pipeline_cache_init(&device->meta_state.cache, device);
337 radv_load_meta_pipeline(device);
338
339 result = radv_device_init_meta_clear_state(device);
340 if (result != VK_SUCCESS)
341 goto fail_clear;
342
343 result = radv_device_init_meta_resolve_state(device);
344 if (result != VK_SUCCESS)
345 goto fail_resolve;
346
347 result = radv_device_init_meta_blit_state(device);
348 if (result != VK_SUCCESS)
349 goto fail_blit;
350
351 result = radv_device_init_meta_blit2d_state(device);
352 if (result != VK_SUCCESS)
353 goto fail_blit2d;
354
355 result = radv_device_init_meta_bufimage_state(device);
356 if (result != VK_SUCCESS)
357 goto fail_bufimage;
358
359 result = radv_device_init_meta_depth_decomp_state(device);
360 if (result != VK_SUCCESS)
361 goto fail_depth_decomp;
362
363 result = radv_device_init_meta_buffer_state(device);
364 if (result != VK_SUCCESS)
365 goto fail_buffer;
366
367 result = radv_device_init_meta_query_state(device);
368 if (result != VK_SUCCESS)
369 goto fail_query;
370
371 result = radv_device_init_meta_fast_clear_flush_state(device);
372 if (result != VK_SUCCESS)
373 goto fail_fast_clear;
374
375 result = radv_device_init_meta_resolve_compute_state(device);
376 if (result != VK_SUCCESS)
377 goto fail_resolve_compute;
378
379 result = radv_device_init_meta_resolve_fragment_state(device);
380 if (result != VK_SUCCESS)
381 goto fail_resolve_fragment;
382 return VK_SUCCESS;
383
384 fail_resolve_fragment:
385 radv_device_finish_meta_resolve_compute_state(device);
386 fail_resolve_compute:
387 radv_device_finish_meta_fast_clear_flush_state(device);
388 fail_fast_clear:
389 radv_device_finish_meta_buffer_state(device);
390 fail_query:
391 radv_device_finish_meta_query_state(device);
392 fail_buffer:
393 radv_device_finish_meta_depth_decomp_state(device);
394 fail_depth_decomp:
395 radv_device_finish_meta_bufimage_state(device);
396 fail_bufimage:
397 radv_device_finish_meta_blit2d_state(device);
398 fail_blit2d:
399 radv_device_finish_meta_blit_state(device);
400 fail_blit:
401 radv_device_finish_meta_resolve_state(device);
402 fail_resolve:
403 radv_device_finish_meta_clear_state(device);
404 fail_clear:
405 radv_pipeline_cache_finish(&device->meta_state.cache);
406 return result;
407 }
408
409 void
410 radv_device_finish_meta(struct radv_device *device)
411 {
412 radv_device_finish_meta_clear_state(device);
413 radv_device_finish_meta_resolve_state(device);
414 radv_device_finish_meta_blit_state(device);
415 radv_device_finish_meta_blit2d_state(device);
416 radv_device_finish_meta_bufimage_state(device);
417 radv_device_finish_meta_depth_decomp_state(device);
418 radv_device_finish_meta_query_state(device);
419 radv_device_finish_meta_buffer_state(device);
420 radv_device_finish_meta_fast_clear_flush_state(device);
421 radv_device_finish_meta_resolve_compute_state(device);
422 radv_device_finish_meta_resolve_fragment_state(device);
423
424 radv_store_meta_pipeline(device);
425 radv_pipeline_cache_finish(&device->meta_state.cache);
426 }
427
428 /*
429 * The most common meta operations all want to have the viewport
430 * reset and any scissors disabled. The rest of the dynamic state
431 * should have no effect.
432 */
433 void
434 radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
435 struct radv_cmd_buffer *cmd_buffer,
436 uint32_t flags)
437 {
438 radv_meta_save_novertex(saved_state, cmd_buffer, flags);
439 cmd_buffer->state.dynamic.viewport.count = 0;
440 cmd_buffer->state.dynamic.scissor.count = 0;
441 cmd_buffer->state.dirty |= 1 << VK_DYNAMIC_STATE_VIEWPORT |
442 1 << VK_DYNAMIC_STATE_SCISSOR;
443 }
444
445 nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
446 {
447
448 nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(vs_b->shader, nir_intrinsic_load_vertex_id_zero_base);
449 nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
450 nir_builder_instr_insert(vs_b, &vertex_id->instr);
451
452 /* vertex 0 - -1.0, -1.0 */
453 /* vertex 1 - -1.0, 1.0 */
454 /* vertex 2 - 1.0, -1.0 */
455 /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
456 channel 1 is vertex id != 1 ? -1.0 : 1.0 */
457
458 nir_ssa_def *c0cmp = nir_ine(vs_b, &vertex_id->dest.ssa,
459 nir_imm_int(vs_b, 2));
460 nir_ssa_def *c1cmp = nir_ine(vs_b, &vertex_id->dest.ssa,
461 nir_imm_int(vs_b, 1));
462
463 nir_ssa_def *comp[4];
464 comp[0] = nir_bcsel(vs_b, c0cmp,
465 nir_imm_float(vs_b, -1.0),
466 nir_imm_float(vs_b, 1.0));
467
468 comp[1] = nir_bcsel(vs_b, c1cmp,
469 nir_imm_float(vs_b, -1.0),
470 nir_imm_float(vs_b, 1.0));
471 comp[2] = comp2;
472 comp[3] = nir_imm_float(vs_b, 1.0);
473 nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
474
475 return outvec;
476 }
477
478 nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b)
479 {
480 return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
481 }
482
483 /* vertex shader that generates vertices */
484 nir_shader *
485 radv_meta_build_nir_vs_generate_vertices(void)
486 {
487 const struct glsl_type *vec4 = glsl_vec4_type();
488
489 nir_builder b;
490 nir_variable *v_position;
491
492 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
493 b.shader->info.name = ralloc_strdup(b.shader, "meta_vs_gen_verts");
494
495 nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
496
497 v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
498 "gl_Position");
499 v_position->data.location = VARYING_SLOT_POS;
500
501 nir_store_var(&b, v_position, outvec, 0xf);
502
503 return b.shader;
504 }
505
506 nir_shader *
507 radv_meta_build_nir_fs_noop(void)
508 {
509 nir_builder b;
510
511 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
512 b.shader->info.name = ralloc_asprintf(b.shader,
513 "meta_noop_fs");
514
515 return b.shader;
516 }
517
518 void radv_meta_build_resolve_shader_core(nir_builder *b,
519 bool is_integer,
520 int samples,
521 nir_variable *input_img,
522 nir_variable *color,
523 nir_ssa_def *img_coord)
524 {
525 /* do a txf_ms on each sample */
526 nir_ssa_def *tmp;
527 nir_if *outer_if = NULL;
528
529 nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
530 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
531 tex->op = nir_texop_txf_ms;
532 tex->src[0].src_type = nir_tex_src_coord;
533 tex->src[0].src = nir_src_for_ssa(img_coord);
534 tex->src[1].src_type = nir_tex_src_ms_index;
535 tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
536 tex->dest_type = nir_type_float;
537 tex->is_array = false;
538 tex->coord_components = 2;
539 tex->texture = nir_deref_var_create(tex, input_img);
540 tex->sampler = NULL;
541
542 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
543 nir_builder_instr_insert(b, &tex->instr);
544
545 tmp = &tex->dest.ssa;
546
547 if (!is_integer && samples > 1) {
548 nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 1);
549 tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
550 tex_all_same->op = nir_texop_samples_identical;
551 tex_all_same->src[0].src_type = nir_tex_src_coord;
552 tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
553 tex_all_same->dest_type = nir_type_float;
554 tex_all_same->is_array = false;
555 tex_all_same->coord_components = 2;
556 tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
557 tex_all_same->sampler = NULL;
558
559 nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
560 nir_builder_instr_insert(b, &tex_all_same->instr);
561
562 nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
563 nir_if *if_stmt = nir_if_create(b->shader);
564 if_stmt->condition = nir_src_for_ssa(all_same);
565 nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
566
567 b->cursor = nir_after_cf_list(&if_stmt->then_list);
568 for (int i = 1; i < samples; i++) {
569 nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 2);
570 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
571 tex_add->op = nir_texop_txf_ms;
572 tex_add->src[0].src_type = nir_tex_src_coord;
573 tex_add->src[0].src = nir_src_for_ssa(img_coord);
574 tex_add->src[1].src_type = nir_tex_src_ms_index;
575 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
576 tex_add->dest_type = nir_type_float;
577 tex_add->is_array = false;
578 tex_add->coord_components = 2;
579 tex_add->texture = nir_deref_var_create(tex_add, input_img);
580 tex_add->sampler = NULL;
581
582 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
583 nir_builder_instr_insert(b, &tex_add->instr);
584
585 tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
586 }
587
588 tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
589 nir_store_var(b, color, tmp, 0xf);
590 b->cursor = nir_after_cf_list(&if_stmt->else_list);
591 outer_if = if_stmt;
592 }
593 nir_store_var(b, color, &tex->dest.ssa, 0xf);
594
595 if (outer_if)
596 b->cursor = nir_after_cf_node(&outer_if->cf_node);
597 }