vk: Implement basic compute shader support
[mesa.git] / src / vulkan / device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "private.h"
31
32 static int
33 anv_env_get_int(const char *name)
34 {
35 const char *val = getenv(name);
36
37 if (!val)
38 return 0;
39
40 return strtol(val, NULL, 0);
41 }
42
43 static VkResult
44 fill_physical_device(struct anv_physical_device *device,
45 struct anv_instance *instance,
46 const char *path)
47 {
48 int fd;
49
50 fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
51 if (fd < 0)
52 return vk_error(VK_ERROR_UNAVAILABLE);
53
54 device->instance = instance;
55 device->path = path;
56
57 device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
58 device->no_hw = false;
59 if (device->chipset_id) {
60 /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
61 device->no_hw = true;
62 } else {
63 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
64 }
65 if (!device->chipset_id)
66 goto fail;
67
68 device->name = brw_get_device_name(device->chipset_id);
69 device->info = brw_get_device_info(device->chipset_id, -1);
70 if (!device->info)
71 goto fail;
72
73 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT))
74 goto fail;
75
76 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2))
77 goto fail;
78
79 if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC))
80 goto fail;
81
82 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS))
83 goto fail;
84
85 close(fd);
86
87 return VK_SUCCESS;
88
89 fail:
90 close(fd);
91
92 return vk_error(VK_ERROR_UNAVAILABLE);
93 }
94
95 static void *default_alloc(
96 void* pUserData,
97 size_t size,
98 size_t alignment,
99 VkSystemAllocType allocType)
100 {
101 return malloc(size);
102 }
103
104 static void default_free(
105 void* pUserData,
106 void* pMem)
107 {
108 free(pMem);
109 }
110
111 static const VkAllocCallbacks default_alloc_callbacks = {
112 .pUserData = NULL,
113 .pfnAlloc = default_alloc,
114 .pfnFree = default_free
115 };
116
117 VkResult anv_CreateInstance(
118 const VkInstanceCreateInfo* pCreateInfo,
119 VkInstance* pInstance)
120 {
121 struct anv_instance *instance;
122 const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
123 void *user_data = NULL;
124 VkResult result;
125
126 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
127
128 if (pCreateInfo->pAllocCb) {
129 alloc_callbacks = pCreateInfo->pAllocCb;
130 user_data = pCreateInfo->pAllocCb->pUserData;
131 }
132 instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
133 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
134 if (!instance)
135 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136
137 instance->pAllocUserData = alloc_callbacks->pUserData;
138 instance->pfnAlloc = alloc_callbacks->pfnAlloc;
139 instance->pfnFree = alloc_callbacks->pfnFree;
140 instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
141
142 instance->physicalDeviceCount = 0;
143 result = fill_physical_device(&instance->physicalDevice,
144 instance, "/dev/dri/renderD128");
145
146 if (result != VK_SUCCESS)
147 return result;
148
149 instance->physicalDeviceCount++;
150 *pInstance = (VkInstance) instance;
151
152 return VK_SUCCESS;
153 }
154
155 VkResult anv_DestroyInstance(
156 VkInstance _instance)
157 {
158 struct anv_instance *instance = (struct anv_instance *) _instance;
159
160 instance->pfnFree(instance->pAllocUserData, instance);
161
162 return VK_SUCCESS;
163 }
164
165 VkResult anv_EnumeratePhysicalDevices(
166 VkInstance _instance,
167 uint32_t* pPhysicalDeviceCount,
168 VkPhysicalDevice* pPhysicalDevices)
169 {
170 struct anv_instance *instance = (struct anv_instance *) _instance;
171
172 if (*pPhysicalDeviceCount >= 1)
173 pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice;
174 *pPhysicalDeviceCount = instance->physicalDeviceCount;
175
176 return VK_SUCCESS;
177 }
178
179 VkResult anv_GetPhysicalDeviceInfo(
180 VkPhysicalDevice physicalDevice,
181 VkPhysicalDeviceInfoType infoType,
182 size_t* pDataSize,
183 void* pData)
184 {
185 struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice;
186 VkPhysicalDeviceProperties *properties;
187 VkPhysicalDevicePerformance *performance;
188 VkPhysicalDeviceQueueProperties *queue_properties;
189 VkPhysicalDeviceMemoryProperties *memory_properties;
190 VkDisplayPropertiesWSI *display_properties;
191 uint64_t ns_per_tick = 80;
192
193 switch ((uint32_t) infoType) {
194 case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES:
195 properties = pData;
196
197 *pDataSize = sizeof(*properties);
198 if (pData == NULL)
199 return VK_SUCCESS;
200
201 properties->apiVersion = 1;
202 properties->driverVersion = 1;
203 properties->vendorId = 0x8086;
204 properties->deviceId = device->chipset_id;
205 properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
206 strcpy(properties->deviceName, device->name);
207 properties->maxInlineMemoryUpdateSize = 0;
208 properties->maxBoundDescriptorSets = MAX_SETS;
209 properties->maxThreadGroupSize = 512;
210 properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick;
211 properties->multiColorAttachmentClears = true;
212 properties->maxDescriptorSets = 8;
213 properties->maxViewports = 16;
214 properties->maxColorAttachments = 8;
215 return VK_SUCCESS;
216
217 case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE:
218 performance = pData;
219
220 *pDataSize = sizeof(*performance);
221 if (pData == NULL)
222 return VK_SUCCESS;
223
224 performance->maxDeviceClock = 1.0;
225 performance->aluPerClock = 1.0;
226 performance->texPerClock = 1.0;
227 performance->primsPerClock = 1.0;
228 performance->pixelsPerClock = 1.0;
229 return VK_SUCCESS;
230
231 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES:
232 queue_properties = pData;
233
234 *pDataSize = sizeof(*queue_properties);
235 if (pData == NULL)
236 return VK_SUCCESS;
237
238 queue_properties->queueFlags = 0;
239 queue_properties->queueCount = 1;
240 queue_properties->maxAtomicCounters = 0;
241 queue_properties->supportsTimestamps = true;
242 queue_properties->maxMemReferences = 256;
243 return VK_SUCCESS;
244
245 case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES:
246 memory_properties = pData;
247
248 *pDataSize = sizeof(*memory_properties);
249 if (pData == NULL)
250 return VK_SUCCESS;
251
252 memory_properties->supportsMigration = false;
253 memory_properties->supportsPinning = false;
254 return VK_SUCCESS;
255
256 case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI:
257 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI");
258
259 *pDataSize = sizeof(*display_properties);
260 if (pData == NULL)
261 return VK_SUCCESS;
262
263 display_properties = pData;
264 display_properties->display = 0;
265 display_properties->physicalResolution = (VkExtent2D) { 0, 0 };
266 return VK_SUCCESS;
267
268 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI:
269 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI");
270 return VK_SUCCESS;
271
272
273 default:
274 return VK_UNSUPPORTED;
275 }
276
277 }
278
279 void * vkGetProcAddr(
280 VkPhysicalDevice physicalDevice,
281 const char* pName)
282 {
283 return anv_lookup_entrypoint(pName);
284 }
285
286 static void
287 parse_debug_flags(struct anv_device *device)
288 {
289 const char *debug, *p, *end;
290
291 debug = getenv("INTEL_DEBUG");
292 device->dump_aub = false;
293 if (debug) {
294 for (p = debug; *p; p = end + 1) {
295 end = strchrnul(p, ',');
296 if (end - p == 3 && memcmp(p, "aub", 3) == 0)
297 device->dump_aub = true;
298 if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
299 device->no_hw = true;
300 if (*end == '\0')
301 break;
302 }
303 }
304 }
305
306 static VkResult
307 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
308 {
309 queue->device = device;
310 queue->pool = &device->surface_state_pool;
311
312 queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
313 if (queue->completed_serial.map == NULL)
314 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
315
316 *(uint32_t *)queue->completed_serial.map = 0;
317 queue->next_serial = 1;
318
319 return VK_SUCCESS;
320 }
321
322 static void
323 anv_queue_finish(struct anv_queue *queue)
324 {
325 #ifdef HAVE_VALGRIND
326 /* This gets torn down with the device so we only need to do this if
327 * valgrind is present.
328 */
329 anv_state_pool_free(queue->pool, queue->completed_serial);
330 #endif
331 }
332
333 static void
334 anv_device_init_border_colors(struct anv_device *device)
335 {
336 float float_border_colors[][4] = {
337 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 },
338 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 },
339 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 }
340 };
341
342 uint32_t uint32_border_colors[][4] = {
343 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 },
344 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 },
345 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 }
346 };
347
348 device->float_border_colors =
349 anv_state_pool_alloc(&device->dynamic_state_pool,
350 sizeof(float_border_colors), 32);
351 memcpy(device->float_border_colors.map,
352 float_border_colors, sizeof(float_border_colors));
353
354 device->uint32_border_colors =
355 anv_state_pool_alloc(&device->dynamic_state_pool,
356 sizeof(uint32_border_colors), 32);
357 memcpy(device->uint32_border_colors.map,
358 uint32_border_colors, sizeof(uint32_border_colors));
359
360 }
361
362 static const uint32_t BATCH_SIZE = 8192;
363
364 VkResult anv_CreateDevice(
365 VkPhysicalDevice _physicalDevice,
366 const VkDeviceCreateInfo* pCreateInfo,
367 VkDevice* pDevice)
368 {
369 struct anv_physical_device *physicalDevice =
370 (struct anv_physical_device *) _physicalDevice;
371 struct anv_instance *instance = physicalDevice->instance;
372 struct anv_device *device;
373
374 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
375
376 device = instance->pfnAlloc(instance->pAllocUserData,
377 sizeof(*device), 8,
378 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
379 if (!device)
380 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
381
382 device->no_hw = physicalDevice->no_hw;
383 parse_debug_flags(device);
384
385 device->instance = physicalDevice->instance;
386 device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
387 if (device->fd == -1)
388 goto fail_device;
389
390 device->context_id = anv_gem_create_context(device);
391 if (device->context_id == -1)
392 goto fail_fd;
393
394 anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
395
396 anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
397
398 anv_state_pool_init(&device->dynamic_state_pool,
399 &device->dynamic_state_block_pool);
400
401 anv_block_pool_init(&device->instruction_block_pool, device, 2048);
402 anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
403
404 anv_state_pool_init(&device->surface_state_pool,
405 &device->surface_state_block_pool);
406
407 device->info = *physicalDevice->info;
408
409 device->compiler = anv_compiler_create(device);
410 device->aub_writer = NULL;
411
412 pthread_mutex_init(&device->mutex, NULL);
413
414 anv_queue_init(device, &device->queue);
415
416 anv_device_init_meta(device);
417
418 anv_device_init_border_colors(device);
419
420 *pDevice = (VkDevice) device;
421
422 return VK_SUCCESS;
423
424 fail_fd:
425 close(device->fd);
426 fail_device:
427 anv_device_free(device, device);
428
429 return vk_error(VK_ERROR_UNAVAILABLE);
430 }
431
432 VkResult anv_DestroyDevice(
433 VkDevice _device)
434 {
435 struct anv_device *device = (struct anv_device *) _device;
436
437 anv_compiler_destroy(device->compiler);
438
439 anv_queue_finish(&device->queue);
440
441 anv_device_finish_meta(device);
442
443 #ifdef HAVE_VALGRIND
444 /* We only need to free these to prevent valgrind errors. The backing
445 * BO will go away in a couple of lines so we don't actually leak.
446 */
447 anv_state_pool_free(&device->dynamic_state_pool,
448 device->float_border_colors);
449 anv_state_pool_free(&device->dynamic_state_pool,
450 device->uint32_border_colors);
451 #endif
452
453 anv_bo_pool_finish(&device->batch_bo_pool);
454 anv_block_pool_finish(&device->dynamic_state_block_pool);
455 anv_block_pool_finish(&device->instruction_block_pool);
456 anv_block_pool_finish(&device->surface_state_block_pool);
457
458 close(device->fd);
459
460 if (device->aub_writer)
461 anv_aub_writer_destroy(device->aub_writer);
462
463 anv_device_free(device, device);
464
465 return VK_SUCCESS;
466 }
467
468 VkResult anv_GetGlobalExtensionInfo(
469 VkExtensionInfoType infoType,
470 uint32_t extensionIndex,
471 size_t* pDataSize,
472 void* pData)
473 {
474 static const VkExtensionProperties extensions[] = {
475 {
476 .extName = "VK_WSI_LunarG",
477 .version = 3
478 }
479 };
480 uint32_t count = ARRAY_SIZE(extensions);
481
482 switch (infoType) {
483 case VK_EXTENSION_INFO_TYPE_COUNT:
484 memcpy(pData, &count, sizeof(count));
485 *pDataSize = sizeof(count);
486 return VK_SUCCESS;
487
488 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
489 if (extensionIndex >= count)
490 return vk_error(VK_ERROR_INVALID_EXTENSION);
491
492 memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0]));
493 *pDataSize = sizeof(extensions[0]);
494 return VK_SUCCESS;
495
496 default:
497 return VK_UNSUPPORTED;
498 }
499 }
500
501 VkResult anv_GetPhysicalDeviceExtensionInfo(
502 VkPhysicalDevice physicalDevice,
503 VkExtensionInfoType infoType,
504 uint32_t extensionIndex,
505 size_t* pDataSize,
506 void* pData)
507 {
508 uint32_t *count;
509
510 switch (infoType) {
511 case VK_EXTENSION_INFO_TYPE_COUNT:
512 *pDataSize = 4;
513 if (pData == NULL)
514 return VK_SUCCESS;
515
516 count = pData;
517 *count = 0;
518 return VK_SUCCESS;
519
520 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
521 return vk_error(VK_ERROR_INVALID_EXTENSION);
522
523 default:
524 return VK_UNSUPPORTED;
525 }
526 }
527
528 VkResult anv_EnumerateLayers(
529 VkPhysicalDevice physicalDevice,
530 size_t maxStringSize,
531 size_t* pLayerCount,
532 char* const* pOutLayers,
533 void* pReserved)
534 {
535 *pLayerCount = 0;
536
537 return VK_SUCCESS;
538 }
539
540 VkResult anv_GetDeviceQueue(
541 VkDevice _device,
542 uint32_t queueNodeIndex,
543 uint32_t queueIndex,
544 VkQueue* pQueue)
545 {
546 struct anv_device *device = (struct anv_device *) _device;
547
548 assert(queueIndex == 0);
549
550 *pQueue = (VkQueue) &device->queue;
551
552 return VK_SUCCESS;
553 }
554
555 VkResult
556 anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
557 {
558 list->num_relocs = 0;
559 list->array_length = 256;
560 list->relocs =
561 anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
562 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
563
564 if (list->relocs == NULL)
565 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
566
567 list->reloc_bos =
568 anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
569 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
570
571 if (list->relocs == NULL) {
572 anv_device_free(device, list->relocs);
573 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
574 }
575
576 return VK_SUCCESS;
577 }
578
579 void
580 anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
581 {
582 anv_device_free(device, list->relocs);
583 anv_device_free(device, list->reloc_bos);
584 }
585
586 static VkResult
587 anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
588 size_t num_additional_relocs)
589 {
590 if (list->num_relocs + num_additional_relocs <= list->array_length)
591 return VK_SUCCESS;
592
593 size_t new_length = list->array_length * 2;
594 while (new_length < list->num_relocs + num_additional_relocs)
595 new_length *= 2;
596
597 struct drm_i915_gem_relocation_entry *new_relocs =
598 anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
599 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
600 if (new_relocs == NULL)
601 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
602
603 struct anv_bo **new_reloc_bos =
604 anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
605 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
606 if (new_relocs == NULL) {
607 anv_device_free(device, new_relocs);
608 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
609 }
610
611 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
612 memcpy(new_reloc_bos, list->reloc_bos,
613 list->num_relocs * sizeof(*list->reloc_bos));
614
615 anv_device_free(device, list->relocs);
616 anv_device_free(device, list->reloc_bos);
617
618 list->relocs = new_relocs;
619 list->reloc_bos = new_reloc_bos;
620
621 return VK_SUCCESS;
622 }
623
624 static VkResult
625 anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
626 {
627 VkResult result;
628
629 struct anv_batch_bo *bbo =
630 anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
631 if (bbo == NULL)
632 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
633
634 bbo->num_relocs = 0;
635 bbo->prev_batch_bo = NULL;
636
637 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
638 if (result != VK_SUCCESS) {
639 anv_device_free(device, bbo);
640 return result;
641 }
642
643 *bbo_out = bbo;
644
645 return VK_SUCCESS;
646 }
647
648 static void
649 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
650 size_t batch_padding)
651 {
652 batch->next = batch->start = bbo->bo.map;
653 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
654 bbo->first_reloc = batch->relocs.num_relocs;
655 }
656
657 static void
658 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
659 {
660 assert(batch->start == bbo->bo.map);
661 bbo->length = batch->next - batch->start;
662 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
663 bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
664 }
665
666 static void
667 anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
668 {
669 anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
670 anv_device_free(device, bbo);
671 }
672
673 void *
674 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
675 {
676 if (batch->next + num_dwords * 4 > batch->end)
677 batch->extend_cb(batch, batch->user_data);
678
679 void *p = batch->next;
680
681 batch->next += num_dwords * 4;
682 assert(batch->next <= batch->end);
683
684 return p;
685 }
686
687 static void
688 anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
689 struct anv_reloc_list *other, uint32_t offset)
690 {
691 anv_reloc_list_grow(list, device, other->num_relocs);
692 /* TODO: Handle failure */
693
694 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
695 other->num_relocs * sizeof(other->relocs[0]));
696 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
697 other->num_relocs * sizeof(other->reloc_bos[0]));
698
699 for (uint32_t i = 0; i < other->num_relocs; i++)
700 list->relocs[i + list->num_relocs].offset += offset;
701
702 list->num_relocs += other->num_relocs;
703 }
704
705 static uint64_t
706 anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
707 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
708 {
709 struct drm_i915_gem_relocation_entry *entry;
710 int index;
711
712 anv_reloc_list_grow(list, device, 1);
713 /* TODO: Handle failure */
714
715 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
716 index = list->num_relocs++;
717 list->reloc_bos[index] = target_bo;
718 entry = &list->relocs[index];
719 entry->target_handle = target_bo->gem_handle;
720 entry->delta = delta;
721 entry->offset = offset;
722 entry->presumed_offset = target_bo->offset;
723 entry->read_domains = 0;
724 entry->write_domain = 0;
725
726 return target_bo->offset + delta;
727 }
728
729 void
730 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
731 {
732 uint32_t size, offset;
733
734 size = other->next - other->start;
735 assert(size % 4 == 0);
736
737 if (batch->next + size > batch->end)
738 batch->extend_cb(batch, batch->user_data);
739
740 assert(batch->next + size <= batch->end);
741
742 memcpy(batch->next, other->start, size);
743
744 offset = batch->next - batch->start;
745 anv_reloc_list_append(&batch->relocs, batch->device,
746 &other->relocs, offset);
747
748 batch->next += size;
749 }
750
751 uint64_t
752 anv_batch_emit_reloc(struct anv_batch *batch,
753 void *location, struct anv_bo *bo, uint32_t delta)
754 {
755 return anv_reloc_list_add(&batch->relocs, batch->device,
756 location - batch->start, bo, delta);
757 }
758
759 VkResult anv_QueueSubmit(
760 VkQueue _queue,
761 uint32_t cmdBufferCount,
762 const VkCmdBuffer* pCmdBuffers,
763 VkFence _fence)
764 {
765 struct anv_queue *queue = (struct anv_queue *) _queue;
766 struct anv_device *device = queue->device;
767 struct anv_fence *fence = (struct anv_fence *) _fence;
768 int ret;
769
770 for (uint32_t i = 0; i < cmdBufferCount; i++) {
771 struct anv_cmd_buffer *cmd_buffer =
772 (struct anv_cmd_buffer *) pCmdBuffers[i];
773
774 if (device->dump_aub)
775 anv_cmd_buffer_dump(cmd_buffer);
776
777 if (!device->no_hw) {
778 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf);
779 if (ret != 0)
780 return vk_error(VK_ERROR_UNKNOWN);
781
782 if (fence) {
783 ret = anv_gem_execbuffer(device, &fence->execbuf);
784 if (ret != 0)
785 return vk_error(VK_ERROR_UNKNOWN);
786 }
787
788 for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
789 cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
790 } else {
791 *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
792 }
793 }
794
795 return VK_SUCCESS;
796 }
797
798 VkResult anv_QueueAddMemReferences(
799 VkQueue queue,
800 uint32_t count,
801 const VkDeviceMemory* pMems)
802 {
803 return VK_SUCCESS;
804 }
805
806 VkResult anv_QueueRemoveMemReferences(
807 VkQueue queue,
808 uint32_t count,
809 const VkDeviceMemory* pMems)
810 {
811 return VK_SUCCESS;
812 }
813
814 VkResult anv_QueueWaitIdle(
815 VkQueue _queue)
816 {
817 struct anv_queue *queue = (struct anv_queue *) _queue;
818
819 return vkDeviceWaitIdle((VkDevice) queue->device);
820 }
821
822 VkResult anv_DeviceWaitIdle(
823 VkDevice _device)
824 {
825 struct anv_device *device = (struct anv_device *) _device;
826 struct anv_state state;
827 struct anv_batch batch;
828 struct drm_i915_gem_execbuffer2 execbuf;
829 struct drm_i915_gem_exec_object2 exec2_objects[1];
830 struct anv_bo *bo = NULL;
831 VkResult result;
832 int64_t timeout;
833 int ret;
834
835 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
836 bo = &device->dynamic_state_pool.block_pool->bo;
837 batch.start = batch.next = state.map;
838 batch.end = state.map + 32;
839 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
840 anv_batch_emit(&batch, GEN8_MI_NOOP);
841
842 exec2_objects[0].handle = bo->gem_handle;
843 exec2_objects[0].relocation_count = 0;
844 exec2_objects[0].relocs_ptr = 0;
845 exec2_objects[0].alignment = 0;
846 exec2_objects[0].offset = bo->offset;
847 exec2_objects[0].flags = 0;
848 exec2_objects[0].rsvd1 = 0;
849 exec2_objects[0].rsvd2 = 0;
850
851 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
852 execbuf.buffer_count = 1;
853 execbuf.batch_start_offset = state.offset;
854 execbuf.batch_len = batch.next - state.map;
855 execbuf.cliprects_ptr = 0;
856 execbuf.num_cliprects = 0;
857 execbuf.DR1 = 0;
858 execbuf.DR4 = 0;
859
860 execbuf.flags =
861 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
862 execbuf.rsvd1 = device->context_id;
863 execbuf.rsvd2 = 0;
864
865 if (!device->no_hw) {
866 ret = anv_gem_execbuffer(device, &execbuf);
867 if (ret != 0) {
868 result = vk_error(VK_ERROR_UNKNOWN);
869 goto fail;
870 }
871
872 timeout = INT64_MAX;
873 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
874 if (ret != 0) {
875 result = vk_error(VK_ERROR_UNKNOWN);
876 goto fail;
877 }
878 }
879
880 anv_state_pool_free(&device->dynamic_state_pool, state);
881
882 return VK_SUCCESS;
883
884 fail:
885 anv_state_pool_free(&device->dynamic_state_pool, state);
886
887 return result;
888 }
889
890 void *
891 anv_device_alloc(struct anv_device * device,
892 size_t size,
893 size_t alignment,
894 VkSystemAllocType allocType)
895 {
896 return device->instance->pfnAlloc(device->instance->pAllocUserData,
897 size,
898 alignment,
899 allocType);
900 }
901
902 void
903 anv_device_free(struct anv_device * device,
904 void * mem)
905 {
906 return device->instance->pfnFree(device->instance->pAllocUserData,
907 mem);
908 }
909
910 VkResult
911 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
912 {
913 bo->gem_handle = anv_gem_create(device, size);
914 if (!bo->gem_handle)
915 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
916
917 bo->map = NULL;
918 bo->index = 0;
919 bo->offset = 0;
920 bo->size = size;
921
922 return VK_SUCCESS;
923 }
924
925 VkResult anv_AllocMemory(
926 VkDevice _device,
927 const VkMemoryAllocInfo* pAllocInfo,
928 VkDeviceMemory* pMem)
929 {
930 struct anv_device *device = (struct anv_device *) _device;
931 struct anv_device_memory *mem;
932 VkResult result;
933
934 assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
935
936 mem = anv_device_alloc(device, sizeof(*mem), 8,
937 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
938 if (mem == NULL)
939 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
940
941 result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
942 if (result != VK_SUCCESS)
943 goto fail;
944
945 *pMem = (VkDeviceMemory) mem;
946
947 return VK_SUCCESS;
948
949 fail:
950 anv_device_free(device, mem);
951
952 return result;
953 }
954
955 VkResult anv_FreeMemory(
956 VkDevice _device,
957 VkDeviceMemory _mem)
958 {
959 struct anv_device *device = (struct anv_device *) _device;
960 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
961
962 if (mem->bo.map)
963 anv_gem_munmap(mem->bo.map, mem->bo.size);
964
965 if (mem->bo.gem_handle != 0)
966 anv_gem_close(device, mem->bo.gem_handle);
967
968 anv_device_free(device, mem);
969
970 return VK_SUCCESS;
971 }
972
973 VkResult anv_SetMemoryPriority(
974 VkDevice device,
975 VkDeviceMemory mem,
976 VkMemoryPriority priority)
977 {
978 return VK_SUCCESS;
979 }
980
981 VkResult anv_MapMemory(
982 VkDevice _device,
983 VkDeviceMemory _mem,
984 VkDeviceSize offset,
985 VkDeviceSize size,
986 VkMemoryMapFlags flags,
987 void** ppData)
988 {
989 struct anv_device *device = (struct anv_device *) _device;
990 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
991
992 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
993 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
994 * at a time is valid. We could just mmap up front and return an offset
995 * pointer here, but that may exhaust virtual memory on 32 bit
996 * userspace. */
997
998 mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
999 mem->map_size = size;
1000
1001 *ppData = mem->map;
1002
1003 return VK_SUCCESS;
1004 }
1005
1006 VkResult anv_UnmapMemory(
1007 VkDevice _device,
1008 VkDeviceMemory _mem)
1009 {
1010 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
1011
1012 anv_gem_munmap(mem->map, mem->map_size);
1013
1014 return VK_SUCCESS;
1015 }
1016
1017 VkResult anv_FlushMappedMemory(
1018 VkDevice device,
1019 VkDeviceMemory mem,
1020 VkDeviceSize offset,
1021 VkDeviceSize size)
1022 {
1023 /* clflush here for !llc platforms */
1024
1025 return VK_SUCCESS;
1026 }
1027
1028 VkResult anv_PinSystemMemory(
1029 VkDevice device,
1030 const void* pSysMem,
1031 size_t memSize,
1032 VkDeviceMemory* pMem)
1033 {
1034 return VK_SUCCESS;
1035 }
1036
1037 VkResult anv_GetMultiDeviceCompatibility(
1038 VkPhysicalDevice physicalDevice0,
1039 VkPhysicalDevice physicalDevice1,
1040 VkPhysicalDeviceCompatibilityInfo* pInfo)
1041 {
1042 return VK_UNSUPPORTED;
1043 }
1044
1045 VkResult anv_OpenSharedMemory(
1046 VkDevice device,
1047 const VkMemoryOpenInfo* pOpenInfo,
1048 VkDeviceMemory* pMem)
1049 {
1050 return VK_UNSUPPORTED;
1051 }
1052
1053 VkResult anv_OpenSharedSemaphore(
1054 VkDevice device,
1055 const VkSemaphoreOpenInfo* pOpenInfo,
1056 VkSemaphore* pSemaphore)
1057 {
1058 return VK_UNSUPPORTED;
1059 }
1060
1061 VkResult anv_OpenPeerMemory(
1062 VkDevice device,
1063 const VkPeerMemoryOpenInfo* pOpenInfo,
1064 VkDeviceMemory* pMem)
1065 {
1066 return VK_UNSUPPORTED;
1067 }
1068
1069 VkResult anv_OpenPeerImage(
1070 VkDevice device,
1071 const VkPeerImageOpenInfo* pOpenInfo,
1072 VkImage* pImage,
1073 VkDeviceMemory* pMem)
1074 {
1075 return VK_UNSUPPORTED;
1076 }
1077
1078 VkResult anv_DestroyObject(
1079 VkDevice _device,
1080 VkObjectType objType,
1081 VkObject _object)
1082 {
1083 struct anv_device *device = (struct anv_device *) _device;
1084 struct anv_object *object = (struct anv_object *) _object;
1085
1086 switch (objType) {
1087 case VK_OBJECT_TYPE_INSTANCE:
1088 return anv_DestroyInstance((VkInstance) _object);
1089
1090 case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1091 /* We don't want to actually destroy physical devices */
1092 return VK_SUCCESS;
1093
1094 case VK_OBJECT_TYPE_DEVICE:
1095 assert(_device == (VkDevice) _object);
1096 return anv_DestroyDevice((VkDevice) _object);
1097
1098 case VK_OBJECT_TYPE_QUEUE:
1099 /* TODO */
1100 return VK_SUCCESS;
1101
1102 case VK_OBJECT_TYPE_DEVICE_MEMORY:
1103 return anv_FreeMemory(_device, (VkDeviceMemory) _object);
1104
1105 case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1106 /* These are just dummys anyway, so we don't need to destroy them */
1107 return VK_SUCCESS;
1108
1109 case VK_OBJECT_TYPE_BUFFER:
1110 case VK_OBJECT_TYPE_IMAGE:
1111 case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
1112 case VK_OBJECT_TYPE_SHADER:
1113 case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1114 case VK_OBJECT_TYPE_SAMPLER:
1115 case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1116 case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1117 case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
1118 case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
1119 case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
1120 case VK_OBJECT_TYPE_RENDER_PASS:
1121 /* These are trivially destroyable */
1122 anv_device_free(device, (void *) _object);
1123 return VK_SUCCESS;
1124
1125 case VK_OBJECT_TYPE_COMMAND_BUFFER:
1126 case VK_OBJECT_TYPE_PIPELINE:
1127 case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
1128 case VK_OBJECT_TYPE_FENCE:
1129 case VK_OBJECT_TYPE_QUERY_POOL:
1130 case VK_OBJECT_TYPE_FRAMEBUFFER:
1131 case VK_OBJECT_TYPE_BUFFER_VIEW:
1132 case VK_OBJECT_TYPE_IMAGE_VIEW:
1133 case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
1134 (object->destructor)(device, object, objType);
1135 return VK_SUCCESS;
1136
1137 case VK_OBJECT_TYPE_SEMAPHORE:
1138 case VK_OBJECT_TYPE_EVENT:
1139 stub_return(VK_UNSUPPORTED);
1140
1141 default:
1142 unreachable("Invalid object type");
1143 }
1144 }
1145
1146 static void
1147 fill_memory_requirements(
1148 VkObjectType objType,
1149 VkObject object,
1150 VkMemoryRequirements * memory_requirements)
1151 {
1152 struct anv_buffer *buffer;
1153 struct anv_image *image;
1154
1155 memory_requirements->memPropsAllowed =
1156 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1157 VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT |
1158 /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */
1159 VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT |
1160 VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL |
1161 VK_MEMORY_PROPERTY_SHAREABLE_BIT;
1162
1163 memory_requirements->memPropsRequired = 0;
1164
1165 switch (objType) {
1166 case VK_OBJECT_TYPE_BUFFER:
1167 buffer = (struct anv_buffer *) object;
1168 memory_requirements->size = buffer->size;
1169 memory_requirements->alignment = 16;
1170 break;
1171 case VK_OBJECT_TYPE_IMAGE:
1172 image = (struct anv_image *) object;
1173 memory_requirements->size = image->size;
1174 memory_requirements->alignment = image->alignment;
1175 break;
1176 default:
1177 memory_requirements->size = 0;
1178 break;
1179 }
1180 }
1181
1182 static uint32_t
1183 get_allocation_count(VkObjectType objType)
1184 {
1185 switch (objType) {
1186 case VK_OBJECT_TYPE_BUFFER:
1187 case VK_OBJECT_TYPE_IMAGE:
1188 return 1;
1189 default:
1190 return 0;
1191 }
1192 }
1193
1194 VkResult anv_GetObjectInfo(
1195 VkDevice _device,
1196 VkObjectType objType,
1197 VkObject object,
1198 VkObjectInfoType infoType,
1199 size_t* pDataSize,
1200 void* pData)
1201 {
1202 VkMemoryRequirements memory_requirements;
1203 uint32_t *count;
1204
1205 switch (infoType) {
1206 case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS:
1207 *pDataSize = sizeof(memory_requirements);
1208 if (pData == NULL)
1209 return VK_SUCCESS;
1210
1211 fill_memory_requirements(objType, object, pData);
1212 return VK_SUCCESS;
1213
1214 case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT:
1215 *pDataSize = sizeof(count);
1216 if (pData == NULL)
1217 return VK_SUCCESS;
1218
1219 count = pData;
1220 *count = get_allocation_count(objType);
1221 return VK_SUCCESS;
1222
1223 default:
1224 return vk_error(VK_UNSUPPORTED);
1225 }
1226
1227 }
1228
1229 VkResult anv_QueueBindObjectMemory(
1230 VkQueue queue,
1231 VkObjectType objType,
1232 VkObject object,
1233 uint32_t allocationIdx,
1234 VkDeviceMemory _mem,
1235 VkDeviceSize memOffset)
1236 {
1237 struct anv_buffer *buffer;
1238 struct anv_image *image;
1239 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
1240
1241 switch (objType) {
1242 case VK_OBJECT_TYPE_BUFFER:
1243 buffer = (struct anv_buffer *) object;
1244 buffer->bo = &mem->bo;
1245 buffer->offset = memOffset;
1246 break;
1247 case VK_OBJECT_TYPE_IMAGE:
1248 image = (struct anv_image *) object;
1249 image->bo = &mem->bo;
1250 image->offset = memOffset;
1251 break;
1252 default:
1253 break;
1254 }
1255
1256 return VK_SUCCESS;
1257 }
1258
1259 VkResult anv_QueueBindObjectMemoryRange(
1260 VkQueue queue,
1261 VkObjectType objType,
1262 VkObject object,
1263 uint32_t allocationIdx,
1264 VkDeviceSize rangeOffset,
1265 VkDeviceSize rangeSize,
1266 VkDeviceMemory mem,
1267 VkDeviceSize memOffset)
1268 {
1269 stub_return(VK_UNSUPPORTED);
1270 }
1271
1272 VkResult anv_QueueBindImageMemoryRange(
1273 VkQueue queue,
1274 VkImage image,
1275 uint32_t allocationIdx,
1276 const VkImageMemoryBindInfo* pBindInfo,
1277 VkDeviceMemory mem,
1278 VkDeviceSize memOffset)
1279 {
1280 stub_return(VK_UNSUPPORTED);
1281 }
1282
1283 static void
1284 anv_fence_destroy(struct anv_device *device,
1285 struct anv_object *object,
1286 VkObjectType obj_type)
1287 {
1288 struct anv_fence *fence = (struct anv_fence *) object;
1289
1290 assert(obj_type == VK_OBJECT_TYPE_FENCE);
1291
1292 anv_gem_munmap(fence->bo.map, fence->bo.size);
1293 anv_gem_close(device, fence->bo.gem_handle);
1294 anv_device_free(device, fence);
1295 }
1296
1297 VkResult anv_CreateFence(
1298 VkDevice _device,
1299 const VkFenceCreateInfo* pCreateInfo,
1300 VkFence* pFence)
1301 {
1302 struct anv_device *device = (struct anv_device *) _device;
1303 struct anv_fence *fence;
1304 struct anv_batch batch;
1305 VkResult result;
1306
1307 const uint32_t fence_size = 128;
1308
1309 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1310
1311 fence = anv_device_alloc(device, sizeof(*fence), 8,
1312 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1313 if (fence == NULL)
1314 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1315
1316 result = anv_bo_init_new(&fence->bo, device, fence_size);
1317 if (result != VK_SUCCESS)
1318 goto fail;
1319
1320 fence->base.destructor = anv_fence_destroy;
1321
1322 fence->bo.map =
1323 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
1324 batch.next = batch.start = fence->bo.map;
1325 batch.end = fence->bo.map + fence->bo.size;
1326 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
1327 anv_batch_emit(&batch, GEN8_MI_NOOP);
1328
1329 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1330 fence->exec2_objects[0].relocation_count = 0;
1331 fence->exec2_objects[0].relocs_ptr = 0;
1332 fence->exec2_objects[0].alignment = 0;
1333 fence->exec2_objects[0].offset = fence->bo.offset;
1334 fence->exec2_objects[0].flags = 0;
1335 fence->exec2_objects[0].rsvd1 = 0;
1336 fence->exec2_objects[0].rsvd2 = 0;
1337
1338 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1339 fence->execbuf.buffer_count = 1;
1340 fence->execbuf.batch_start_offset = 0;
1341 fence->execbuf.batch_len = batch.next - fence->bo.map;
1342 fence->execbuf.cliprects_ptr = 0;
1343 fence->execbuf.num_cliprects = 0;
1344 fence->execbuf.DR1 = 0;
1345 fence->execbuf.DR4 = 0;
1346
1347 fence->execbuf.flags =
1348 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1349 fence->execbuf.rsvd1 = device->context_id;
1350 fence->execbuf.rsvd2 = 0;
1351
1352 *pFence = (VkFence) fence;
1353
1354 return VK_SUCCESS;
1355
1356 fail:
1357 anv_device_free(device, fence);
1358
1359 return result;
1360 }
1361
1362 VkResult anv_ResetFences(
1363 VkDevice _device,
1364 uint32_t fenceCount,
1365 VkFence* pFences)
1366 {
1367 struct anv_fence **fences = (struct anv_fence **) pFences;
1368
1369 for (uint32_t i = 0; i < fenceCount; i++)
1370 fences[i]->ready = false;
1371
1372 return VK_SUCCESS;
1373 }
1374
1375 VkResult anv_GetFenceStatus(
1376 VkDevice _device,
1377 VkFence _fence)
1378 {
1379 struct anv_device *device = (struct anv_device *) _device;
1380 struct anv_fence *fence = (struct anv_fence *) _fence;
1381 int64_t t = 0;
1382 int ret;
1383
1384 if (fence->ready)
1385 return VK_SUCCESS;
1386
1387 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1388 if (ret == 0) {
1389 fence->ready = true;
1390 return VK_SUCCESS;
1391 }
1392
1393 return VK_NOT_READY;
1394 }
1395
1396 VkResult anv_WaitForFences(
1397 VkDevice _device,
1398 uint32_t fenceCount,
1399 const VkFence* pFences,
1400 bool32_t waitAll,
1401 uint64_t timeout)
1402 {
1403 struct anv_device *device = (struct anv_device *) _device;
1404 struct anv_fence **fences = (struct anv_fence **) pFences;
1405 int64_t t = timeout;
1406 int ret;
1407
1408 /* FIXME: handle !waitAll */
1409
1410 for (uint32_t i = 0; i < fenceCount; i++) {
1411 ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t);
1412 if (ret == -1 && errno == ETIME)
1413 return VK_TIMEOUT;
1414 else if (ret == -1)
1415 return vk_error(VK_ERROR_UNKNOWN);
1416 }
1417
1418 return VK_SUCCESS;
1419 }
1420
1421 // Queue semaphore functions
1422
1423 VkResult anv_CreateSemaphore(
1424 VkDevice device,
1425 const VkSemaphoreCreateInfo* pCreateInfo,
1426 VkSemaphore* pSemaphore)
1427 {
1428 stub_return(VK_UNSUPPORTED);
1429 }
1430
1431 VkResult anv_QueueSignalSemaphore(
1432 VkQueue queue,
1433 VkSemaphore semaphore)
1434 {
1435 stub_return(VK_UNSUPPORTED);
1436 }
1437
1438 VkResult anv_QueueWaitSemaphore(
1439 VkQueue queue,
1440 VkSemaphore semaphore)
1441 {
1442 stub_return(VK_UNSUPPORTED);
1443 }
1444
1445 // Event functions
1446
1447 VkResult anv_CreateEvent(
1448 VkDevice device,
1449 const VkEventCreateInfo* pCreateInfo,
1450 VkEvent* pEvent)
1451 {
1452 stub_return(VK_UNSUPPORTED);
1453 }
1454
1455 VkResult anv_GetEventStatus(
1456 VkDevice device,
1457 VkEvent event)
1458 {
1459 stub_return(VK_UNSUPPORTED);
1460 }
1461
1462 VkResult anv_SetEvent(
1463 VkDevice device,
1464 VkEvent event)
1465 {
1466 stub_return(VK_UNSUPPORTED);
1467 }
1468
1469 VkResult anv_ResetEvent(
1470 VkDevice device,
1471 VkEvent event)
1472 {
1473 stub_return(VK_UNSUPPORTED);
1474 }
1475
1476 // Buffer functions
1477
1478 VkResult anv_CreateBuffer(
1479 VkDevice _device,
1480 const VkBufferCreateInfo* pCreateInfo,
1481 VkBuffer* pBuffer)
1482 {
1483 struct anv_device *device = (struct anv_device *) _device;
1484 struct anv_buffer *buffer;
1485
1486 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1487
1488 buffer = anv_device_alloc(device, sizeof(*buffer), 8,
1489 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1490 if (buffer == NULL)
1491 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1492
1493 buffer->size = pCreateInfo->size;
1494 buffer->bo = NULL;
1495 buffer->offset = 0;
1496
1497 *pBuffer = (VkBuffer) buffer;
1498
1499 return VK_SUCCESS;
1500 }
1501
1502 // Buffer view functions
1503
1504 static void
1505 fill_buffer_surface_state(void *state, VkFormat format,
1506 uint32_t offset, uint32_t range)
1507 {
1508 const struct anv_format *info;
1509
1510 info = anv_format_for_vk_format(format);
1511 /* This assumes RGBA float format. */
1512 uint32_t stride = 4;
1513 uint32_t num_elements = range / stride;
1514
1515 struct GEN8_RENDER_SURFACE_STATE surface_state = {
1516 .SurfaceType = SURFTYPE_BUFFER,
1517 .SurfaceArray = false,
1518 .SurfaceFormat = info->format,
1519 .SurfaceVerticalAlignment = VALIGN4,
1520 .SurfaceHorizontalAlignment = HALIGN4,
1521 .TileMode = LINEAR,
1522 .VerticalLineStride = 0,
1523 .VerticalLineStrideOffset = 0,
1524 .SamplerL2BypassModeDisable = true,
1525 .RenderCacheReadWriteMode = WriteOnlyCache,
1526 .MemoryObjectControlState = GEN8_MOCS,
1527 .BaseMipLevel = 0.0,
1528 .SurfaceQPitch = 0,
1529 .Height = (num_elements >> 7) & 0x3fff,
1530 .Width = num_elements & 0x7f,
1531 .Depth = (num_elements >> 21) & 0x3f,
1532 .SurfacePitch = stride - 1,
1533 .MinimumArrayElement = 0,
1534 .NumberofMultisamples = MULTISAMPLECOUNT_1,
1535 .XOffset = 0,
1536 .YOffset = 0,
1537 .SurfaceMinLOD = 0,
1538 .MIPCountLOD = 0,
1539 .AuxiliarySurfaceMode = AUX_NONE,
1540 .RedClearColor = 0,
1541 .GreenClearColor = 0,
1542 .BlueClearColor = 0,
1543 .AlphaClearColor = 0,
1544 .ShaderChannelSelectRed = SCS_RED,
1545 .ShaderChannelSelectGreen = SCS_GREEN,
1546 .ShaderChannelSelectBlue = SCS_BLUE,
1547 .ShaderChannelSelectAlpha = SCS_ALPHA,
1548 .ResourceMinLOD = 0.0,
1549 /* FIXME: We assume that the image must be bound at this time. */
1550 .SurfaceBaseAddress = { NULL, offset },
1551 };
1552
1553 GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
1554 }
1555
1556 VkResult anv_CreateBufferView(
1557 VkDevice _device,
1558 const VkBufferViewCreateInfo* pCreateInfo,
1559 VkBufferView* pView)
1560 {
1561 struct anv_device *device = (struct anv_device *) _device;
1562 struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
1563 struct anv_surface_view *view;
1564
1565 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
1566
1567 view = anv_device_alloc(device, sizeof(*view), 8,
1568 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1569 if (view == NULL)
1570 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1571
1572 view->base.destructor = anv_surface_view_destroy;
1573
1574 view->bo = buffer->bo;
1575 view->offset = buffer->offset + pCreateInfo->offset;
1576 view->surface_state =
1577 anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
1578 view->format = pCreateInfo->format;
1579 view->range = pCreateInfo->range;
1580
1581 fill_buffer_surface_state(view->surface_state.map,
1582 pCreateInfo->format, view->offset, pCreateInfo->range);
1583
1584 *pView = (VkBufferView) view;
1585
1586 return VK_SUCCESS;
1587 }
1588
1589 // Sampler functions
1590
1591 VkResult anv_CreateSampler(
1592 VkDevice _device,
1593 const VkSamplerCreateInfo* pCreateInfo,
1594 VkSampler* pSampler)
1595 {
1596 struct anv_device *device = (struct anv_device *) _device;
1597 struct anv_sampler *sampler;
1598 uint32_t mag_filter, min_filter, max_anisotropy;
1599
1600 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1601
1602 sampler = anv_device_alloc(device, sizeof(*sampler), 8,
1603 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1604 if (!sampler)
1605 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1606
1607 static const uint32_t vk_to_gen_tex_filter[] = {
1608 [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
1609 [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
1610 };
1611
1612 static const uint32_t vk_to_gen_mipmap_mode[] = {
1613 [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
1614 [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
1615 [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
1616 };
1617
1618 static const uint32_t vk_to_gen_tex_address[] = {
1619 [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
1620 [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
1621 [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
1622 [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
1623 [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
1624 };
1625
1626 static const uint32_t vk_to_gen_compare_op[] = {
1627 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
1628 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
1629 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
1630 [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
1631 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
1632 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
1633 [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
1634 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
1635 };
1636
1637 if (pCreateInfo->maxAnisotropy > 1) {
1638 mag_filter = MAPFILTER_ANISOTROPIC;
1639 min_filter = MAPFILTER_ANISOTROPIC;
1640 max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
1641 } else {
1642 mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
1643 min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
1644 max_anisotropy = RATIO21;
1645 }
1646
1647 struct GEN8_SAMPLER_STATE sampler_state = {
1648 .SamplerDisable = false,
1649 .TextureBorderColorMode = DX10OGL,
1650 .LODPreClampMode = 0,
1651 .BaseMipLevel = 0.0,
1652 .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
1653 .MagModeFilter = mag_filter,
1654 .MinModeFilter = min_filter,
1655 .TextureLODBias = pCreateInfo->mipLodBias * 256,
1656 .AnisotropicAlgorithm = EWAApproximation,
1657 .MinLOD = pCreateInfo->minLod,
1658 .MaxLOD = pCreateInfo->maxLod,
1659 .ChromaKeyEnable = 0,
1660 .ChromaKeyIndex = 0,
1661 .ChromaKeyMode = 0,
1662 .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
1663 .CubeSurfaceControlMode = 0,
1664
1665 .IndirectStatePointer =
1666 device->float_border_colors.offset +
1667 pCreateInfo->borderColor * sizeof(float) * 4,
1668
1669 .LODClampMagnificationMode = MIPNONE,
1670 .MaximumAnisotropy = max_anisotropy,
1671 .RAddressMinFilterRoundingEnable = 0,
1672 .RAddressMagFilterRoundingEnable = 0,
1673 .VAddressMinFilterRoundingEnable = 0,
1674 .VAddressMagFilterRoundingEnable = 0,
1675 .UAddressMinFilterRoundingEnable = 0,
1676 .UAddressMagFilterRoundingEnable = 0,
1677 .TrilinearFilterQuality = 0,
1678 .NonnormalizedCoordinateEnable = 0,
1679 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
1680 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
1681 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
1682 };
1683
1684 GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
1685
1686 *pSampler = (VkSampler) sampler;
1687
1688 return VK_SUCCESS;
1689 }
1690
1691 // Descriptor set functions
1692
1693 VkResult anv_CreateDescriptorSetLayout(
1694 VkDevice _device,
1695 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
1696 VkDescriptorSetLayout* pSetLayout)
1697 {
1698 struct anv_device *device = (struct anv_device *) _device;
1699 struct anv_descriptor_set_layout *set_layout;
1700
1701 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
1702
1703 uint32_t sampler_count[VK_NUM_SHADER_STAGE] = { 0, };
1704 uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, };
1705 uint32_t num_dynamic_buffers = 0;
1706 uint32_t count = 0;
1707 uint32_t stages = 0;
1708 uint32_t s;
1709
1710 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1711 switch (pCreateInfo->pBinding[i].descriptorType) {
1712 case VK_DESCRIPTOR_TYPE_SAMPLER:
1713 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1714 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1715 sampler_count[s] += pCreateInfo->pBinding[i].count;
1716 break;
1717 default:
1718 break;
1719 }
1720
1721 switch (pCreateInfo->pBinding[i].descriptorType) {
1722 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1723 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1724 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1725 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1726 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1727 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1728 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1729 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1730 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1731 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1732 surface_count[s] += pCreateInfo->pBinding[i].count;
1733 break;
1734 default:
1735 break;
1736 }
1737
1738 switch (pCreateInfo->pBinding[i].descriptorType) {
1739 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1740 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1741 num_dynamic_buffers += pCreateInfo->pBinding[i].count;
1742 break;
1743 default:
1744 break;
1745 }
1746
1747 stages |= pCreateInfo->pBinding[i].stageFlags;
1748 count += pCreateInfo->pBinding[i].count;
1749 }
1750
1751 uint32_t sampler_total = 0;
1752 uint32_t surface_total = 0;
1753 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1754 sampler_total += sampler_count[s];
1755 surface_total += surface_count[s];
1756 }
1757
1758 size_t size = sizeof(*set_layout) +
1759 (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
1760 set_layout = anv_device_alloc(device, size, 8,
1761 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1762 if (!set_layout)
1763 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1764
1765 set_layout->num_dynamic_buffers = num_dynamic_buffers;
1766 set_layout->count = count;
1767 set_layout->shader_stages = stages;
1768
1769 struct anv_descriptor_slot *p = set_layout->entries;
1770 struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE];
1771 struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE];
1772 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1773 set_layout->stage[s].surface_count = surface_count[s];
1774 set_layout->stage[s].surface_start = surface[s] = p;
1775 p += surface_count[s];
1776 set_layout->stage[s].sampler_count = sampler_count[s];
1777 set_layout->stage[s].sampler_start = sampler[s] = p;
1778 p += sampler_count[s];
1779 }
1780
1781 uint32_t descriptor = 0;
1782 int8_t dynamic_slot = 0;
1783 bool is_dynamic;
1784 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1785 switch (pCreateInfo->pBinding[i].descriptorType) {
1786 case VK_DESCRIPTOR_TYPE_SAMPLER:
1787 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1788 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1789 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1790 sampler[s]->index = descriptor + j;
1791 sampler[s]->dynamic_slot = -1;
1792 sampler[s]++;
1793 }
1794 break;
1795 default:
1796 break;
1797 }
1798
1799 switch (pCreateInfo->pBinding[i].descriptorType) {
1800 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1801 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1802 is_dynamic = true;
1803 break;
1804 default:
1805 is_dynamic = false;
1806 break;
1807 }
1808
1809 switch (pCreateInfo->pBinding[i].descriptorType) {
1810 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1811 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1812 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1813 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1814 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1815 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1816 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1817 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1818 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1819 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1820 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1821 surface[s]->index = descriptor + j;
1822 if (is_dynamic)
1823 surface[s]->dynamic_slot = dynamic_slot + j;
1824 else
1825 surface[s]->dynamic_slot = -1;
1826 surface[s]++;
1827 }
1828 break;
1829 default:
1830 break;
1831 }
1832
1833 if (is_dynamic)
1834 dynamic_slot += pCreateInfo->pBinding[i].count;
1835
1836 descriptor += pCreateInfo->pBinding[i].count;
1837 }
1838
1839 *pSetLayout = (VkDescriptorSetLayout) set_layout;
1840
1841 return VK_SUCCESS;
1842 }
1843
1844 VkResult anv_BeginDescriptorPoolUpdate(
1845 VkDevice device,
1846 VkDescriptorUpdateMode updateMode)
1847 {
1848 return VK_SUCCESS;
1849 }
1850
1851 VkResult anv_EndDescriptorPoolUpdate(
1852 VkDevice device,
1853 VkCmdBuffer cmd)
1854 {
1855 return VK_SUCCESS;
1856 }
1857
1858 VkResult anv_CreateDescriptorPool(
1859 VkDevice device,
1860 VkDescriptorPoolUsage poolUsage,
1861 uint32_t maxSets,
1862 const VkDescriptorPoolCreateInfo* pCreateInfo,
1863 VkDescriptorPool* pDescriptorPool)
1864 {
1865 *pDescriptorPool = 1;
1866
1867 return VK_SUCCESS;
1868 }
1869
1870 VkResult anv_ResetDescriptorPool(
1871 VkDevice device,
1872 VkDescriptorPool descriptorPool)
1873 {
1874 return VK_SUCCESS;
1875 }
1876
1877 VkResult anv_AllocDescriptorSets(
1878 VkDevice _device,
1879 VkDescriptorPool descriptorPool,
1880 VkDescriptorSetUsage setUsage,
1881 uint32_t count,
1882 const VkDescriptorSetLayout* pSetLayouts,
1883 VkDescriptorSet* pDescriptorSets,
1884 uint32_t* pCount)
1885 {
1886 struct anv_device *device = (struct anv_device *) _device;
1887 const struct anv_descriptor_set_layout *layout;
1888 struct anv_descriptor_set *set;
1889 size_t size;
1890
1891 for (uint32_t i = 0; i < count; i++) {
1892 layout = (struct anv_descriptor_set_layout *) pSetLayouts[i];
1893 size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
1894 set = anv_device_alloc(device, size, 8,
1895 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1896 if (!set) {
1897 *pCount = i;
1898 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1899 }
1900
1901 /* Descriptor sets may not be 100% filled out so we need to memset to
1902 * ensure that we can properly detect and handle holes.
1903 */
1904 memset(set, 0, size);
1905
1906 pDescriptorSets[i] = (VkDescriptorSet) set;
1907 }
1908
1909 *pCount = count;
1910
1911 return VK_SUCCESS;
1912 }
1913
1914 void anv_ClearDescriptorSets(
1915 VkDevice device,
1916 VkDescriptorPool descriptorPool,
1917 uint32_t count,
1918 const VkDescriptorSet* pDescriptorSets)
1919 {
1920 }
1921
1922 void anv_UpdateDescriptors(
1923 VkDevice _device,
1924 VkDescriptorSet descriptorSet,
1925 uint32_t updateCount,
1926 const void** ppUpdateArray)
1927 {
1928 struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet;
1929 VkUpdateSamplers *update_samplers;
1930 VkUpdateSamplerTextures *update_sampler_textures;
1931 VkUpdateImages *update_images;
1932 VkUpdateBuffers *update_buffers;
1933 VkUpdateAsCopy *update_as_copy;
1934
1935 for (uint32_t i = 0; i < updateCount; i++) {
1936 const struct anv_common *common = ppUpdateArray[i];
1937
1938 switch (common->sType) {
1939 case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS:
1940 update_samplers = (VkUpdateSamplers *) common;
1941
1942 for (uint32_t j = 0; j < update_samplers->count; j++) {
1943 set->descriptors[update_samplers->binding + j].sampler =
1944 (struct anv_sampler *) update_samplers->pSamplers[j];
1945 }
1946 break;
1947
1948 case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES:
1949 /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */
1950 update_sampler_textures = (VkUpdateSamplerTextures *) common;
1951
1952 for (uint32_t j = 0; j < update_sampler_textures->count; j++) {
1953 set->descriptors[update_sampler_textures->binding + j].view =
1954 (struct anv_surface_view *)
1955 update_sampler_textures->pSamplerImageViews[j].pImageView->view;
1956 set->descriptors[update_sampler_textures->binding + j].sampler =
1957 (struct anv_sampler *)
1958 update_sampler_textures->pSamplerImageViews[j].sampler;
1959 }
1960 break;
1961
1962 case VK_STRUCTURE_TYPE_UPDATE_IMAGES:
1963 update_images = (VkUpdateImages *) common;
1964
1965 for (uint32_t j = 0; j < update_images->count; j++) {
1966 set->descriptors[update_images->binding + j].view =
1967 (struct anv_surface_view *) update_images->pImageViews[j].view;
1968 }
1969 break;
1970
1971 case VK_STRUCTURE_TYPE_UPDATE_BUFFERS:
1972 update_buffers = (VkUpdateBuffers *) common;
1973
1974 for (uint32_t j = 0; j < update_buffers->count; j++) {
1975 set->descriptors[update_buffers->binding + j].view =
1976 (struct anv_surface_view *) update_buffers->pBufferViews[j].view;
1977 }
1978 /* FIXME: descriptor arrays? */
1979 break;
1980
1981 case VK_STRUCTURE_TYPE_UPDATE_AS_COPY:
1982 update_as_copy = (VkUpdateAsCopy *) common;
1983 (void) update_as_copy;
1984 break;
1985
1986 default:
1987 break;
1988 }
1989 }
1990 }
1991
1992 // State object functions
1993
1994 static inline int64_t
1995 clamp_int64(int64_t x, int64_t min, int64_t max)
1996 {
1997 if (x < min)
1998 return min;
1999 else if (x < max)
2000 return x;
2001 else
2002 return max;
2003 }
2004
2005 static void
2006 anv_dynamic_vp_state_destroy(struct anv_device *device,
2007 struct anv_object *object,
2008 VkObjectType obj_type)
2009 {
2010 struct anv_dynamic_vp_state *state = (void *)object;
2011
2012 assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
2013
2014 anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
2015 anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
2016 anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
2017
2018 anv_device_free(device, state);
2019 }
2020
2021 VkResult anv_CreateDynamicViewportState(
2022 VkDevice _device,
2023 const VkDynamicVpStateCreateInfo* pCreateInfo,
2024 VkDynamicVpState* pState)
2025 {
2026 struct anv_device *device = (struct anv_device *) _device;
2027 struct anv_dynamic_vp_state *state;
2028
2029 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO);
2030
2031 state = anv_device_alloc(device, sizeof(*state), 8,
2032 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2033 if (state == NULL)
2034 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2035
2036 state->base.destructor = anv_dynamic_vp_state_destroy;
2037
2038 unsigned count = pCreateInfo->viewportAndScissorCount;
2039 state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2040 count * 64, 64);
2041 state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2042 count * 8, 32);
2043 state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
2044 count * 32, 32);
2045
2046 for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
2047 const VkViewport *vp = &pCreateInfo->pViewports[i];
2048 const VkRect *s = &pCreateInfo->pScissors[i];
2049
2050 struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
2051 .ViewportMatrixElementm00 = vp->width / 2,
2052 .ViewportMatrixElementm11 = vp->height / 2,
2053 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
2054 .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
2055 .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
2056 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
2057 .XMinClipGuardband = -1.0f,
2058 .XMaxClipGuardband = 1.0f,
2059 .YMinClipGuardband = -1.0f,
2060 .YMaxClipGuardband = 1.0f,
2061 .XMinViewPort = vp->originX,
2062 .XMaxViewPort = vp->originX + vp->width - 1,
2063 .YMinViewPort = vp->originY,
2064 .YMaxViewPort = vp->originY + vp->height - 1,
2065 };
2066
2067 struct GEN8_CC_VIEWPORT cc_viewport = {
2068 .MinimumDepth = vp->minDepth,
2069 .MaximumDepth = vp->maxDepth
2070 };
2071
2072 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
2073 * ymax < ymin for empty clips. In case clip x, y, width height are all
2074 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
2075 * what we want. Just special case empty clips and produce a canonical
2076 * empty clip. */
2077 static const struct GEN8_SCISSOR_RECT empty_scissor = {
2078 .ScissorRectangleYMin = 1,
2079 .ScissorRectangleXMin = 1,
2080 .ScissorRectangleYMax = 0,
2081 .ScissorRectangleXMax = 0
2082 };
2083
2084 const int max = 0xffff;
2085 struct GEN8_SCISSOR_RECT scissor = {
2086 /* Do this math using int64_t so overflow gets clamped correctly. */
2087 .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
2088 .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
2089 .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
2090 .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
2091 };
2092
2093 GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
2094 GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
2095
2096 if (s->extent.width <= 0 || s->extent.height <= 0) {
2097 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
2098 } else {
2099 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
2100 }
2101 }
2102
2103 *pState = (VkDynamicVpState) state;
2104
2105 return VK_SUCCESS;
2106 }
2107
2108 VkResult anv_CreateDynamicRasterState(
2109 VkDevice _device,
2110 const VkDynamicRsStateCreateInfo* pCreateInfo,
2111 VkDynamicRsState* pState)
2112 {
2113 struct anv_device *device = (struct anv_device *) _device;
2114 struct anv_dynamic_rs_state *state;
2115
2116 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO);
2117
2118 state = anv_device_alloc(device, sizeof(*state), 8,
2119 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2120 if (state == NULL)
2121 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2122
2123 /* Missing these:
2124 * float pointFadeThreshold;
2125 * // optional (GL45) - Size of point fade threshold
2126 */
2127
2128 struct GEN8_3DSTATE_SF sf = {
2129 GEN8_3DSTATE_SF_header,
2130 .LineWidth = pCreateInfo->lineWidth,
2131 .PointWidth = pCreateInfo->pointSize,
2132 };
2133
2134 GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
2135
2136 bool enable_bias = pCreateInfo->depthBias != 0.0f ||
2137 pCreateInfo->slopeScaledDepthBias != 0.0f;
2138 struct GEN8_3DSTATE_RASTER raster = {
2139 .GlobalDepthOffsetEnableSolid = enable_bias,
2140 .GlobalDepthOffsetEnableWireframe = enable_bias,
2141 .GlobalDepthOffsetEnablePoint = enable_bias,
2142 .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
2143 .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
2144 .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
2145 };
2146
2147 GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
2148
2149 *pState = (VkDynamicRsState) state;
2150
2151 return VK_SUCCESS;
2152 }
2153
2154 VkResult anv_CreateDynamicColorBlendState(
2155 VkDevice _device,
2156 const VkDynamicCbStateCreateInfo* pCreateInfo,
2157 VkDynamicCbState* pState)
2158 {
2159 struct anv_device *device = (struct anv_device *) _device;
2160 struct anv_dynamic_cb_state *state;
2161
2162 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO);
2163
2164 state = anv_device_alloc(device, sizeof(*state), 8,
2165 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2166 if (state == NULL)
2167 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2168
2169 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2170 .BlendConstantColorRed = pCreateInfo->blendConst[0],
2171 .BlendConstantColorGreen = pCreateInfo->blendConst[1],
2172 .BlendConstantColorBlue = pCreateInfo->blendConst[2],
2173 .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
2174 };
2175
2176 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2177
2178 *pState = (VkDynamicCbState) state;
2179
2180 return VK_SUCCESS;
2181 }
2182
2183 VkResult anv_CreateDynamicDepthStencilState(
2184 VkDevice _device,
2185 const VkDynamicDsStateCreateInfo* pCreateInfo,
2186 VkDynamicDsState* pState)
2187 {
2188 struct anv_device *device = (struct anv_device *) _device;
2189 struct anv_dynamic_ds_state *state;
2190
2191 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
2192
2193 state = anv_device_alloc(device, sizeof(*state), 8,
2194 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2195 if (state == NULL)
2196 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2197
2198 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
2199 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
2200
2201 /* Is this what we need to do? */
2202 .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
2203
2204 .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2205 .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2206
2207 .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2208 .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2209 };
2210
2211 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
2212 &wm_depth_stencil);
2213
2214 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2215 .StencilReferenceValue = pCreateInfo->stencilFrontRef,
2216 .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
2217 };
2218
2219 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2220
2221 *pState = (VkDynamicDsState) state;
2222
2223 return VK_SUCCESS;
2224 }
2225
2226 // Command buffer functions
2227
2228 static void
2229 anv_cmd_buffer_destroy(struct anv_device *device,
2230 struct anv_object *object,
2231 VkObjectType obj_type)
2232 {
2233 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
2234
2235 assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
2236
2237 /* Destroy all of the batch buffers */
2238 struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
2239 while (bbo) {
2240 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2241 anv_batch_bo_destroy(bbo, device);
2242 bbo = prev;
2243 }
2244 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2245
2246 /* Destroy all of the surface state buffers */
2247 bbo = cmd_buffer->surface_batch_bo;
2248 while (bbo) {
2249 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2250 anv_batch_bo_destroy(bbo, device);
2251 bbo = prev;
2252 }
2253 anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
2254
2255 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
2256 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
2257 anv_device_free(device, cmd_buffer->exec2_objects);
2258 anv_device_free(device, cmd_buffer->exec2_bos);
2259 anv_device_free(device, cmd_buffer);
2260 }
2261
2262 static VkResult
2263 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
2264 {
2265 struct anv_cmd_buffer *cmd_buffer = _data;
2266
2267 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
2268
2269 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2270 if (result != VK_SUCCESS)
2271 return result;
2272
2273 /* We set the end of the batch a little short so we would be sure we
2274 * have room for the chaining command. Since we're about to emit the
2275 * chaining command, let's set it back where it should go.
2276 */
2277 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
2278 assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
2279
2280 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
2281 GEN8_MI_BATCH_BUFFER_START_header,
2282 ._2ndLevelBatchBuffer = _1stlevelbatch,
2283 .AddressSpaceIndicator = ASI_PPGTT,
2284 .BatchBufferStartAddress = { &new_bbo->bo, 0 },
2285 );
2286
2287 /* Pad out to a 2-dword aligned boundary with zeros */
2288 if ((uintptr_t)batch->next % 8 != 0) {
2289 *(uint32_t *)batch->next = 0;
2290 batch->next += 4;
2291 }
2292
2293 anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
2294
2295 new_bbo->prev_batch_bo = old_bbo;
2296 cmd_buffer->last_batch_bo = new_bbo;
2297
2298 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
2299
2300 return VK_SUCCESS;
2301 }
2302
2303 VkResult anv_CreateCommandBuffer(
2304 VkDevice _device,
2305 const VkCmdBufferCreateInfo* pCreateInfo,
2306 VkCmdBuffer* pCmdBuffer)
2307 {
2308 struct anv_device *device = (struct anv_device *) _device;
2309 struct anv_cmd_buffer *cmd_buffer;
2310 VkResult result;
2311
2312 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
2313 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2314 if (cmd_buffer == NULL)
2315 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2316
2317 cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
2318
2319 cmd_buffer->device = device;
2320 cmd_buffer->rs_state = NULL;
2321 cmd_buffer->vp_state = NULL;
2322 cmd_buffer->cb_state = NULL;
2323 cmd_buffer->ds_state = NULL;
2324 memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
2325
2326 result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
2327 if (result != VK_SUCCESS)
2328 goto fail;
2329
2330 result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
2331 if (result != VK_SUCCESS)
2332 goto fail_batch_bo;
2333
2334 cmd_buffer->batch.device = device;
2335 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
2336 cmd_buffer->batch.user_data = cmd_buffer;
2337
2338 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2339 GEN8_MI_BATCH_BUFFER_START_length * 4);
2340
2341 result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
2342 if (result != VK_SUCCESS)
2343 goto fail_batch_relocs;
2344 cmd_buffer->surface_batch_bo->first_reloc = 0;
2345
2346 result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
2347 if (result != VK_SUCCESS)
2348 goto fail_ss_batch_bo;
2349
2350 /* Start surface_next at 1 so surface offset 0 is invalid. */
2351 cmd_buffer->surface_next = 1;
2352
2353 cmd_buffer->exec2_objects = NULL;
2354 cmd_buffer->exec2_bos = NULL;
2355 cmd_buffer->exec2_array_length = 0;
2356
2357 anv_state_stream_init(&cmd_buffer->surface_state_stream,
2358 &device->surface_state_block_pool);
2359 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
2360 &device->dynamic_state_block_pool);
2361
2362 cmd_buffer->dirty = 0;
2363 cmd_buffer->vb_dirty = 0;
2364 cmd_buffer->descriptors_dirty = 0;
2365 cmd_buffer->pipeline = NULL;
2366 cmd_buffer->vp_state = NULL;
2367 cmd_buffer->rs_state = NULL;
2368 cmd_buffer->ds_state = NULL;
2369
2370 *pCmdBuffer = (VkCmdBuffer) cmd_buffer;
2371
2372 return VK_SUCCESS;
2373
2374 fail_ss_batch_bo:
2375 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
2376 fail_batch_relocs:
2377 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2378 fail_batch_bo:
2379 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
2380 fail:
2381 anv_device_free(device, cmd_buffer);
2382
2383 return result;
2384 }
2385
2386 static void
2387 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
2388 {
2389 struct anv_device *device = cmd_buffer->device;
2390
2391 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
2392 .GeneralStateBaseAddress = { NULL, 0 },
2393 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
2394 .GeneralStateBaseAddressModifyEnable = true,
2395 .GeneralStateBufferSize = 0xfffff,
2396 .GeneralStateBufferSizeModifyEnable = true,
2397
2398 .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
2399 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
2400 .SurfaceStateBaseAddressModifyEnable = true,
2401
2402 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
2403 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
2404 .DynamicStateBaseAddressModifyEnable = true,
2405 .DynamicStateBufferSize = 0xfffff,
2406 .DynamicStateBufferSizeModifyEnable = true,
2407
2408 .IndirectObjectBaseAddress = { NULL, 0 },
2409 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
2410 .IndirectObjectBaseAddressModifyEnable = true,
2411 .IndirectObjectBufferSize = 0xfffff,
2412 .IndirectObjectBufferSizeModifyEnable = true,
2413
2414 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
2415 .InstructionMemoryObjectControlState = GEN8_MOCS,
2416 .InstructionBaseAddressModifyEnable = true,
2417 .InstructionBufferSize = 0xfffff,
2418 .InstructionBuffersizeModifyEnable = true);
2419 }
2420
2421 VkResult anv_BeginCommandBuffer(
2422 VkCmdBuffer cmdBuffer,
2423 const VkCmdBufferBeginInfo* pBeginInfo)
2424 {
2425 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2426
2427 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2428 cmd_buffer->current_pipeline = UINT32_MAX;
2429
2430 return VK_SUCCESS;
2431 }
2432
2433 static VkResult
2434 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
2435 struct anv_bo *bo,
2436 struct drm_i915_gem_relocation_entry *relocs,
2437 size_t num_relocs)
2438 {
2439 struct drm_i915_gem_exec_object2 *obj;
2440
2441 if (bo->index < cmd_buffer->bo_count &&
2442 cmd_buffer->exec2_bos[bo->index] == bo)
2443 return VK_SUCCESS;
2444
2445 if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
2446 uint32_t new_len = cmd_buffer->exec2_objects ?
2447 cmd_buffer->exec2_array_length * 2 : 64;
2448
2449 struct drm_i915_gem_exec_object2 *new_objects =
2450 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
2451 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2452 if (new_objects == NULL)
2453 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2454
2455 struct anv_bo **new_bos =
2456 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
2457 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2458 if (new_objects == NULL) {
2459 anv_device_free(cmd_buffer->device, new_objects);
2460 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2461 }
2462
2463 if (cmd_buffer->exec2_objects) {
2464 memcpy(new_objects, cmd_buffer->exec2_objects,
2465 cmd_buffer->bo_count * sizeof(*new_objects));
2466 memcpy(new_bos, cmd_buffer->exec2_bos,
2467 cmd_buffer->bo_count * sizeof(*new_bos));
2468 }
2469
2470 cmd_buffer->exec2_objects = new_objects;
2471 cmd_buffer->exec2_bos = new_bos;
2472 cmd_buffer->exec2_array_length = new_len;
2473 }
2474
2475 assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
2476
2477 bo->index = cmd_buffer->bo_count++;
2478 obj = &cmd_buffer->exec2_objects[bo->index];
2479 cmd_buffer->exec2_bos[bo->index] = bo;
2480
2481 obj->handle = bo->gem_handle;
2482 obj->relocation_count = 0;
2483 obj->relocs_ptr = 0;
2484 obj->alignment = 0;
2485 obj->offset = bo->offset;
2486 obj->flags = 0;
2487 obj->rsvd1 = 0;
2488 obj->rsvd2 = 0;
2489
2490 if (relocs) {
2491 obj->relocation_count = num_relocs;
2492 obj->relocs_ptr = (uintptr_t) relocs;
2493 }
2494
2495 return VK_SUCCESS;
2496 }
2497
2498 static void
2499 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
2500 struct anv_reloc_list *list)
2501 {
2502 for (size_t i = 0; i < list->num_relocs; i++)
2503 anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
2504 }
2505
2506 static void
2507 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
2508 struct anv_reloc_list *list)
2509 {
2510 struct anv_bo *bo;
2511
2512 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
2513 * struct drm_i915_gem_exec_object2 against the bos current offset and if
2514 * all bos haven't moved it will skip relocation processing alltogether.
2515 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
2516 * value of offset so we can set it either way. For that to work we need
2517 * to make sure all relocs use the same presumed offset.
2518 */
2519
2520 for (size_t i = 0; i < list->num_relocs; i++) {
2521 bo = list->reloc_bos[i];
2522 if (bo->offset != list->relocs[i].presumed_offset)
2523 cmd_buffer->need_reloc = true;
2524
2525 list->relocs[i].target_handle = bo->index;
2526 }
2527 }
2528
2529 VkResult anv_EndCommandBuffer(
2530 VkCmdBuffer cmdBuffer)
2531 {
2532 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2533 struct anv_device *device = cmd_buffer->device;
2534 struct anv_batch *batch = &cmd_buffer->batch;
2535
2536 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
2537
2538 /* Round batch up to an even number of dwords. */
2539 if ((batch->next - batch->start) & 4)
2540 anv_batch_emit(batch, GEN8_MI_NOOP);
2541
2542 anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
2543 cmd_buffer->surface_batch_bo->num_relocs =
2544 cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
2545 cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
2546
2547 cmd_buffer->bo_count = 0;
2548 cmd_buffer->need_reloc = false;
2549
2550 /* Lock for access to bo->index. */
2551 pthread_mutex_lock(&device->mutex);
2552
2553 /* Add surface state bos first so we can add them with their relocs. */
2554 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
2555 bbo != NULL; bbo = bbo->prev_batch_bo) {
2556 anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
2557 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
2558 bbo->num_relocs);
2559 }
2560
2561 /* Add all of the BOs referenced by surface state */
2562 anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
2563
2564 /* Add all but the first batch BO */
2565 struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
2566 while (batch_bo->prev_batch_bo) {
2567 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2568 &batch->relocs.relocs[batch_bo->first_reloc],
2569 batch_bo->num_relocs);
2570 batch_bo = batch_bo->prev_batch_bo;
2571 }
2572
2573 /* Add everything referenced by the batches */
2574 anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
2575
2576 /* Add the first batch bo last */
2577 assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
2578 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2579 &batch->relocs.relocs[batch_bo->first_reloc],
2580 batch_bo->num_relocs);
2581 assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
2582
2583 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
2584 anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
2585
2586 cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
2587 cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
2588 cmd_buffer->execbuf.batch_start_offset = 0;
2589 cmd_buffer->execbuf.batch_len = batch->next - batch->start;
2590 cmd_buffer->execbuf.cliprects_ptr = 0;
2591 cmd_buffer->execbuf.num_cliprects = 0;
2592 cmd_buffer->execbuf.DR1 = 0;
2593 cmd_buffer->execbuf.DR4 = 0;
2594
2595 cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT;
2596 if (!cmd_buffer->need_reloc)
2597 cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC;
2598 cmd_buffer->execbuf.flags |= I915_EXEC_RENDER;
2599 cmd_buffer->execbuf.rsvd1 = device->context_id;
2600 cmd_buffer->execbuf.rsvd2 = 0;
2601
2602 pthread_mutex_unlock(&device->mutex);
2603
2604 return VK_SUCCESS;
2605 }
2606
2607 VkResult anv_ResetCommandBuffer(
2608 VkCmdBuffer cmdBuffer)
2609 {
2610 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2611
2612 /* Delete all but the first batch bo */
2613 while (cmd_buffer->last_batch_bo->prev_batch_bo) {
2614 struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
2615 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
2616 cmd_buffer->last_batch_bo = prev;
2617 }
2618 assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
2619
2620 cmd_buffer->batch.relocs.num_relocs = 0;
2621 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2622 GEN8_MI_BATCH_BUFFER_START_length * 4);
2623
2624 /* Delete all but the first batch bo */
2625 while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
2626 struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
2627 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
2628 cmd_buffer->surface_batch_bo = prev;
2629 }
2630 assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
2631
2632 cmd_buffer->surface_next = 1;
2633 cmd_buffer->surface_relocs.num_relocs = 0;
2634
2635 cmd_buffer->rs_state = NULL;
2636 cmd_buffer->vp_state = NULL;
2637 cmd_buffer->cb_state = NULL;
2638 cmd_buffer->ds_state = NULL;
2639
2640 return VK_SUCCESS;
2641 }
2642
2643 // Command buffer building functions
2644
2645 void anv_CmdBindPipeline(
2646 VkCmdBuffer cmdBuffer,
2647 VkPipelineBindPoint pipelineBindPoint,
2648 VkPipeline _pipeline)
2649 {
2650 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2651 struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
2652
2653 switch (pipelineBindPoint) {
2654 case VK_PIPELINE_BIND_POINT_COMPUTE:
2655 cmd_buffer->compute_pipeline = pipeline;
2656 cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2657 break;
2658
2659 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2660 cmd_buffer->pipeline = pipeline;
2661 cmd_buffer->vb_dirty |= pipeline->vb_used;
2662 cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2663 break;
2664
2665 default:
2666 assert(!"invalid bind point");
2667 break;
2668 }
2669 }
2670
2671 void anv_CmdBindDynamicStateObject(
2672 VkCmdBuffer cmdBuffer,
2673 VkStateBindPoint stateBindPoint,
2674 VkDynamicStateObject dynamicState)
2675 {
2676 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2677
2678 switch (stateBindPoint) {
2679 case VK_STATE_BIND_POINT_VIEWPORT:
2680 cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState;
2681 cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
2682 break;
2683 case VK_STATE_BIND_POINT_RASTER:
2684 cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState;
2685 cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
2686 break;
2687 case VK_STATE_BIND_POINT_COLOR_BLEND:
2688 cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState;
2689 cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
2690 break;
2691 case VK_STATE_BIND_POINT_DEPTH_STENCIL:
2692 cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState;
2693 cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
2694 break;
2695 default:
2696 break;
2697 };
2698 }
2699
2700 static struct anv_state
2701 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
2702 uint32_t size, uint32_t alignment)
2703 {
2704 struct anv_state state;
2705
2706 state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment);
2707 if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
2708 return (struct anv_state) { 0 };
2709
2710 state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
2711 state.alloc_size = size;
2712 cmd_buffer->surface_next = state.offset + size;
2713
2714 assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
2715
2716 return state;
2717 }
2718
2719 static VkResult
2720 anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
2721 {
2722 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
2723
2724 /* Finish off the old buffer */
2725 old_bbo->num_relocs =
2726 cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
2727 old_bbo->length = cmd_buffer->surface_next;
2728
2729 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2730 if (result != VK_SUCCESS)
2731 return result;
2732
2733 new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
2734 cmd_buffer->surface_next = 1;
2735
2736 new_bbo->prev_batch_bo = old_bbo;
2737 cmd_buffer->surface_batch_bo = new_bbo;
2738
2739 /* Re-emit state base addresses so we get the new surface state base
2740 * address before we start emitting binding tables etc.
2741 */
2742 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2743
2744 /* It seems like just changing the state base addresses isn't enough.
2745 * Invalidating the cache seems to be enough to cause things to
2746 * propagate. However, I'm not 100% sure what we're supposed to do.
2747 */
2748 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
2749 .TextureCacheInvalidationEnable = true);
2750
2751 return VK_SUCCESS;
2752 }
2753
2754 void anv_CmdBindDescriptorSets(
2755 VkCmdBuffer cmdBuffer,
2756 VkPipelineBindPoint pipelineBindPoint,
2757 uint32_t firstSet,
2758 uint32_t setCount,
2759 const VkDescriptorSet* pDescriptorSets,
2760 uint32_t dynamicOffsetCount,
2761 const uint32_t* pDynamicOffsets)
2762 {
2763 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2764 struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout;
2765 struct anv_descriptor_set *set;
2766 struct anv_descriptor_set_layout *set_layout;
2767
2768 assert(firstSet + setCount < MAX_SETS);
2769
2770 uint32_t dynamic_slot = 0;
2771 for (uint32_t i = 0; i < setCount; i++) {
2772 set = (struct anv_descriptor_set *) pDescriptorSets[i];
2773 set_layout = layout->set[firstSet + i].layout;
2774
2775 cmd_buffer->descriptors[firstSet + i].set = set;
2776
2777 assert(set_layout->num_dynamic_buffers <
2778 ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
2779 memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
2780 pDynamicOffsets + dynamic_slot,
2781 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
2782
2783 cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
2784
2785 dynamic_slot += set_layout->num_dynamic_buffers;
2786 }
2787 }
2788
2789 void anv_CmdBindIndexBuffer(
2790 VkCmdBuffer cmdBuffer,
2791 VkBuffer _buffer,
2792 VkDeviceSize offset,
2793 VkIndexType indexType)
2794 {
2795 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2796 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
2797
2798 static const uint32_t vk_to_gen_index_type[] = {
2799 [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
2800 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
2801 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
2802 };
2803
2804 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
2805 .IndexFormat = vk_to_gen_index_type[indexType],
2806 .MemoryObjectControlState = GEN8_MOCS,
2807 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
2808 .BufferSize = buffer->size - offset);
2809 }
2810
2811 void anv_CmdBindVertexBuffers(
2812 VkCmdBuffer cmdBuffer,
2813 uint32_t startBinding,
2814 uint32_t bindingCount,
2815 const VkBuffer* pBuffers,
2816 const VkDeviceSize* pOffsets)
2817 {
2818 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2819 struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
2820
2821 /* We have to defer setting up vertex buffer since we need the buffer
2822 * stride from the pipeline. */
2823
2824 assert(startBinding + bindingCount < MAX_VBS);
2825 for (uint32_t i = 0; i < bindingCount; i++) {
2826 vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
2827 vb[startBinding + i].offset = pOffsets[i];
2828 cmd_buffer->vb_dirty |= 1 << (startBinding + i);
2829 }
2830 }
2831
2832 static VkResult
2833 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
2834 unsigned stage, struct anv_state *bt_state)
2835 {
2836 struct anv_pipeline_layout *layout;
2837 uint32_t color_attachments, bias, size;
2838
2839 if (stage == VK_SHADER_STAGE_COMPUTE)
2840 layout = cmd_buffer->compute_pipeline->layout;
2841 else
2842 layout = cmd_buffer->pipeline->layout;
2843
2844 if (stage == VK_SHADER_STAGE_FRAGMENT) {
2845 bias = MAX_RTS;
2846 color_attachments = cmd_buffer->framebuffer->color_attachment_count;
2847 } else {
2848 bias = 0;
2849 color_attachments = 0;
2850 }
2851
2852 /* This is a little awkward: layout can be NULL but we still have to
2853 * allocate and set a binding table for the PS stage for render
2854 * targets. */
2855 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
2856
2857 if (color_attachments + surface_count == 0)
2858 return VK_SUCCESS;
2859
2860 size = (bias + surface_count) * sizeof(uint32_t);
2861 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
2862 uint32_t *bt_map = bt_state->map;
2863
2864 if (bt_state->map == NULL)
2865 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2866
2867 for (uint32_t ca = 0; ca < color_attachments; ca++) {
2868 const struct anv_surface_view *view =
2869 cmd_buffer->framebuffer->color_attachments[ca];
2870
2871 struct anv_state state =
2872 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2873
2874 if (state.map == NULL)
2875 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2876
2877 memcpy(state.map, view->surface_state.map, 64);
2878
2879 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2880 *(uint64_t *)(state.map + 8 * 4) =
2881 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2882 cmd_buffer->device,
2883 state.offset + 8 * 4,
2884 view->bo, view->offset);
2885
2886 bt_map[ca] = state.offset;
2887 }
2888
2889 if (layout == NULL)
2890 return VK_SUCCESS;
2891
2892 for (uint32_t set = 0; set < layout->num_sets; set++) {
2893 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2894 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2895 struct anv_descriptor_slot *surface_slots =
2896 set_layout->stage[stage].surface_start;
2897
2898 uint32_t start = bias + layout->set[set].surface_start[stage];
2899
2900 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
2901 struct anv_surface_view *view =
2902 d->set->descriptors[surface_slots[b].index].view;
2903
2904 if (!view)
2905 continue;
2906
2907 struct anv_state state =
2908 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2909
2910 if (state.map == NULL)
2911 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2912
2913 uint32_t offset;
2914 if (surface_slots[b].dynamic_slot >= 0) {
2915 uint32_t dynamic_offset =
2916 d->dynamic_offsets[surface_slots[b].dynamic_slot];
2917
2918 offset = view->offset + dynamic_offset;
2919 fill_buffer_surface_state(state.map, view->format, offset,
2920 view->range - dynamic_offset);
2921 } else {
2922 offset = view->offset;
2923 memcpy(state.map, view->surface_state.map, 64);
2924 }
2925
2926 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2927 *(uint64_t *)(state.map + 8 * 4) =
2928 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2929 cmd_buffer->device,
2930 state.offset + 8 * 4,
2931 view->bo, offset);
2932
2933 bt_map[start + b] = state.offset;
2934 }
2935 }
2936
2937 return VK_SUCCESS;
2938 }
2939
2940 static VkResult
2941 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
2942 unsigned stage, struct anv_state *state)
2943 {
2944 struct anv_pipeline_layout *layout;
2945 uint32_t sampler_count;
2946
2947 if (stage == VK_SHADER_STAGE_COMPUTE)
2948 layout = cmd_buffer->compute_pipeline->layout;
2949 else
2950 layout = cmd_buffer->pipeline->layout;
2951
2952 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
2953 if (sampler_count == 0)
2954 return VK_SUCCESS;
2955
2956 uint32_t size = sampler_count * 16;
2957 *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
2958
2959 if (state->map == NULL)
2960 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2961
2962 for (uint32_t set = 0; set < layout->num_sets; set++) {
2963 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2964 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2965 struct anv_descriptor_slot *sampler_slots =
2966 set_layout->stage[stage].sampler_start;
2967
2968 uint32_t start = layout->set[set].sampler_start[stage];
2969
2970 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
2971 struct anv_sampler *sampler =
2972 d->set->descriptors[sampler_slots[b].index].sampler;
2973
2974 if (!sampler)
2975 continue;
2976
2977 memcpy(state->map + (start + b) * 16,
2978 sampler->state, sizeof(sampler->state));
2979 }
2980 }
2981
2982 return VK_SUCCESS;
2983 }
2984
2985 static VkResult
2986 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
2987 {
2988 struct anv_state surfaces = { 0, }, samplers = { 0, };
2989 VkResult result;
2990
2991 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
2992 if (result != VK_SUCCESS)
2993 return result;
2994 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
2995 if (result != VK_SUCCESS)
2996 return result;
2997
2998 static const uint32_t sampler_state_opcodes[] = {
2999 [VK_SHADER_STAGE_VERTEX] = 43,
3000 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
3001 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
3002 [VK_SHADER_STAGE_GEOMETRY] = 46,
3003 [VK_SHADER_STAGE_FRAGMENT] = 47,
3004 [VK_SHADER_STAGE_COMPUTE] = 0,
3005 };
3006
3007 static const uint32_t binding_table_opcodes[] = {
3008 [VK_SHADER_STAGE_VERTEX] = 38,
3009 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
3010 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
3011 [VK_SHADER_STAGE_GEOMETRY] = 41,
3012 [VK_SHADER_STAGE_FRAGMENT] = 42,
3013 [VK_SHADER_STAGE_COMPUTE] = 0,
3014 };
3015
3016 if (samplers.alloc_size > 0) {
3017 anv_batch_emit(&cmd_buffer->batch,
3018 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
3019 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
3020 .PointertoVSSamplerState = samplers.offset);
3021 }
3022
3023 if (surfaces.alloc_size > 0) {
3024 anv_batch_emit(&cmd_buffer->batch,
3025 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
3026 ._3DCommandSubOpcode = binding_table_opcodes[stage],
3027 .PointertoVSBindingTable = surfaces.offset);
3028 }
3029
3030 return VK_SUCCESS;
3031 }
3032
3033 static void
3034 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
3035 {
3036 uint32_t s, dirty = cmd_buffer->descriptors_dirty &
3037 cmd_buffer->pipeline->active_stages;
3038
3039 VkResult result;
3040 for_each_bit(s, dirty) {
3041 result = flush_descriptor_set(cmd_buffer, s);
3042 if (result != VK_SUCCESS)
3043 break;
3044 }
3045
3046 if (result != VK_SUCCESS) {
3047 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3048
3049 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3050 assert(result == VK_SUCCESS);
3051
3052 /* Re-emit all active binding tables */
3053 for_each_bit(s, cmd_buffer->pipeline->active_stages) {
3054 result = flush_descriptor_set(cmd_buffer, s);
3055
3056 /* It had better succeed this time */
3057 assert(result == VK_SUCCESS);
3058 }
3059 }
3060
3061 cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
3062 }
3063
3064 static struct anv_state
3065 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3066 uint32_t *a, uint32_t dwords, uint32_t alignment)
3067 {
3068 struct anv_state state;
3069
3070 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3071 dwords * 4, alignment);
3072 memcpy(state.map, a, dwords * 4);
3073
3074 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
3075
3076 return state;
3077 }
3078
3079 static struct anv_state
3080 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3081 uint32_t *a, uint32_t *b,
3082 uint32_t dwords, uint32_t alignment)
3083 {
3084 struct anv_state state;
3085 uint32_t *p;
3086
3087 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3088 dwords * 4, alignment);
3089 p = state.map;
3090 for (uint32_t i = 0; i < dwords; i++)
3091 p[i] = a[i] | b[i];
3092
3093 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
3094
3095 return state;
3096 }
3097
3098 static VkResult
3099 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
3100 {
3101 struct anv_device *device = cmd_buffer->device;
3102 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3103 struct anv_state surfaces = { 0, }, samplers = { 0, };
3104 VkResult result;
3105
3106 result = cmd_buffer_emit_samplers(cmd_buffer,
3107 VK_SHADER_STAGE_COMPUTE, &samplers);
3108 if (result != VK_SUCCESS)
3109 return result;
3110 result = cmd_buffer_emit_binding_table(cmd_buffer,
3111 VK_SHADER_STAGE_COMPUTE, &surfaces);
3112 if (result != VK_SUCCESS)
3113 return result;
3114
3115 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
3116 .KernelStartPointer = pipeline->cs_simd,
3117 .KernelStartPointerHigh = 0,
3118 .BindingTablePointer = surfaces.offset,
3119 .BindingTableEntryCount = 0,
3120 .SamplerStatePointer = samplers.offset,
3121 .SamplerCount = 0,
3122 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
3123 };
3124
3125 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
3126 struct anv_state state =
3127 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
3128
3129 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
3130
3131 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
3132 .InterfaceDescriptorTotalLength = size,
3133 .InterfaceDescriptorDataStartAddress = state.offset);
3134
3135 return VK_SUCCESS;
3136 }
3137
3138 static void
3139 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
3140 {
3141 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3142 VkResult result;
3143
3144 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
3145
3146 if (cmd_buffer->current_pipeline != GPGPU) {
3147 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3148 .PipelineSelection = GPGPU);
3149 cmd_buffer->current_pipeline = GPGPU;
3150 }
3151
3152 if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3153 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3154
3155 if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
3156 (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
3157 result = flush_compute_descriptor_set(cmd_buffer);
3158 if (result != VK_SUCCESS) {
3159 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3160 assert(result == VK_SUCCESS);
3161 result = flush_compute_descriptor_set(cmd_buffer);
3162 assert(result == VK_SUCCESS);
3163 }
3164 cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
3165 }
3166
3167 cmd_buffer->compute_dirty = 0;
3168 }
3169
3170 static void
3171 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
3172 {
3173 struct anv_pipeline *pipeline = cmd_buffer->pipeline;
3174 uint32_t *p;
3175
3176 uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
3177
3178 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
3179
3180 if (cmd_buffer->current_pipeline != _3D) {
3181 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3182 .PipelineSelection = _3D);
3183 cmd_buffer->current_pipeline = _3D;
3184 }
3185
3186 if (vb_emit) {
3187 const uint32_t num_buffers = __builtin_popcount(vb_emit);
3188 const uint32_t num_dwords = 1 + num_buffers * 4;
3189
3190 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
3191 GEN8_3DSTATE_VERTEX_BUFFERS);
3192 uint32_t vb, i = 0;
3193 for_each_bit(vb, vb_emit) {
3194 struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
3195 uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
3196
3197 struct GEN8_VERTEX_BUFFER_STATE state = {
3198 .VertexBufferIndex = vb,
3199 .MemoryObjectControlState = GEN8_MOCS,
3200 .AddressModifyEnable = true,
3201 .BufferPitch = pipeline->binding_stride[vb],
3202 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
3203 .BufferSize = buffer->size - offset
3204 };
3205
3206 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
3207 i++;
3208 }
3209 }
3210
3211 if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3212 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3213
3214 if (cmd_buffer->descriptors_dirty)
3215 flush_descriptor_sets(cmd_buffer);
3216
3217 if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
3218 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
3219 .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
3220 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
3221 .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
3222 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
3223 .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
3224 }
3225
3226 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
3227 anv_batch_emit_merge(&cmd_buffer->batch,
3228 cmd_buffer->rs_state->state_sf, pipeline->state_sf);
3229 anv_batch_emit_merge(&cmd_buffer->batch,
3230 cmd_buffer->rs_state->state_raster, pipeline->state_raster);
3231 }
3232
3233 if (cmd_buffer->ds_state &&
3234 (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
3235 anv_batch_emit_merge(&cmd_buffer->batch,
3236 cmd_buffer->ds_state->state_wm_depth_stencil,
3237 pipeline->state_wm_depth_stencil);
3238
3239 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
3240 struct anv_state state;
3241 if (cmd_buffer->ds_state == NULL)
3242 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3243 cmd_buffer->cb_state->state_color_calc,
3244 GEN8_COLOR_CALC_STATE_length, 64);
3245 else if (cmd_buffer->cb_state == NULL)
3246 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3247 cmd_buffer->ds_state->state_color_calc,
3248 GEN8_COLOR_CALC_STATE_length, 64);
3249 else
3250 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
3251 cmd_buffer->ds_state->state_color_calc,
3252 cmd_buffer->cb_state->state_color_calc,
3253 GEN8_COLOR_CALC_STATE_length, 64);
3254
3255 anv_batch_emit(&cmd_buffer->batch,
3256 GEN8_3DSTATE_CC_STATE_POINTERS,
3257 .ColorCalcStatePointer = state.offset,
3258 .ColorCalcStatePointerValid = true);
3259 }
3260
3261 cmd_buffer->vb_dirty &= ~vb_emit;
3262 cmd_buffer->dirty = 0;
3263 }
3264
3265 void anv_CmdDraw(
3266 VkCmdBuffer cmdBuffer,
3267 uint32_t firstVertex,
3268 uint32_t vertexCount,
3269 uint32_t firstInstance,
3270 uint32_t instanceCount)
3271 {
3272 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3273
3274 anv_cmd_buffer_flush_state(cmd_buffer);
3275
3276 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3277 .VertexAccessType = SEQUENTIAL,
3278 .VertexCountPerInstance = vertexCount,
3279 .StartVertexLocation = firstVertex,
3280 .InstanceCount = instanceCount,
3281 .StartInstanceLocation = firstInstance,
3282 .BaseVertexLocation = 0);
3283 }
3284
3285 void anv_CmdDrawIndexed(
3286 VkCmdBuffer cmdBuffer,
3287 uint32_t firstIndex,
3288 uint32_t indexCount,
3289 int32_t vertexOffset,
3290 uint32_t firstInstance,
3291 uint32_t instanceCount)
3292 {
3293 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3294
3295 anv_cmd_buffer_flush_state(cmd_buffer);
3296
3297 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3298 .VertexAccessType = RANDOM,
3299 .VertexCountPerInstance = indexCount,
3300 .StartVertexLocation = firstIndex,
3301 .InstanceCount = instanceCount,
3302 .StartInstanceLocation = firstInstance,
3303 .BaseVertexLocation = vertexOffset);
3304 }
3305
3306 static void
3307 anv_batch_lrm(struct anv_batch *batch,
3308 uint32_t reg, struct anv_bo *bo, uint32_t offset)
3309 {
3310 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
3311 .RegisterAddress = reg,
3312 .MemoryAddress = { bo, offset });
3313 }
3314
3315 static void
3316 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
3317 {
3318 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
3319 .RegisterOffset = reg,
3320 .DataDWord = imm);
3321 }
3322
3323 /* Auto-Draw / Indirect Registers */
3324 #define GEN7_3DPRIM_END_OFFSET 0x2420
3325 #define GEN7_3DPRIM_START_VERTEX 0x2430
3326 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
3327 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
3328 #define GEN7_3DPRIM_START_INSTANCE 0x243C
3329 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
3330
3331 void anv_CmdDrawIndirect(
3332 VkCmdBuffer cmdBuffer,
3333 VkBuffer _buffer,
3334 VkDeviceSize offset,
3335 uint32_t count,
3336 uint32_t stride)
3337 {
3338 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3339 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3340 struct anv_bo *bo = buffer->bo;
3341 uint32_t bo_offset = buffer->offset + offset;
3342
3343 anv_cmd_buffer_flush_state(cmd_buffer);
3344
3345 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3346 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3347 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3348 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
3349 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
3350
3351 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3352 .IndirectParameterEnable = true,
3353 .VertexAccessType = SEQUENTIAL);
3354 }
3355
3356 void anv_CmdDrawIndexedIndirect(
3357 VkCmdBuffer cmdBuffer,
3358 VkBuffer _buffer,
3359 VkDeviceSize offset,
3360 uint32_t count,
3361 uint32_t stride)
3362 {
3363 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3364 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3365 struct anv_bo *bo = buffer->bo;
3366 uint32_t bo_offset = buffer->offset + offset;
3367
3368 anv_cmd_buffer_flush_state(cmd_buffer);
3369
3370 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3371 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3372 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3373 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
3374 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
3375
3376 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3377 .IndirectParameterEnable = true,
3378 .VertexAccessType = RANDOM);
3379 }
3380
3381 void anv_CmdDispatch(
3382 VkCmdBuffer cmdBuffer,
3383 uint32_t x,
3384 uint32_t y,
3385 uint32_t z)
3386 {
3387 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3388 uint32_t size = SIMD8; /* FIXME */
3389 uint32_t right_mask = 0; /* FIXME */
3390 uint32_t thread_width_max = 0; /* FIXME */
3391
3392 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3393
3394 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3395
3396 .InterfaceDescriptorOffset = 0,
3397 .IndirectDataLength = 0,
3398 .IndirectDataStartAddress = 0,
3399
3400 .SIMDSize = size,
3401
3402 .ThreadDepthCounterMaximum = 0,
3403 .ThreadHeightCounterMaximum = 0,
3404 .ThreadWidthCounterMaximum = thread_width_max,
3405
3406 .ThreadGroupIDStartingX = 0,
3407 .ThreadGroupIDXDimension = x,
3408 .ThreadGroupIDStartingY = 0,
3409 .ThreadGroupIDYDimension = y,
3410 .ThreadGroupIDStartingResumeZ = 0,
3411 .ThreadGroupIDZDimension = z,
3412 .RightExecutionMask = right_mask,
3413 .BottomExecutionMask = 0xffffffff);
3414
3415 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3416 }
3417
3418 #define GPGPU_DISPATCHDIMX 0x2500
3419 #define GPGPU_DISPATCHDIMY 0x2504
3420 #define GPGPU_DISPATCHDIMZ 0x2508
3421
3422 void anv_CmdDispatchIndirect(
3423 VkCmdBuffer cmdBuffer,
3424 VkBuffer _buffer,
3425 VkDeviceSize offset)
3426 {
3427 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3428 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3429 struct anv_bo *bo = buffer->bo;
3430 uint32_t bo_offset = buffer->offset + offset;
3431
3432 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3433
3434 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
3435 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
3436 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
3437
3438 uint32_t size = SIMD8; /* FIXME */
3439 uint32_t right_mask = 0; /* FIXME */
3440 uint32_t thread_width_max = 0; /* FIXME */
3441
3442 /* FIXME: We can't compute thread_width_max for indirect, looks like it
3443 * depends on DIMX. */
3444
3445 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3446 .IndirectParameterEnable = true,
3447 .InterfaceDescriptorOffset = 0,
3448 .IndirectDataLength = 0,
3449 .IndirectDataStartAddress = 0,
3450
3451 .SIMDSize = size,
3452
3453 .ThreadDepthCounterMaximum = 0,
3454 .ThreadHeightCounterMaximum = 0,
3455 .ThreadWidthCounterMaximum = thread_width_max,
3456
3457 .RightExecutionMask = right_mask,
3458 .BottomExecutionMask = 0xffffffff);
3459
3460 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3461 }
3462
3463 void anv_CmdSetEvent(
3464 VkCmdBuffer cmdBuffer,
3465 VkEvent event,
3466 VkPipeEvent pipeEvent)
3467 {
3468 stub();
3469 }
3470
3471 void anv_CmdResetEvent(
3472 VkCmdBuffer cmdBuffer,
3473 VkEvent event,
3474 VkPipeEvent pipeEvent)
3475 {
3476 stub();
3477 }
3478
3479 void anv_CmdWaitEvents(
3480 VkCmdBuffer cmdBuffer,
3481 VkWaitEvent waitEvent,
3482 uint32_t eventCount,
3483 const VkEvent* pEvents,
3484 uint32_t memBarrierCount,
3485 const void** ppMemBarriers)
3486 {
3487 stub();
3488 }
3489
3490 void anv_CmdPipelineBarrier(
3491 VkCmdBuffer cmdBuffer,
3492 VkWaitEvent waitEvent,
3493 uint32_t pipeEventCount,
3494 const VkPipeEvent* pPipeEvents,
3495 uint32_t memBarrierCount,
3496 const void** ppMemBarriers)
3497 {
3498 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3499 uint32_t b, *dw;
3500
3501 struct GEN8_PIPE_CONTROL cmd = {
3502 GEN8_PIPE_CONTROL_header,
3503 .PostSyncOperation = NoWrite,
3504 };
3505
3506 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
3507
3508 for (uint32_t i = 0; i < pipeEventCount; i++) {
3509 switch (pPipeEvents[i]) {
3510 case VK_PIPE_EVENT_TOP_OF_PIPE:
3511 /* This is just what PIPE_CONTROL does */
3512 break;
3513 case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
3514 case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
3515 case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
3516 cmd.StallAtPixelScoreboard = true;
3517 break;
3518 case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
3519 case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
3520 case VK_PIPE_EVENT_TRANSFER_COMPLETE:
3521 case VK_PIPE_EVENT_COMMANDS_COMPLETE:
3522 cmd.CommandStreamerStallEnable = true;
3523 break;
3524 default:
3525 unreachable("Invalid VkPipeEvent");
3526 }
3527 }
3528
3529 /* XXX: Right now, we're really dumb and just flush whatever categories
3530 * the app asks for. One of these days we may make this a bit better
3531 * but right now that's all the hardware allows for in most areas.
3532 */
3533 VkMemoryOutputFlags out_flags = 0;
3534 VkMemoryInputFlags in_flags = 0;
3535
3536 for (uint32_t i = 0; i < memBarrierCount; i++) {
3537 const struct anv_common *common = ppMemBarriers[i];
3538 switch (common->sType) {
3539 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
3540 const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
3541 out_flags |= barrier->outputMask;
3542 in_flags |= barrier->inputMask;
3543 break;
3544 }
3545 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
3546 const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
3547 out_flags |= barrier->outputMask;
3548 in_flags |= barrier->inputMask;
3549 break;
3550 }
3551 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
3552 const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
3553 out_flags |= barrier->outputMask;
3554 in_flags |= barrier->inputMask;
3555 break;
3556 }
3557 default:
3558 unreachable("Invalid memory barrier type");
3559 }
3560 }
3561
3562 for_each_bit(b, out_flags) {
3563 switch ((VkMemoryOutputFlags)(1 << b)) {
3564 case VK_MEMORY_OUTPUT_CPU_WRITE_BIT:
3565 break; /* FIXME: Little-core systems */
3566 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
3567 cmd.DCFlushEnable = true;
3568 break;
3569 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
3570 cmd.RenderTargetCacheFlushEnable = true;
3571 break;
3572 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3573 cmd.DepthCacheFlushEnable = true;
3574 break;
3575 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
3576 cmd.RenderTargetCacheFlushEnable = true;
3577 cmd.DepthCacheFlushEnable = true;
3578 break;
3579 default:
3580 unreachable("Invalid memory output flag");
3581 }
3582 }
3583
3584 for_each_bit(b, out_flags) {
3585 switch ((VkMemoryInputFlags)(1 << b)) {
3586 case VK_MEMORY_INPUT_CPU_READ_BIT:
3587 break; /* FIXME: Little-core systems */
3588 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
3589 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
3590 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
3591 cmd.VFCacheInvalidationEnable = true;
3592 break;
3593 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
3594 cmd.ConstantCacheInvalidationEnable = true;
3595 /* fallthrough */
3596 case VK_MEMORY_INPUT_SHADER_READ_BIT:
3597 cmd.DCFlushEnable = true;
3598 cmd.TextureCacheInvalidationEnable = true;
3599 break;
3600 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
3601 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3602 break; /* XXX: Hunh? */
3603 case VK_MEMORY_INPUT_TRANSFER_BIT:
3604 cmd.TextureCacheInvalidationEnable = true;
3605 break;
3606 }
3607 }
3608
3609 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
3610 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
3611 }
3612
3613 void anv_CmdInitAtomicCounters(
3614 VkCmdBuffer cmdBuffer,
3615 VkPipelineBindPoint pipelineBindPoint,
3616 uint32_t startCounter,
3617 uint32_t counterCount,
3618 const uint32_t* pData)
3619 {
3620 stub();
3621 }
3622
3623 void anv_CmdLoadAtomicCounters(
3624 VkCmdBuffer cmdBuffer,
3625 VkPipelineBindPoint pipelineBindPoint,
3626 uint32_t startCounter,
3627 uint32_t counterCount,
3628 VkBuffer srcBuffer,
3629 VkDeviceSize srcOffset)
3630 {
3631 stub();
3632 }
3633
3634 void anv_CmdSaveAtomicCounters(
3635 VkCmdBuffer cmdBuffer,
3636 VkPipelineBindPoint pipelineBindPoint,
3637 uint32_t startCounter,
3638 uint32_t counterCount,
3639 VkBuffer destBuffer,
3640 VkDeviceSize destOffset)
3641 {
3642 stub();
3643 }
3644
3645 static void
3646 anv_framebuffer_destroy(struct anv_device *device,
3647 struct anv_object *object,
3648 VkObjectType obj_type)
3649 {
3650 struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
3651
3652 assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
3653
3654 anv_DestroyObject((VkDevice) device,
3655 VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
3656 fb->vp_state);
3657
3658 anv_device_free(device, fb);
3659 }
3660
3661 VkResult anv_CreateFramebuffer(
3662 VkDevice _device,
3663 const VkFramebufferCreateInfo* pCreateInfo,
3664 VkFramebuffer* pFramebuffer)
3665 {
3666 struct anv_device *device = (struct anv_device *) _device;
3667 struct anv_framebuffer *framebuffer;
3668
3669 static const struct anv_depth_stencil_view null_view =
3670 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
3671
3672 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3673
3674 framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
3675 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3676 if (framebuffer == NULL)
3677 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3678
3679 framebuffer->base.destructor = anv_framebuffer_destroy;
3680
3681 framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
3682 for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
3683 framebuffer->color_attachments[i] =
3684 (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view;
3685 }
3686
3687 if (pCreateInfo->pDepthStencilAttachment) {
3688 framebuffer->depth_stencil =
3689 (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view;
3690 } else {
3691 framebuffer->depth_stencil = &null_view;
3692 }
3693
3694 framebuffer->sample_count = pCreateInfo->sampleCount;
3695 framebuffer->width = pCreateInfo->width;
3696 framebuffer->height = pCreateInfo->height;
3697 framebuffer->layers = pCreateInfo->layers;
3698
3699 anv_CreateDynamicViewportState((VkDevice) device,
3700 &(VkDynamicVpStateCreateInfo) {
3701 .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
3702 .viewportAndScissorCount = 1,
3703 .pViewports = (VkViewport[]) {
3704 {
3705 .originX = 0,
3706 .originY = 0,
3707 .width = pCreateInfo->width,
3708 .height = pCreateInfo->height,
3709 .minDepth = 0,
3710 .maxDepth = 1
3711 },
3712 },
3713 .pScissors = (VkRect[]) {
3714 { { 0, 0 },
3715 { pCreateInfo->width, pCreateInfo->height } },
3716 }
3717 },
3718 &framebuffer->vp_state);
3719
3720 *pFramebuffer = (VkFramebuffer) framebuffer;
3721
3722 return VK_SUCCESS;
3723 }
3724
3725 VkResult anv_CreateRenderPass(
3726 VkDevice _device,
3727 const VkRenderPassCreateInfo* pCreateInfo,
3728 VkRenderPass* pRenderPass)
3729 {
3730 struct anv_device *device = (struct anv_device *) _device;
3731 struct anv_render_pass *pass;
3732 size_t size;
3733
3734 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
3735
3736 size = sizeof(*pass) +
3737 pCreateInfo->layers * sizeof(struct anv_render_pass_layer);
3738 pass = anv_device_alloc(device, size, 8,
3739 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3740 if (pass == NULL)
3741 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3742
3743 pass->render_area = pCreateInfo->renderArea;
3744
3745 pass->num_layers = pCreateInfo->layers;
3746
3747 pass->num_clear_layers = 0;
3748 for (uint32_t i = 0; i < pCreateInfo->layers; i++) {
3749 pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i];
3750 pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i];
3751 if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
3752 pass->num_clear_layers++;
3753 }
3754
3755 *pRenderPass = (VkRenderPass) pass;
3756
3757 return VK_SUCCESS;
3758 }
3759
3760 static void
3761 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3762 struct anv_render_pass *pass)
3763 {
3764 const struct anv_depth_stencil_view *view =
3765 cmd_buffer->framebuffer->depth_stencil;
3766
3767 /* FIXME: Implement the PMA stall W/A */
3768
3769 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
3770 .SurfaceType = SURFTYPE_2D,
3771 .DepthWriteEnable = view->depth_stride > 0,
3772 .StencilWriteEnable = view->stencil_stride > 0,
3773 .HierarchicalDepthBufferEnable = false,
3774 .SurfaceFormat = view->depth_format,
3775 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
3776 .SurfaceBaseAddress = { view->bo, view->depth_offset },
3777 .Height = pass->render_area.extent.height - 1,
3778 .Width = pass->render_area.extent.width - 1,
3779 .LOD = 0,
3780 .Depth = 1 - 1,
3781 .MinimumArrayElement = 0,
3782 .DepthBufferObjectControlState = GEN8_MOCS,
3783 .RenderTargetViewExtent = 1 - 1,
3784 .SurfaceQPitch = 0);
3785
3786 /* Disable hierarchial depth buffers. */
3787 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
3788
3789 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
3790 .StencilBufferEnable = view->stencil_stride > 0,
3791 .StencilBufferObjectControlState = GEN8_MOCS,
3792 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
3793 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
3794 .SurfaceQPitch = 0);
3795
3796 /* Clear the clear params. */
3797 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
3798 }
3799
3800 void anv_CmdBeginRenderPass(
3801 VkCmdBuffer cmdBuffer,
3802 const VkRenderPassBegin* pRenderPassBegin)
3803 {
3804 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3805 struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass;
3806 struct anv_framebuffer *framebuffer =
3807 (struct anv_framebuffer *) pRenderPassBegin->framebuffer;
3808
3809 cmd_buffer->framebuffer = framebuffer;
3810
3811 cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
3812
3813 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
3814 .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
3815 .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
3816 .ClippedDrawingRectangleYMax =
3817 pass->render_area.offset.y + pass->render_area.extent.height - 1,
3818 .ClippedDrawingRectangleXMax =
3819 pass->render_area.offset.x + pass->render_area.extent.width - 1,
3820 .DrawingRectangleOriginY = 0,
3821 .DrawingRectangleOriginX = 0);
3822
3823 anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
3824
3825 anv_cmd_buffer_clear(cmd_buffer, pass);
3826 }
3827
3828 void anv_CmdEndRenderPass(
3829 VkCmdBuffer cmdBuffer,
3830 VkRenderPass renderPass)
3831 {
3832 /* Emit a flushing pipe control at the end of a pass. This is kind of a
3833 * hack but it ensures that render targets always actually get written.
3834 * Eventually, we should do flushing based on image format transitions
3835 * or something of that nature.
3836 */
3837 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3838 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
3839 .PostSyncOperation = NoWrite,
3840 .RenderTargetCacheFlushEnable = true,
3841 .InstructionCacheInvalidateEnable = true,
3842 .DepthCacheFlushEnable = true,
3843 .VFCacheInvalidationEnable = true,
3844 .TextureCacheInvalidationEnable = true,
3845 .CommandStreamerStallEnable = true);
3846 }
3847
3848 void vkCmdDbgMarkerBegin(
3849 VkCmdBuffer cmdBuffer,
3850 const char* pMarker)
3851 __attribute__ ((visibility ("default")));
3852
3853 void vkCmdDbgMarkerEnd(
3854 VkCmdBuffer cmdBuffer)
3855 __attribute__ ((visibility ("default")));
3856
3857 VkResult vkDbgSetObjectTag(
3858 VkDevice device,
3859 VkObject object,
3860 size_t tagSize,
3861 const void* pTag)
3862 __attribute__ ((visibility ("default")));
3863
3864
3865 void vkCmdDbgMarkerBegin(
3866 VkCmdBuffer cmdBuffer,
3867 const char* pMarker)
3868 {
3869 }
3870
3871 void vkCmdDbgMarkerEnd(
3872 VkCmdBuffer cmdBuffer)
3873 {
3874 }
3875
3876 VkResult vkDbgSetObjectTag(
3877 VkDevice device,
3878 VkObject object,
3879 size_t tagSize,
3880 const void* pTag)
3881 {
3882 return VK_SUCCESS;
3883 }