vk/image: Remove unneeded data from anv_buffer_view
[mesa.git] / src / vulkan / aub.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <stdint.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <assert.h>
30 #include <sys/types.h>
31 #include <sys/mman.h>
32
33 #include <drm.h>
34 #include <i915_drm.h>
35
36 #include "private.h"
37 #include "aub.h"
38
39 struct anv_aub_writer {
40 FILE *file;
41 uint32_t offset;
42 int gen;
43 };
44
45 static void
46 aub_out(struct anv_aub_writer *writer, uint32_t data)
47 {
48 fwrite(&data, 1, 4, writer->file);
49 }
50
51 static void
52 aub_out_data(struct anv_aub_writer *writer, const void *data, size_t size)
53 {
54 fwrite(data, 1, size, writer->file);
55 }
56
57 static struct anv_aub_writer *
58 get_anv_aub_writer(struct anv_device *device)
59 {
60 struct anv_aub_writer *writer = device->aub_writer;
61 int entry = 0x200003;
62 int i;
63 int gtt_size = 0x10000;
64 const char *filename;
65
66 if (geteuid() != getuid())
67 return NULL;
68
69 if (writer)
70 return writer;
71
72 writer = malloc(sizeof(*writer));
73 if (writer == NULL)
74 return NULL;
75
76 filename = "intel.aub";
77 writer->gen = device->info.gen;
78 writer->file = fopen(filename, "w+");
79 if (!writer->file) {
80 free(writer);
81 return NULL;
82 }
83
84 /* Start allocating objects from just after the GTT. */
85 writer->offset = gtt_size;
86
87 /* Start with a (required) version packet. */
88 aub_out(writer, CMD_AUB_HEADER | (13 - 2));
89 aub_out(writer,
90 (4 << AUB_HEADER_MAJOR_SHIFT) |
91 (0 << AUB_HEADER_MINOR_SHIFT));
92 for (i = 0; i < 8; i++) {
93 aub_out(writer, 0); /* app name */
94 }
95 aub_out(writer, 0); /* timestamp */
96 aub_out(writer, 0); /* timestamp */
97 aub_out(writer, 0); /* comment len */
98
99 /* Set up the GTT. The max we can handle is 256M */
100 aub_out(writer, CMD_AUB_TRACE_HEADER_BLOCK | ((writer->gen >= 8 ? 6 : 5) - 2));
101 aub_out(writer,
102 AUB_TRACE_MEMTYPE_GTT_ENTRY |
103 AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE);
104 aub_out(writer, 0); /* subtype */
105 aub_out(writer, 0); /* offset */
106 aub_out(writer, gtt_size); /* size */
107 if (writer->gen >= 8)
108 aub_out(writer, 0);
109 for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
110 aub_out(writer, entry);
111 }
112
113 return device->aub_writer = writer;
114 }
115
116 void
117 anv_aub_writer_destroy(struct anv_aub_writer *writer)
118 {
119 fclose(writer->file);
120 free(writer);
121 }
122
123
124 /**
125 * Break up large objects into multiple writes. Otherwise a 128kb VBO
126 * would overflow the 16 bits of size field in the packet header and
127 * everything goes badly after that.
128 */
129 static void
130 aub_write_trace_block(struct anv_aub_writer *writer, uint32_t type,
131 void *virtual, uint32_t size, uint32_t gtt_offset)
132 {
133 uint32_t block_size;
134 uint32_t offset;
135 uint32_t subtype = 0;
136 static const char null_block[8 * 4096];
137
138 for (offset = 0; offset < size; offset += block_size) {
139 block_size = size - offset;
140
141 if (block_size > 8 * 4096)
142 block_size = 8 * 4096;
143
144 aub_out(writer,
145 CMD_AUB_TRACE_HEADER_BLOCK |
146 ((writer->gen >= 8 ? 6 : 5) - 2));
147 aub_out(writer,
148 AUB_TRACE_MEMTYPE_GTT |
149 type | AUB_TRACE_OP_DATA_WRITE);
150 aub_out(writer, subtype);
151 aub_out(writer, gtt_offset + offset);
152 aub_out(writer, align_u32(block_size, 4));
153 if (writer->gen >= 8)
154 aub_out(writer, 0);
155
156 if (virtual)
157 aub_out_data(writer, (char *) virtual + offset, block_size);
158 else
159 aub_out_data(writer, null_block, block_size);
160
161 /* Pad to a multiple of 4 bytes. */
162 aub_out_data(writer, null_block, -block_size & 3);
163 }
164 }
165
166 /*
167 * Make a ringbuffer on fly and dump it
168 */
169 static void
170 aub_build_dump_ringbuffer(struct anv_aub_writer *writer,
171 uint32_t batch_offset, uint32_t offset,
172 int ring_flag)
173 {
174 uint32_t ringbuffer[4096];
175 int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
176 int ring_count = 0;
177
178 if (ring_flag == I915_EXEC_BSD)
179 ring = AUB_TRACE_TYPE_RING_PRB1;
180 else if (ring_flag == I915_EXEC_BLT)
181 ring = AUB_TRACE_TYPE_RING_PRB2;
182
183 /* Make a ring buffer to execute our batchbuffer. */
184 memset(ringbuffer, 0, sizeof(ringbuffer));
185 if (writer->gen >= 8) {
186 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
187 ringbuffer[ring_count++] = batch_offset;
188 ringbuffer[ring_count++] = 0;
189 } else {
190 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
191 ringbuffer[ring_count++] = batch_offset;
192 }
193
194 /* Write out the ring. This appears to trigger execution of
195 * the ring in the simulator.
196 */
197 aub_out(writer,
198 CMD_AUB_TRACE_HEADER_BLOCK |
199 ((writer->gen >= 8 ? 6 : 5) - 2));
200 aub_out(writer,
201 AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
202 aub_out(writer, 0); /* general/surface subtype */
203 aub_out(writer, offset);
204 aub_out(writer, ring_count * 4);
205 if (writer->gen >= 8)
206 aub_out(writer, 0);
207
208 /* FIXME: Need some flush operations here? */
209 aub_out_data(writer, ringbuffer, ring_count * 4);
210 }
211
212 struct aub_bo {
213 uint32_t offset;
214 void *map;
215 void *relocated;
216 };
217
218 static void
219 relocate_bo(struct anv_bo *bo, struct drm_i915_gem_relocation_entry *relocs,
220 size_t num_relocs, struct aub_bo *bos)
221 {
222 struct aub_bo *aub_bo = &bos[bo->index];
223 struct drm_i915_gem_relocation_entry *reloc;
224 uint32_t *dw;
225
226 aub_bo->relocated = malloc(bo->size);
227 memcpy(aub_bo->relocated, aub_bo->map, bo->size);
228 for (size_t i = 0; i < num_relocs; i++) {
229 reloc = &relocs[i];
230 assert(reloc->offset < bo->size);
231 dw = aub_bo->relocated + reloc->offset;
232 *dw = bos[reloc->target_handle].offset + reloc->delta;
233 }
234 }
235
236 void
237 anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer)
238 {
239 struct anv_device *device = cmd_buffer->device;
240 struct anv_batch *batch = &cmd_buffer->batch;
241 struct anv_aub_writer *writer;
242 struct anv_bo *bo;
243 uint32_t ring_flag = 0;
244 uint32_t offset;
245 struct aub_bo *aub_bos;
246
247 writer = get_anv_aub_writer(device);
248 if (writer == NULL)
249 return;
250
251 aub_bos = malloc(cmd_buffer->exec2_bo_count * sizeof(aub_bos[0]));
252 offset = writer->offset;
253 for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) {
254 bo = cmd_buffer->exec2_bos[i];
255 if (bo->map)
256 aub_bos[i].map = bo->map;
257 else
258 aub_bos[i].map = anv_gem_mmap(device, bo->gem_handle, 0, bo->size);
259 aub_bos[i].relocated = aub_bos[i].map;
260 aub_bos[i].offset = offset;
261 offset = align_u32(offset + bo->size + 4095, 4096);
262 }
263
264 struct anv_batch_bo *first_bbo;
265 for (struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
266 bbo != NULL; bbo = bbo->prev_batch_bo) {
267 /* Keep stashing the current BO until we get to the beginning */
268 first_bbo = bbo;
269
270 /* Handle relocations for this batch BO */
271 relocate_bo(&bbo->bo, &batch->relocs.relocs[bbo->first_reloc],
272 bbo->num_relocs, aub_bos);
273 }
274 assert(first_bbo->prev_batch_bo == NULL);
275
276 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
277 bbo != NULL; bbo = bbo->prev_batch_bo) {
278
279 /* Handle relocations for this surface state BO */
280 relocate_bo(&bbo->bo,
281 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
282 bbo->num_relocs, aub_bos);
283 }
284
285 for (uint32_t i = 0; i < cmd_buffer->exec2_bo_count; i++) {
286 bo = cmd_buffer->exec2_bos[i];
287 if (i == cmd_buffer->exec2_bo_count - 1) {
288 assert(bo == &first_bbo->bo);
289 aub_write_trace_block(writer, AUB_TRACE_TYPE_BATCH,
290 aub_bos[i].relocated,
291 first_bbo->length, aub_bos[i].offset);
292 } else {
293 aub_write_trace_block(writer, AUB_TRACE_TYPE_NOTYPE,
294 aub_bos[i].relocated,
295 bo->size, aub_bos[i].offset);
296 }
297 if (aub_bos[i].relocated != aub_bos[i].map)
298 free(aub_bos[i].relocated);
299 if (aub_bos[i].map != bo->map)
300 anv_gem_munmap(aub_bos[i].map, bo->size);
301 }
302
303 /* Dump ring buffer */
304 aub_build_dump_ringbuffer(writer, aub_bos[first_bbo->bo.index].offset,
305 offset, ring_flag);
306
307 free(aub_bos);
308
309 fflush(writer->file);
310 }