2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/glthread_marshal.h"
25 #include "main/dispatch.h"
26 #include "main/bufferobj.h"
29 * Create an upload buffer. This is called from the app thread, so everything
30 * has to be thread-safe in the driver.
32 static struct gl_buffer_object
*
33 new_upload_buffer(struct gl_context
*ctx
, GLsizeiptr size
, uint8_t **ptr
)
35 assert(ctx
->GLThread
.SupportsBufferUploads
);
37 struct gl_buffer_object
*obj
= ctx
->Driver
.NewBufferObject(ctx
, -1);
41 obj
->Immutable
= true;
43 if (!ctx
->Driver
.BufferData(ctx
, GL_ARRAY_BUFFER
, size
, NULL
,
45 GL_CLIENT_STORAGE_BIT
| GL_MAP_WRITE_BIT
,
47 ctx
->Driver
.DeleteBuffer(ctx
, obj
);
51 *ptr
= ctx
->Driver
.MapBufferRange(ctx
, 0, size
,
53 GL_MAP_UNSYNCHRONIZED_BIT
|
54 MESA_MAP_THREAD_SAFE_BIT
,
57 ctx
->Driver
.DeleteBuffer(ctx
, obj
);
65 _mesa_glthread_upload(struct gl_context
*ctx
, const void *data
,
66 GLsizeiptr size
, unsigned *out_offset
,
67 struct gl_buffer_object
**out_buffer
,
70 struct glthread_state
*glthread
= &ctx
->GLThread
;
71 const unsigned default_size
= 1024 * 1024;
73 if (unlikely(size
> INT_MAX
))
76 /* The alignment was chosen arbitrarily. */
77 unsigned offset
= align(glthread
->upload_offset
, 8);
79 /* Allocate a new buffer if needed. */
80 if (unlikely(!glthread
->upload_buffer
|| offset
+ size
> default_size
)) {
81 /* If the size is greater than the buffer size, allocate a separate buffer
82 * just for this upload.
84 if (unlikely(size
> default_size
)) {
87 assert(*out_buffer
== NULL
);
88 *out_buffer
= new_upload_buffer(ctx
, size
, &ptr
);
94 memcpy(ptr
, data
, size
);
100 if (glthread
->upload_buffer_private_refcount
> 0) {
101 p_atomic_add(&glthread
->upload_buffer
->RefCount
,
102 -glthread
->upload_buffer_private_refcount
);
103 glthread
->upload_buffer_private_refcount
= 0;
105 _mesa_reference_buffer_object(ctx
, &glthread
->upload_buffer
, NULL
);
106 glthread
->upload_buffer
=
107 new_upload_buffer(ctx
, default_size
, &glthread
->upload_ptr
);
108 glthread
->upload_offset
= 0;
111 /* Since atomic operations are very very slow when 2 threads are not
112 * sharing one L3 cache (which can happen on AMD Zen), prevent using
113 * atomics as follows:
115 * This function has to return a buffer reference to the caller.
116 * Instead of atomic_inc for every call, it does all possible future
117 * increments in advance when the upload buffer is allocated.
118 * The maximum number of times the function can be called per upload
119 * buffer is default_size, because the minimum allocation size is 1.
120 * Therefore the function can only return default_size number of
121 * references at most, so we will never need more. This is the number
122 * that is added to RefCount at allocation.
124 * upload_buffer_private_refcount tracks how many buffer references
125 * are left to return to callers. If the buffer is full and there are
126 * still references left, they are atomically subtracted from RefCount
127 * before the buffer is unreferenced.
129 * This can increase performance by 20%.
131 glthread
->upload_buffer
->RefCount
+= default_size
;
132 glthread
->upload_buffer_private_refcount
= default_size
;
137 memcpy(glthread
->upload_ptr
+ offset
, data
, size
);
139 *out_ptr
= glthread
->upload_ptr
+ offset
;
141 glthread
->upload_offset
= offset
+ size
;
142 *out_offset
= offset
;
144 assert(*out_buffer
== NULL
);
145 assert(glthread
->upload_buffer_private_refcount
> 0);
146 *out_buffer
= glthread
->upload_buffer
;
147 glthread
->upload_buffer_private_refcount
--;
150 /** Tracks the current bindings for the vertex array and index array buffers.
152 * This is part of what we need to enable glthread on compat-GL contexts that
153 * happen to use VBOs, without also supporting the full tracking of VBO vs
154 * user vertex array bindings per attribute on each vertex array for
155 * determining what to upload at draw call time.
157 * Note that GL core makes it so that a buffer binding with an invalid handle
158 * in the "buffer" parameter will throw an error, and then a
159 * glVertexAttribPointer() that followsmight not end up pointing at a VBO.
160 * However, in GL core the draw call would throw an error as well, so we don't
161 * really care if our tracking is wrong for this case -- we never need to
162 * marshal user data for draw calls, and the unmarshal will just generate an
163 * error or not as appropriate.
165 * For compatibility GL, we do need to accurately know whether the draw call
166 * on the unmarshal side will dereference a user pointer or load data from a
167 * VBO per vertex. That would make it seem like we need to track whether a
168 * "buffer" is valid, so that we can know when an error will be generated
169 * instead of updating the binding. However, compat GL has the ridiculous
170 * feature that if you pass a bad name, it just gens a buffer object for you,
171 * so we escape without having to know if things are valid or not.
174 _mesa_glthread_BindBuffer(struct gl_context
*ctx
, GLenum target
, GLuint buffer
)
176 struct glthread_state
*glthread
= &ctx
->GLThread
;
179 case GL_ARRAY_BUFFER
:
180 glthread
->CurrentArrayBufferName
= buffer
;
182 case GL_ELEMENT_ARRAY_BUFFER
:
183 /* The current element array buffer binding is actually tracked in the
184 * vertex array object instead of the context, so this would need to
185 * change on vertex array object updates.
187 glthread
->CurrentVAO
->CurrentElementBufferName
= buffer
;
189 case GL_DRAW_INDIRECT_BUFFER
:
190 glthread
->CurrentDrawIndirectBufferName
= buffer
;
196 _mesa_glthread_DeleteBuffers(struct gl_context
*ctx
, GLsizei n
,
197 const GLuint
*buffers
)
199 struct glthread_state
*glthread
= &ctx
->GLThread
;
204 for (unsigned i
= 0; i
< n
; i
++) {
205 GLuint id
= buffers
[i
];
207 if (id
== glthread
->CurrentArrayBufferName
)
208 _mesa_glthread_BindBuffer(ctx
, GL_ARRAY_BUFFER
, 0);
209 if (id
== glthread
->CurrentVAO
->CurrentElementBufferName
)
210 _mesa_glthread_BindBuffer(ctx
, GL_ELEMENT_ARRAY_BUFFER
, 0);
211 if (id
== glthread
->CurrentDrawIndirectBufferName
)
212 _mesa_glthread_BindBuffer(ctx
, GL_DRAW_INDIRECT_BUFFER
, 0);
216 /* BufferData: marshalled asynchronously */
217 struct marshal_cmd_BufferData
219 struct marshal_cmd_base cmd_base
;
220 GLuint target_or_name
;
223 const GLvoid
*data_external_mem
;
224 bool data_null
; /* If set, no data follows for "data" */
227 /* Next size bytes are GLubyte data[size] */
231 _mesa_unmarshal_BufferData(struct gl_context
*ctx
,
232 const struct marshal_cmd_BufferData
*cmd
)
234 const GLuint target_or_name
= cmd
->target_or_name
;
235 const GLsizei size
= cmd
->size
;
236 const GLenum usage
= cmd
->usage
;
241 else if (!cmd
->named
&& target_or_name
== GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
)
242 data
= cmd
->data_external_mem
;
244 data
= (const void *) (cmd
+ 1);
247 CALL_NamedBufferDataEXT(ctx
->CurrentServerDispatch
,
248 (target_or_name
, size
, data
, usage
));
249 } else if (cmd
->named
) {
250 CALL_NamedBufferData(ctx
->CurrentServerDispatch
,
251 (target_or_name
, size
, data
, usage
));
253 CALL_BufferData(ctx
->CurrentServerDispatch
,
254 (target_or_name
, size
, data
, usage
));
259 _mesa_unmarshal_NamedBufferData(struct gl_context
*ctx
,
260 const struct marshal_cmd_NamedBufferData
*cmd
)
262 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
266 _mesa_unmarshal_NamedBufferDataEXT(struct gl_context
*ctx
,
267 const struct marshal_cmd_NamedBufferDataEXT
*cmd
)
269 unreachable("never used - all BufferData variants use DISPATCH_CMD_BufferData");
273 _mesa_marshal_BufferData_merged(GLuint target_or_name
, GLsizeiptr size
,
274 const GLvoid
*data
, GLenum usage
, bool named
,
275 bool ext_dsa
, const char *func
)
277 GET_CURRENT_CONTEXT(ctx
);
278 bool external_mem
= !named
&&
279 target_or_name
== GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD
;
280 bool copy_data
= data
&& !external_mem
;
281 int cmd_size
= sizeof(struct marshal_cmd_BufferData
) + (copy_data
? size
: 0);
283 if (unlikely(size
< 0 || size
> INT_MAX
|| cmd_size
< 0 ||
284 cmd_size
> MARSHAL_MAX_CMD_SIZE
||
285 (named
&& target_or_name
== 0))) {
286 _mesa_glthread_finish_before(ctx
, func
);
288 CALL_NamedBufferData(ctx
->CurrentServerDispatch
,
289 (target_or_name
, size
, data
, usage
));
291 CALL_BufferData(ctx
->CurrentServerDispatch
,
292 (target_or_name
, size
, data
, usage
));
297 struct marshal_cmd_BufferData
*cmd
=
298 _mesa_glthread_allocate_command(ctx
, DISPATCH_CMD_BufferData
,
301 cmd
->target_or_name
= target_or_name
;
304 cmd
->data_null
= !data
;
306 cmd
->ext_dsa
= ext_dsa
;
307 cmd
->data_external_mem
= data
;
310 char *variable_data
= (char *) (cmd
+ 1);
311 memcpy(variable_data
, data
, size
);
316 _mesa_marshal_BufferData(GLenum target
, GLsizeiptr size
, const GLvoid
* data
,
319 _mesa_marshal_BufferData_merged(target
, size
, data
, usage
, false, false,
324 _mesa_marshal_NamedBufferData(GLuint buffer
, GLsizeiptr size
,
325 const GLvoid
* data
, GLenum usage
)
327 _mesa_marshal_BufferData_merged(buffer
, size
, data
, usage
, true, false,
332 _mesa_marshal_NamedBufferDataEXT(GLuint buffer
, GLsizeiptr size
,
333 const GLvoid
*data
, GLenum usage
)
335 _mesa_marshal_BufferData_merged(buffer
, size
, data
, usage
, true, true,
336 "NamedBufferDataEXT");
340 /* BufferSubData: marshalled asynchronously */
341 struct marshal_cmd_BufferSubData
343 struct marshal_cmd_base cmd_base
;
344 GLenum target_or_name
;
349 /* Next size bytes are GLubyte data[size] */
353 _mesa_unmarshal_BufferSubData(struct gl_context
*ctx
,
354 const struct marshal_cmd_BufferSubData
*cmd
)
356 const GLenum target_or_name
= cmd
->target_or_name
;
357 const GLintptr offset
= cmd
->offset
;
358 const GLsizeiptr size
= cmd
->size
;
359 const void *data
= (const void *) (cmd
+ 1);
362 CALL_NamedBufferSubDataEXT(ctx
->CurrentServerDispatch
,
363 (target_or_name
, offset
, size
, data
));
364 } else if (cmd
->named
) {
365 CALL_NamedBufferSubData(ctx
->CurrentServerDispatch
,
366 (target_or_name
, offset
, size
, data
));
368 CALL_BufferSubData(ctx
->CurrentServerDispatch
,
369 (target_or_name
, offset
, size
, data
));
374 _mesa_unmarshal_NamedBufferSubData(struct gl_context
*ctx
,
375 const struct marshal_cmd_NamedBufferSubData
*cmd
)
377 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
381 _mesa_unmarshal_NamedBufferSubDataEXT(struct gl_context
*ctx
,
382 const struct marshal_cmd_NamedBufferSubDataEXT
*cmd
)
384 unreachable("never used - all BufferSubData variants use DISPATCH_CMD_BufferSubData");
388 _mesa_marshal_BufferSubData_merged(GLuint target_or_name
, GLintptr offset
,
389 GLsizeiptr size
, const GLvoid
*data
,
390 bool named
, bool ext_dsa
, const char *func
)
392 GET_CURRENT_CONTEXT(ctx
);
393 size_t cmd_size
= sizeof(struct marshal_cmd_BufferSubData
) + size
;
395 /* Fast path: Copy the data to an upload buffer, and use the GPU
396 * to copy the uploaded data to the destination buffer.
398 /* TODO: Handle offset == 0 && size < buffer_size.
399 * If offset == 0 and size == buffer_size, it's better to discard
400 * the buffer storage, but we don't know the buffer size in glthread.
402 if (ctx
->GLThread
.SupportsBufferUploads
&&
403 data
&& offset
> 0 && size
> 0) {
404 struct gl_buffer_object
*upload_buffer
= NULL
;
405 unsigned upload_offset
= 0;
407 _mesa_glthread_upload(ctx
, data
, size
, &upload_offset
, &upload_buffer
,
411 _mesa_marshal_InternalBufferSubDataCopyMESA((GLintptr
)upload_buffer
,
420 if (unlikely(size
< 0 || size
> INT_MAX
|| cmd_size
< 0 ||
421 cmd_size
> MARSHAL_MAX_CMD_SIZE
|| !data
||
422 (named
&& target_or_name
== 0))) {
423 _mesa_glthread_finish_before(ctx
, func
);
425 CALL_NamedBufferSubData(ctx
->CurrentServerDispatch
,
426 (target_or_name
, offset
, size
, data
));
428 CALL_BufferSubData(ctx
->CurrentServerDispatch
,
429 (target_or_name
, offset
, size
, data
));
434 struct marshal_cmd_BufferSubData
*cmd
=
435 _mesa_glthread_allocate_command(ctx
, DISPATCH_CMD_BufferSubData
,
437 cmd
->target_or_name
= target_or_name
;
438 cmd
->offset
= offset
;
441 cmd
->ext_dsa
= ext_dsa
;
443 char *variable_data
= (char *) (cmd
+ 1);
444 memcpy(variable_data
, data
, size
);
448 _mesa_marshal_BufferSubData(GLenum target
, GLintptr offset
, GLsizeiptr size
,
451 _mesa_marshal_BufferSubData_merged(target
, offset
, size
, data
, false,
452 false, "BufferSubData");
456 _mesa_marshal_NamedBufferSubData(GLuint buffer
, GLintptr offset
,
457 GLsizeiptr size
, const GLvoid
* data
)
459 _mesa_marshal_BufferSubData_merged(buffer
, offset
, size
, data
, true,
460 false, "NamedBufferSubData");
464 _mesa_marshal_NamedBufferSubDataEXT(GLuint buffer
, GLintptr offset
,
465 GLsizeiptr size
, const GLvoid
* data
)
467 _mesa_marshal_BufferSubData_merged(buffer
, offset
, size
, data
, true,
468 true, "NamedBufferSubDataEXT");