broadcom/vc5: Fix up a comment from vc4 about the predraw texture setup.
[mesa.git] / src / gallium / drivers / vc5 / vc5_draw.c
1 /*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "util/u_blitter.h"
25 #include "util/u_prim.h"
26 #include "util/u_format.h"
27 #include "util/u_pack_color.h"
28 #include "util/u_prim_restart.h"
29 #include "util/u_upload_mgr.h"
30 #include "indices/u_primconvert.h"
31
32 #include "vc5_context.h"
33 #include "vc5_resource.h"
34 #include "vc5_cl.h"
35 #include "broadcom/cle/v3d_packet_v33_pack.h"
36 #include "broadcom/compiler/v3d_compiler.h"
37
38 /**
39 * Does the initial bining command list setup for drawing to a given FBO.
40 */
41 static void
42 vc5_start_draw(struct vc5_context *vc5)
43 {
44 struct vc5_job *job = vc5->job;
45
46 if (job->needs_flush)
47 return;
48
49 /* Get space to emit our BCL state, using a branch to jump to a new BO
50 * if necessary.
51 */
52 vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
53
54 job->submit.bcl_start = job->bcl.bo->offset;
55 vc5_job_add_bo(job, job->bcl.bo);
56
57 job->tile_alloc = vc5_bo_alloc(vc5->screen, 1024 * 1024, "tile alloc");
58 struct vc5_bo *tsda = vc5_bo_alloc(vc5->screen,
59 job->draw_tiles_y *
60 job->draw_tiles_x *
61 64,
62 "TSDA");
63
64 /* "Binning mode lists start with a Tile Binning Mode Configuration
65 * item (120)"
66 *
67 * Part1 signals the end of binning config setup.
68 */
69 cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART2, config) {
70 config.tile_allocation_memory_address =
71 cl_address(job->tile_alloc, 0);
72 config.tile_allocation_memory_size = job->tile_alloc->size;
73 }
74
75 cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION_PART1, config) {
76 config.tile_state_data_array_base_address =
77 cl_address(tsda, 0);
78
79 config.width_in_tiles = job->draw_tiles_x;
80 config.height_in_tiles = job->draw_tiles_y;
81
82 /* Must be >= 1 */
83 config.number_of_render_targets = 1;
84
85 config.multisample_mode_4x = job->msaa;
86
87 config.maximum_bpp_of_all_render_targets = job->internal_bpp;
88 }
89
90 vc5_bo_unreference(&tsda);
91
92 /* There's definitely nothing in the VCD cache we want. */
93 cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin);
94
95 /* "Binning mode lists must have a Start Tile Binning item (6) after
96 * any prefix state data before the binning list proper starts."
97 */
98 cl_emit(&job->bcl, START_TILE_BINNING, bin);
99
100 cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, fmt) {
101 fmt.data_type = LIST_INDEXED;
102 fmt.primitive_type = LIST_TRIANGLES;
103 }
104
105 job->needs_flush = true;
106 job->draw_width = vc5->framebuffer.width;
107 job->draw_height = vc5->framebuffer.height;
108 }
109
110 static void
111 vc5_predraw_check_textures(struct pipe_context *pctx,
112 struct vc5_texture_stateobj *stage_tex)
113 {
114 struct vc5_context *vc5 = vc5_context(pctx);
115
116 for (int i = 0; i < stage_tex->num_textures; i++) {
117 struct pipe_sampler_view *view = stage_tex->textures[i];
118 if (!view)
119 continue;
120
121 vc5_flush_jobs_writing_resource(vc5, view->texture);
122 }
123 }
124
125 static struct vc5_cl_reloc
126 vc5_get_default_values(struct vc5_context *vc5)
127 {
128 struct vc5_job *job = vc5->job;
129
130 /* VC5_DIRTY_VTXSTATE */
131 struct vc5_vertex_stateobj *vtx = vc5->vtx;
132
133 /* Set up the default values for attributes. */
134 vc5_cl_ensure_space(&job->indirect, 4 * 4 * vtx->num_elements, 4);
135 struct vc5_cl_reloc default_values =
136 cl_address(job->indirect.bo, cl_offset(&job->indirect));
137 vc5_bo_reference(default_values.bo);
138
139 struct vc5_cl_out *defaults = cl_start(&job->indirect);
140 for (int i = 0; i < vtx->num_elements; i++) {
141 cl_aligned_f(&defaults, 0.0);
142 cl_aligned_f(&defaults, 0.0);
143 cl_aligned_f(&defaults, 0.0);
144 cl_aligned_f(&defaults, 1.0);
145 }
146 cl_end(&job->indirect, defaults);
147
148 return default_values;
149 }
150
151 static void
152 vc5_emit_gl_shader_state(struct vc5_context *vc5,
153 const struct pipe_draw_info *info)
154 {
155 struct vc5_job *job = vc5->job;
156 /* VC5_DIRTY_VTXSTATE */
157 struct vc5_vertex_stateobj *vtx = vc5->vtx;
158 /* VC5_DIRTY_VTXBUF */
159 struct vc5_vertexbuf_stateobj *vertexbuf = &vc5->vertexbuf;
160
161 /* Upload the uniforms to the indirect CL first */
162 struct vc5_cl_reloc fs_uniforms =
163 vc5_write_uniforms(vc5, vc5->prog.fs,
164 &vc5->constbuf[PIPE_SHADER_FRAGMENT],
165 &vc5->fragtex);
166 struct vc5_cl_reloc vs_uniforms =
167 vc5_write_uniforms(vc5, vc5->prog.vs,
168 &vc5->constbuf[PIPE_SHADER_VERTEX],
169 &vc5->verttex);
170 struct vc5_cl_reloc cs_uniforms =
171 vc5_write_uniforms(vc5, vc5->prog.cs,
172 &vc5->constbuf[PIPE_SHADER_VERTEX],
173 &vc5->verttex);
174 struct vc5_cl_reloc default_values = vc5_get_default_values(vc5);
175
176 uint32_t shader_rec_offset =
177 vc5_cl_ensure_space(&job->indirect,
178 cl_packet_length(GL_SHADER_STATE_RECORD) +
179 vtx->num_elements *
180 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD),
181 32);
182
183 cl_emit(&job->indirect, GL_SHADER_STATE_RECORD, shader) {
184 shader.enable_clipping = true;
185 /* VC5_DIRTY_PRIM_MODE | VC5_DIRTY_RASTERIZER */
186 shader.point_size_in_shaded_vertex_data =
187 (info->mode == PIPE_PRIM_POINTS &&
188 vc5->rasterizer->base.point_size_per_vertex);
189
190 /* Must be set if the shader modifies Z, discards, or modifies
191 * the sample mask. For any of these cases, the fragment
192 * shader needs to write the Z value (even just discards).
193 */
194 shader.fragment_shader_does_z_writes =
195 (vc5->prog.fs->prog_data.fs->writes_z ||
196 vc5->prog.fs->prog_data.fs->discard);
197
198 shader.number_of_varyings_in_fragment_shader =
199 vc5->prog.fs->prog_data.base->num_inputs;
200
201 shader.propagate_nans = true;
202
203 shader.coordinate_shader_code_address =
204 cl_address(vc5->prog.cs->bo, 0);
205 shader.vertex_shader_code_address =
206 cl_address(vc5->prog.vs->bo, 0);
207 shader.fragment_shader_code_address =
208 cl_address(vc5->prog.fs->bo, 0);
209
210 /* XXX: Use combined input/output size flag in the common
211 * case.
212 */
213 shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
214 shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
215 shader.coordinate_shader_input_vpm_segment_size =
216 vc5->prog.cs->prog_data.vs->vpm_input_size;
217 shader.vertex_shader_input_vpm_segment_size =
218 vc5->prog.vs->prog_data.vs->vpm_input_size;
219
220 shader.coordinate_shader_output_vpm_segment_size =
221 vc5->prog.cs->prog_data.vs->vpm_output_size;
222 shader.vertex_shader_output_vpm_segment_size =
223 vc5->prog.vs->prog_data.vs->vpm_output_size;
224
225 shader.coordinate_shader_uniforms_address = cs_uniforms;
226 shader.vertex_shader_uniforms_address = vs_uniforms;
227 shader.fragment_shader_uniforms_address = fs_uniforms;
228
229 shader.vertex_id_read_by_coordinate_shader =
230 vc5->prog.cs->prog_data.vs->uses_vid;
231 shader.instance_id_read_by_coordinate_shader =
232 vc5->prog.cs->prog_data.vs->uses_iid;
233 shader.vertex_id_read_by_vertex_shader =
234 vc5->prog.vs->prog_data.vs->uses_vid;
235 shader.instance_id_read_by_vertex_shader =
236 vc5->prog.vs->prog_data.vs->uses_iid;
237
238 shader.address_of_default_attribute_values = default_values;
239 }
240
241 for (int i = 0; i < vtx->num_elements; i++) {
242 struct pipe_vertex_element *elem = &vtx->pipe[i];
243 struct pipe_vertex_buffer *vb =
244 &vertexbuf->vb[elem->vertex_buffer_index];
245 struct vc5_resource *rsc = vc5_resource(vb->buffer.resource);
246 const struct util_format_description *desc =
247 util_format_description(elem->src_format);
248
249 uint32_t offset = vb->buffer_offset + elem->src_offset;
250
251 cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
252 uint32_t r_size = desc->channel[0].size;
253
254 /* vec_size == 0 means 4 */
255 attr.vec_size = desc->nr_channels & 3;
256
257 switch (desc->channel[0].type) {
258 case UTIL_FORMAT_TYPE_FLOAT:
259 if (r_size == 32) {
260 attr.type = ATTRIBUTE_FLOAT;
261 } else {
262 assert(r_size == 16);
263 attr.type = ATTRIBUTE_HALF_FLOAT;
264 }
265 break;
266
267 case UTIL_FORMAT_TYPE_SIGNED:
268 case UTIL_FORMAT_TYPE_UNSIGNED:
269 switch (r_size) {
270 case 32:
271 attr.type = ATTRIBUTE_INT;
272 break;
273 case 16:
274 attr.type = ATTRIBUTE_SHORT;
275 break;
276 case 10:
277 attr.type = ATTRIBUTE_INT2_10_10_10;
278 break;
279 case 8:
280 attr.type = ATTRIBUTE_BYTE;
281 break;
282 default:
283 fprintf(stderr,
284 "format %s unsupported\n",
285 desc->name);
286 attr.type = ATTRIBUTE_BYTE;
287 abort();
288 }
289 break;
290
291 default:
292 fprintf(stderr,
293 "format %s unsupported\n",
294 desc->name);
295 abort();
296 }
297
298 attr.signed_int_type =
299 desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED;
300
301 attr.normalized_int_type = desc->channel[0].normalized;
302 attr.read_as_int_uint = desc->channel[0].pure_integer;
303 attr.address = cl_address(rsc->bo, offset);
304 attr.stride = vb->stride;
305 attr.instance_divisor = elem->instance_divisor;
306 attr.number_of_values_read_by_coordinate_shader =
307 vc5->prog.cs->prog_data.vs->vattr_sizes[i];
308 attr.number_of_values_read_by_vertex_shader =
309 vc5->prog.vs->prog_data.vs->vattr_sizes[i];
310 }
311 }
312
313 cl_emit(&job->bcl, GL_SHADER_STATE, state) {
314 state.address = cl_address(job->indirect.bo, shader_rec_offset);
315 state.number_of_attribute_arrays = vtx->num_elements;
316 }
317
318 vc5_bo_unreference(&cs_uniforms.bo);
319 vc5_bo_unreference(&vs_uniforms.bo);
320 vc5_bo_unreference(&fs_uniforms.bo);
321 vc5_bo_unreference(&default_values.bo);
322
323 job->shader_rec_count++;
324 }
325
326 static void
327 vc5_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
328 {
329 struct vc5_context *vc5 = vc5_context(pctx);
330
331 if (!info->count_from_stream_output && !info->indirect &&
332 !info->primitive_restart &&
333 !u_trim_pipe_prim(info->mode, (unsigned*)&info->count))
334 return;
335
336 /* Fall back for weird desktop GL primitive restart values. */
337 if (info->primitive_restart &&
338 info->index_size) {
339 uint32_t mask = ~0;
340
341 switch (info->index_size) {
342 case 2:
343 mask = 0xffff;
344 break;
345 case 1:
346 mask = 0xff;
347 break;
348 }
349
350 if (info->restart_index != mask) {
351 util_draw_vbo_without_prim_restart(pctx, info);
352 return;
353 }
354 }
355
356 if (info->mode >= PIPE_PRIM_QUADS) {
357 util_primconvert_save_rasterizer_state(vc5->primconvert, &vc5->rasterizer->base);
358 util_primconvert_draw_vbo(vc5->primconvert, info);
359 perf_debug("Fallback conversion for %d %s vertices\n",
360 info->count, u_prim_name(info->mode));
361 return;
362 }
363
364 /* Before setting up the draw, flush anything writing to the textures
365 * that we read from.
366 */
367 vc5_predraw_check_textures(pctx, &vc5->verttex);
368 vc5_predraw_check_textures(pctx, &vc5->fragtex);
369
370 struct vc5_job *job = vc5_get_job_for_fbo(vc5);
371
372 /* Get space to emit our draw call into the BCL, using a branch to
373 * jump to a new BO if necessary.
374 */
375 vc5_cl_ensure_space_with_branch(&job->bcl, 256 /* XXX */);
376
377 if (vc5->prim_mode != info->mode) {
378 vc5->prim_mode = info->mode;
379 vc5->dirty |= VC5_DIRTY_PRIM_MODE;
380 }
381
382 vc5_start_draw(vc5);
383 vc5_update_compiled_shaders(vc5, info->mode);
384
385 vc5_emit_state(pctx);
386
387 if (vc5->dirty & (VC5_DIRTY_VTXBUF |
388 VC5_DIRTY_VTXSTATE |
389 VC5_DIRTY_PRIM_MODE |
390 VC5_DIRTY_RASTERIZER |
391 VC5_DIRTY_COMPILED_CS |
392 VC5_DIRTY_COMPILED_VS |
393 VC5_DIRTY_COMPILED_FS |
394 vc5->prog.cs->uniform_dirty_bits |
395 vc5->prog.vs->uniform_dirty_bits |
396 vc5->prog.fs->uniform_dirty_bits)) {
397 vc5_emit_gl_shader_state(vc5, info);
398 }
399
400 vc5->dirty = 0;
401
402 /* The Base Vertex/Base Instance packet sets those values to nonzero
403 * for the next draw call only.
404 */
405 if (info->index_bias || info->start_instance) {
406 cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) {
407 base.base_instance = info->start_instance;
408 base.base_vertex = info->index_bias;
409 }
410 }
411
412 /* Note that the primitive type fields match with OpenGL/gallium
413 * definitions, up to but not including QUADS.
414 */
415 if (info->index_size) {
416 uint32_t index_size = info->index_size;
417 uint32_t offset = info->start * index_size;
418 struct pipe_resource *prsc;
419 if (info->has_user_indices) {
420 prsc = NULL;
421 u_upload_data(vc5->uploader, 0,
422 info->count * info->index_size, 4,
423 info->index.user,
424 &offset, &prsc);
425 } else {
426 prsc = info->index.resource;
427 }
428 struct vc5_resource *rsc = vc5_resource(prsc);
429
430 if (info->instance_count > 1) {
431 cl_emit(&job->bcl, INDEXED_INSTANCED_PRIMITIVE_LIST, prim) {
432 prim.index_type = ffs(info->index_size) - 1;
433 prim.maximum_index = (1u << 31) - 1; /* XXX */
434 prim.address_of_indices_list =
435 cl_address(rsc->bo, offset);
436 prim.mode = info->mode;
437 prim.enable_primitive_restarts = info->primitive_restart;
438
439 prim.number_of_instances = info->instance_count;
440 prim.instance_length = info->count;
441 }
442 } else {
443 cl_emit(&job->bcl, INDEXED_PRIMITIVE_LIST, prim) {
444 prim.index_type = ffs(info->index_size) - 1;
445 prim.length = info->count;
446 prim.maximum_index = (1u << 31) - 1; /* XXX */
447 prim.address_of_indices_list =
448 cl_address(rsc->bo, offset);
449 prim.mode = info->mode;
450 prim.enable_primitive_restarts = info->primitive_restart;
451 }
452 }
453
454 job->draw_calls_queued++;
455
456 if (info->has_user_indices)
457 pipe_resource_reference(&prsc, NULL);
458 } else {
459 if (info->instance_count > 1) {
460 cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMITIVES, prim) {
461 prim.mode = info->mode;
462 prim.index_of_first_vertex = info->start;
463 prim.number_of_instances = info->instance_count;
464 prim.instance_length = info->count;
465 }
466 } else {
467 cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, prim) {
468 prim.mode = info->mode;
469 prim.length = info->count;
470 prim.index_of_first_vertex = info->start;
471 }
472 }
473 }
474 job->draw_calls_queued++;
475
476 if (vc5->zsa && job->zsbuf &&
477 (vc5->zsa->base.depth.enabled ||
478 vc5->zsa->base.stencil[0].enabled)) {
479 struct vc5_resource *rsc = vc5_resource(job->zsbuf->texture);
480 vc5_job_add_bo(job, rsc->bo);
481
482 if (vc5->zsa->base.depth.enabled) {
483 job->resolve |= PIPE_CLEAR_DEPTH;
484 rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
485
486 if (vc5->zsa->early_z_enable)
487 job->uses_early_z = true;
488 }
489
490 if (vc5->zsa->base.stencil[0].enabled) {
491 job->resolve |= PIPE_CLEAR_STENCIL;
492 rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
493 }
494 }
495
496 for (int i = 0; i < VC5_MAX_DRAW_BUFFERS; i++) {
497 uint32_t bit = PIPE_CLEAR_COLOR0 << i;
498
499 if (job->resolve & bit || !job->cbufs[i])
500 continue;
501 struct vc5_resource *rsc = vc5_resource(job->cbufs[i]->texture);
502
503 job->resolve |= bit;
504 vc5_job_add_bo(job, rsc->bo);
505 }
506
507 if (V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)
508 vc5_flush(pctx);
509 }
510
511 static uint32_t
512 pack_rgba(enum pipe_format format, const float *rgba)
513 {
514 union util_color uc;
515 util_pack_color(rgba, format, &uc);
516 if (util_format_get_blocksize(format) == 2)
517 return uc.us;
518 else
519 return uc.ui[0];
520 }
521
522 static void
523 vc5_clear(struct pipe_context *pctx, unsigned buffers,
524 const union pipe_color_union *color, double depth, unsigned stencil)
525 {
526 struct vc5_context *vc5 = vc5_context(pctx);
527 struct vc5_job *job = vc5_get_job_for_fbo(vc5);
528
529 /* We can't flag new buffers for clearing once we've queued draws. We
530 * could avoid this by using the 3d engine to clear.
531 */
532 if (job->draw_calls_queued) {
533 perf_debug("Flushing rendering to process new clear.\n");
534 vc5_job_submit(vc5, job);
535 job = vc5_get_job_for_fbo(vc5);
536 }
537
538 if (buffers & PIPE_CLEAR_COLOR0) {
539 struct vc5_resource *rsc =
540 vc5_resource(vc5->framebuffer.cbufs[0]->texture);
541 uint32_t clear_color;
542
543 #if 0
544 if (vc5_rt_format_is_565(vc5->framebuffer.cbufs[0]->format)) {
545 /* In 565 mode, the hardware will be packing our color
546 * for us.
547 */
548 clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
549 color->f);
550 } else {
551 /* Otherwise, we need to do this packing because we
552 * support multiple swizzlings of RGBA8888.
553 */
554 clear_color =
555 pack_rgba(vc5->framebuffer.cbufs[0]->format,
556 color->f);
557 }
558 #endif
559 clear_color = pack_rgba(vc5->framebuffer.cbufs[0]->format,
560 color->f);
561
562 job->clear_color[0] = job->clear_color[1] = clear_color;
563 rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
564 }
565
566 unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
567 if (zsclear) {
568 struct vc5_resource *rsc =
569 vc5_resource(vc5->framebuffer.zsbuf->texture);
570
571 if (zsclear & PIPE_CLEAR_DEPTH)
572 job->clear_z = depth;
573 if (zsclear & PIPE_CLEAR_STENCIL)
574 job->clear_s = stencil;
575
576 rsc->initialized_buffers |= zsclear;
577 }
578
579 job->draw_min_x = 0;
580 job->draw_min_y = 0;
581 job->draw_max_x = vc5->framebuffer.width;
582 job->draw_max_y = vc5->framebuffer.height;
583 job->cleared |= buffers;
584 job->resolve |= buffers;
585
586 vc5_start_draw(vc5);
587 }
588
589 static void
590 vc5_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
591 const union pipe_color_union *color,
592 unsigned x, unsigned y, unsigned w, unsigned h,
593 bool render_condition_enabled)
594 {
595 fprintf(stderr, "unimpl: clear RT\n");
596 }
597
598 static void
599 vc5_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
600 unsigned buffers, double depth, unsigned stencil,
601 unsigned x, unsigned y, unsigned w, unsigned h,
602 bool render_condition_enabled)
603 {
604 fprintf(stderr, "unimpl: clear DS\n");
605 }
606
607 void
608 vc5_draw_init(struct pipe_context *pctx)
609 {
610 pctx->draw_vbo = vc5_draw_vbo;
611 pctx->clear = vc5_clear;
612 pctx->clear_render_target = vc5_clear_render_target;
613 pctx->clear_depth_stencil = vc5_clear_depth_stencil;
614 }