swr: [rasterizer jitter] include cstdarg in builder_misc.cpp
[mesa.git] / src / gallium / drivers / swr / swr_state.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #pragma pop_macro("DEBUG")
29
30 #include "common/os.h"
31 #include "jit_api.h"
32 #include "state_llvm.h"
33
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "util/u_format.h"
36
37 #include "util/u_memory.h"
38 #include "util/u_inlines.h"
39 #include "util/u_helpers.h"
40 #include "util/u_framebuffer.h"
41 #include "util/u_viewport.h"
42
43 #include "swr_state.h"
44 #include "swr_context.h"
45 #include "swr_context_llvm.h"
46 #include "swr_screen.h"
47 #include "swr_resource.h"
48 #include "swr_tex_sample.h"
49 #include "swr_scratch.h"
50 #include "swr_shader.h"
51 #include "swr_fence.h"
52
53 /* These should be pulled out into separate files as necessary
54 * Just initializing everything here to get going. */
55
56 static void *
57 swr_create_blend_state(struct pipe_context *pipe,
58 const struct pipe_blend_state *blend)
59 {
60 struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
61
62 memcpy(&state->pipe, blend, sizeof(*blend));
63
64 struct pipe_blend_state *pipe_blend = &state->pipe;
65
66 for (int target = 0;
67 target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
68 target++) {
69
70 struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
71 SWR_RENDER_TARGET_BLEND_STATE &blendState =
72 state->blendState.renderTarget[target];
73 RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
74 state->compileState[target];
75
76 if (target != 0 && !pipe_blend->independent_blend_enable) {
77 memcpy(&compileState,
78 &state->compileState[0],
79 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
80 continue;
81 }
82
83 compileState.blendEnable = rt_blend->blend_enable;
84 if (compileState.blendEnable) {
85 compileState.sourceAlphaBlendFactor =
86 swr_convert_blend_factor(rt_blend->alpha_src_factor);
87 compileState.destAlphaBlendFactor =
88 swr_convert_blend_factor(rt_blend->alpha_dst_factor);
89 compileState.sourceBlendFactor =
90 swr_convert_blend_factor(rt_blend->rgb_src_factor);
91 compileState.destBlendFactor =
92 swr_convert_blend_factor(rt_blend->rgb_dst_factor);
93
94 compileState.colorBlendFunc =
95 swr_convert_blend_func(rt_blend->rgb_func);
96 compileState.alphaBlendFunc =
97 swr_convert_blend_func(rt_blend->alpha_func);
98 }
99 compileState.logicOpEnable = state->pipe.logicop_enable;
100 if (compileState.logicOpEnable) {
101 compileState.logicOpFunc =
102 swr_convert_logic_op(state->pipe.logicop_func);
103 }
104
105 blendState.writeDisableRed =
106 (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
107 blendState.writeDisableGreen =
108 (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
109 blendState.writeDisableBlue =
110 (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
111 blendState.writeDisableAlpha =
112 (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
113
114 if (rt_blend->colormask == 0)
115 compileState.blendEnable = false;
116 }
117
118 return state;
119 }
120
121 static void
122 swr_bind_blend_state(struct pipe_context *pipe, void *blend)
123 {
124 struct swr_context *ctx = swr_context(pipe);
125
126 if (ctx->blend == blend)
127 return;
128
129 ctx->blend = (swr_blend_state *)blend;
130
131 ctx->dirty |= SWR_NEW_BLEND;
132 }
133
134 static void
135 swr_delete_blend_state(struct pipe_context *pipe, void *blend)
136 {
137 FREE(blend);
138 }
139
140 static void
141 swr_set_blend_color(struct pipe_context *pipe,
142 const struct pipe_blend_color *color)
143 {
144 struct swr_context *ctx = swr_context(pipe);
145
146 ctx->blend_color = *color;
147
148 ctx->dirty |= SWR_NEW_BLEND;
149 }
150
151 static void
152 swr_set_stencil_ref(struct pipe_context *pipe,
153 const struct pipe_stencil_ref *ref)
154 {
155 struct swr_context *ctx = swr_context(pipe);
156
157 ctx->stencil_ref = *ref;
158
159 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
160 }
161
162 static void *
163 swr_create_depth_stencil_state(
164 struct pipe_context *pipe,
165 const struct pipe_depth_stencil_alpha_state *depth_stencil)
166 {
167 struct pipe_depth_stencil_alpha_state *state;
168
169 state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
170 sizeof *depth_stencil);
171
172 return state;
173 }
174
175 static void
176 swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
177 {
178 struct swr_context *ctx = swr_context(pipe);
179
180 if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
181 return;
182
183 ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
184
185 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
186 }
187
188 static void
189 swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
190 {
191 FREE(depth);
192 }
193
194
195 static void *
196 swr_create_rasterizer_state(struct pipe_context *pipe,
197 const struct pipe_rasterizer_state *rast)
198 {
199 struct pipe_rasterizer_state *state;
200 state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
201
202 return state;
203 }
204
205 static void
206 swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
207 {
208 struct swr_context *ctx = swr_context(pipe);
209 const struct pipe_rasterizer_state *rasterizer =
210 (const struct pipe_rasterizer_state *)handle;
211
212 if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
213 return;
214
215 ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
216
217 ctx->dirty |= SWR_NEW_RASTERIZER;
218 }
219
220 static void
221 swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
222 {
223 FREE(rasterizer);
224 }
225
226
227 static void *
228 swr_create_sampler_state(struct pipe_context *pipe,
229 const struct pipe_sampler_state *sampler)
230 {
231 struct pipe_sampler_state *state =
232 (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
233
234 return state;
235 }
236
237 static void
238 swr_bind_sampler_states(struct pipe_context *pipe,
239 enum pipe_shader_type shader,
240 unsigned start,
241 unsigned num,
242 void **samplers)
243 {
244 struct swr_context *ctx = swr_context(pipe);
245 unsigned i;
246
247 assert(shader < PIPE_SHADER_TYPES);
248 assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
249
250 /* set the new samplers */
251 ctx->num_samplers[shader] = num;
252 for (i = 0; i < num; i++) {
253 ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
254 }
255
256 ctx->dirty |= SWR_NEW_SAMPLER;
257 }
258
259 static void
260 swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
261 {
262 FREE(sampler);
263 }
264
265
266 static struct pipe_sampler_view *
267 swr_create_sampler_view(struct pipe_context *pipe,
268 struct pipe_resource *texture,
269 const struct pipe_sampler_view *templ)
270 {
271 struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
272
273 if (view) {
274 *view = *templ;
275 view->reference.count = 1;
276 view->texture = NULL;
277 pipe_resource_reference(&view->texture, texture);
278 view->context = pipe;
279 }
280
281 return view;
282 }
283
284 static void
285 swr_set_sampler_views(struct pipe_context *pipe,
286 enum pipe_shader_type shader,
287 unsigned start,
288 unsigned num,
289 struct pipe_sampler_view **views)
290 {
291 struct swr_context *ctx = swr_context(pipe);
292 uint i;
293
294 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
295
296 assert(shader < PIPE_SHADER_TYPES);
297 assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
298
299 /* set the new sampler views */
300 ctx->num_sampler_views[shader] = num;
301 for (i = 0; i < num; i++) {
302 /* Note: we're using pipe_sampler_view_release() here to work around
303 * a possible crash when the old view belongs to another context that
304 * was already destroyed.
305 */
306 pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
307 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
308 views[i]);
309 }
310
311 ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
312 }
313
314 static void
315 swr_sampler_view_destroy(struct pipe_context *pipe,
316 struct pipe_sampler_view *view)
317 {
318 pipe_resource_reference(&view->texture, NULL);
319 FREE(view);
320 }
321
322 static void *
323 swr_create_vs_state(struct pipe_context *pipe,
324 const struct pipe_shader_state *vs)
325 {
326 struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
327 if (!swr_vs)
328 return NULL;
329
330 swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
331 swr_vs->pipe.stream_output = vs->stream_output;
332
333 lp_build_tgsi_info(vs->tokens, &swr_vs->info);
334
335 swr_vs->soState = {0};
336
337 if (swr_vs->pipe.stream_output.num_outputs) {
338 pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
339
340 swr_vs->soState.soEnable = true;
341 // soState.rasterizerDisable set on state dirty
342 // soState.streamToRasterizer not used
343
344 for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
345 swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
346 1 << (stream_output->output[i].register_index - 1);
347 }
348 for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
349 swr_vs->soState.streamNumEntries[i] =
350 _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
351 }
352 }
353
354 return swr_vs;
355 }
356
357 static void
358 swr_bind_vs_state(struct pipe_context *pipe, void *vs)
359 {
360 struct swr_context *ctx = swr_context(pipe);
361
362 if (ctx->vs == vs)
363 return;
364
365 ctx->vs = (swr_vertex_shader *)vs;
366 ctx->dirty |= SWR_NEW_VS;
367 }
368
369 static void
370 swr_delete_vs_state(struct pipe_context *pipe, void *vs)
371 {
372 struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
373 FREE((void *)swr_vs->pipe.tokens);
374 struct swr_screen *screen = swr_screen(pipe->screen);
375 if (!swr_is_fence_pending(screen->flush_fence))
376 swr_fence_submit(swr_context(pipe), screen->flush_fence);
377 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
378 delete swr_vs;
379 }
380
381 static void *
382 swr_create_fs_state(struct pipe_context *pipe,
383 const struct pipe_shader_state *fs)
384 {
385 struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
386 if (!swr_fs)
387 return NULL;
388
389 swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
390
391 lp_build_tgsi_info(fs->tokens, &swr_fs->info);
392
393 return swr_fs;
394 }
395
396
397 static void
398 swr_bind_fs_state(struct pipe_context *pipe, void *fs)
399 {
400 struct swr_context *ctx = swr_context(pipe);
401
402 if (ctx->fs == fs)
403 return;
404
405 ctx->fs = (swr_fragment_shader *)fs;
406 ctx->dirty |= SWR_NEW_FS;
407 }
408
409 static void
410 swr_delete_fs_state(struct pipe_context *pipe, void *fs)
411 {
412 struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
413 FREE((void *)swr_fs->pipe.tokens);
414 struct swr_screen *screen = swr_screen(pipe->screen);
415 if (!swr_is_fence_pending(screen->flush_fence))
416 swr_fence_submit(swr_context(pipe), screen->flush_fence);
417 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
418 delete swr_fs;
419 }
420
421
422 static void
423 swr_set_constant_buffer(struct pipe_context *pipe,
424 uint shader,
425 uint index,
426 const struct pipe_constant_buffer *cb)
427 {
428 struct swr_context *ctx = swr_context(pipe);
429 struct pipe_resource *constants = cb ? cb->buffer : NULL;
430
431 assert(shader < PIPE_SHADER_TYPES);
432 assert(index < ARRAY_SIZE(ctx->constants[shader]));
433
434 /* note: reference counting */
435 util_copy_constant_buffer(&ctx->constants[shader][index], cb);
436
437 if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
438 ctx->dirty |= SWR_NEW_VSCONSTANTS;
439 } else if (shader == PIPE_SHADER_FRAGMENT) {
440 ctx->dirty |= SWR_NEW_FSCONSTANTS;
441 }
442
443 if (cb && cb->user_buffer) {
444 pipe_resource_reference(&constants, NULL);
445 }
446 }
447
448
449 static void *
450 swr_create_vertex_elements_state(struct pipe_context *pipe,
451 unsigned num_elements,
452 const struct pipe_vertex_element *attribs)
453 {
454 struct swr_vertex_element_state *velems;
455 assert(num_elements <= PIPE_MAX_ATTRIBS);
456 velems = CALLOC_STRUCT(swr_vertex_element_state);
457 if (velems) {
458 velems->fsState.bVertexIDOffsetEnable = true;
459 velems->fsState.numAttribs = num_elements;
460 for (unsigned i = 0; i < num_elements; i++) {
461 // XXX: we should do this keyed on the VS usage info
462
463 const struct util_format_description *desc =
464 util_format_description(attribs[i].src_format);
465
466 velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
467 velems->fsState.layout[i].Format =
468 mesa_to_swr_format(attribs[i].src_format);
469 velems->fsState.layout[i].StreamIndex =
470 attribs[i].vertex_buffer_index;
471 velems->fsState.layout[i].InstanceEnable =
472 attribs[i].instance_divisor != 0;
473 velems->fsState.layout[i].ComponentControl0 =
474 desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
475 ? ComponentControl::StoreSrc
476 : ComponentControl::Store0;
477 velems->fsState.layout[i].ComponentControl1 =
478 desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
479 ? ComponentControl::StoreSrc
480 : ComponentControl::Store0;
481 velems->fsState.layout[i].ComponentControl2 =
482 desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
483 ? ComponentControl::StoreSrc
484 : ComponentControl::Store0;
485 velems->fsState.layout[i].ComponentControl3 =
486 desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
487 ? ComponentControl::StoreSrc
488 : ComponentControl::Store1Fp;
489 velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
490 velems->fsState.layout[i].InstanceDataStepRate =
491 attribs[i].instance_divisor;
492
493 /* Calculate the pitch of each stream */
494 const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
495 mesa_to_swr_format(attribs[i].src_format));
496 velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
497
498 if (attribs[i].instance_divisor != 0) {
499 velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
500 uint32_t *min_instance_div =
501 &velems->min_instance_div[attribs[i].vertex_buffer_index];
502 if (!*min_instance_div ||
503 attribs[i].instance_divisor < *min_instance_div)
504 *min_instance_div = attribs[i].instance_divisor;
505 }
506 }
507 }
508
509 return velems;
510 }
511
512 static void
513 swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
514 {
515 struct swr_context *ctx = swr_context(pipe);
516 struct swr_vertex_element_state *swr_velems =
517 (struct swr_vertex_element_state *)velems;
518
519 ctx->velems = swr_velems;
520 ctx->dirty |= SWR_NEW_VERTEX;
521 }
522
523 static void
524 swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
525 {
526 /* XXX Need to destroy fetch shader? */
527 FREE(velems);
528 }
529
530
531 static void
532 swr_set_vertex_buffers(struct pipe_context *pipe,
533 unsigned start_slot,
534 unsigned num_elements,
535 const struct pipe_vertex_buffer *buffers)
536 {
537 struct swr_context *ctx = swr_context(pipe);
538
539 assert(num_elements <= PIPE_MAX_ATTRIBS);
540
541 util_set_vertex_buffers_count(ctx->vertex_buffer,
542 &ctx->num_vertex_buffers,
543 buffers,
544 start_slot,
545 num_elements);
546
547 ctx->dirty |= SWR_NEW_VERTEX;
548 }
549
550
551 static void
552 swr_set_index_buffer(struct pipe_context *pipe,
553 const struct pipe_index_buffer *ib)
554 {
555 struct swr_context *ctx = swr_context(pipe);
556
557 if (ib)
558 memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
559 else
560 memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
561
562 ctx->dirty |= SWR_NEW_VERTEX;
563 }
564
565 static void
566 swr_set_polygon_stipple(struct pipe_context *pipe,
567 const struct pipe_poly_stipple *stipple)
568 {
569 struct swr_context *ctx = swr_context(pipe);
570
571 ctx->poly_stipple = *stipple; /* struct copy */
572 ctx->dirty |= SWR_NEW_STIPPLE;
573 }
574
575 static void
576 swr_set_clip_state(struct pipe_context *pipe,
577 const struct pipe_clip_state *clip)
578 {
579 struct swr_context *ctx = swr_context(pipe);
580
581 ctx->clip = *clip;
582 /* XXX Unimplemented, but prevents crash */
583
584 ctx->dirty |= SWR_NEW_CLIP;
585 }
586
587
588 static void
589 swr_set_scissor_states(struct pipe_context *pipe,
590 unsigned start_slot,
591 unsigned num_viewports,
592 const struct pipe_scissor_state *scissor)
593 {
594 struct swr_context *ctx = swr_context(pipe);
595
596 ctx->scissor = *scissor;
597 ctx->swr_scissor.xmin = scissor->minx;
598 ctx->swr_scissor.xmax = scissor->maxx;
599 ctx->swr_scissor.ymin = scissor->miny;
600 ctx->swr_scissor.ymax = scissor->maxy;
601 ctx->dirty |= SWR_NEW_SCISSOR;
602 }
603
604 static void
605 swr_set_viewport_states(struct pipe_context *pipe,
606 unsigned start_slot,
607 unsigned num_viewports,
608 const struct pipe_viewport_state *vpt)
609 {
610 struct swr_context *ctx = swr_context(pipe);
611
612 ctx->viewport = *vpt;
613 ctx->dirty |= SWR_NEW_VIEWPORT;
614 }
615
616
617 static void
618 swr_set_framebuffer_state(struct pipe_context *pipe,
619 const struct pipe_framebuffer_state *fb)
620 {
621 struct swr_context *ctx = swr_context(pipe);
622
623 boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
624
625 assert(fb->width <= KNOB_GUARDBAND_WIDTH);
626 assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
627
628 if (changed) {
629 util_copy_framebuffer_state(&ctx->framebuffer, fb);
630
631 ctx->dirty |= SWR_NEW_FRAMEBUFFER;
632 }
633 }
634
635
636 static void
637 swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
638 {
639 struct swr_context *ctx = swr_context(pipe);
640
641 if (sample_mask != ctx->sample_mask) {
642 ctx->sample_mask = sample_mask;
643 ctx->dirty |= SWR_NEW_RASTERIZER;
644 }
645 }
646
647 /*
648 * Update resource in-use status
649 * All resources bound to color or depth targets marked as WRITE resources.
650 * VBO Vertex/index buffers and texture views marked as READ resources.
651 */
652 void
653 swr_update_resource_status(struct pipe_context *pipe,
654 const struct pipe_draw_info *p_draw_info)
655 {
656 struct swr_context *ctx = swr_context(pipe);
657 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
658
659 /* colorbuffer targets */
660 if (fb->nr_cbufs)
661 for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
662 if (fb->cbufs[i])
663 swr_resource_write(fb->cbufs[i]->texture);
664
665 /* depth/stencil target */
666 if (fb->zsbuf)
667 swr_resource_write(fb->zsbuf->texture);
668
669 /* VBO vertex buffers */
670 for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
671 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
672 if (!vb->user_buffer)
673 swr_resource_read(vb->buffer);
674 }
675
676 /* VBO index buffer */
677 if (p_draw_info && p_draw_info->indexed) {
678 struct pipe_index_buffer *ib = &ctx->index_buffer;
679 if (!ib->user_buffer)
680 swr_resource_read(ib->buffer);
681 }
682
683 /* transform feedback buffers */
684 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
685 struct pipe_stream_output_target *target = ctx->so_targets[i];
686 if (target && target->buffer)
687 swr_resource_write(target->buffer);
688 }
689
690 /* texture sampler views */
691 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
692 for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
693 struct pipe_sampler_view *view = ctx->sampler_views[j][i];
694 if (view)
695 swr_resource_read(view->texture);
696 }
697 }
698
699 /* constant buffers */
700 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
701 for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
702 struct pipe_constant_buffer *cb = &ctx->constants[j][i];
703 if (cb->buffer)
704 swr_resource_read(cb->buffer);
705 }
706 }
707 }
708
709 static void
710 swr_update_texture_state(struct swr_context *ctx,
711 unsigned shader_type,
712 unsigned num_sampler_views,
713 swr_jit_texture *textures)
714 {
715 for (unsigned i = 0; i < num_sampler_views; i++) {
716 struct pipe_sampler_view *view =
717 ctx->sampler_views[shader_type][i];
718 struct swr_jit_texture *jit_tex = &textures[i];
719
720 memset(jit_tex, 0, sizeof(*jit_tex));
721 if (view) {
722 struct pipe_resource *res = view->texture;
723 struct swr_resource *swr_res = swr_resource(res);
724 SWR_SURFACE_STATE *swr = &swr_res->swr;
725 size_t *mip_offsets = swr_res->mip_offsets;
726 if (swr_res->has_depth && swr_res->has_stencil &&
727 !util_format_has_depth(util_format_description(view->format))) {
728 swr = &swr_res->secondary;
729 mip_offsets = swr_res->secondary_mip_offsets;
730 }
731
732 jit_tex->width = res->width0;
733 jit_tex->height = res->height0;
734 jit_tex->base_ptr = swr->pBaseAddress;
735 if (view->target != PIPE_BUFFER) {
736 jit_tex->first_level = view->u.tex.first_level;
737 jit_tex->last_level = view->u.tex.last_level;
738 if (view->target == PIPE_TEXTURE_3D)
739 jit_tex->depth = res->depth0;
740 else
741 jit_tex->depth =
742 view->u.tex.last_layer - view->u.tex.first_layer + 1;
743 jit_tex->base_ptr += view->u.tex.first_layer *
744 swr->qpitch * swr->pitch;
745 } else {
746 unsigned view_blocksize = util_format_get_blocksize(view->format);
747 jit_tex->base_ptr += view->u.buf.offset;
748 jit_tex->width = view->u.buf.size / view_blocksize;
749 jit_tex->depth = 1;
750 }
751
752 for (unsigned level = jit_tex->first_level;
753 level <= jit_tex->last_level;
754 level++) {
755 jit_tex->row_stride[level] = swr->pitch;
756 jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
757 jit_tex->mip_offsets[level] = mip_offsets[level];
758 }
759 }
760 }
761 }
762
763 static void
764 swr_update_sampler_state(struct swr_context *ctx,
765 unsigned shader_type,
766 unsigned num_samplers,
767 swr_jit_sampler *samplers)
768 {
769 for (unsigned i = 0; i < num_samplers; i++) {
770 const struct pipe_sampler_state *sampler =
771 ctx->samplers[shader_type][i];
772
773 if (sampler) {
774 samplers[i].min_lod = sampler->min_lod;
775 samplers[i].max_lod = sampler->max_lod;
776 samplers[i].lod_bias = sampler->lod_bias;
777 COPY_4V(samplers[i].border_color, sampler->border_color.f);
778 }
779 }
780 }
781
782 static void
783 swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
784 {
785 swr_draw_context *pDC = &ctx->swrDC;
786
787 const float **constant;
788 uint32_t *num_constants;
789 struct swr_scratch_space *scratch;
790
791 switch (shaderType) {
792 case PIPE_SHADER_VERTEX:
793 constant = pDC->constantVS;
794 num_constants = pDC->num_constantsVS;
795 scratch = &ctx->scratch->vs_constants;
796 break;
797 case PIPE_SHADER_FRAGMENT:
798 constant = pDC->constantFS;
799 num_constants = pDC->num_constantsFS;
800 scratch = &ctx->scratch->fs_constants;
801 break;
802 default:
803 debug_printf("Unsupported shader type constants\n");
804 return;
805 }
806
807 for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
808 const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
809 num_constants[i] = cb->buffer_size;
810 if (cb->buffer) {
811 constant[i] =
812 (const float *)(swr_resource_data(cb->buffer) +
813 cb->buffer_offset);
814 } else {
815 /* Need to copy these constants to scratch space */
816 if (cb->user_buffer && cb->buffer_size) {
817 const void *ptr =
818 ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
819 uint32_t size = AlignUp(cb->buffer_size, 4);
820 ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
821 constant[i] = (const float *)ptr;
822 }
823 }
824 }
825 }
826
827 static bool
828 swr_change_rt(struct swr_context *ctx,
829 unsigned attachment,
830 const struct pipe_surface *sf)
831 {
832 swr_draw_context *pDC = &ctx->swrDC;
833 struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
834
835 /* Do nothing if the render target hasn't changed */
836 if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
837 return false;
838
839 /* Deal with disabling RT up front */
840 if (!sf || !sf->texture) {
841 /* If detaching attachment, mark tiles as RESOLVED so core
842 * won't try to load from non-existent target. */
843 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
844 *rt = {0};
845 return true;
846 }
847
848 const struct swr_resource *swr = swr_resource(sf->texture);
849 const SWR_SURFACE_STATE *swr_surface = &swr->swr;
850 SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
851
852 if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
853 swr_surface = &swr->secondary;
854 fmt = swr_surface->format;
855 }
856
857 if (rt->pBaseAddress == swr_surface->pBaseAddress &&
858 rt->format == fmt &&
859 rt->lod == sf->u.tex.level &&
860 rt->arrayIndex == sf->u.tex.first_layer)
861 return false;
862
863 bool need_fence = false;
864
865 /* StoreTile for changed target */
866 if (rt->pBaseAddress) {
867 /* If changing attachment to a new target, mark tiles as
868 * INVALID so they are reloaded from surface. */
869 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
870 need_fence = true;
871 }
872
873 /* Make new attachment */
874 *rt = *swr_surface;
875 rt->format = fmt;
876 rt->lod = sf->u.tex.level;
877 rt->arrayIndex = sf->u.tex.first_layer;
878
879 return need_fence;
880 }
881
882 static inline void
883 swr_user_vbuf_range(const struct pipe_draw_info *info,
884 const struct swr_vertex_element_state *velems,
885 const struct pipe_vertex_buffer *vb,
886 uint32_t i,
887 uint32_t *totelems,
888 uint32_t *base,
889 uint32_t *size)
890 {
891 /* FIXME: The size is too large - we don't access the full extra stride. */
892 unsigned elems;
893 if (velems->instanced_bufs & (1U << i)) {
894 elems = info->instance_count / velems->min_instance_div[i] + 1;
895 *totelems = info->start_instance + elems;
896 *base = info->start_instance * vb->stride;
897 *size = elems * vb->stride;
898 } else if (vb->stride) {
899 elems = info->max_index - info->min_index + 1;
900 *totelems = info->max_index + 1;
901 *base = info->min_index * vb->stride;
902 *size = elems * vb->stride;
903 } else {
904 *totelems = 1;
905 *base = 0;
906 *size = velems->stream_pitch[i];
907 }
908 }
909
910 void
911 swr_update_derived(struct pipe_context *pipe,
912 const struct pipe_draw_info *p_draw_info)
913 {
914 struct swr_context *ctx = swr_context(pipe);
915 struct swr_screen *screen = swr_screen(ctx->pipe.screen);
916
917 /* Update screen->pipe to current pipe context. */
918 if (screen->pipe != pipe)
919 screen->pipe = pipe;
920
921 /* Any state that requires dirty flags to be re-triggered sets this mask */
922 /* For example, user_buffer vertex and index buffers. */
923 unsigned post_update_dirty_flags = 0;
924
925 /* Render Targets */
926 if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
927 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
928 const struct util_format_description *desc = NULL;
929 bool need_fence = false;
930
931 /* colorbuffer targets */
932 if (fb->nr_cbufs) {
933 for (unsigned i = 0; i < fb->nr_cbufs; ++i)
934 need_fence |= swr_change_rt(
935 ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
936 }
937 for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
938 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
939
940 /* depth/stencil target */
941 if (fb->zsbuf)
942 desc = util_format_description(fb->zsbuf->format);
943 if (fb->zsbuf && util_format_has_depth(desc))
944 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
945 else
946 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
947
948 if (fb->zsbuf && util_format_has_stencil(desc))
949 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
950 else
951 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
952
953 /* This fence ensures any attachment changes are resolved before the
954 * next draw */
955 if (need_fence)
956 swr_fence_submit(ctx, screen->flush_fence);
957 }
958
959 /* Raster state */
960 if (ctx->dirty & (SWR_NEW_RASTERIZER |
961 SWR_NEW_VS | // clipping
962 SWR_NEW_FRAMEBUFFER)) {
963 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
964 pipe_framebuffer_state *fb = &ctx->framebuffer;
965
966 SWR_RASTSTATE *rastState = &ctx->derived.rastState;
967 rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
968 rastState->frontWinding = rasterizer->front_ccw
969 ? SWR_FRONTWINDING_CCW
970 : SWR_FRONTWINDING_CW;
971 rastState->scissorEnable = rasterizer->scissor;
972 rastState->pointSize = rasterizer->point_size > 0.0f
973 ? rasterizer->point_size
974 : 1.0f;
975 rastState->lineWidth = rasterizer->line_width > 0.0f
976 ? rasterizer->line_width
977 : 1.0f;
978
979 rastState->pointParam = rasterizer->point_size_per_vertex;
980
981 rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
982 rastState->pointSpriteTopOrigin =
983 rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
984
985 /* XXX TODO: Add multisample */
986 rastState->msaaRastEnable = false;
987 rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
988 rastState->sampleCount = SWR_MULTISAMPLE_1X;
989 rastState->forcedSampleCount = false;
990
991 bool do_offset = false;
992 switch (rasterizer->fill_front) {
993 case PIPE_POLYGON_MODE_FILL:
994 do_offset = rasterizer->offset_tri;
995 break;
996 case PIPE_POLYGON_MODE_LINE:
997 do_offset = rasterizer->offset_line;
998 break;
999 case PIPE_POLYGON_MODE_POINT:
1000 do_offset = rasterizer->offset_point;
1001 break;
1002 }
1003
1004 if (do_offset) {
1005 rastState->depthBias = rasterizer->offset_units;
1006 rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1007 rastState->depthBiasClamp = rasterizer->offset_clamp;
1008 } else {
1009 rastState->depthBias = 0;
1010 rastState->slopeScaledDepthBias = 0;
1011 rastState->depthBiasClamp = 0;
1012 }
1013 struct pipe_surface *zb = fb->zsbuf;
1014 if (zb && swr_resource(zb->texture)->has_depth)
1015 rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1016
1017 rastState->depthClipEnable = rasterizer->depth_clip;
1018 rastState->clipHalfZ = rasterizer->clip_halfz;
1019
1020 rastState->clipDistanceMask =
1021 ctx->vs->info.base.num_written_clipdistance ?
1022 ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
1023 rasterizer->clip_plane_enable;
1024
1025 rastState->cullDistanceMask =
1026 ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1027
1028 SwrSetRastState(ctx->swrContext, rastState);
1029 }
1030
1031 /* Scissor */
1032 if (ctx->dirty & SWR_NEW_SCISSOR) {
1033 SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1034 }
1035
1036 /* Viewport */
1037 if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1038 | SWR_NEW_RASTERIZER)) {
1039 pipe_viewport_state *state = &ctx->viewport;
1040 pipe_framebuffer_state *fb = &ctx->framebuffer;
1041 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1042
1043 SWR_VIEWPORT *vp = &ctx->derived.vp;
1044 SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1045
1046 vp->x = state->translate[0] - state->scale[0];
1047 vp->width = 2 * state->scale[0];
1048 vp->y = state->translate[1] - fabs(state->scale[1]);
1049 vp->height = 2 * fabs(state->scale[1]);
1050 util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1051 &vp->minZ, &vp->maxZ);
1052
1053 vpm->m00[0] = state->scale[0];
1054 vpm->m11[0] = state->scale[1];
1055 vpm->m22[0] = state->scale[2];
1056 vpm->m30[0] = state->translate[0];
1057 vpm->m31[0] = state->translate[1];
1058 vpm->m32[0] = state->translate[2];
1059
1060 /* Now that the matrix is calculated, clip the view coords to screen
1061 * size. OpenGL allows for -ve x,y in the viewport. */
1062 if (vp->x < 0.0f) {
1063 vp->width += vp->x;
1064 vp->x = 0.0f;
1065 }
1066 if (vp->y < 0.0f) {
1067 vp->height += vp->y;
1068 vp->y = 0.0f;
1069 }
1070 vp->width = std::min(vp->width, (float)fb->width - vp->x);
1071 vp->height = std::min(vp->height, (float)fb->height - vp->y);
1072
1073 SwrSetViewports(ctx->swrContext, 1, vp, vpm);
1074 }
1075
1076 /* Set vertex & index buffers */
1077 /* (using draw info if called by swr_draw_vbo) */
1078 if (ctx->dirty & SWR_NEW_VERTEX) {
1079 uint32_t scratch_total;
1080 uint8_t *scratch = NULL;
1081
1082 /* If being called by swr_draw_vbo, copy draw details */
1083 struct pipe_draw_info info = {0};
1084 if (p_draw_info)
1085 info = *p_draw_info;
1086
1087 /* We must get all the scratch space in one go */
1088 scratch_total = 0;
1089 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1090 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1091
1092 if (!vb->user_buffer)
1093 continue;
1094
1095 uint32_t elems, base, size;
1096 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1097 scratch_total += AlignUp(size, 4);
1098 }
1099
1100 if (scratch_total) {
1101 scratch = (uint8_t *)swr_copy_to_scratch_space(
1102 ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
1103 }
1104
1105 /* vertex buffers */
1106 SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1107 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1108 uint32_t size, pitch, elems, partial_inbounds;
1109 const uint8_t *p_data;
1110 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1111
1112 pitch = vb->stride;
1113 if (!vb->user_buffer) {
1114 /* VBO
1115 * size is based on buffer->width0 rather than info.max_index
1116 * to prevent having to validate VBO on each draw */
1117 size = vb->buffer->width0;
1118 elems = size / pitch;
1119 partial_inbounds = size % pitch;
1120
1121 p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
1122 } else {
1123 /* Client buffer
1124 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1125 * revalidate on each draw */
1126 post_update_dirty_flags |= SWR_NEW_VERTEX;
1127
1128 uint32_t base;
1129 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1130 partial_inbounds = 0;
1131
1132 /* Copy only needed vertices to scratch space */
1133 size = AlignUp(size, 4);
1134 const void *ptr = (const uint8_t *) vb->user_buffer + base;
1135 memcpy(scratch, ptr, size);
1136 ptr = scratch;
1137 scratch += size;
1138 p_data = (const uint8_t *)ptr - base;
1139 }
1140
1141 swrVertexBuffers[i] = {0};
1142 swrVertexBuffers[i].index = i;
1143 swrVertexBuffers[i].pitch = pitch;
1144 swrVertexBuffers[i].pData = p_data;
1145 swrVertexBuffers[i].size = size;
1146 swrVertexBuffers[i].maxVertex = elems;
1147 swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1148 }
1149
1150 SwrSetVertexBuffers(
1151 ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1152
1153 /* index buffer, if required (info passed in by swr_draw_vbo) */
1154 SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1155 if (info.indexed) {
1156 const uint8_t *p_data;
1157 uint32_t size, pitch;
1158 struct pipe_index_buffer *ib = &ctx->index_buffer;
1159
1160 pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
1161 index_type = swr_convert_index_type(pitch);
1162
1163 if (!ib->user_buffer) {
1164 /* VBO
1165 * size is based on buffer->width0 rather than info.count
1166 * to prevent having to validate VBO on each draw */
1167 size = ib->buffer->width0;
1168 p_data = swr_resource_data(ib->buffer) + ib->offset;
1169 } else {
1170 /* Client buffer
1171 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1172 * revalidate on each draw */
1173 post_update_dirty_flags |= SWR_NEW_VERTEX;
1174
1175 size = info.count * pitch;
1176 size = AlignUp(size, 4);
1177
1178 /* Copy indices to scratch space */
1179 const void *ptr = ib->user_buffer;
1180 ptr = swr_copy_to_scratch_space(
1181 ctx, &ctx->scratch->index_buffer, ptr, size);
1182 p_data = (const uint8_t *)ptr;
1183 }
1184
1185 SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1186 swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
1187 swrIndexBuffer.pIndices = p_data;
1188 swrIndexBuffer.size = size;
1189
1190 SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1191 }
1192
1193 struct swr_vertex_element_state *velems = ctx->velems;
1194 if (velems && velems->fsState.indexType != index_type) {
1195 velems->fsFunc = NULL;
1196 velems->fsState.indexType = index_type;
1197 }
1198 }
1199
1200 /* VertexShader */
1201 if (ctx->dirty & (SWR_NEW_VS |
1202 SWR_NEW_RASTERIZER | // for clip planes
1203 SWR_NEW_SAMPLER |
1204 SWR_NEW_SAMPLER_VIEW |
1205 SWR_NEW_FRAMEBUFFER)) {
1206 swr_jit_vs_key key;
1207 swr_generate_vs_key(key, ctx, ctx->vs);
1208 auto search = ctx->vs->map.find(key);
1209 PFN_VERTEX_FUNC func;
1210 if (search != ctx->vs->map.end()) {
1211 func = search->second->shader;
1212 } else {
1213 func = swr_compile_vs(ctx, key);
1214 }
1215 SwrSetVertexFunc(ctx->swrContext, func);
1216
1217 /* JIT sampler state */
1218 if (ctx->dirty & SWR_NEW_SAMPLER) {
1219 swr_update_sampler_state(ctx,
1220 PIPE_SHADER_VERTEX,
1221 key.nr_samplers,
1222 ctx->swrDC.samplersVS);
1223 }
1224
1225 /* JIT sampler view state */
1226 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1227 swr_update_texture_state(ctx,
1228 PIPE_SHADER_VERTEX,
1229 key.nr_sampler_views,
1230 ctx->swrDC.texturesVS);
1231 }
1232 }
1233
1234 /* FragmentShader */
1235 if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
1236 | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
1237 swr_jit_fs_key key;
1238 swr_generate_fs_key(key, ctx, ctx->fs);
1239 auto search = ctx->fs->map.find(key);
1240 PFN_PIXEL_KERNEL func;
1241 if (search != ctx->fs->map.end()) {
1242 func = search->second->shader;
1243 } else {
1244 func = swr_compile_fs(ctx, key);
1245 }
1246 SWR_PS_STATE psState = {0};
1247 psState.pfnPixelShader = func;
1248 psState.killsPixel = ctx->fs->info.base.uses_kill;
1249 psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1250 psState.writesODepth = ctx->fs->info.base.writes_z;
1251 psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1252 psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
1253 psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
1254 psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
1255 uint32_t barycentricsMask = 0;
1256 #if 0
1257 // when we switch to mesa-master
1258 if (ctx->fs->info.base.uses_persp_center ||
1259 ctx->fs->info.base.uses_linear_center)
1260 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1261 if (ctx->fs->info.base.uses_persp_centroid ||
1262 ctx->fs->info.base.uses_linear_centroid)
1263 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1264 if (ctx->fs->info.base.uses_persp_sample ||
1265 ctx->fs->info.base.uses_linear_sample)
1266 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1267 #else
1268 for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1269 switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1270 case TGSI_INTERPOLATE_LOC_CENTER:
1271 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1272 break;
1273 case TGSI_INTERPOLATE_LOC_CENTROID:
1274 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1275 break;
1276 case TGSI_INTERPOLATE_LOC_SAMPLE:
1277 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1278 break;
1279 }
1280 }
1281 #endif
1282 psState.barycentricsMask = barycentricsMask;
1283 psState.usesUAV = false; // XXX
1284 psState.forceEarlyZ = false;
1285 SwrSetPixelShaderState(ctx->swrContext, &psState);
1286
1287 /* JIT sampler state */
1288 if (ctx->dirty & SWR_NEW_SAMPLER) {
1289 swr_update_sampler_state(ctx,
1290 PIPE_SHADER_FRAGMENT,
1291 key.nr_samplers,
1292 ctx->swrDC.samplersFS);
1293 }
1294
1295 /* JIT sampler view state */
1296 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1297 swr_update_texture_state(ctx,
1298 PIPE_SHADER_FRAGMENT,
1299 key.nr_sampler_views,
1300 ctx->swrDC.texturesFS);
1301 }
1302 }
1303
1304
1305 /* VertexShader Constants */
1306 if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1307 swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1308 }
1309
1310 /* FragmentShader Constants */
1311 if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1312 swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1313 }
1314
1315 /* Depth/stencil state */
1316 if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1317 struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1318 struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1319 SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1320 SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1321
1322 /* XXX, incomplete. Need to flesh out stencil & alpha test state
1323 struct pipe_stencil_state *front_stencil =
1324 ctx->depth_stencil.stencil[0];
1325 struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1326 struct pipe_alpha_state alpha;
1327 */
1328 if (stencil[0].enabled) {
1329 depthStencilState.stencilWriteEnable = 1;
1330 depthStencilState.stencilTestEnable = 1;
1331 depthStencilState.stencilTestFunc =
1332 swr_convert_depth_func(stencil[0].func);
1333
1334 depthStencilState.stencilPassDepthPassOp =
1335 swr_convert_stencil_op(stencil[0].zpass_op);
1336 depthStencilState.stencilPassDepthFailOp =
1337 swr_convert_stencil_op(stencil[0].zfail_op);
1338 depthStencilState.stencilFailOp =
1339 swr_convert_stencil_op(stencil[0].fail_op);
1340 depthStencilState.stencilWriteMask = stencil[0].writemask;
1341 depthStencilState.stencilTestMask = stencil[0].valuemask;
1342 depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1343 }
1344 if (stencil[1].enabled) {
1345 depthStencilState.doubleSidedStencilTestEnable = 1;
1346
1347 depthStencilState.backfaceStencilTestFunc =
1348 swr_convert_depth_func(stencil[1].func);
1349
1350 depthStencilState.backfaceStencilPassDepthPassOp =
1351 swr_convert_stencil_op(stencil[1].zpass_op);
1352 depthStencilState.backfaceStencilPassDepthFailOp =
1353 swr_convert_stencil_op(stencil[1].zfail_op);
1354 depthStencilState.backfaceStencilFailOp =
1355 swr_convert_stencil_op(stencil[1].fail_op);
1356 depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1357 depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1358
1359 depthStencilState.backfaceStencilRefValue =
1360 ctx->stencil_ref.ref_value[1];
1361 }
1362
1363 depthStencilState.depthTestEnable = depth->enabled;
1364 depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1365 depthStencilState.depthWriteEnable = depth->writemask;
1366 SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1367
1368 depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1369 depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1370 depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1371 SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1372 }
1373
1374 /* Blend State */
1375 if (ctx->dirty & (SWR_NEW_BLEND |
1376 SWR_NEW_FRAMEBUFFER |
1377 SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1378 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1379
1380 SWR_BLEND_STATE blendState;
1381 memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1382 blendState.constantColor[0] = ctx->blend_color.color[0];
1383 blendState.constantColor[1] = ctx->blend_color.color[1];
1384 blendState.constantColor[2] = ctx->blend_color.color[2];
1385 blendState.constantColor[3] = ctx->blend_color.color[3];
1386 blendState.alphaTestReference =
1387 *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1388
1389 // XXX MSAA
1390 blendState.sampleMask = 0;
1391 blendState.sampleCount = SWR_MULTISAMPLE_1X;
1392
1393 /* If there are no color buffers bound, disable writes on RT0
1394 * and skip loop */
1395 if (fb->nr_cbufs == 0) {
1396 blendState.renderTarget[0].writeDisableRed = 1;
1397 blendState.renderTarget[0].writeDisableGreen = 1;
1398 blendState.renderTarget[0].writeDisableBlue = 1;
1399 blendState.renderTarget[0].writeDisableAlpha = 1;
1400 SwrSetBlendFunc(ctx->swrContext, 0, NULL);
1401 }
1402 else
1403 for (int target = 0;
1404 target < std::min(SWR_NUM_RENDERTARGETS,
1405 PIPE_MAX_COLOR_BUFS);
1406 target++) {
1407 if (!fb->cbufs[target])
1408 continue;
1409
1410 struct swr_resource *colorBuffer =
1411 swr_resource(fb->cbufs[target]->texture);
1412
1413 BLEND_COMPILE_STATE compileState;
1414 memset(&compileState, 0, sizeof(compileState));
1415 compileState.format = colorBuffer->swr.format;
1416 memcpy(&compileState.blendState,
1417 &ctx->blend->compileState[target],
1418 sizeof(compileState.blendState));
1419
1420 const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1421 if (compileState.blendState.logicOpEnable &&
1422 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1423 compileState.blendState.logicOpEnable = false;
1424 }
1425
1426 if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1427 compileState.blendState.blendEnable = false;
1428
1429 if (compileState.blendState.blendEnable == false &&
1430 compileState.blendState.logicOpEnable == false &&
1431 ctx->depth_stencil->alpha.enabled == 0) {
1432 SwrSetBlendFunc(ctx->swrContext, target, NULL);
1433 continue;
1434 }
1435
1436 compileState.desc.alphaTestEnable =
1437 ctx->depth_stencil->alpha.enabled;
1438 compileState.desc.independentAlphaBlendEnable =
1439 (compileState.blendState.sourceBlendFactor !=
1440 compileState.blendState.sourceAlphaBlendFactor) ||
1441 (compileState.blendState.destBlendFactor !=
1442 compileState.blendState.destAlphaBlendFactor) ||
1443 (compileState.blendState.colorBlendFunc !=
1444 compileState.blendState.alphaBlendFunc);
1445 compileState.desc.alphaToCoverageEnable =
1446 ctx->blend->pipe.alpha_to_coverage;
1447 compileState.desc.sampleMaskEnable = 0; // XXX
1448 compileState.desc.numSamples = 1; // XXX
1449
1450 compileState.alphaTestFunction =
1451 swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1452 compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1453
1454 compileState.Canonicalize();
1455
1456 PFN_BLEND_JIT_FUNC func = NULL;
1457 auto search = ctx->blendJIT->find(compileState);
1458 if (search != ctx->blendJIT->end()) {
1459 func = search->second;
1460 } else {
1461 HANDLE hJitMgr = screen->hJitMgr;
1462 func = JitCompileBlend(hJitMgr, compileState);
1463 debug_printf("BLEND shader %p\n", func);
1464 assert(func && "Error: BlendShader = NULL");
1465
1466 ctx->blendJIT->insert(std::make_pair(compileState, func));
1467 }
1468 SwrSetBlendFunc(ctx->swrContext, target, func);
1469 }
1470
1471 SwrSetBlendState(ctx->swrContext, &blendState);
1472 }
1473
1474 if (ctx->dirty & SWR_NEW_STIPPLE) {
1475 /* XXX What to do with this one??? SWR doesn't stipple */
1476 }
1477
1478 if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1479 ctx->vs->soState.rasterizerDisable =
1480 ctx->rasterizer->rasterizer_discard;
1481 SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1482
1483 pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1484
1485 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1486 SWR_STREAMOUT_BUFFER buffer = {0};
1487 if (!ctx->so_targets[i])
1488 continue;
1489 buffer.enable = true;
1490 buffer.pBuffer =
1491 (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1492 ctx->so_targets[i]->buffer_offset);
1493 buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1494 buffer.pitch = stream_output->stride[i];
1495 buffer.streamOffset = 0;
1496
1497 SwrSetSoBuffers(ctx->swrContext, &buffer, i);
1498 }
1499 }
1500
1501 if (ctx->dirty & SWR_NEW_CLIP) {
1502 // shader exporting clip distances overrides all user clip planes
1503 if (ctx->rasterizer->clip_plane_enable &&
1504 !ctx->vs->info.base.num_written_clipdistance)
1505 {
1506 swr_draw_context *pDC = &ctx->swrDC;
1507 memcpy(pDC->userClipPlanes,
1508 ctx->clip.ucp,
1509 sizeof(pDC->userClipPlanes));
1510 }
1511 }
1512
1513 // set up backend state
1514 SWR_BACKEND_STATE backendState = {0};
1515 backendState.numAttributes =
1516 ctx->vs->info.base.num_outputs - 1 +
1517 (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
1518 for (unsigned i = 0; i < backendState.numAttributes; i++)
1519 backendState.numComponents[i] = 4;
1520 backendState.constantInterpolationMask = ctx->fs->constantMask |
1521 (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1522 backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1523
1524 SwrSetBackendState(ctx->swrContext, &backendState);
1525
1526 /* Ensure that any in-progress attachment change StoreTiles finish */
1527 if (swr_is_fence_pending(screen->flush_fence))
1528 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1529
1530 /* Finally, update the in-use status of all resources involved in draw */
1531 swr_update_resource_status(pipe, p_draw_info);
1532
1533 ctx->dirty = post_update_dirty_flags;
1534 }
1535
1536
1537 static struct pipe_stream_output_target *
1538 swr_create_so_target(struct pipe_context *pipe,
1539 struct pipe_resource *buffer,
1540 unsigned buffer_offset,
1541 unsigned buffer_size)
1542 {
1543 struct pipe_stream_output_target *target;
1544
1545 target = CALLOC_STRUCT(pipe_stream_output_target);
1546 if (!target)
1547 return NULL;
1548
1549 target->context = pipe;
1550 target->reference.count = 1;
1551 pipe_resource_reference(&target->buffer, buffer);
1552 target->buffer_offset = buffer_offset;
1553 target->buffer_size = buffer_size;
1554 return target;
1555 }
1556
1557 static void
1558 swr_destroy_so_target(struct pipe_context *pipe,
1559 struct pipe_stream_output_target *target)
1560 {
1561 pipe_resource_reference(&target->buffer, NULL);
1562 FREE(target);
1563 }
1564
1565 static void
1566 swr_set_so_targets(struct pipe_context *pipe,
1567 unsigned num_targets,
1568 struct pipe_stream_output_target **targets,
1569 const unsigned *offsets)
1570 {
1571 struct swr_context *swr = swr_context(pipe);
1572 uint32_t i;
1573
1574 assert(num_targets <= MAX_SO_STREAMS);
1575
1576 for (i = 0; i < num_targets; i++) {
1577 pipe_so_target_reference(
1578 (struct pipe_stream_output_target **)&swr->so_targets[i],
1579 targets[i]);
1580 }
1581
1582 for (/* fall-through */; i < swr->num_so_targets; i++) {
1583 pipe_so_target_reference(
1584 (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1585 }
1586
1587 swr->num_so_targets = num_targets;
1588
1589 swr->dirty |= SWR_NEW_SO;
1590 }
1591
1592
1593 void
1594 swr_state_init(struct pipe_context *pipe)
1595 {
1596 pipe->create_blend_state = swr_create_blend_state;
1597 pipe->bind_blend_state = swr_bind_blend_state;
1598 pipe->delete_blend_state = swr_delete_blend_state;
1599
1600 pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1601 pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1602 pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1603
1604 pipe->create_rasterizer_state = swr_create_rasterizer_state;
1605 pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1606 pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1607
1608 pipe->create_sampler_state = swr_create_sampler_state;
1609 pipe->bind_sampler_states = swr_bind_sampler_states;
1610 pipe->delete_sampler_state = swr_delete_sampler_state;
1611
1612 pipe->create_sampler_view = swr_create_sampler_view;
1613 pipe->set_sampler_views = swr_set_sampler_views;
1614 pipe->sampler_view_destroy = swr_sampler_view_destroy;
1615
1616 pipe->create_vs_state = swr_create_vs_state;
1617 pipe->bind_vs_state = swr_bind_vs_state;
1618 pipe->delete_vs_state = swr_delete_vs_state;
1619
1620 pipe->create_fs_state = swr_create_fs_state;
1621 pipe->bind_fs_state = swr_bind_fs_state;
1622 pipe->delete_fs_state = swr_delete_fs_state;
1623
1624 pipe->set_constant_buffer = swr_set_constant_buffer;
1625
1626 pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1627 pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1628 pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1629
1630 pipe->set_vertex_buffers = swr_set_vertex_buffers;
1631 pipe->set_index_buffer = swr_set_index_buffer;
1632
1633 pipe->set_polygon_stipple = swr_set_polygon_stipple;
1634 pipe->set_clip_state = swr_set_clip_state;
1635 pipe->set_scissor_states = swr_set_scissor_states;
1636 pipe->set_viewport_states = swr_set_viewport_states;
1637
1638 pipe->set_framebuffer_state = swr_set_framebuffer_state;
1639
1640 pipe->set_blend_color = swr_set_blend_color;
1641 pipe->set_stencil_ref = swr_set_stencil_ref;
1642
1643 pipe->set_sample_mask = swr_set_sample_mask;
1644
1645 pipe->create_stream_output_target = swr_create_so_target;
1646 pipe->stream_output_target_destroy = swr_destroy_so_target;
1647 pipe->set_stream_output_targets = swr_set_so_targets;
1648 }