swr: fix index buffers with non-zero indices
[mesa.git] / src / gallium / drivers / swr / swr_state.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #pragma pop_macro("DEBUG")
29
30 #include "common/os.h"
31 #include "jit_api.h"
32 #include "state_llvm.h"
33
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "util/u_format.h"
36
37 #include "util/u_memory.h"
38 #include "util/u_inlines.h"
39 #include "util/u_helpers.h"
40 #include "util/u_framebuffer.h"
41 #include "util/u_viewport.h"
42
43 #include "swr_state.h"
44 #include "swr_context.h"
45 #include "swr_context_llvm.h"
46 #include "swr_screen.h"
47 #include "swr_resource.h"
48 #include "swr_tex_sample.h"
49 #include "swr_scratch.h"
50 #include "swr_shader.h"
51 #include "swr_fence.h"
52
53 /* These should be pulled out into separate files as necessary
54 * Just initializing everything here to get going. */
55
56 static void *
57 swr_create_blend_state(struct pipe_context *pipe,
58 const struct pipe_blend_state *blend)
59 {
60 struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
61
62 memcpy(&state->pipe, blend, sizeof(*blend));
63
64 struct pipe_blend_state *pipe_blend = &state->pipe;
65
66 for (int target = 0;
67 target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
68 target++) {
69
70 struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
71 SWR_RENDER_TARGET_BLEND_STATE &blendState =
72 state->blendState.renderTarget[target];
73 RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
74 state->compileState[target];
75
76 if (target != 0 && !pipe_blend->independent_blend_enable) {
77 memcpy(&compileState,
78 &state->compileState[0],
79 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
80 continue;
81 }
82
83 compileState.blendEnable = rt_blend->blend_enable;
84 if (compileState.blendEnable) {
85 compileState.sourceAlphaBlendFactor =
86 swr_convert_blend_factor(rt_blend->alpha_src_factor);
87 compileState.destAlphaBlendFactor =
88 swr_convert_blend_factor(rt_blend->alpha_dst_factor);
89 compileState.sourceBlendFactor =
90 swr_convert_blend_factor(rt_blend->rgb_src_factor);
91 compileState.destBlendFactor =
92 swr_convert_blend_factor(rt_blend->rgb_dst_factor);
93
94 compileState.colorBlendFunc =
95 swr_convert_blend_func(rt_blend->rgb_func);
96 compileState.alphaBlendFunc =
97 swr_convert_blend_func(rt_blend->alpha_func);
98 }
99 compileState.logicOpEnable = state->pipe.logicop_enable;
100 if (compileState.logicOpEnable) {
101 compileState.logicOpFunc =
102 swr_convert_logic_op(state->pipe.logicop_func);
103 }
104
105 blendState.writeDisableRed =
106 (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
107 blendState.writeDisableGreen =
108 (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
109 blendState.writeDisableBlue =
110 (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
111 blendState.writeDisableAlpha =
112 (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
113
114 if (rt_blend->colormask == 0)
115 compileState.blendEnable = false;
116 }
117
118 return state;
119 }
120
121 static void
122 swr_bind_blend_state(struct pipe_context *pipe, void *blend)
123 {
124 struct swr_context *ctx = swr_context(pipe);
125
126 if (ctx->blend == blend)
127 return;
128
129 ctx->blend = (swr_blend_state *)blend;
130
131 ctx->dirty |= SWR_NEW_BLEND;
132 }
133
134 static void
135 swr_delete_blend_state(struct pipe_context *pipe, void *blend)
136 {
137 FREE(blend);
138 }
139
140 static void
141 swr_set_blend_color(struct pipe_context *pipe,
142 const struct pipe_blend_color *color)
143 {
144 struct swr_context *ctx = swr_context(pipe);
145
146 ctx->blend_color = *color;
147
148 ctx->dirty |= SWR_NEW_BLEND;
149 }
150
151 static void
152 swr_set_stencil_ref(struct pipe_context *pipe,
153 const struct pipe_stencil_ref *ref)
154 {
155 struct swr_context *ctx = swr_context(pipe);
156
157 ctx->stencil_ref = *ref;
158
159 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
160 }
161
162 static void *
163 swr_create_depth_stencil_state(
164 struct pipe_context *pipe,
165 const struct pipe_depth_stencil_alpha_state *depth_stencil)
166 {
167 struct pipe_depth_stencil_alpha_state *state;
168
169 state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
170 sizeof *depth_stencil);
171
172 return state;
173 }
174
175 static void
176 swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
177 {
178 struct swr_context *ctx = swr_context(pipe);
179
180 if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
181 return;
182
183 ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
184
185 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
186 }
187
188 static void
189 swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
190 {
191 FREE(depth);
192 }
193
194
195 static void *
196 swr_create_rasterizer_state(struct pipe_context *pipe,
197 const struct pipe_rasterizer_state *rast)
198 {
199 struct pipe_rasterizer_state *state;
200 state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
201
202 return state;
203 }
204
205 static void
206 swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
207 {
208 struct swr_context *ctx = swr_context(pipe);
209 const struct pipe_rasterizer_state *rasterizer =
210 (const struct pipe_rasterizer_state *)handle;
211
212 if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
213 return;
214
215 ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
216
217 ctx->dirty |= SWR_NEW_RASTERIZER;
218 }
219
220 static void
221 swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
222 {
223 FREE(rasterizer);
224 }
225
226
227 static void *
228 swr_create_sampler_state(struct pipe_context *pipe,
229 const struct pipe_sampler_state *sampler)
230 {
231 struct pipe_sampler_state *state =
232 (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
233
234 return state;
235 }
236
237 static void
238 swr_bind_sampler_states(struct pipe_context *pipe,
239 enum pipe_shader_type shader,
240 unsigned start,
241 unsigned num,
242 void **samplers)
243 {
244 struct swr_context *ctx = swr_context(pipe);
245 unsigned i;
246
247 assert(shader < PIPE_SHADER_TYPES);
248 assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
249
250 /* set the new samplers */
251 ctx->num_samplers[shader] = num;
252 for (i = 0; i < num; i++) {
253 ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
254 }
255
256 ctx->dirty |= SWR_NEW_SAMPLER;
257 }
258
259 static void
260 swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
261 {
262 FREE(sampler);
263 }
264
265
266 static struct pipe_sampler_view *
267 swr_create_sampler_view(struct pipe_context *pipe,
268 struct pipe_resource *texture,
269 const struct pipe_sampler_view *templ)
270 {
271 struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
272
273 if (view) {
274 *view = *templ;
275 view->reference.count = 1;
276 view->texture = NULL;
277 pipe_resource_reference(&view->texture, texture);
278 view->context = pipe;
279 }
280
281 return view;
282 }
283
284 static void
285 swr_set_sampler_views(struct pipe_context *pipe,
286 enum pipe_shader_type shader,
287 unsigned start,
288 unsigned num,
289 struct pipe_sampler_view **views)
290 {
291 struct swr_context *ctx = swr_context(pipe);
292 uint i;
293
294 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
295
296 assert(shader < PIPE_SHADER_TYPES);
297 assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
298
299 /* set the new sampler views */
300 ctx->num_sampler_views[shader] = num;
301 for (i = 0; i < num; i++) {
302 /* Note: we're using pipe_sampler_view_release() here to work around
303 * a possible crash when the old view belongs to another context that
304 * was already destroyed.
305 */
306 pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
307 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
308 views[i]);
309 }
310
311 ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
312 }
313
314 static void
315 swr_sampler_view_destroy(struct pipe_context *pipe,
316 struct pipe_sampler_view *view)
317 {
318 pipe_resource_reference(&view->texture, NULL);
319 FREE(view);
320 }
321
322 static void *
323 swr_create_vs_state(struct pipe_context *pipe,
324 const struct pipe_shader_state *vs)
325 {
326 struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
327 if (!swr_vs)
328 return NULL;
329
330 swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
331 swr_vs->pipe.stream_output = vs->stream_output;
332
333 lp_build_tgsi_info(vs->tokens, &swr_vs->info);
334
335 swr_vs->soState = {0};
336
337 if (swr_vs->pipe.stream_output.num_outputs) {
338 pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
339
340 swr_vs->soState.soEnable = true;
341 // soState.rasterizerDisable set on state dirty
342 // soState.streamToRasterizer not used
343
344 for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
345 swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
346 1 << (stream_output->output[i].register_index - 1);
347 }
348 for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
349 swr_vs->soState.streamNumEntries[i] =
350 _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
351 }
352 }
353
354 return swr_vs;
355 }
356
357 static void
358 swr_bind_vs_state(struct pipe_context *pipe, void *vs)
359 {
360 struct swr_context *ctx = swr_context(pipe);
361
362 if (ctx->vs == vs)
363 return;
364
365 ctx->vs = (swr_vertex_shader *)vs;
366 ctx->dirty |= SWR_NEW_VS;
367 }
368
369 static void
370 swr_delete_vs_state(struct pipe_context *pipe, void *vs)
371 {
372 struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
373 FREE((void *)swr_vs->pipe.tokens);
374 struct swr_screen *screen = swr_screen(pipe->screen);
375
376 /* Defer deletion of vs state */
377 swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
378 }
379
380 static void *
381 swr_create_fs_state(struct pipe_context *pipe,
382 const struct pipe_shader_state *fs)
383 {
384 struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
385 if (!swr_fs)
386 return NULL;
387
388 swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
389
390 lp_build_tgsi_info(fs->tokens, &swr_fs->info);
391
392 return swr_fs;
393 }
394
395
396 static void
397 swr_bind_fs_state(struct pipe_context *pipe, void *fs)
398 {
399 struct swr_context *ctx = swr_context(pipe);
400
401 if (ctx->fs == fs)
402 return;
403
404 ctx->fs = (swr_fragment_shader *)fs;
405 ctx->dirty |= SWR_NEW_FS;
406 }
407
408 static void
409 swr_delete_fs_state(struct pipe_context *pipe, void *fs)
410 {
411 struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
412 FREE((void *)swr_fs->pipe.tokens);
413 struct swr_screen *screen = swr_screen(pipe->screen);
414
415 /* Defer deleton of fs state */
416 swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
417 }
418
419
420 static void
421 swr_set_constant_buffer(struct pipe_context *pipe,
422 uint shader,
423 uint index,
424 const struct pipe_constant_buffer *cb)
425 {
426 struct swr_context *ctx = swr_context(pipe);
427 struct pipe_resource *constants = cb ? cb->buffer : NULL;
428
429 assert(shader < PIPE_SHADER_TYPES);
430 assert(index < ARRAY_SIZE(ctx->constants[shader]));
431
432 /* note: reference counting */
433 util_copy_constant_buffer(&ctx->constants[shader][index], cb);
434
435 if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
436 ctx->dirty |= SWR_NEW_VSCONSTANTS;
437 } else if (shader == PIPE_SHADER_FRAGMENT) {
438 ctx->dirty |= SWR_NEW_FSCONSTANTS;
439 }
440
441 if (cb && cb->user_buffer) {
442 pipe_resource_reference(&constants, NULL);
443 }
444 }
445
446
447 static void *
448 swr_create_vertex_elements_state(struct pipe_context *pipe,
449 unsigned num_elements,
450 const struct pipe_vertex_element *attribs)
451 {
452 struct swr_vertex_element_state *velems;
453 assert(num_elements <= PIPE_MAX_ATTRIBS);
454 velems = new swr_vertex_element_state;
455 if (velems) {
456 velems->fsState.bVertexIDOffsetEnable = true;
457 velems->fsState.numAttribs = num_elements;
458 for (unsigned i = 0; i < num_elements; i++) {
459 // XXX: we should do this keyed on the VS usage info
460
461 const struct util_format_description *desc =
462 util_format_description(attribs[i].src_format);
463
464 velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
465 velems->fsState.layout[i].Format =
466 mesa_to_swr_format(attribs[i].src_format);
467 velems->fsState.layout[i].StreamIndex =
468 attribs[i].vertex_buffer_index;
469 velems->fsState.layout[i].InstanceEnable =
470 attribs[i].instance_divisor != 0;
471 velems->fsState.layout[i].ComponentControl0 =
472 desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
473 ? ComponentControl::StoreSrc
474 : ComponentControl::Store0;
475 velems->fsState.layout[i].ComponentControl1 =
476 desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
477 ? ComponentControl::StoreSrc
478 : ComponentControl::Store0;
479 velems->fsState.layout[i].ComponentControl2 =
480 desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
481 ? ComponentControl::StoreSrc
482 : ComponentControl::Store0;
483 velems->fsState.layout[i].ComponentControl3 =
484 desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
485 ? ComponentControl::StoreSrc
486 : ComponentControl::Store1Fp;
487 velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
488 velems->fsState.layout[i].InstanceDataStepRate =
489 attribs[i].instance_divisor;
490
491 /* Calculate the pitch of each stream */
492 const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
493 mesa_to_swr_format(attribs[i].src_format));
494 velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
495
496 if (attribs[i].instance_divisor != 0) {
497 velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
498 uint32_t *min_instance_div =
499 &velems->min_instance_div[attribs[i].vertex_buffer_index];
500 if (!*min_instance_div ||
501 attribs[i].instance_divisor < *min_instance_div)
502 *min_instance_div = attribs[i].instance_divisor;
503 }
504 }
505 }
506
507 return velems;
508 }
509
510 static void
511 swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
512 {
513 struct swr_context *ctx = swr_context(pipe);
514 struct swr_vertex_element_state *swr_velems =
515 (struct swr_vertex_element_state *)velems;
516
517 ctx->velems = swr_velems;
518 ctx->dirty |= SWR_NEW_VERTEX;
519 }
520
521 static void
522 swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
523 {
524 struct swr_vertex_element_state *swr_velems =
525 (struct swr_vertex_element_state *) velems;
526 /* XXX Need to destroy fetch shader? */
527 delete swr_velems;
528 }
529
530
531 static void
532 swr_set_vertex_buffers(struct pipe_context *pipe,
533 unsigned start_slot,
534 unsigned num_elements,
535 const struct pipe_vertex_buffer *buffers)
536 {
537 struct swr_context *ctx = swr_context(pipe);
538
539 assert(num_elements <= PIPE_MAX_ATTRIBS);
540
541 util_set_vertex_buffers_count(ctx->vertex_buffer,
542 &ctx->num_vertex_buffers,
543 buffers,
544 start_slot,
545 num_elements);
546
547 ctx->dirty |= SWR_NEW_VERTEX;
548 }
549
550
551 static void
552 swr_set_index_buffer(struct pipe_context *pipe,
553 const struct pipe_index_buffer *ib)
554 {
555 struct swr_context *ctx = swr_context(pipe);
556
557 if (ib)
558 memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
559 else
560 memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
561
562 ctx->dirty |= SWR_NEW_VERTEX;
563 }
564
565 static void
566 swr_set_polygon_stipple(struct pipe_context *pipe,
567 const struct pipe_poly_stipple *stipple)
568 {
569 struct swr_context *ctx = swr_context(pipe);
570
571 ctx->poly_stipple = *stipple; /* struct copy */
572 ctx->dirty |= SWR_NEW_STIPPLE;
573 }
574
575 static void
576 swr_set_clip_state(struct pipe_context *pipe,
577 const struct pipe_clip_state *clip)
578 {
579 struct swr_context *ctx = swr_context(pipe);
580
581 ctx->clip = *clip;
582 /* XXX Unimplemented, but prevents crash */
583
584 ctx->dirty |= SWR_NEW_CLIP;
585 }
586
587
588 static void
589 swr_set_scissor_states(struct pipe_context *pipe,
590 unsigned start_slot,
591 unsigned num_viewports,
592 const struct pipe_scissor_state *scissor)
593 {
594 struct swr_context *ctx = swr_context(pipe);
595
596 ctx->scissor = *scissor;
597 ctx->swr_scissor.xmin = scissor->minx;
598 ctx->swr_scissor.xmax = scissor->maxx;
599 ctx->swr_scissor.ymin = scissor->miny;
600 ctx->swr_scissor.ymax = scissor->maxy;
601 ctx->dirty |= SWR_NEW_SCISSOR;
602 }
603
604 static void
605 swr_set_viewport_states(struct pipe_context *pipe,
606 unsigned start_slot,
607 unsigned num_viewports,
608 const struct pipe_viewport_state *vpt)
609 {
610 struct swr_context *ctx = swr_context(pipe);
611
612 ctx->viewport = *vpt;
613 ctx->dirty |= SWR_NEW_VIEWPORT;
614 }
615
616
617 static void
618 swr_set_framebuffer_state(struct pipe_context *pipe,
619 const struct pipe_framebuffer_state *fb)
620 {
621 struct swr_context *ctx = swr_context(pipe);
622
623 boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
624
625 assert(fb->width <= KNOB_GUARDBAND_WIDTH);
626 assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
627
628 if (changed) {
629 util_copy_framebuffer_state(&ctx->framebuffer, fb);
630
631 ctx->dirty |= SWR_NEW_FRAMEBUFFER;
632 }
633 }
634
635
636 static void
637 swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
638 {
639 struct swr_context *ctx = swr_context(pipe);
640
641 if (sample_mask != ctx->sample_mask) {
642 ctx->sample_mask = sample_mask;
643 ctx->dirty |= SWR_NEW_RASTERIZER;
644 }
645 }
646
647 /*
648 * Update resource in-use status
649 * All resources bound to color or depth targets marked as WRITE resources.
650 * VBO Vertex/index buffers and texture views marked as READ resources.
651 */
652 void
653 swr_update_resource_status(struct pipe_context *pipe,
654 const struct pipe_draw_info *p_draw_info)
655 {
656 struct swr_context *ctx = swr_context(pipe);
657 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
658
659 /* colorbuffer targets */
660 if (fb->nr_cbufs)
661 for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
662 if (fb->cbufs[i])
663 swr_resource_write(fb->cbufs[i]->texture);
664
665 /* depth/stencil target */
666 if (fb->zsbuf)
667 swr_resource_write(fb->zsbuf->texture);
668
669 /* VBO vertex buffers */
670 for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
671 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
672 if (!vb->user_buffer)
673 swr_resource_read(vb->buffer);
674 }
675
676 /* VBO index buffer */
677 if (p_draw_info && p_draw_info->indexed) {
678 struct pipe_index_buffer *ib = &ctx->index_buffer;
679 if (!ib->user_buffer)
680 swr_resource_read(ib->buffer);
681 }
682
683 /* transform feedback buffers */
684 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
685 struct pipe_stream_output_target *target = ctx->so_targets[i];
686 if (target && target->buffer)
687 swr_resource_write(target->buffer);
688 }
689
690 /* texture sampler views */
691 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
692 for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
693 struct pipe_sampler_view *view = ctx->sampler_views[j][i];
694 if (view)
695 swr_resource_read(view->texture);
696 }
697 }
698
699 /* constant buffers */
700 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
701 for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
702 struct pipe_constant_buffer *cb = &ctx->constants[j][i];
703 if (cb->buffer)
704 swr_resource_read(cb->buffer);
705 }
706 }
707 }
708
709 static void
710 swr_update_texture_state(struct swr_context *ctx,
711 unsigned shader_type,
712 unsigned num_sampler_views,
713 swr_jit_texture *textures)
714 {
715 for (unsigned i = 0; i < num_sampler_views; i++) {
716 struct pipe_sampler_view *view =
717 ctx->sampler_views[shader_type][i];
718 struct swr_jit_texture *jit_tex = &textures[i];
719
720 memset(jit_tex, 0, sizeof(*jit_tex));
721 if (view) {
722 struct pipe_resource *res = view->texture;
723 struct swr_resource *swr_res = swr_resource(res);
724 SWR_SURFACE_STATE *swr = &swr_res->swr;
725 size_t *mip_offsets = swr_res->mip_offsets;
726 if (swr_res->has_depth && swr_res->has_stencil &&
727 !util_format_has_depth(util_format_description(view->format))) {
728 swr = &swr_res->secondary;
729 mip_offsets = swr_res->secondary_mip_offsets;
730 }
731
732 jit_tex->width = res->width0;
733 jit_tex->height = res->height0;
734 jit_tex->base_ptr = swr->pBaseAddress;
735 if (view->target != PIPE_BUFFER) {
736 jit_tex->first_level = view->u.tex.first_level;
737 jit_tex->last_level = view->u.tex.last_level;
738 if (view->target == PIPE_TEXTURE_3D)
739 jit_tex->depth = res->depth0;
740 else
741 jit_tex->depth =
742 view->u.tex.last_layer - view->u.tex.first_layer + 1;
743 jit_tex->base_ptr += view->u.tex.first_layer *
744 swr->qpitch * swr->pitch;
745 } else {
746 unsigned view_blocksize = util_format_get_blocksize(view->format);
747 jit_tex->base_ptr += view->u.buf.offset;
748 jit_tex->width = view->u.buf.size / view_blocksize;
749 jit_tex->depth = 1;
750 }
751
752 for (unsigned level = jit_tex->first_level;
753 level <= jit_tex->last_level;
754 level++) {
755 jit_tex->row_stride[level] = swr->pitch;
756 jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
757 jit_tex->mip_offsets[level] = mip_offsets[level];
758 }
759 }
760 }
761 }
762
763 static void
764 swr_update_sampler_state(struct swr_context *ctx,
765 unsigned shader_type,
766 unsigned num_samplers,
767 swr_jit_sampler *samplers)
768 {
769 for (unsigned i = 0; i < num_samplers; i++) {
770 const struct pipe_sampler_state *sampler =
771 ctx->samplers[shader_type][i];
772
773 if (sampler) {
774 samplers[i].min_lod = sampler->min_lod;
775 samplers[i].max_lod = sampler->max_lod;
776 samplers[i].lod_bias = sampler->lod_bias;
777 COPY_4V(samplers[i].border_color, sampler->border_color.f);
778 }
779 }
780 }
781
782 static void
783 swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
784 {
785 swr_draw_context *pDC = &ctx->swrDC;
786
787 const float **constant;
788 uint32_t *num_constants;
789 struct swr_scratch_space *scratch;
790
791 switch (shaderType) {
792 case PIPE_SHADER_VERTEX:
793 constant = pDC->constantVS;
794 num_constants = pDC->num_constantsVS;
795 scratch = &ctx->scratch->vs_constants;
796 break;
797 case PIPE_SHADER_FRAGMENT:
798 constant = pDC->constantFS;
799 num_constants = pDC->num_constantsFS;
800 scratch = &ctx->scratch->fs_constants;
801 break;
802 default:
803 debug_printf("Unsupported shader type constants\n");
804 return;
805 }
806
807 for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
808 const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
809 num_constants[i] = cb->buffer_size;
810 if (cb->buffer) {
811 constant[i] =
812 (const float *)(swr_resource_data(cb->buffer) +
813 cb->buffer_offset);
814 } else {
815 /* Need to copy these constants to scratch space */
816 if (cb->user_buffer && cb->buffer_size) {
817 const void *ptr =
818 ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
819 uint32_t size = AlignUp(cb->buffer_size, 4);
820 ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
821 constant[i] = (const float *)ptr;
822 }
823 }
824 }
825 }
826
827 static bool
828 swr_change_rt(struct swr_context *ctx,
829 unsigned attachment,
830 const struct pipe_surface *sf)
831 {
832 swr_draw_context *pDC = &ctx->swrDC;
833 struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
834
835 /* Do nothing if the render target hasn't changed */
836 if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
837 return false;
838
839 /* Deal with disabling RT up front */
840 if (!sf || !sf->texture) {
841 /* If detaching attachment, mark tiles as RESOLVED so core
842 * won't try to load from non-existent target. */
843 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
844 *rt = {0};
845 return true;
846 }
847
848 const struct swr_resource *swr = swr_resource(sf->texture);
849 const SWR_SURFACE_STATE *swr_surface = &swr->swr;
850 SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
851
852 if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
853 swr_surface = &swr->secondary;
854 fmt = swr_surface->format;
855 }
856
857 if (rt->pBaseAddress == swr_surface->pBaseAddress &&
858 rt->format == fmt &&
859 rt->lod == sf->u.tex.level &&
860 rt->arrayIndex == sf->u.tex.first_layer)
861 return false;
862
863 bool need_fence = false;
864
865 /* StoreTile for changed target */
866 if (rt->pBaseAddress) {
867 /* If changing attachment to a new target, mark tiles as
868 * INVALID so they are reloaded from surface. */
869 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
870 need_fence = true;
871 }
872
873 /* Make new attachment */
874 *rt = *swr_surface;
875 rt->format = fmt;
876 rt->lod = sf->u.tex.level;
877 rt->arrayIndex = sf->u.tex.first_layer;
878
879 return need_fence;
880 }
881
882 static inline void
883 swr_user_vbuf_range(const struct pipe_draw_info *info,
884 const struct swr_vertex_element_state *velems,
885 const struct pipe_vertex_buffer *vb,
886 uint32_t i,
887 uint32_t *totelems,
888 uint32_t *base,
889 uint32_t *size)
890 {
891 /* FIXME: The size is too large - we don't access the full extra stride. */
892 unsigned elems;
893 if (velems->instanced_bufs & (1U << i)) {
894 elems = info->instance_count / velems->min_instance_div[i] + 1;
895 *totelems = info->start_instance + elems;
896 *base = info->start_instance * vb->stride;
897 *size = elems * vb->stride;
898 } else if (vb->stride) {
899 elems = info->max_index - info->min_index + 1;
900 *totelems = info->max_index + 1;
901 *base = info->min_index * vb->stride;
902 *size = elems * vb->stride;
903 } else {
904 *totelems = 1;
905 *base = 0;
906 *size = velems->stream_pitch[i];
907 }
908 }
909
910 void
911 swr_update_derived(struct pipe_context *pipe,
912 const struct pipe_draw_info *p_draw_info)
913 {
914 struct swr_context *ctx = swr_context(pipe);
915 struct swr_screen *screen = swr_screen(pipe->screen);
916
917 /* Update screen->pipe to current pipe context. */
918 if (screen->pipe != pipe)
919 screen->pipe = pipe;
920
921 /* Any state that requires dirty flags to be re-triggered sets this mask */
922 /* For example, user_buffer vertex and index buffers. */
923 unsigned post_update_dirty_flags = 0;
924
925 /* Render Targets */
926 if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
927 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
928 const struct util_format_description *desc = NULL;
929 bool need_fence = false;
930
931 /* colorbuffer targets */
932 if (fb->nr_cbufs) {
933 for (unsigned i = 0; i < fb->nr_cbufs; ++i)
934 need_fence |= swr_change_rt(
935 ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
936 }
937 for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
938 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
939
940 /* depth/stencil target */
941 if (fb->zsbuf)
942 desc = util_format_description(fb->zsbuf->format);
943 if (fb->zsbuf && util_format_has_depth(desc))
944 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
945 else
946 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
947
948 if (fb->zsbuf && util_format_has_stencil(desc))
949 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
950 else
951 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
952
953 /* This fence ensures any attachment changes are resolved before the
954 * next draw */
955 if (need_fence)
956 swr_fence_submit(ctx, screen->flush_fence);
957 }
958
959 /* Raster state */
960 if (ctx->dirty & (SWR_NEW_RASTERIZER |
961 SWR_NEW_VS | // clipping
962 SWR_NEW_FRAMEBUFFER)) {
963 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
964 pipe_framebuffer_state *fb = &ctx->framebuffer;
965
966 SWR_RASTSTATE *rastState = &ctx->derived.rastState;
967 rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
968 rastState->frontWinding = rasterizer->front_ccw
969 ? SWR_FRONTWINDING_CCW
970 : SWR_FRONTWINDING_CW;
971 rastState->scissorEnable = rasterizer->scissor;
972 rastState->pointSize = rasterizer->point_size > 0.0f
973 ? rasterizer->point_size
974 : 1.0f;
975 rastState->lineWidth = rasterizer->line_width > 0.0f
976 ? rasterizer->line_width
977 : 1.0f;
978
979 rastState->pointParam = rasterizer->point_size_per_vertex;
980
981 rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
982 rastState->pointSpriteTopOrigin =
983 rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
984
985 /* XXX TODO: Add multisample */
986 rastState->msaaRastEnable = false;
987 rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
988 rastState->sampleCount = SWR_MULTISAMPLE_1X;
989 rastState->forcedSampleCount = false;
990
991 bool do_offset = false;
992 switch (rasterizer->fill_front) {
993 case PIPE_POLYGON_MODE_FILL:
994 do_offset = rasterizer->offset_tri;
995 break;
996 case PIPE_POLYGON_MODE_LINE:
997 do_offset = rasterizer->offset_line;
998 break;
999 case PIPE_POLYGON_MODE_POINT:
1000 do_offset = rasterizer->offset_point;
1001 break;
1002 }
1003
1004 if (do_offset) {
1005 rastState->depthBias = rasterizer->offset_units;
1006 rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1007 rastState->depthBiasClamp = rasterizer->offset_clamp;
1008 } else {
1009 rastState->depthBias = 0;
1010 rastState->slopeScaledDepthBias = 0;
1011 rastState->depthBiasClamp = 0;
1012 }
1013 struct pipe_surface *zb = fb->zsbuf;
1014 if (zb && swr_resource(zb->texture)->has_depth)
1015 rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1016
1017 rastState->depthClipEnable = rasterizer->depth_clip;
1018 rastState->clipHalfZ = rasterizer->clip_halfz;
1019
1020 rastState->clipDistanceMask =
1021 ctx->vs->info.base.num_written_clipdistance ?
1022 ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
1023 rasterizer->clip_plane_enable;
1024
1025 rastState->cullDistanceMask =
1026 ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1027
1028 SwrSetRastState(ctx->swrContext, rastState);
1029 }
1030
1031 /* Scissor */
1032 if (ctx->dirty & SWR_NEW_SCISSOR) {
1033 SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1034 }
1035
1036 /* Viewport */
1037 if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1038 | SWR_NEW_RASTERIZER)) {
1039 pipe_viewport_state *state = &ctx->viewport;
1040 pipe_framebuffer_state *fb = &ctx->framebuffer;
1041 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1042
1043 SWR_VIEWPORT *vp = &ctx->derived.vp;
1044 SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1045
1046 vp->x = state->translate[0] - state->scale[0];
1047 vp->width = 2 * state->scale[0];
1048 vp->y = state->translate[1] - fabs(state->scale[1]);
1049 vp->height = 2 * fabs(state->scale[1]);
1050 util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1051 &vp->minZ, &vp->maxZ);
1052
1053 vpm->m00[0] = state->scale[0];
1054 vpm->m11[0] = state->scale[1];
1055 vpm->m22[0] = state->scale[2];
1056 vpm->m30[0] = state->translate[0];
1057 vpm->m31[0] = state->translate[1];
1058 vpm->m32[0] = state->translate[2];
1059
1060 /* Now that the matrix is calculated, clip the view coords to screen
1061 * size. OpenGL allows for -ve x,y in the viewport. */
1062 if (vp->x < 0.0f) {
1063 vp->width += vp->x;
1064 vp->x = 0.0f;
1065 }
1066 if (vp->y < 0.0f) {
1067 vp->height += vp->y;
1068 vp->y = 0.0f;
1069 }
1070 vp->width = std::min(vp->width, (float)fb->width - vp->x);
1071 vp->height = std::min(vp->height, (float)fb->height - vp->y);
1072
1073 SwrSetViewports(ctx->swrContext, 1, vp, vpm);
1074 }
1075
1076 /* Set vertex & index buffers */
1077 /* (using draw info if called by swr_draw_vbo) */
1078 if (ctx->dirty & SWR_NEW_VERTEX) {
1079 uint32_t scratch_total;
1080 uint8_t *scratch = NULL;
1081
1082 /* If being called by swr_draw_vbo, copy draw details */
1083 struct pipe_draw_info info = {0};
1084 if (p_draw_info)
1085 info = *p_draw_info;
1086
1087 /* We must get all the scratch space in one go */
1088 scratch_total = 0;
1089 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1090 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1091
1092 if (!vb->user_buffer)
1093 continue;
1094
1095 uint32_t elems, base, size;
1096 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1097 scratch_total += AlignUp(size, 4);
1098 }
1099
1100 if (scratch_total) {
1101 scratch = (uint8_t *)swr_copy_to_scratch_space(
1102 ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
1103 }
1104
1105 /* vertex buffers */
1106 SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1107 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1108 uint32_t size, pitch, elems, partial_inbounds;
1109 uint32_t min_vertex_index;
1110 const uint8_t *p_data;
1111 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1112
1113 pitch = vb->stride;
1114 if (!vb->user_buffer) {
1115 /* VBO
1116 * size is based on buffer->width0 rather than info.max_index
1117 * to prevent having to validate VBO on each draw */
1118 size = vb->buffer->width0;
1119 elems = size / pitch;
1120 partial_inbounds = size % pitch;
1121 min_vertex_index = 0;
1122
1123 p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
1124 } else {
1125 /* Client buffer
1126 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1127 * revalidate on each draw */
1128 post_update_dirty_flags |= SWR_NEW_VERTEX;
1129
1130 uint32_t base;
1131 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1132 partial_inbounds = 0;
1133 min_vertex_index = info.min_index;
1134
1135 /* Copy only needed vertices to scratch space */
1136 size = AlignUp(size, 4);
1137 const void *ptr = (const uint8_t *) vb->user_buffer + base;
1138 memcpy(scratch, ptr, size);
1139 ptr = scratch;
1140 scratch += size;
1141 p_data = (const uint8_t *)ptr - base;
1142 }
1143
1144 swrVertexBuffers[i] = {0};
1145 swrVertexBuffers[i].index = i;
1146 swrVertexBuffers[i].pitch = pitch;
1147 swrVertexBuffers[i].pData = p_data;
1148 swrVertexBuffers[i].size = size;
1149 swrVertexBuffers[i].minVertex = min_vertex_index;
1150 swrVertexBuffers[i].maxVertex = elems;
1151 swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1152 }
1153
1154 SwrSetVertexBuffers(
1155 ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1156
1157 /* index buffer, if required (info passed in by swr_draw_vbo) */
1158 SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1159 if (info.indexed) {
1160 const uint8_t *p_data;
1161 uint32_t size, pitch;
1162 struct pipe_index_buffer *ib = &ctx->index_buffer;
1163
1164 pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
1165 index_type = swr_convert_index_type(pitch);
1166
1167 if (!ib->user_buffer) {
1168 /* VBO
1169 * size is based on buffer->width0 rather than info.count
1170 * to prevent having to validate VBO on each draw */
1171 size = ib->buffer->width0;
1172 p_data = swr_resource_data(ib->buffer) + ib->offset;
1173 } else {
1174 /* Client buffer
1175 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1176 * revalidate on each draw */
1177 post_update_dirty_flags |= SWR_NEW_VERTEX;
1178
1179 size = info.count * pitch;
1180 size = AlignUp(size, 4);
1181
1182 /* Copy indices to scratch space */
1183 const void *ptr = ib->user_buffer;
1184 ptr = swr_copy_to_scratch_space(
1185 ctx, &ctx->scratch->index_buffer, ptr, size);
1186 p_data = (const uint8_t *)ptr;
1187 }
1188
1189 SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1190 swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
1191 swrIndexBuffer.pIndices = p_data;
1192 swrIndexBuffer.size = size;
1193
1194 SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1195 }
1196
1197 struct swr_vertex_element_state *velems = ctx->velems;
1198 if (velems && velems->fsState.indexType != index_type) {
1199 velems->fsFunc = NULL;
1200 velems->fsState.indexType = index_type;
1201 }
1202 }
1203
1204 /* VertexShader */
1205 if (ctx->dirty & (SWR_NEW_VS |
1206 SWR_NEW_RASTERIZER | // for clip planes
1207 SWR_NEW_SAMPLER |
1208 SWR_NEW_SAMPLER_VIEW |
1209 SWR_NEW_FRAMEBUFFER)) {
1210 swr_jit_vs_key key;
1211 swr_generate_vs_key(key, ctx, ctx->vs);
1212 auto search = ctx->vs->map.find(key);
1213 PFN_VERTEX_FUNC func;
1214 if (search != ctx->vs->map.end()) {
1215 func = search->second->shader;
1216 } else {
1217 func = swr_compile_vs(ctx, key);
1218 }
1219 SwrSetVertexFunc(ctx->swrContext, func);
1220
1221 /* JIT sampler state */
1222 if (ctx->dirty & SWR_NEW_SAMPLER) {
1223 swr_update_sampler_state(ctx,
1224 PIPE_SHADER_VERTEX,
1225 key.nr_samplers,
1226 ctx->swrDC.samplersVS);
1227 }
1228
1229 /* JIT sampler view state */
1230 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1231 swr_update_texture_state(ctx,
1232 PIPE_SHADER_VERTEX,
1233 key.nr_sampler_views,
1234 ctx->swrDC.texturesVS);
1235 }
1236 }
1237
1238 /* FragmentShader */
1239 if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
1240 | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
1241 swr_jit_fs_key key;
1242 swr_generate_fs_key(key, ctx, ctx->fs);
1243 auto search = ctx->fs->map.find(key);
1244 PFN_PIXEL_KERNEL func;
1245 if (search != ctx->fs->map.end()) {
1246 func = search->second->shader;
1247 } else {
1248 func = swr_compile_fs(ctx, key);
1249 }
1250 SWR_PS_STATE psState = {0};
1251 psState.pfnPixelShader = func;
1252 psState.killsPixel = ctx->fs->info.base.uses_kill;
1253 psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1254 psState.writesODepth = ctx->fs->info.base.writes_z;
1255 psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1256 psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
1257 psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
1258 psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
1259 uint32_t barycentricsMask = 0;
1260 #if 0
1261 // when we switch to mesa-master
1262 if (ctx->fs->info.base.uses_persp_center ||
1263 ctx->fs->info.base.uses_linear_center)
1264 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1265 if (ctx->fs->info.base.uses_persp_centroid ||
1266 ctx->fs->info.base.uses_linear_centroid)
1267 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1268 if (ctx->fs->info.base.uses_persp_sample ||
1269 ctx->fs->info.base.uses_linear_sample)
1270 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1271 #else
1272 for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1273 switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1274 case TGSI_INTERPOLATE_LOC_CENTER:
1275 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1276 break;
1277 case TGSI_INTERPOLATE_LOC_CENTROID:
1278 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1279 break;
1280 case TGSI_INTERPOLATE_LOC_SAMPLE:
1281 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1282 break;
1283 }
1284 }
1285 #endif
1286 psState.barycentricsMask = barycentricsMask;
1287 psState.usesUAV = false; // XXX
1288 psState.forceEarlyZ = false;
1289 SwrSetPixelShaderState(ctx->swrContext, &psState);
1290
1291 /* JIT sampler state */
1292 if (ctx->dirty & (SWR_NEW_SAMPLER |
1293 SWR_NEW_FS)) {
1294 swr_update_sampler_state(ctx,
1295 PIPE_SHADER_FRAGMENT,
1296 key.nr_samplers,
1297 ctx->swrDC.samplersFS);
1298 }
1299
1300 /* JIT sampler view state */
1301 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW |
1302 SWR_NEW_FRAMEBUFFER |
1303 SWR_NEW_FS)) {
1304 swr_update_texture_state(ctx,
1305 PIPE_SHADER_FRAGMENT,
1306 key.nr_sampler_views,
1307 ctx->swrDC.texturesFS);
1308 }
1309 }
1310
1311
1312 /* VertexShader Constants */
1313 if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1314 swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1315 }
1316
1317 /* FragmentShader Constants */
1318 if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1319 swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1320 }
1321
1322 /* Depth/stencil state */
1323 if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1324 struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1325 struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1326 SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1327 SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1328
1329 /* XXX, incomplete. Need to flesh out stencil & alpha test state
1330 struct pipe_stencil_state *front_stencil =
1331 ctx->depth_stencil.stencil[0];
1332 struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1333 struct pipe_alpha_state alpha;
1334 */
1335 if (stencil[0].enabled) {
1336 depthStencilState.stencilWriteEnable = 1;
1337 depthStencilState.stencilTestEnable = 1;
1338 depthStencilState.stencilTestFunc =
1339 swr_convert_depth_func(stencil[0].func);
1340
1341 depthStencilState.stencilPassDepthPassOp =
1342 swr_convert_stencil_op(stencil[0].zpass_op);
1343 depthStencilState.stencilPassDepthFailOp =
1344 swr_convert_stencil_op(stencil[0].zfail_op);
1345 depthStencilState.stencilFailOp =
1346 swr_convert_stencil_op(stencil[0].fail_op);
1347 depthStencilState.stencilWriteMask = stencil[0].writemask;
1348 depthStencilState.stencilTestMask = stencil[0].valuemask;
1349 depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1350 }
1351 if (stencil[1].enabled) {
1352 depthStencilState.doubleSidedStencilTestEnable = 1;
1353
1354 depthStencilState.backfaceStencilTestFunc =
1355 swr_convert_depth_func(stencil[1].func);
1356
1357 depthStencilState.backfaceStencilPassDepthPassOp =
1358 swr_convert_stencil_op(stencil[1].zpass_op);
1359 depthStencilState.backfaceStencilPassDepthFailOp =
1360 swr_convert_stencil_op(stencil[1].zfail_op);
1361 depthStencilState.backfaceStencilFailOp =
1362 swr_convert_stencil_op(stencil[1].fail_op);
1363 depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1364 depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1365
1366 depthStencilState.backfaceStencilRefValue =
1367 ctx->stencil_ref.ref_value[1];
1368 }
1369
1370 depthStencilState.depthTestEnable = depth->enabled;
1371 depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1372 depthStencilState.depthWriteEnable = depth->writemask;
1373 SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1374
1375 depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1376 depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1377 depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1378 SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1379 }
1380
1381 /* Blend State */
1382 if (ctx->dirty & (SWR_NEW_BLEND |
1383 SWR_NEW_FRAMEBUFFER |
1384 SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1385 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1386
1387 SWR_BLEND_STATE blendState;
1388 memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1389 blendState.constantColor[0] = ctx->blend_color.color[0];
1390 blendState.constantColor[1] = ctx->blend_color.color[1];
1391 blendState.constantColor[2] = ctx->blend_color.color[2];
1392 blendState.constantColor[3] = ctx->blend_color.color[3];
1393 blendState.alphaTestReference =
1394 *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1395
1396 // XXX MSAA
1397 blendState.sampleMask = 0;
1398 blendState.sampleCount = SWR_MULTISAMPLE_1X;
1399
1400 /* If there are no color buffers bound, disable writes on RT0
1401 * and skip loop */
1402 if (fb->nr_cbufs == 0) {
1403 blendState.renderTarget[0].writeDisableRed = 1;
1404 blendState.renderTarget[0].writeDisableGreen = 1;
1405 blendState.renderTarget[0].writeDisableBlue = 1;
1406 blendState.renderTarget[0].writeDisableAlpha = 1;
1407 SwrSetBlendFunc(ctx->swrContext, 0, NULL);
1408 }
1409 else
1410 for (int target = 0;
1411 target < std::min(SWR_NUM_RENDERTARGETS,
1412 PIPE_MAX_COLOR_BUFS);
1413 target++) {
1414 if (!fb->cbufs[target])
1415 continue;
1416
1417 struct swr_resource *colorBuffer =
1418 swr_resource(fb->cbufs[target]->texture);
1419
1420 BLEND_COMPILE_STATE compileState;
1421 memset(&compileState, 0, sizeof(compileState));
1422 compileState.format = colorBuffer->swr.format;
1423 memcpy(&compileState.blendState,
1424 &ctx->blend->compileState[target],
1425 sizeof(compileState.blendState));
1426
1427 const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1428 if (compileState.blendState.logicOpEnable &&
1429 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1430 compileState.blendState.logicOpEnable = false;
1431 }
1432
1433 if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1434 compileState.blendState.blendEnable = false;
1435
1436 if (compileState.blendState.blendEnable == false &&
1437 compileState.blendState.logicOpEnable == false &&
1438 ctx->depth_stencil->alpha.enabled == 0) {
1439 SwrSetBlendFunc(ctx->swrContext, target, NULL);
1440 continue;
1441 }
1442
1443 compileState.desc.alphaTestEnable =
1444 ctx->depth_stencil->alpha.enabled;
1445 compileState.desc.independentAlphaBlendEnable =
1446 (compileState.blendState.sourceBlendFactor !=
1447 compileState.blendState.sourceAlphaBlendFactor) ||
1448 (compileState.blendState.destBlendFactor !=
1449 compileState.blendState.destAlphaBlendFactor) ||
1450 (compileState.blendState.colorBlendFunc !=
1451 compileState.blendState.alphaBlendFunc);
1452 compileState.desc.alphaToCoverageEnable =
1453 ctx->blend->pipe.alpha_to_coverage;
1454 compileState.desc.sampleMaskEnable = 0; // XXX
1455 compileState.desc.numSamples = 1; // XXX
1456
1457 compileState.alphaTestFunction =
1458 swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1459 compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1460
1461 compileState.Canonicalize();
1462
1463 PFN_BLEND_JIT_FUNC func = NULL;
1464 auto search = ctx->blendJIT->find(compileState);
1465 if (search != ctx->blendJIT->end()) {
1466 func = search->second;
1467 } else {
1468 HANDLE hJitMgr = screen->hJitMgr;
1469 func = JitCompileBlend(hJitMgr, compileState);
1470 debug_printf("BLEND shader %p\n", func);
1471 assert(func && "Error: BlendShader = NULL");
1472
1473 ctx->blendJIT->insert(std::make_pair(compileState, func));
1474 }
1475 SwrSetBlendFunc(ctx->swrContext, target, func);
1476 }
1477
1478 SwrSetBlendState(ctx->swrContext, &blendState);
1479 }
1480
1481 if (ctx->dirty & SWR_NEW_STIPPLE) {
1482 /* XXX What to do with this one??? SWR doesn't stipple */
1483 }
1484
1485 if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1486 ctx->vs->soState.rasterizerDisable =
1487 ctx->rasterizer->rasterizer_discard;
1488 SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1489
1490 pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1491
1492 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1493 SWR_STREAMOUT_BUFFER buffer = {0};
1494 if (!ctx->so_targets[i])
1495 continue;
1496 buffer.enable = true;
1497 buffer.pBuffer =
1498 (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1499 ctx->so_targets[i]->buffer_offset);
1500 buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1501 buffer.pitch = stream_output->stride[i];
1502 buffer.streamOffset = 0;
1503
1504 SwrSetSoBuffers(ctx->swrContext, &buffer, i);
1505 }
1506 }
1507
1508 if (ctx->dirty & SWR_NEW_CLIP) {
1509 // shader exporting clip distances overrides all user clip planes
1510 if (ctx->rasterizer->clip_plane_enable &&
1511 !ctx->vs->info.base.num_written_clipdistance)
1512 {
1513 swr_draw_context *pDC = &ctx->swrDC;
1514 memcpy(pDC->userClipPlanes,
1515 ctx->clip.ucp,
1516 sizeof(pDC->userClipPlanes));
1517 }
1518 }
1519
1520 // set up backend state
1521 SWR_BACKEND_STATE backendState = {0};
1522 backendState.numAttributes =
1523 ctx->vs->info.base.num_outputs - 1 +
1524 (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
1525 for (unsigned i = 0; i < backendState.numAttributes; i++)
1526 backendState.numComponents[i] = 4;
1527 backendState.constantInterpolationMask = ctx->fs->constantMask |
1528 (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1529 backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1530
1531 SwrSetBackendState(ctx->swrContext, &backendState);
1532
1533 /* Ensure that any in-progress attachment change StoreTiles finish */
1534 if (swr_is_fence_pending(screen->flush_fence))
1535 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1536
1537 /* Finally, update the in-use status of all resources involved in draw */
1538 swr_update_resource_status(pipe, p_draw_info);
1539
1540 ctx->dirty = post_update_dirty_flags;
1541 }
1542
1543
1544 static struct pipe_stream_output_target *
1545 swr_create_so_target(struct pipe_context *pipe,
1546 struct pipe_resource *buffer,
1547 unsigned buffer_offset,
1548 unsigned buffer_size)
1549 {
1550 struct pipe_stream_output_target *target;
1551
1552 target = CALLOC_STRUCT(pipe_stream_output_target);
1553 if (!target)
1554 return NULL;
1555
1556 target->context = pipe;
1557 target->reference.count = 1;
1558 pipe_resource_reference(&target->buffer, buffer);
1559 target->buffer_offset = buffer_offset;
1560 target->buffer_size = buffer_size;
1561 return target;
1562 }
1563
1564 static void
1565 swr_destroy_so_target(struct pipe_context *pipe,
1566 struct pipe_stream_output_target *target)
1567 {
1568 pipe_resource_reference(&target->buffer, NULL);
1569 FREE(target);
1570 }
1571
1572 static void
1573 swr_set_so_targets(struct pipe_context *pipe,
1574 unsigned num_targets,
1575 struct pipe_stream_output_target **targets,
1576 const unsigned *offsets)
1577 {
1578 struct swr_context *swr = swr_context(pipe);
1579 uint32_t i;
1580
1581 assert(num_targets <= MAX_SO_STREAMS);
1582
1583 for (i = 0; i < num_targets; i++) {
1584 pipe_so_target_reference(
1585 (struct pipe_stream_output_target **)&swr->so_targets[i],
1586 targets[i]);
1587 }
1588
1589 for (/* fall-through */; i < swr->num_so_targets; i++) {
1590 pipe_so_target_reference(
1591 (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1592 }
1593
1594 swr->num_so_targets = num_targets;
1595
1596 swr->dirty |= SWR_NEW_SO;
1597 }
1598
1599
1600 void
1601 swr_state_init(struct pipe_context *pipe)
1602 {
1603 pipe->create_blend_state = swr_create_blend_state;
1604 pipe->bind_blend_state = swr_bind_blend_state;
1605 pipe->delete_blend_state = swr_delete_blend_state;
1606
1607 pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1608 pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1609 pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1610
1611 pipe->create_rasterizer_state = swr_create_rasterizer_state;
1612 pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1613 pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1614
1615 pipe->create_sampler_state = swr_create_sampler_state;
1616 pipe->bind_sampler_states = swr_bind_sampler_states;
1617 pipe->delete_sampler_state = swr_delete_sampler_state;
1618
1619 pipe->create_sampler_view = swr_create_sampler_view;
1620 pipe->set_sampler_views = swr_set_sampler_views;
1621 pipe->sampler_view_destroy = swr_sampler_view_destroy;
1622
1623 pipe->create_vs_state = swr_create_vs_state;
1624 pipe->bind_vs_state = swr_bind_vs_state;
1625 pipe->delete_vs_state = swr_delete_vs_state;
1626
1627 pipe->create_fs_state = swr_create_fs_state;
1628 pipe->bind_fs_state = swr_bind_fs_state;
1629 pipe->delete_fs_state = swr_delete_fs_state;
1630
1631 pipe->set_constant_buffer = swr_set_constant_buffer;
1632
1633 pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1634 pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1635 pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1636
1637 pipe->set_vertex_buffers = swr_set_vertex_buffers;
1638 pipe->set_index_buffer = swr_set_index_buffer;
1639
1640 pipe->set_polygon_stipple = swr_set_polygon_stipple;
1641 pipe->set_clip_state = swr_set_clip_state;
1642 pipe->set_scissor_states = swr_set_scissor_states;
1643 pipe->set_viewport_states = swr_set_viewport_states;
1644
1645 pipe->set_framebuffer_state = swr_set_framebuffer_state;
1646
1647 pipe->set_blend_color = swr_set_blend_color;
1648 pipe->set_stencil_ref = swr_set_stencil_ref;
1649
1650 pipe->set_sample_mask = swr_set_sample_mask;
1651
1652 pipe->create_stream_output_target = swr_create_so_target;
1653 pipe->stream_output_target_destroy = swr_destroy_so_target;
1654 pipe->set_stream_output_targets = swr_set_so_targets;
1655 }