swr: Update fs texture & sampler state logic
[mesa.git] / src / gallium / drivers / swr / swr_state.cpp
1 /****************************************************************************
2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 ***************************************************************************/
23
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #pragma pop_macro("DEBUG")
29
30 #include "common/os.h"
31 #include "jit_api.h"
32 #include "state_llvm.h"
33
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "util/u_format.h"
36
37 #include "util/u_memory.h"
38 #include "util/u_inlines.h"
39 #include "util/u_helpers.h"
40 #include "util/u_framebuffer.h"
41 #include "util/u_viewport.h"
42
43 #include "swr_state.h"
44 #include "swr_context.h"
45 #include "swr_context_llvm.h"
46 #include "swr_screen.h"
47 #include "swr_resource.h"
48 #include "swr_tex_sample.h"
49 #include "swr_scratch.h"
50 #include "swr_shader.h"
51 #include "swr_fence.h"
52
53 /* These should be pulled out into separate files as necessary
54 * Just initializing everything here to get going. */
55
56 static void *
57 swr_create_blend_state(struct pipe_context *pipe,
58 const struct pipe_blend_state *blend)
59 {
60 struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
61
62 memcpy(&state->pipe, blend, sizeof(*blend));
63
64 struct pipe_blend_state *pipe_blend = &state->pipe;
65
66 for (int target = 0;
67 target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
68 target++) {
69
70 struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
71 SWR_RENDER_TARGET_BLEND_STATE &blendState =
72 state->blendState.renderTarget[target];
73 RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
74 state->compileState[target];
75
76 if (target != 0 && !pipe_blend->independent_blend_enable) {
77 memcpy(&compileState,
78 &state->compileState[0],
79 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
80 continue;
81 }
82
83 compileState.blendEnable = rt_blend->blend_enable;
84 if (compileState.blendEnable) {
85 compileState.sourceAlphaBlendFactor =
86 swr_convert_blend_factor(rt_blend->alpha_src_factor);
87 compileState.destAlphaBlendFactor =
88 swr_convert_blend_factor(rt_blend->alpha_dst_factor);
89 compileState.sourceBlendFactor =
90 swr_convert_blend_factor(rt_blend->rgb_src_factor);
91 compileState.destBlendFactor =
92 swr_convert_blend_factor(rt_blend->rgb_dst_factor);
93
94 compileState.colorBlendFunc =
95 swr_convert_blend_func(rt_blend->rgb_func);
96 compileState.alphaBlendFunc =
97 swr_convert_blend_func(rt_blend->alpha_func);
98 }
99 compileState.logicOpEnable = state->pipe.logicop_enable;
100 if (compileState.logicOpEnable) {
101 compileState.logicOpFunc =
102 swr_convert_logic_op(state->pipe.logicop_func);
103 }
104
105 blendState.writeDisableRed =
106 (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
107 blendState.writeDisableGreen =
108 (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
109 blendState.writeDisableBlue =
110 (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
111 blendState.writeDisableAlpha =
112 (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
113
114 if (rt_blend->colormask == 0)
115 compileState.blendEnable = false;
116 }
117
118 return state;
119 }
120
121 static void
122 swr_bind_blend_state(struct pipe_context *pipe, void *blend)
123 {
124 struct swr_context *ctx = swr_context(pipe);
125
126 if (ctx->blend == blend)
127 return;
128
129 ctx->blend = (swr_blend_state *)blend;
130
131 ctx->dirty |= SWR_NEW_BLEND;
132 }
133
134 static void
135 swr_delete_blend_state(struct pipe_context *pipe, void *blend)
136 {
137 FREE(blend);
138 }
139
140 static void
141 swr_set_blend_color(struct pipe_context *pipe,
142 const struct pipe_blend_color *color)
143 {
144 struct swr_context *ctx = swr_context(pipe);
145
146 ctx->blend_color = *color;
147
148 ctx->dirty |= SWR_NEW_BLEND;
149 }
150
151 static void
152 swr_set_stencil_ref(struct pipe_context *pipe,
153 const struct pipe_stencil_ref *ref)
154 {
155 struct swr_context *ctx = swr_context(pipe);
156
157 ctx->stencil_ref = *ref;
158
159 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
160 }
161
162 static void *
163 swr_create_depth_stencil_state(
164 struct pipe_context *pipe,
165 const struct pipe_depth_stencil_alpha_state *depth_stencil)
166 {
167 struct pipe_depth_stencil_alpha_state *state;
168
169 state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
170 sizeof *depth_stencil);
171
172 return state;
173 }
174
175 static void
176 swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
177 {
178 struct swr_context *ctx = swr_context(pipe);
179
180 if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
181 return;
182
183 ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
184
185 ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
186 }
187
188 static void
189 swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
190 {
191 FREE(depth);
192 }
193
194
195 static void *
196 swr_create_rasterizer_state(struct pipe_context *pipe,
197 const struct pipe_rasterizer_state *rast)
198 {
199 struct pipe_rasterizer_state *state;
200 state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
201
202 return state;
203 }
204
205 static void
206 swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
207 {
208 struct swr_context *ctx = swr_context(pipe);
209 const struct pipe_rasterizer_state *rasterizer =
210 (const struct pipe_rasterizer_state *)handle;
211
212 if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
213 return;
214
215 ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
216
217 ctx->dirty |= SWR_NEW_RASTERIZER;
218 }
219
220 static void
221 swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
222 {
223 FREE(rasterizer);
224 }
225
226
227 static void *
228 swr_create_sampler_state(struct pipe_context *pipe,
229 const struct pipe_sampler_state *sampler)
230 {
231 struct pipe_sampler_state *state =
232 (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
233
234 return state;
235 }
236
237 static void
238 swr_bind_sampler_states(struct pipe_context *pipe,
239 enum pipe_shader_type shader,
240 unsigned start,
241 unsigned num,
242 void **samplers)
243 {
244 struct swr_context *ctx = swr_context(pipe);
245 unsigned i;
246
247 assert(shader < PIPE_SHADER_TYPES);
248 assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
249
250 /* set the new samplers */
251 ctx->num_samplers[shader] = num;
252 for (i = 0; i < num; i++) {
253 ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
254 }
255
256 ctx->dirty |= SWR_NEW_SAMPLER;
257 }
258
259 static void
260 swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
261 {
262 FREE(sampler);
263 }
264
265
266 static struct pipe_sampler_view *
267 swr_create_sampler_view(struct pipe_context *pipe,
268 struct pipe_resource *texture,
269 const struct pipe_sampler_view *templ)
270 {
271 struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
272
273 if (view) {
274 *view = *templ;
275 view->reference.count = 1;
276 view->texture = NULL;
277 pipe_resource_reference(&view->texture, texture);
278 view->context = pipe;
279 }
280
281 return view;
282 }
283
284 static void
285 swr_set_sampler_views(struct pipe_context *pipe,
286 enum pipe_shader_type shader,
287 unsigned start,
288 unsigned num,
289 struct pipe_sampler_view **views)
290 {
291 struct swr_context *ctx = swr_context(pipe);
292 uint i;
293
294 assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
295
296 assert(shader < PIPE_SHADER_TYPES);
297 assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
298
299 /* set the new sampler views */
300 ctx->num_sampler_views[shader] = num;
301 for (i = 0; i < num; i++) {
302 /* Note: we're using pipe_sampler_view_release() here to work around
303 * a possible crash when the old view belongs to another context that
304 * was already destroyed.
305 */
306 pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
307 pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
308 views[i]);
309 }
310
311 ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
312 }
313
314 static void
315 swr_sampler_view_destroy(struct pipe_context *pipe,
316 struct pipe_sampler_view *view)
317 {
318 pipe_resource_reference(&view->texture, NULL);
319 FREE(view);
320 }
321
322 static void *
323 swr_create_vs_state(struct pipe_context *pipe,
324 const struct pipe_shader_state *vs)
325 {
326 struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
327 if (!swr_vs)
328 return NULL;
329
330 swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
331 swr_vs->pipe.stream_output = vs->stream_output;
332
333 lp_build_tgsi_info(vs->tokens, &swr_vs->info);
334
335 swr_vs->soState = {0};
336
337 if (swr_vs->pipe.stream_output.num_outputs) {
338 pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
339
340 swr_vs->soState.soEnable = true;
341 // soState.rasterizerDisable set on state dirty
342 // soState.streamToRasterizer not used
343
344 for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
345 swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
346 1 << (stream_output->output[i].register_index - 1);
347 }
348 for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
349 swr_vs->soState.streamNumEntries[i] =
350 _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
351 }
352 }
353
354 return swr_vs;
355 }
356
357 static void
358 swr_bind_vs_state(struct pipe_context *pipe, void *vs)
359 {
360 struct swr_context *ctx = swr_context(pipe);
361
362 if (ctx->vs == vs)
363 return;
364
365 ctx->vs = (swr_vertex_shader *)vs;
366 ctx->dirty |= SWR_NEW_VS;
367 }
368
369 static void
370 swr_delete_vs_state(struct pipe_context *pipe, void *vs)
371 {
372 struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
373 FREE((void *)swr_vs->pipe.tokens);
374 struct swr_screen *screen = swr_screen(pipe->screen);
375
376 /* Defer deletion of vs state */
377 swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
378 }
379
380 static void *
381 swr_create_fs_state(struct pipe_context *pipe,
382 const struct pipe_shader_state *fs)
383 {
384 struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
385 if (!swr_fs)
386 return NULL;
387
388 swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
389
390 lp_build_tgsi_info(fs->tokens, &swr_fs->info);
391
392 return swr_fs;
393 }
394
395
396 static void
397 swr_bind_fs_state(struct pipe_context *pipe, void *fs)
398 {
399 struct swr_context *ctx = swr_context(pipe);
400
401 if (ctx->fs == fs)
402 return;
403
404 ctx->fs = (swr_fragment_shader *)fs;
405 ctx->dirty |= SWR_NEW_FS;
406 }
407
408 static void
409 swr_delete_fs_state(struct pipe_context *pipe, void *fs)
410 {
411 struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
412 FREE((void *)swr_fs->pipe.tokens);
413 struct swr_screen *screen = swr_screen(pipe->screen);
414
415 /* Defer deleton of fs state */
416 swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
417 }
418
419
420 static void
421 swr_set_constant_buffer(struct pipe_context *pipe,
422 uint shader,
423 uint index,
424 const struct pipe_constant_buffer *cb)
425 {
426 struct swr_context *ctx = swr_context(pipe);
427 struct pipe_resource *constants = cb ? cb->buffer : NULL;
428
429 assert(shader < PIPE_SHADER_TYPES);
430 assert(index < ARRAY_SIZE(ctx->constants[shader]));
431
432 /* note: reference counting */
433 util_copy_constant_buffer(&ctx->constants[shader][index], cb);
434
435 if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
436 ctx->dirty |= SWR_NEW_VSCONSTANTS;
437 } else if (shader == PIPE_SHADER_FRAGMENT) {
438 ctx->dirty |= SWR_NEW_FSCONSTANTS;
439 }
440
441 if (cb && cb->user_buffer) {
442 pipe_resource_reference(&constants, NULL);
443 }
444 }
445
446
447 static void *
448 swr_create_vertex_elements_state(struct pipe_context *pipe,
449 unsigned num_elements,
450 const struct pipe_vertex_element *attribs)
451 {
452 struct swr_vertex_element_state *velems;
453 assert(num_elements <= PIPE_MAX_ATTRIBS);
454 velems = CALLOC_STRUCT(swr_vertex_element_state);
455 if (velems) {
456 velems->fsState.bVertexIDOffsetEnable = true;
457 velems->fsState.numAttribs = num_elements;
458 for (unsigned i = 0; i < num_elements; i++) {
459 // XXX: we should do this keyed on the VS usage info
460
461 const struct util_format_description *desc =
462 util_format_description(attribs[i].src_format);
463
464 velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
465 velems->fsState.layout[i].Format =
466 mesa_to_swr_format(attribs[i].src_format);
467 velems->fsState.layout[i].StreamIndex =
468 attribs[i].vertex_buffer_index;
469 velems->fsState.layout[i].InstanceEnable =
470 attribs[i].instance_divisor != 0;
471 velems->fsState.layout[i].ComponentControl0 =
472 desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
473 ? ComponentControl::StoreSrc
474 : ComponentControl::Store0;
475 velems->fsState.layout[i].ComponentControl1 =
476 desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
477 ? ComponentControl::StoreSrc
478 : ComponentControl::Store0;
479 velems->fsState.layout[i].ComponentControl2 =
480 desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
481 ? ComponentControl::StoreSrc
482 : ComponentControl::Store0;
483 velems->fsState.layout[i].ComponentControl3 =
484 desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
485 ? ComponentControl::StoreSrc
486 : ComponentControl::Store1Fp;
487 velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
488 velems->fsState.layout[i].InstanceDataStepRate =
489 attribs[i].instance_divisor;
490
491 /* Calculate the pitch of each stream */
492 const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
493 mesa_to_swr_format(attribs[i].src_format));
494 velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
495
496 if (attribs[i].instance_divisor != 0) {
497 velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
498 uint32_t *min_instance_div =
499 &velems->min_instance_div[attribs[i].vertex_buffer_index];
500 if (!*min_instance_div ||
501 attribs[i].instance_divisor < *min_instance_div)
502 *min_instance_div = attribs[i].instance_divisor;
503 }
504 }
505 }
506
507 return velems;
508 }
509
510 static void
511 swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
512 {
513 struct swr_context *ctx = swr_context(pipe);
514 struct swr_vertex_element_state *swr_velems =
515 (struct swr_vertex_element_state *)velems;
516
517 ctx->velems = swr_velems;
518 ctx->dirty |= SWR_NEW_VERTEX;
519 }
520
521 static void
522 swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
523 {
524 /* XXX Need to destroy fetch shader? */
525 FREE(velems);
526 }
527
528
529 static void
530 swr_set_vertex_buffers(struct pipe_context *pipe,
531 unsigned start_slot,
532 unsigned num_elements,
533 const struct pipe_vertex_buffer *buffers)
534 {
535 struct swr_context *ctx = swr_context(pipe);
536
537 assert(num_elements <= PIPE_MAX_ATTRIBS);
538
539 util_set_vertex_buffers_count(ctx->vertex_buffer,
540 &ctx->num_vertex_buffers,
541 buffers,
542 start_slot,
543 num_elements);
544
545 ctx->dirty |= SWR_NEW_VERTEX;
546 }
547
548
549 static void
550 swr_set_index_buffer(struct pipe_context *pipe,
551 const struct pipe_index_buffer *ib)
552 {
553 struct swr_context *ctx = swr_context(pipe);
554
555 if (ib)
556 memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
557 else
558 memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
559
560 ctx->dirty |= SWR_NEW_VERTEX;
561 }
562
563 static void
564 swr_set_polygon_stipple(struct pipe_context *pipe,
565 const struct pipe_poly_stipple *stipple)
566 {
567 struct swr_context *ctx = swr_context(pipe);
568
569 ctx->poly_stipple = *stipple; /* struct copy */
570 ctx->dirty |= SWR_NEW_STIPPLE;
571 }
572
573 static void
574 swr_set_clip_state(struct pipe_context *pipe,
575 const struct pipe_clip_state *clip)
576 {
577 struct swr_context *ctx = swr_context(pipe);
578
579 ctx->clip = *clip;
580 /* XXX Unimplemented, but prevents crash */
581
582 ctx->dirty |= SWR_NEW_CLIP;
583 }
584
585
586 static void
587 swr_set_scissor_states(struct pipe_context *pipe,
588 unsigned start_slot,
589 unsigned num_viewports,
590 const struct pipe_scissor_state *scissor)
591 {
592 struct swr_context *ctx = swr_context(pipe);
593
594 ctx->scissor = *scissor;
595 ctx->swr_scissor.xmin = scissor->minx;
596 ctx->swr_scissor.xmax = scissor->maxx;
597 ctx->swr_scissor.ymin = scissor->miny;
598 ctx->swr_scissor.ymax = scissor->maxy;
599 ctx->dirty |= SWR_NEW_SCISSOR;
600 }
601
602 static void
603 swr_set_viewport_states(struct pipe_context *pipe,
604 unsigned start_slot,
605 unsigned num_viewports,
606 const struct pipe_viewport_state *vpt)
607 {
608 struct swr_context *ctx = swr_context(pipe);
609
610 ctx->viewport = *vpt;
611 ctx->dirty |= SWR_NEW_VIEWPORT;
612 }
613
614
615 static void
616 swr_set_framebuffer_state(struct pipe_context *pipe,
617 const struct pipe_framebuffer_state *fb)
618 {
619 struct swr_context *ctx = swr_context(pipe);
620
621 boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
622
623 assert(fb->width <= KNOB_GUARDBAND_WIDTH);
624 assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
625
626 if (changed) {
627 util_copy_framebuffer_state(&ctx->framebuffer, fb);
628
629 ctx->dirty |= SWR_NEW_FRAMEBUFFER;
630 }
631 }
632
633
634 static void
635 swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
636 {
637 struct swr_context *ctx = swr_context(pipe);
638
639 if (sample_mask != ctx->sample_mask) {
640 ctx->sample_mask = sample_mask;
641 ctx->dirty |= SWR_NEW_RASTERIZER;
642 }
643 }
644
645 /*
646 * Update resource in-use status
647 * All resources bound to color or depth targets marked as WRITE resources.
648 * VBO Vertex/index buffers and texture views marked as READ resources.
649 */
650 void
651 swr_update_resource_status(struct pipe_context *pipe,
652 const struct pipe_draw_info *p_draw_info)
653 {
654 struct swr_context *ctx = swr_context(pipe);
655 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
656
657 /* colorbuffer targets */
658 if (fb->nr_cbufs)
659 for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
660 if (fb->cbufs[i])
661 swr_resource_write(fb->cbufs[i]->texture);
662
663 /* depth/stencil target */
664 if (fb->zsbuf)
665 swr_resource_write(fb->zsbuf->texture);
666
667 /* VBO vertex buffers */
668 for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
669 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
670 if (!vb->user_buffer)
671 swr_resource_read(vb->buffer);
672 }
673
674 /* VBO index buffer */
675 if (p_draw_info && p_draw_info->indexed) {
676 struct pipe_index_buffer *ib = &ctx->index_buffer;
677 if (!ib->user_buffer)
678 swr_resource_read(ib->buffer);
679 }
680
681 /* transform feedback buffers */
682 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
683 struct pipe_stream_output_target *target = ctx->so_targets[i];
684 if (target && target->buffer)
685 swr_resource_write(target->buffer);
686 }
687
688 /* texture sampler views */
689 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
690 for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
691 struct pipe_sampler_view *view = ctx->sampler_views[j][i];
692 if (view)
693 swr_resource_read(view->texture);
694 }
695 }
696
697 /* constant buffers */
698 for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
699 for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
700 struct pipe_constant_buffer *cb = &ctx->constants[j][i];
701 if (cb->buffer)
702 swr_resource_read(cb->buffer);
703 }
704 }
705 }
706
707 static void
708 swr_update_texture_state(struct swr_context *ctx,
709 unsigned shader_type,
710 unsigned num_sampler_views,
711 swr_jit_texture *textures)
712 {
713 for (unsigned i = 0; i < num_sampler_views; i++) {
714 struct pipe_sampler_view *view =
715 ctx->sampler_views[shader_type][i];
716 struct swr_jit_texture *jit_tex = &textures[i];
717
718 memset(jit_tex, 0, sizeof(*jit_tex));
719 if (view) {
720 struct pipe_resource *res = view->texture;
721 struct swr_resource *swr_res = swr_resource(res);
722 SWR_SURFACE_STATE *swr = &swr_res->swr;
723 size_t *mip_offsets = swr_res->mip_offsets;
724 if (swr_res->has_depth && swr_res->has_stencil &&
725 !util_format_has_depth(util_format_description(view->format))) {
726 swr = &swr_res->secondary;
727 mip_offsets = swr_res->secondary_mip_offsets;
728 }
729
730 jit_tex->width = res->width0;
731 jit_tex->height = res->height0;
732 jit_tex->base_ptr = swr->pBaseAddress;
733 if (view->target != PIPE_BUFFER) {
734 jit_tex->first_level = view->u.tex.first_level;
735 jit_tex->last_level = view->u.tex.last_level;
736 if (view->target == PIPE_TEXTURE_3D)
737 jit_tex->depth = res->depth0;
738 else
739 jit_tex->depth =
740 view->u.tex.last_layer - view->u.tex.first_layer + 1;
741 jit_tex->base_ptr += view->u.tex.first_layer *
742 swr->qpitch * swr->pitch;
743 } else {
744 unsigned view_blocksize = util_format_get_blocksize(view->format);
745 jit_tex->base_ptr += view->u.buf.offset;
746 jit_tex->width = view->u.buf.size / view_blocksize;
747 jit_tex->depth = 1;
748 }
749
750 for (unsigned level = jit_tex->first_level;
751 level <= jit_tex->last_level;
752 level++) {
753 jit_tex->row_stride[level] = swr->pitch;
754 jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
755 jit_tex->mip_offsets[level] = mip_offsets[level];
756 }
757 }
758 }
759 }
760
761 static void
762 swr_update_sampler_state(struct swr_context *ctx,
763 unsigned shader_type,
764 unsigned num_samplers,
765 swr_jit_sampler *samplers)
766 {
767 for (unsigned i = 0; i < num_samplers; i++) {
768 const struct pipe_sampler_state *sampler =
769 ctx->samplers[shader_type][i];
770
771 if (sampler) {
772 samplers[i].min_lod = sampler->min_lod;
773 samplers[i].max_lod = sampler->max_lod;
774 samplers[i].lod_bias = sampler->lod_bias;
775 COPY_4V(samplers[i].border_color, sampler->border_color.f);
776 }
777 }
778 }
779
780 static void
781 swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
782 {
783 swr_draw_context *pDC = &ctx->swrDC;
784
785 const float **constant;
786 uint32_t *num_constants;
787 struct swr_scratch_space *scratch;
788
789 switch (shaderType) {
790 case PIPE_SHADER_VERTEX:
791 constant = pDC->constantVS;
792 num_constants = pDC->num_constantsVS;
793 scratch = &ctx->scratch->vs_constants;
794 break;
795 case PIPE_SHADER_FRAGMENT:
796 constant = pDC->constantFS;
797 num_constants = pDC->num_constantsFS;
798 scratch = &ctx->scratch->fs_constants;
799 break;
800 default:
801 debug_printf("Unsupported shader type constants\n");
802 return;
803 }
804
805 for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
806 const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
807 num_constants[i] = cb->buffer_size;
808 if (cb->buffer) {
809 constant[i] =
810 (const float *)(swr_resource_data(cb->buffer) +
811 cb->buffer_offset);
812 } else {
813 /* Need to copy these constants to scratch space */
814 if (cb->user_buffer && cb->buffer_size) {
815 const void *ptr =
816 ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
817 uint32_t size = AlignUp(cb->buffer_size, 4);
818 ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
819 constant[i] = (const float *)ptr;
820 }
821 }
822 }
823 }
824
825 static bool
826 swr_change_rt(struct swr_context *ctx,
827 unsigned attachment,
828 const struct pipe_surface *sf)
829 {
830 swr_draw_context *pDC = &ctx->swrDC;
831 struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
832
833 /* Do nothing if the render target hasn't changed */
834 if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
835 return false;
836
837 /* Deal with disabling RT up front */
838 if (!sf || !sf->texture) {
839 /* If detaching attachment, mark tiles as RESOLVED so core
840 * won't try to load from non-existent target. */
841 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
842 *rt = {0};
843 return true;
844 }
845
846 const struct swr_resource *swr = swr_resource(sf->texture);
847 const SWR_SURFACE_STATE *swr_surface = &swr->swr;
848 SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
849
850 if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
851 swr_surface = &swr->secondary;
852 fmt = swr_surface->format;
853 }
854
855 if (rt->pBaseAddress == swr_surface->pBaseAddress &&
856 rt->format == fmt &&
857 rt->lod == sf->u.tex.level &&
858 rt->arrayIndex == sf->u.tex.first_layer)
859 return false;
860
861 bool need_fence = false;
862
863 /* StoreTile for changed target */
864 if (rt->pBaseAddress) {
865 /* If changing attachment to a new target, mark tiles as
866 * INVALID so they are reloaded from surface. */
867 swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
868 need_fence = true;
869 }
870
871 /* Make new attachment */
872 *rt = *swr_surface;
873 rt->format = fmt;
874 rt->lod = sf->u.tex.level;
875 rt->arrayIndex = sf->u.tex.first_layer;
876
877 return need_fence;
878 }
879
880 static inline void
881 swr_user_vbuf_range(const struct pipe_draw_info *info,
882 const struct swr_vertex_element_state *velems,
883 const struct pipe_vertex_buffer *vb,
884 uint32_t i,
885 uint32_t *totelems,
886 uint32_t *base,
887 uint32_t *size)
888 {
889 /* FIXME: The size is too large - we don't access the full extra stride. */
890 unsigned elems;
891 if (velems->instanced_bufs & (1U << i)) {
892 elems = info->instance_count / velems->min_instance_div[i] + 1;
893 *totelems = info->start_instance + elems;
894 *base = info->start_instance * vb->stride;
895 *size = elems * vb->stride;
896 } else if (vb->stride) {
897 elems = info->max_index - info->min_index + 1;
898 *totelems = info->max_index + 1;
899 *base = info->min_index * vb->stride;
900 *size = elems * vb->stride;
901 } else {
902 *totelems = 1;
903 *base = 0;
904 *size = velems->stream_pitch[i];
905 }
906 }
907
908 void
909 swr_update_derived(struct pipe_context *pipe,
910 const struct pipe_draw_info *p_draw_info)
911 {
912 struct swr_context *ctx = swr_context(pipe);
913 struct swr_screen *screen = swr_screen(pipe->screen);
914
915 /* Update screen->pipe to current pipe context. */
916 if (screen->pipe != pipe)
917 screen->pipe = pipe;
918
919 /* Any state that requires dirty flags to be re-triggered sets this mask */
920 /* For example, user_buffer vertex and index buffers. */
921 unsigned post_update_dirty_flags = 0;
922
923 /* Render Targets */
924 if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
925 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
926 const struct util_format_description *desc = NULL;
927 bool need_fence = false;
928
929 /* colorbuffer targets */
930 if (fb->nr_cbufs) {
931 for (unsigned i = 0; i < fb->nr_cbufs; ++i)
932 need_fence |= swr_change_rt(
933 ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
934 }
935 for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
936 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
937
938 /* depth/stencil target */
939 if (fb->zsbuf)
940 desc = util_format_description(fb->zsbuf->format);
941 if (fb->zsbuf && util_format_has_depth(desc))
942 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
943 else
944 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
945
946 if (fb->zsbuf && util_format_has_stencil(desc))
947 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
948 else
949 need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
950
951 /* This fence ensures any attachment changes are resolved before the
952 * next draw */
953 if (need_fence)
954 swr_fence_submit(ctx, screen->flush_fence);
955 }
956
957 /* Raster state */
958 if (ctx->dirty & (SWR_NEW_RASTERIZER |
959 SWR_NEW_VS | // clipping
960 SWR_NEW_FRAMEBUFFER)) {
961 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
962 pipe_framebuffer_state *fb = &ctx->framebuffer;
963
964 SWR_RASTSTATE *rastState = &ctx->derived.rastState;
965 rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
966 rastState->frontWinding = rasterizer->front_ccw
967 ? SWR_FRONTWINDING_CCW
968 : SWR_FRONTWINDING_CW;
969 rastState->scissorEnable = rasterizer->scissor;
970 rastState->pointSize = rasterizer->point_size > 0.0f
971 ? rasterizer->point_size
972 : 1.0f;
973 rastState->lineWidth = rasterizer->line_width > 0.0f
974 ? rasterizer->line_width
975 : 1.0f;
976
977 rastState->pointParam = rasterizer->point_size_per_vertex;
978
979 rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
980 rastState->pointSpriteTopOrigin =
981 rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
982
983 /* XXX TODO: Add multisample */
984 rastState->msaaRastEnable = false;
985 rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
986 rastState->sampleCount = SWR_MULTISAMPLE_1X;
987 rastState->forcedSampleCount = false;
988
989 bool do_offset = false;
990 switch (rasterizer->fill_front) {
991 case PIPE_POLYGON_MODE_FILL:
992 do_offset = rasterizer->offset_tri;
993 break;
994 case PIPE_POLYGON_MODE_LINE:
995 do_offset = rasterizer->offset_line;
996 break;
997 case PIPE_POLYGON_MODE_POINT:
998 do_offset = rasterizer->offset_point;
999 break;
1000 }
1001
1002 if (do_offset) {
1003 rastState->depthBias = rasterizer->offset_units;
1004 rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1005 rastState->depthBiasClamp = rasterizer->offset_clamp;
1006 } else {
1007 rastState->depthBias = 0;
1008 rastState->slopeScaledDepthBias = 0;
1009 rastState->depthBiasClamp = 0;
1010 }
1011 struct pipe_surface *zb = fb->zsbuf;
1012 if (zb && swr_resource(zb->texture)->has_depth)
1013 rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1014
1015 rastState->depthClipEnable = rasterizer->depth_clip;
1016 rastState->clipHalfZ = rasterizer->clip_halfz;
1017
1018 rastState->clipDistanceMask =
1019 ctx->vs->info.base.num_written_clipdistance ?
1020 ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
1021 rasterizer->clip_plane_enable;
1022
1023 rastState->cullDistanceMask =
1024 ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1025
1026 SwrSetRastState(ctx->swrContext, rastState);
1027 }
1028
1029 /* Scissor */
1030 if (ctx->dirty & SWR_NEW_SCISSOR) {
1031 SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1032 }
1033
1034 /* Viewport */
1035 if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1036 | SWR_NEW_RASTERIZER)) {
1037 pipe_viewport_state *state = &ctx->viewport;
1038 pipe_framebuffer_state *fb = &ctx->framebuffer;
1039 pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1040
1041 SWR_VIEWPORT *vp = &ctx->derived.vp;
1042 SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1043
1044 vp->x = state->translate[0] - state->scale[0];
1045 vp->width = 2 * state->scale[0];
1046 vp->y = state->translate[1] - fabs(state->scale[1]);
1047 vp->height = 2 * fabs(state->scale[1]);
1048 util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1049 &vp->minZ, &vp->maxZ);
1050
1051 vpm->m00[0] = state->scale[0];
1052 vpm->m11[0] = state->scale[1];
1053 vpm->m22[0] = state->scale[2];
1054 vpm->m30[0] = state->translate[0];
1055 vpm->m31[0] = state->translate[1];
1056 vpm->m32[0] = state->translate[2];
1057
1058 /* Now that the matrix is calculated, clip the view coords to screen
1059 * size. OpenGL allows for -ve x,y in the viewport. */
1060 if (vp->x < 0.0f) {
1061 vp->width += vp->x;
1062 vp->x = 0.0f;
1063 }
1064 if (vp->y < 0.0f) {
1065 vp->height += vp->y;
1066 vp->y = 0.0f;
1067 }
1068 vp->width = std::min(vp->width, (float)fb->width - vp->x);
1069 vp->height = std::min(vp->height, (float)fb->height - vp->y);
1070
1071 SwrSetViewports(ctx->swrContext, 1, vp, vpm);
1072 }
1073
1074 /* Set vertex & index buffers */
1075 /* (using draw info if called by swr_draw_vbo) */
1076 if (ctx->dirty & SWR_NEW_VERTEX) {
1077 uint32_t scratch_total;
1078 uint8_t *scratch = NULL;
1079
1080 /* If being called by swr_draw_vbo, copy draw details */
1081 struct pipe_draw_info info = {0};
1082 if (p_draw_info)
1083 info = *p_draw_info;
1084
1085 /* We must get all the scratch space in one go */
1086 scratch_total = 0;
1087 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1088 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1089
1090 if (!vb->user_buffer)
1091 continue;
1092
1093 uint32_t elems, base, size;
1094 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1095 scratch_total += AlignUp(size, 4);
1096 }
1097
1098 if (scratch_total) {
1099 scratch = (uint8_t *)swr_copy_to_scratch_space(
1100 ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
1101 }
1102
1103 /* vertex buffers */
1104 SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1105 for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1106 uint32_t size, pitch, elems, partial_inbounds;
1107 const uint8_t *p_data;
1108 struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1109
1110 pitch = vb->stride;
1111 if (!vb->user_buffer) {
1112 /* VBO
1113 * size is based on buffer->width0 rather than info.max_index
1114 * to prevent having to validate VBO on each draw */
1115 size = vb->buffer->width0;
1116 elems = size / pitch;
1117 partial_inbounds = size % pitch;
1118
1119 p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
1120 } else {
1121 /* Client buffer
1122 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1123 * revalidate on each draw */
1124 post_update_dirty_flags |= SWR_NEW_VERTEX;
1125
1126 uint32_t base;
1127 swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1128 partial_inbounds = 0;
1129
1130 /* Copy only needed vertices to scratch space */
1131 size = AlignUp(size, 4);
1132 const void *ptr = (const uint8_t *) vb->user_buffer + base;
1133 memcpy(scratch, ptr, size);
1134 ptr = scratch;
1135 scratch += size;
1136 p_data = (const uint8_t *)ptr - base;
1137 }
1138
1139 swrVertexBuffers[i] = {0};
1140 swrVertexBuffers[i].index = i;
1141 swrVertexBuffers[i].pitch = pitch;
1142 swrVertexBuffers[i].pData = p_data;
1143 swrVertexBuffers[i].size = size;
1144 swrVertexBuffers[i].maxVertex = elems;
1145 swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1146 }
1147
1148 SwrSetVertexBuffers(
1149 ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1150
1151 /* index buffer, if required (info passed in by swr_draw_vbo) */
1152 SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1153 if (info.indexed) {
1154 const uint8_t *p_data;
1155 uint32_t size, pitch;
1156 struct pipe_index_buffer *ib = &ctx->index_buffer;
1157
1158 pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
1159 index_type = swr_convert_index_type(pitch);
1160
1161 if (!ib->user_buffer) {
1162 /* VBO
1163 * size is based on buffer->width0 rather than info.count
1164 * to prevent having to validate VBO on each draw */
1165 size = ib->buffer->width0;
1166 p_data = swr_resource_data(ib->buffer) + ib->offset;
1167 } else {
1168 /* Client buffer
1169 * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1170 * revalidate on each draw */
1171 post_update_dirty_flags |= SWR_NEW_VERTEX;
1172
1173 size = info.count * pitch;
1174 size = AlignUp(size, 4);
1175
1176 /* Copy indices to scratch space */
1177 const void *ptr = ib->user_buffer;
1178 ptr = swr_copy_to_scratch_space(
1179 ctx, &ctx->scratch->index_buffer, ptr, size);
1180 p_data = (const uint8_t *)ptr;
1181 }
1182
1183 SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1184 swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
1185 swrIndexBuffer.pIndices = p_data;
1186 swrIndexBuffer.size = size;
1187
1188 SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1189 }
1190
1191 struct swr_vertex_element_state *velems = ctx->velems;
1192 if (velems && velems->fsState.indexType != index_type) {
1193 velems->fsFunc = NULL;
1194 velems->fsState.indexType = index_type;
1195 }
1196 }
1197
1198 /* VertexShader */
1199 if (ctx->dirty & (SWR_NEW_VS |
1200 SWR_NEW_RASTERIZER | // for clip planes
1201 SWR_NEW_SAMPLER |
1202 SWR_NEW_SAMPLER_VIEW |
1203 SWR_NEW_FRAMEBUFFER)) {
1204 swr_jit_vs_key key;
1205 swr_generate_vs_key(key, ctx, ctx->vs);
1206 auto search = ctx->vs->map.find(key);
1207 PFN_VERTEX_FUNC func;
1208 if (search != ctx->vs->map.end()) {
1209 func = search->second->shader;
1210 } else {
1211 func = swr_compile_vs(ctx, key);
1212 }
1213 SwrSetVertexFunc(ctx->swrContext, func);
1214
1215 /* JIT sampler state */
1216 if (ctx->dirty & SWR_NEW_SAMPLER) {
1217 swr_update_sampler_state(ctx,
1218 PIPE_SHADER_VERTEX,
1219 key.nr_samplers,
1220 ctx->swrDC.samplersVS);
1221 }
1222
1223 /* JIT sampler view state */
1224 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1225 swr_update_texture_state(ctx,
1226 PIPE_SHADER_VERTEX,
1227 key.nr_sampler_views,
1228 ctx->swrDC.texturesVS);
1229 }
1230 }
1231
1232 /* FragmentShader */
1233 if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
1234 | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
1235 swr_jit_fs_key key;
1236 swr_generate_fs_key(key, ctx, ctx->fs);
1237 auto search = ctx->fs->map.find(key);
1238 PFN_PIXEL_KERNEL func;
1239 if (search != ctx->fs->map.end()) {
1240 func = search->second->shader;
1241 } else {
1242 func = swr_compile_fs(ctx, key);
1243 }
1244 SWR_PS_STATE psState = {0};
1245 psState.pfnPixelShader = func;
1246 psState.killsPixel = ctx->fs->info.base.uses_kill;
1247 psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1248 psState.writesODepth = ctx->fs->info.base.writes_z;
1249 psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1250 psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
1251 psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
1252 psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
1253 uint32_t barycentricsMask = 0;
1254 #if 0
1255 // when we switch to mesa-master
1256 if (ctx->fs->info.base.uses_persp_center ||
1257 ctx->fs->info.base.uses_linear_center)
1258 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1259 if (ctx->fs->info.base.uses_persp_centroid ||
1260 ctx->fs->info.base.uses_linear_centroid)
1261 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1262 if (ctx->fs->info.base.uses_persp_sample ||
1263 ctx->fs->info.base.uses_linear_sample)
1264 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1265 #else
1266 for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1267 switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1268 case TGSI_INTERPOLATE_LOC_CENTER:
1269 barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1270 break;
1271 case TGSI_INTERPOLATE_LOC_CENTROID:
1272 barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1273 break;
1274 case TGSI_INTERPOLATE_LOC_SAMPLE:
1275 barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1276 break;
1277 }
1278 }
1279 #endif
1280 psState.barycentricsMask = barycentricsMask;
1281 psState.usesUAV = false; // XXX
1282 psState.forceEarlyZ = false;
1283 SwrSetPixelShaderState(ctx->swrContext, &psState);
1284
1285 /* JIT sampler state */
1286 if (ctx->dirty & (SWR_NEW_SAMPLER |
1287 SWR_NEW_FS)) {
1288 swr_update_sampler_state(ctx,
1289 PIPE_SHADER_FRAGMENT,
1290 key.nr_samplers,
1291 ctx->swrDC.samplersFS);
1292 }
1293
1294 /* JIT sampler view state */
1295 if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW |
1296 SWR_NEW_FRAMEBUFFER |
1297 SWR_NEW_FS)) {
1298 swr_update_texture_state(ctx,
1299 PIPE_SHADER_FRAGMENT,
1300 key.nr_sampler_views,
1301 ctx->swrDC.texturesFS);
1302 }
1303 }
1304
1305
1306 /* VertexShader Constants */
1307 if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1308 swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1309 }
1310
1311 /* FragmentShader Constants */
1312 if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1313 swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1314 }
1315
1316 /* Depth/stencil state */
1317 if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1318 struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1319 struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1320 SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1321 SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1322
1323 /* XXX, incomplete. Need to flesh out stencil & alpha test state
1324 struct pipe_stencil_state *front_stencil =
1325 ctx->depth_stencil.stencil[0];
1326 struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1327 struct pipe_alpha_state alpha;
1328 */
1329 if (stencil[0].enabled) {
1330 depthStencilState.stencilWriteEnable = 1;
1331 depthStencilState.stencilTestEnable = 1;
1332 depthStencilState.stencilTestFunc =
1333 swr_convert_depth_func(stencil[0].func);
1334
1335 depthStencilState.stencilPassDepthPassOp =
1336 swr_convert_stencil_op(stencil[0].zpass_op);
1337 depthStencilState.stencilPassDepthFailOp =
1338 swr_convert_stencil_op(stencil[0].zfail_op);
1339 depthStencilState.stencilFailOp =
1340 swr_convert_stencil_op(stencil[0].fail_op);
1341 depthStencilState.stencilWriteMask = stencil[0].writemask;
1342 depthStencilState.stencilTestMask = stencil[0].valuemask;
1343 depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1344 }
1345 if (stencil[1].enabled) {
1346 depthStencilState.doubleSidedStencilTestEnable = 1;
1347
1348 depthStencilState.backfaceStencilTestFunc =
1349 swr_convert_depth_func(stencil[1].func);
1350
1351 depthStencilState.backfaceStencilPassDepthPassOp =
1352 swr_convert_stencil_op(stencil[1].zpass_op);
1353 depthStencilState.backfaceStencilPassDepthFailOp =
1354 swr_convert_stencil_op(stencil[1].zfail_op);
1355 depthStencilState.backfaceStencilFailOp =
1356 swr_convert_stencil_op(stencil[1].fail_op);
1357 depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1358 depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1359
1360 depthStencilState.backfaceStencilRefValue =
1361 ctx->stencil_ref.ref_value[1];
1362 }
1363
1364 depthStencilState.depthTestEnable = depth->enabled;
1365 depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1366 depthStencilState.depthWriteEnable = depth->writemask;
1367 SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1368
1369 depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1370 depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1371 depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1372 SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1373 }
1374
1375 /* Blend State */
1376 if (ctx->dirty & (SWR_NEW_BLEND |
1377 SWR_NEW_FRAMEBUFFER |
1378 SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1379 struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1380
1381 SWR_BLEND_STATE blendState;
1382 memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1383 blendState.constantColor[0] = ctx->blend_color.color[0];
1384 blendState.constantColor[1] = ctx->blend_color.color[1];
1385 blendState.constantColor[2] = ctx->blend_color.color[2];
1386 blendState.constantColor[3] = ctx->blend_color.color[3];
1387 blendState.alphaTestReference =
1388 *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1389
1390 // XXX MSAA
1391 blendState.sampleMask = 0;
1392 blendState.sampleCount = SWR_MULTISAMPLE_1X;
1393
1394 /* If there are no color buffers bound, disable writes on RT0
1395 * and skip loop */
1396 if (fb->nr_cbufs == 0) {
1397 blendState.renderTarget[0].writeDisableRed = 1;
1398 blendState.renderTarget[0].writeDisableGreen = 1;
1399 blendState.renderTarget[0].writeDisableBlue = 1;
1400 blendState.renderTarget[0].writeDisableAlpha = 1;
1401 SwrSetBlendFunc(ctx->swrContext, 0, NULL);
1402 }
1403 else
1404 for (int target = 0;
1405 target < std::min(SWR_NUM_RENDERTARGETS,
1406 PIPE_MAX_COLOR_BUFS);
1407 target++) {
1408 if (!fb->cbufs[target])
1409 continue;
1410
1411 struct swr_resource *colorBuffer =
1412 swr_resource(fb->cbufs[target]->texture);
1413
1414 BLEND_COMPILE_STATE compileState;
1415 memset(&compileState, 0, sizeof(compileState));
1416 compileState.format = colorBuffer->swr.format;
1417 memcpy(&compileState.blendState,
1418 &ctx->blend->compileState[target],
1419 sizeof(compileState.blendState));
1420
1421 const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1422 if (compileState.blendState.logicOpEnable &&
1423 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1424 compileState.blendState.logicOpEnable = false;
1425 }
1426
1427 if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1428 compileState.blendState.blendEnable = false;
1429
1430 if (compileState.blendState.blendEnable == false &&
1431 compileState.blendState.logicOpEnable == false &&
1432 ctx->depth_stencil->alpha.enabled == 0) {
1433 SwrSetBlendFunc(ctx->swrContext, target, NULL);
1434 continue;
1435 }
1436
1437 compileState.desc.alphaTestEnable =
1438 ctx->depth_stencil->alpha.enabled;
1439 compileState.desc.independentAlphaBlendEnable =
1440 (compileState.blendState.sourceBlendFactor !=
1441 compileState.blendState.sourceAlphaBlendFactor) ||
1442 (compileState.blendState.destBlendFactor !=
1443 compileState.blendState.destAlphaBlendFactor) ||
1444 (compileState.blendState.colorBlendFunc !=
1445 compileState.blendState.alphaBlendFunc);
1446 compileState.desc.alphaToCoverageEnable =
1447 ctx->blend->pipe.alpha_to_coverage;
1448 compileState.desc.sampleMaskEnable = 0; // XXX
1449 compileState.desc.numSamples = 1; // XXX
1450
1451 compileState.alphaTestFunction =
1452 swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1453 compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1454
1455 compileState.Canonicalize();
1456
1457 PFN_BLEND_JIT_FUNC func = NULL;
1458 auto search = ctx->blendJIT->find(compileState);
1459 if (search != ctx->blendJIT->end()) {
1460 func = search->second;
1461 } else {
1462 HANDLE hJitMgr = screen->hJitMgr;
1463 func = JitCompileBlend(hJitMgr, compileState);
1464 debug_printf("BLEND shader %p\n", func);
1465 assert(func && "Error: BlendShader = NULL");
1466
1467 ctx->blendJIT->insert(std::make_pair(compileState, func));
1468 }
1469 SwrSetBlendFunc(ctx->swrContext, target, func);
1470 }
1471
1472 SwrSetBlendState(ctx->swrContext, &blendState);
1473 }
1474
1475 if (ctx->dirty & SWR_NEW_STIPPLE) {
1476 /* XXX What to do with this one??? SWR doesn't stipple */
1477 }
1478
1479 if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1480 ctx->vs->soState.rasterizerDisable =
1481 ctx->rasterizer->rasterizer_discard;
1482 SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1483
1484 pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1485
1486 for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1487 SWR_STREAMOUT_BUFFER buffer = {0};
1488 if (!ctx->so_targets[i])
1489 continue;
1490 buffer.enable = true;
1491 buffer.pBuffer =
1492 (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1493 ctx->so_targets[i]->buffer_offset);
1494 buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1495 buffer.pitch = stream_output->stride[i];
1496 buffer.streamOffset = 0;
1497
1498 SwrSetSoBuffers(ctx->swrContext, &buffer, i);
1499 }
1500 }
1501
1502 if (ctx->dirty & SWR_NEW_CLIP) {
1503 // shader exporting clip distances overrides all user clip planes
1504 if (ctx->rasterizer->clip_plane_enable &&
1505 !ctx->vs->info.base.num_written_clipdistance)
1506 {
1507 swr_draw_context *pDC = &ctx->swrDC;
1508 memcpy(pDC->userClipPlanes,
1509 ctx->clip.ucp,
1510 sizeof(pDC->userClipPlanes));
1511 }
1512 }
1513
1514 // set up backend state
1515 SWR_BACKEND_STATE backendState = {0};
1516 backendState.numAttributes =
1517 ctx->vs->info.base.num_outputs - 1 +
1518 (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
1519 for (unsigned i = 0; i < backendState.numAttributes; i++)
1520 backendState.numComponents[i] = 4;
1521 backendState.constantInterpolationMask = ctx->fs->constantMask |
1522 (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1523 backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1524
1525 SwrSetBackendState(ctx->swrContext, &backendState);
1526
1527 /* Ensure that any in-progress attachment change StoreTiles finish */
1528 if (swr_is_fence_pending(screen->flush_fence))
1529 swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1530
1531 /* Finally, update the in-use status of all resources involved in draw */
1532 swr_update_resource_status(pipe, p_draw_info);
1533
1534 ctx->dirty = post_update_dirty_flags;
1535 }
1536
1537
1538 static struct pipe_stream_output_target *
1539 swr_create_so_target(struct pipe_context *pipe,
1540 struct pipe_resource *buffer,
1541 unsigned buffer_offset,
1542 unsigned buffer_size)
1543 {
1544 struct pipe_stream_output_target *target;
1545
1546 target = CALLOC_STRUCT(pipe_stream_output_target);
1547 if (!target)
1548 return NULL;
1549
1550 target->context = pipe;
1551 target->reference.count = 1;
1552 pipe_resource_reference(&target->buffer, buffer);
1553 target->buffer_offset = buffer_offset;
1554 target->buffer_size = buffer_size;
1555 return target;
1556 }
1557
1558 static void
1559 swr_destroy_so_target(struct pipe_context *pipe,
1560 struct pipe_stream_output_target *target)
1561 {
1562 pipe_resource_reference(&target->buffer, NULL);
1563 FREE(target);
1564 }
1565
1566 static void
1567 swr_set_so_targets(struct pipe_context *pipe,
1568 unsigned num_targets,
1569 struct pipe_stream_output_target **targets,
1570 const unsigned *offsets)
1571 {
1572 struct swr_context *swr = swr_context(pipe);
1573 uint32_t i;
1574
1575 assert(num_targets <= MAX_SO_STREAMS);
1576
1577 for (i = 0; i < num_targets; i++) {
1578 pipe_so_target_reference(
1579 (struct pipe_stream_output_target **)&swr->so_targets[i],
1580 targets[i]);
1581 }
1582
1583 for (/* fall-through */; i < swr->num_so_targets; i++) {
1584 pipe_so_target_reference(
1585 (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1586 }
1587
1588 swr->num_so_targets = num_targets;
1589
1590 swr->dirty |= SWR_NEW_SO;
1591 }
1592
1593
1594 void
1595 swr_state_init(struct pipe_context *pipe)
1596 {
1597 pipe->create_blend_state = swr_create_blend_state;
1598 pipe->bind_blend_state = swr_bind_blend_state;
1599 pipe->delete_blend_state = swr_delete_blend_state;
1600
1601 pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1602 pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1603 pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1604
1605 pipe->create_rasterizer_state = swr_create_rasterizer_state;
1606 pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1607 pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1608
1609 pipe->create_sampler_state = swr_create_sampler_state;
1610 pipe->bind_sampler_states = swr_bind_sampler_states;
1611 pipe->delete_sampler_state = swr_delete_sampler_state;
1612
1613 pipe->create_sampler_view = swr_create_sampler_view;
1614 pipe->set_sampler_views = swr_set_sampler_views;
1615 pipe->sampler_view_destroy = swr_sampler_view_destroy;
1616
1617 pipe->create_vs_state = swr_create_vs_state;
1618 pipe->bind_vs_state = swr_bind_vs_state;
1619 pipe->delete_vs_state = swr_delete_vs_state;
1620
1621 pipe->create_fs_state = swr_create_fs_state;
1622 pipe->bind_fs_state = swr_bind_fs_state;
1623 pipe->delete_fs_state = swr_delete_fs_state;
1624
1625 pipe->set_constant_buffer = swr_set_constant_buffer;
1626
1627 pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1628 pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1629 pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1630
1631 pipe->set_vertex_buffers = swr_set_vertex_buffers;
1632 pipe->set_index_buffer = swr_set_index_buffer;
1633
1634 pipe->set_polygon_stipple = swr_set_polygon_stipple;
1635 pipe->set_clip_state = swr_set_clip_state;
1636 pipe->set_scissor_states = swr_set_scissor_states;
1637 pipe->set_viewport_states = swr_set_viewport_states;
1638
1639 pipe->set_framebuffer_state = swr_set_framebuffer_state;
1640
1641 pipe->set_blend_color = swr_set_blend_color;
1642 pipe->set_stencil_ref = swr_set_stencil_ref;
1643
1644 pipe->set_sample_mask = swr_set_sample_mask;
1645
1646 pipe->create_stream_output_target = swr_create_so_target;
1647 pipe->stream_output_target_destroy = swr_destroy_so_target;
1648 pipe->set_stream_output_targets = swr_set_so_targets;
1649 }