1 /**********************************************************
2 * Copyright 2008-2009 VMware, Inc. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person
5 * obtaining a copy of this software and associated documentation
6 * files (the "Software"), to deal in the Software without
7 * restriction, including without limitation the rights to use, copy,
8 * modify, merge, publish, distribute, sublicense, and/or sell copies
9 * of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 **********************************************************/
26 #include "util/u_inlines.h"
27 #include "pipe/p_defines.h"
28 #include "util/u_math.h"
29 #include "util/u_format.h"
31 #include "svga_context.h"
32 #include "svga_state.h"
34 #include "svga_debug.h"
35 #include "svga_screen.h"
36 #include "svga_surface.h"
40 * flush our command buffer after the 8th distinct render target
42 * This helps improve the surface cache behaviour in the face of the
43 * large number of single-use render targets generated by EXA and the xorg
44 * state tracker. Without this we can reference hundreds of individual
45 * render targets from a command buffer, which leaves little scope for
46 * sharing or reuse of those targets.
48 #define MAX_RT_PER_BATCH 8
52 static enum pipe_error
53 emit_fb_vgpu9(struct svga_context
*svga
)
55 struct svga_screen
*svgascreen
= svga_screen(svga
->pipe
.screen
);
56 const struct pipe_framebuffer_state
*curr
= &svga
->curr
.framebuffer
;
57 struct pipe_framebuffer_state
*hw
= &svga
->state
.hw_clear
.framebuffer
;
58 boolean reemit
= svga
->rebind
.flags
.rendertargets
;
62 assert(!svga_have_vgpu10(svga
));
65 * We need to reemit non-null surface bindings, even when they are not
66 * dirty, to ensure that the resources are paged in.
69 for (i
= 0; i
< svgascreen
->max_color_buffers
; i
++) {
70 if ((curr
->cbufs
[i
] != hw
->cbufs
[i
]) || (reemit
&& hw
->cbufs
[i
])) {
71 if (svga
->curr
.nr_fbs
++ > MAX_RT_PER_BATCH
)
72 return PIPE_ERROR_OUT_OF_MEMORY
;
74 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_COLOR0
+ i
,
79 pipe_surface_reference(&hw
->cbufs
[i
], curr
->cbufs
[i
]);
83 if ((curr
->zsbuf
!= hw
->zsbuf
) || (reemit
&& hw
->zsbuf
)) {
84 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_DEPTH
, curr
->zsbuf
);
89 util_format_is_depth_and_stencil(curr
->zsbuf
->format
)) {
90 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_STENCIL
,
96 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_STENCIL
, NULL
);
101 pipe_surface_reference(&hw
->zsbuf
, curr
->zsbuf
);
109 * Rebind rendertargets.
111 * Similar to emit_framebuffer, but without any state checking/update.
113 * Called at the beginning of every new command buffer to ensure that
114 * non-dirty rendertargets are properly paged-in.
116 static enum pipe_error
117 svga_reemit_framebuffer_bindings_vgpu9(struct svga_context
*svga
)
119 struct svga_screen
*svgascreen
= svga_screen(svga
->pipe
.screen
);
120 struct pipe_framebuffer_state
*hw
= &svga
->state
.hw_clear
.framebuffer
;
124 assert(!svga_have_vgpu10(svga
));
126 for (i
= 0; i
< svgascreen
->max_color_buffers
; i
++) {
128 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_COLOR0
+ i
,
130 if (ret
!= PIPE_OK
) {
137 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_DEPTH
, hw
->zsbuf
);
138 if (ret
!= PIPE_OK
) {
143 util_format_is_depth_and_stencil(hw
->zsbuf
->format
)) {
144 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_STENCIL
, hw
->zsbuf
);
145 if (ret
!= PIPE_OK
) {
150 ret
= SVGA3D_SetRenderTarget(svga
->swc
, SVGA3D_RT_STENCIL
, NULL
);
151 if (ret
!= PIPE_OK
) {
162 static enum pipe_error
163 emit_fb_vgpu10(struct svga_context
*svga
)
165 const struct svga_screen
*ss
= svga_screen(svga
->pipe
.screen
);
166 struct pipe_surface
*rtv
[SVGA3D_MAX_RENDER_TARGETS
];
167 struct pipe_surface
*dsv
;
168 struct pipe_framebuffer_state
*curr
= &svga
->curr
.framebuffer
;
169 struct pipe_framebuffer_state
*hw
= &svga
->state
.hw_clear
.framebuffer
;
170 const unsigned num_color
= MAX2(curr
->nr_cbufs
, hw
->nr_cbufs
);
173 enum pipe_error ret
= PIPE_OK
;
175 assert(svga_have_vgpu10(svga
));
177 /* Setup render targets array. Note that we loop over the max of the
178 * number of previously bound buffers and the new buffers to unbind
179 * any previously bound buffers when the new number of buffers is less
180 * than the old number of buffers.
182 for (i
= 0; i
< num_color
; i
++) {
183 if (curr
->cbufs
[i
]) {
184 rtv
[i
] = svga_validate_surface_view(svga
,
185 svga_surface(curr
->cbufs
[i
]));
186 if (rtv
[i
] == NULL
) {
187 return PIPE_ERROR_OUT_OF_MEMORY
;
190 assert(svga_surface(rtv
[i
])->view_id
!= SVGA3D_INVALID_ID
);
198 /* Setup depth stencil view */
200 dsv
= svga_validate_surface_view(svga
, svga_surface(curr
->zsbuf
));
202 return PIPE_ERROR_OUT_OF_MEMORY
;
209 /* avoid emitting redundant SetRenderTargets command */
210 if ((num_color
!= svga
->state
.hw_draw
.num_rendertargets
) ||
211 (dsv
!= svga
->state
.hw_draw
.dsv
) ||
212 memcmp(rtv
, svga
->state
.hw_draw
.rtv
, num_color
* sizeof(rtv
[0]))) {
214 ret
= SVGA3D_vgpu10_SetRenderTargets(svga
->swc
, num_color
, rtv
, dsv
);
218 /* number of render targets sent to the device, not including trailing
219 * unbound render targets.
221 svga
->state
.hw_draw
.num_rendertargets
= last_rtv
+ 1;
222 svga
->state
.hw_draw
.dsv
= dsv
;
223 memcpy(svga
->state
.hw_draw
.rtv
, rtv
, num_color
* sizeof(rtv
[0]));
225 for (i
= 0; i
< ss
->max_color_buffers
; i
++) {
226 if (hw
->cbufs
[i
] != curr
->cbufs
[i
]) {
227 /* propagate the backed view surface before unbinding it */
228 if (hw
->cbufs
[i
] && svga_surface(hw
->cbufs
[i
])->backed
) {
229 svga_propagate_surface(svga
,
230 &svga_surface(hw
->cbufs
[i
])->backed
->base
);
232 pipe_surface_reference(&hw
->cbufs
[i
], curr
->cbufs
[i
]);
235 hw
->nr_cbufs
= curr
->nr_cbufs
;
237 if (hw
->zsbuf
!= curr
->zsbuf
) {
238 /* propagate the backed view surface before unbinding it */
239 if (hw
->zsbuf
&& svga_surface(hw
->zsbuf
)->backed
) {
240 svga_propagate_surface(svga
, &svga_surface(hw
->zsbuf
)->backed
->base
);
242 pipe_surface_reference(&hw
->zsbuf
, curr
->zsbuf
);
250 static enum pipe_error
251 emit_framebuffer(struct svga_context
*svga
, unsigned dirty
)
253 if (svga_have_vgpu10(svga
)) {
254 return emit_fb_vgpu10(svga
);
257 return emit_fb_vgpu9(svga
);
263 * Rebind rendertargets.
265 * Similar to emit_framebuffer, but without any state checking/update.
267 * Called at the beginning of every new command buffer to ensure that
268 * non-dirty rendertargets are properly paged-in.
271 svga_reemit_framebuffer_bindings(struct svga_context
*svga
)
275 assert(svga
->rebind
.flags
.rendertargets
);
277 if (svga_have_vgpu10(svga
)) {
278 ret
= emit_fb_vgpu10(svga
);
281 ret
= svga_reemit_framebuffer_bindings_vgpu9(svga
);
284 svga
->rebind
.flags
.rendertargets
= FALSE
;
291 * Send a private allocation command to page in rendertargets resource.
294 svga_rebind_framebuffer_bindings(struct svga_context
*svga
)
296 struct svga_hw_draw_state
*hw
= &svga
->state
.hw_draw
;
300 assert(svga_have_vgpu10(svga
));
302 if (!svga
->rebind
.flags
.rendertargets
)
305 for (i
= 0; i
< hw
->num_rendertargets
; i
++) {
307 ret
= svga
->swc
->resource_rebind(svga
->swc
,
308 svga_surface(hw
->rtv
[i
])->handle
,
317 ret
= svga
->swc
->resource_rebind(svga
->swc
,
318 svga_surface(hw
->dsv
)->handle
,
325 svga
->rebind
.flags
.rendertargets
= 0;
331 struct svga_tracked_state svga_hw_framebuffer
=
333 "hw framebuffer state",
334 SVGA_NEW_FRAME_BUFFER
,
341 /***********************************************************************
344 static enum pipe_error
345 emit_viewport( struct svga_context
*svga
,
348 const struct pipe_viewport_state
*viewport
= &svga
->curr
.viewport
;
349 struct svga_prescale prescale
;
351 /* Not sure if this state is relevant with POSITIONT. Probably
352 * not, but setting to 0,1 avoids some state pingponging.
354 float range_min
= 0.0;
355 float range_max
= 1.0;
357 boolean degenerate
= FALSE
;
358 boolean invertY
= FALSE
;
361 float fb_width
= (float) svga
->curr
.framebuffer
.width
;
362 float fb_height
= (float) svga
->curr
.framebuffer
.height
;
364 float fx
= viewport
->scale
[0] * -1.0f
+ viewport
->translate
[0];
365 float fy
= flip
* viewport
->scale
[1] * -1.0f
+ viewport
->translate
[1];
366 float fw
= viewport
->scale
[0] * 2.0f
;
367 float fh
= flip
* viewport
->scale
[1] * 2.0f
;
368 boolean emit_vgpu10_viewport
= FALSE
;
370 memset( &prescale
, 0, sizeof(prescale
) );
372 /* Examine gallium viewport transformation and produce a screen
373 * rectangle and possibly vertex shader pre-transformation to
374 * get the same results.
377 SVGA_DBG(DEBUG_VIEWPORT
,
378 "\ninitial %f,%f %fx%f\n",
384 prescale
.scale
[0] = 1.0;
385 prescale
.scale
[1] = 1.0;
386 prescale
.scale
[2] = 1.0;
387 prescale
.scale
[3] = 1.0;
388 prescale
.translate
[0] = 0;
389 prescale
.translate
[1] = 0;
390 prescale
.translate
[2] = 0;
391 prescale
.translate
[3] = 0;
393 /* Enable prescale to adjust vertex positions to match
394 VGPU10 convention only if rasterization is enabled.
396 if (svga
->curr
.rast
&& svga
->curr
.rast
->templ
.rasterizer_discard
) {
400 prescale
.enabled
= TRUE
;
404 prescale
.scale
[0] *= -1.0f
;
405 prescale
.translate
[0] += -fw
;
407 fx
= viewport
->scale
[0] * 1.0f
+ viewport
->translate
[0];
411 if (svga_have_vgpu10(svga
)) {
412 /* floating point viewport params below */
413 prescale
.translate
[1] = fh
+ fy
* 2.0f
;
416 /* integer viewport params below */
417 prescale
.translate
[1] = fh
- 1.0f
+ fy
* 2.0f
;
421 prescale
.scale
[1] = -1.0f
;
426 prescale
.translate
[0] += fx
;
427 prescale
.scale
[0] *= fw
/ (fw
+ fx
);
434 prescale
.translate
[1] -= fy
;
437 prescale
.translate
[1] += fy
;
439 prescale
.scale
[1] *= fh
/ (fh
+ fy
);
444 if (fx
+ fw
> fb_width
) {
445 prescale
.scale
[0] *= fw
/ (fb_width
- fx
);
446 prescale
.translate
[0] -= fx
* (fw
/ (fb_width
- fx
));
447 prescale
.translate
[0] += fx
;
451 if (fy
+ fh
> fb_height
) {
452 prescale
.scale
[1] *= fh
/ (fb_height
- fy
);
454 float in
= fb_height
- fy
; /* number of vp pixels inside view */
455 float out
= fy
+ fh
- fb_height
; /* number of vp pixels out of view */
456 prescale
.translate
[1] += fy
* out
/ in
;
459 prescale
.translate
[1] -= fy
* (fh
/ (fb_height
- fy
));
460 prescale
.translate
[1] += fy
;
465 if (fw
< 0 || fh
< 0) {
466 fw
= fh
= fx
= fy
= 0;
471 /* D3D viewport is integer space. Convert fx,fy,etc. to
474 * TODO: adjust pretranslate correct for any subpixel error
475 * introduced converting to integers.
477 rect
.x
= (uint32
) fx
;
478 rect
.y
= (uint32
) fy
;
479 rect
.w
= (uint32
) fw
;
480 rect
.h
= (uint32
) fh
;
482 SVGA_DBG(DEBUG_VIEWPORT
,
483 "viewport error %f,%f %fx%f\n",
484 fabs((float)rect
.x
- fx
),
485 fabs((float)rect
.y
- fy
),
486 fabs((float)rect
.w
- fw
),
487 fabs((float)rect
.h
- fh
));
489 SVGA_DBG(DEBUG_VIEWPORT
,
490 "viewport %d,%d %dx%d\n",
496 /* Finally, to get GL rasterization rules, need to tweak the
497 * screen-space coordinates slightly relative to D3D which is
498 * what hardware implements natively.
500 if (svga
->curr
.rast
&& svga
->curr
.rast
->templ
.half_pixel_center
) {
501 float adjust_x
= 0.0;
502 float adjust_y
= 0.0;
504 if (svga_have_vgpu10(svga
)) {
505 /* Normally, we don't have to do any sub-pixel coordinate
506 * adjustments for VGPU10. But when we draw wide points with
507 * a GS we need an X adjustment in order to be conformant.
509 if (svga
->curr
.reduced_prim
== PIPE_PRIM_POINTS
&&
510 svga
->curr
.rast
->pointsize
> 1.0f
) {
515 switch (svga
->curr
.reduced_prim
) {
516 case PIPE_PRIM_POINTS
:
520 case PIPE_PRIM_LINES
:
524 case PIPE_PRIM_TRIANGLES
:
535 adjust_y
= -adjust_y
;
537 prescale
.translate
[0] += adjust_x
;
538 prescale
.translate
[1] += adjust_y
;
539 prescale
.translate
[2] = 0.5; /* D3D clip space */
540 prescale
.scale
[2] = 0.5; /* D3D clip space */
543 range_min
= viewport
->scale
[2] * -1.0f
+ viewport
->translate
[2];
544 range_max
= viewport
->scale
[2] * 1.0f
+ viewport
->translate
[2];
546 /* D3D (and by implication SVGA) doesn't like dealing with zmax
547 * less than zmin. Detect that case, flip the depth range and
548 * invert our z-scale factor to achieve the same effect.
550 if (range_min
> range_max
) {
552 range_tmp
= range_min
;
553 range_min
= range_max
;
554 range_max
= range_tmp
;
555 prescale
.scale
[2] = -prescale
.scale
[2];
558 /* If zmin is less than 0, clamp zmin to 0 and adjust the prescale.
559 * zmin can be set to -1 when viewport->scale[2] is set to 1 and
560 * viewport->translate[2] is set to 0 in the blit code.
562 if (range_min
< 0.0f
) {
563 range_min
= -0.5f
* viewport
->scale
[2] + 0.5f
+ viewport
->translate
[2];
564 range_max
= 0.5f
* viewport
->scale
[2] + 0.5f
+ viewport
->translate
[2];
565 prescale
.scale
[2] *= 2.0f
;
566 prescale
.translate
[2] -= 0.5f
;
569 if (prescale
.enabled
) {
574 SVGA_DBG(DEBUG_VIEWPORT
,
575 "prescale %f,%f %fx%f\n",
576 prescale
.translate
[0],
577 prescale
.translate
[1],
581 H
[0] = (float)rect
.w
/ 2.0f
;
582 H
[1] = -(float)rect
.h
/ 2.0f
;
583 J
[0] = (float)rect
.x
+ (float)rect
.w
/ 2.0f
;
584 J
[1] = (float)rect
.y
+ (float)rect
.h
/ 2.0f
;
586 SVGA_DBG(DEBUG_VIEWPORT
,
594 /* Adjust prescale to take into account the fact that it is
595 * going to be applied prior to the perspective divide and
596 * viewport transformation.
598 * Vwin = H(Vc/Vc.w) + J
600 * We want to tweak Vwin with scale and translation from above,
605 * But we can only modify the values at Vc. Plugging all the
606 * above together, and rearranging, eventually we get:
608 * Vwin' = H(Vc'/Vc'.w) + J
611 * K = (T + (S-1)J) / H
613 * Overwrite prescale.translate with values for K:
615 for (i
= 0; i
< 2; i
++) {
616 prescale
.translate
[i
] = ((prescale
.translate
[i
] +
617 (prescale
.scale
[i
] - 1.0f
) * J
[i
]) / H
[i
]);
620 SVGA_DBG(DEBUG_VIEWPORT
,
621 "clipspace %f,%f %fx%f\n",
622 prescale
.translate
[0],
623 prescale
.translate
[1],
634 prescale
.enabled
= FALSE
;
637 if (!svga_rects_equal(&rect
, &svga
->state
.hw_clear
.viewport
)) {
638 if (svga_have_vgpu10(svga
)) {
639 emit_vgpu10_viewport
= TRUE
;
642 ret
= SVGA3D_SetViewport(svga
->swc
, &rect
);
646 svga
->state
.hw_clear
.viewport
= rect
;
650 if (svga
->state
.hw_clear
.depthrange
.zmin
!= range_min
||
651 svga
->state
.hw_clear
.depthrange
.zmax
!= range_max
)
653 if (svga_have_vgpu10(svga
)) {
654 emit_vgpu10_viewport
= TRUE
;
657 ret
= SVGA3D_SetZRange(svga
->swc
, range_min
, range_max
);
661 svga
->state
.hw_clear
.depthrange
.zmin
= range_min
;
662 svga
->state
.hw_clear
.depthrange
.zmax
= range_max
;
666 if (emit_vgpu10_viewport
) {
668 vp
.x
= (float) rect
.x
;
669 vp
.y
= (float) rect
.y
;
670 vp
.width
= (float) rect
.w
;
671 vp
.height
= (float) rect
.h
;
672 vp
.minDepth
= range_min
;
673 vp
.maxDepth
= range_max
;
674 ret
= SVGA3D_vgpu10_SetViewports(svga
->swc
, 1, &vp
);
678 svga
->state
.hw_clear
.viewport
= rect
;
680 svga
->state
.hw_clear
.depthrange
.zmin
= range_min
;
681 svga
->state
.hw_clear
.depthrange
.zmax
= range_max
;
684 if (memcmp(&prescale
, &svga
->state
.hw_clear
.prescale
, sizeof prescale
) != 0) {
685 svga
->dirty
|= SVGA_NEW_PRESCALE
;
686 svga
->state
.hw_clear
.prescale
= prescale
;
693 struct svga_tracked_state svga_hw_viewport
=
696 ( SVGA_NEW_FRAME_BUFFER
|
699 SVGA_NEW_REDUCED_PRIMITIVE
),
704 /***********************************************************************
707 static enum pipe_error
708 emit_scissor_rect( struct svga_context
*svga
,
711 const struct pipe_scissor_state
*scissor
= &svga
->curr
.scissor
;
713 if (svga_have_vgpu10(svga
)) {
716 rect
.left
= scissor
->minx
;
717 rect
.top
= scissor
->miny
;
718 rect
.right
= scissor
->maxx
;
719 rect
.bottom
= scissor
->maxy
;
721 return SVGA3D_vgpu10_SetScissorRects(svga
->swc
, 1, &rect
);
726 rect
.x
= scissor
->minx
;
727 rect
.y
= scissor
->miny
;
728 rect
.w
= scissor
->maxx
- scissor
->minx
; /* + 1 ?? */
729 rect
.h
= scissor
->maxy
- scissor
->miny
; /* + 1 ?? */
731 return SVGA3D_SetScissorRect(svga
->swc
, &rect
);
736 struct svga_tracked_state svga_hw_scissor
=
744 /***********************************************************************
748 static enum pipe_error
749 emit_clip_planes( struct svga_context
*svga
,
755 /* TODO: just emit directly from svga_set_clip_state()?
757 for (i
= 0; i
< SVGA3D_MAX_CLIP_PLANES
; i
++) {
758 /* need to express the plane in D3D-style coordinate space.
759 * GL coords get converted to D3D coords with the matrix:
764 * Apply that matrix to our plane equation, and invert Y.
766 float a
= svga
->curr
.clip
.ucp
[i
][0];
767 float b
= svga
->curr
.clip
.ucp
[i
][1];
768 float c
= svga
->curr
.clip
.ucp
[i
][2];
769 float d
= svga
->curr
.clip
.ucp
[i
][3];
777 if (svga_have_vgpu10(svga
)) {
778 //debug_printf("XXX emit DX10 clip plane\n");
782 ret
= SVGA3D_SetClipPlane(svga
->swc
, i
, plane
);
792 struct svga_tracked_state svga_hw_clip_planes
=