1 #include "util/format/u_format.h"
2 #include "util/u_framebuffer.h"
3 #include "util/u_math.h"
4 #include "util/u_viewport.h"
6 #include "nvc0/nvc0_context.h"
10 nvc0_validate_zcull(struct nvc0_context
*nvc0
)
12 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
13 struct pipe_framebuffer_state
*fb
= &nvc0
->framebuffer
;
14 struct nv50_surface
*sf
= nv50_surface(fb
->zsbuf
);
15 struct nv50_miptree
*mt
= nv50_miptree(sf
->base
.texture
);
16 struct nouveau_bo
*bo
= mt
->base
.bo
;
18 uint32_t offset
= align(mt
->total_size
, 1 << 17);
19 unsigned width
, height
;
21 assert(mt
->base
.base
.depth0
== 1 && mt
->base
.base
.array_size
< 2);
23 size
= mt
->total_size
* 2;
25 height
= align(fb
->height
, 32);
26 width
= fb
->width
% 224;
28 width
= fb
->width
+ (224 - width
);
32 BEGIN_NVC0(push
, NVC0_3D(ZCULL_REGION
), 1);
34 BEGIN_NVC0(push
, NVC0_3D(ZCULL_ADDRESS_HIGH
), 2);
35 PUSH_DATAh(push
, bo
->offset
+ offset
);
36 PUSH_DATA (push
, bo
->offset
+ offset
);
38 BEGIN_NVC0(push
, NVC0_3D(ZCULL_LIMIT_HIGH
), 2);
39 PUSH_DATAh(push
, bo
->offset
+ offset
);
40 PUSH_DATA (push
, bo
->offset
+ offset
);
41 BEGIN_NVC0(push
, SUBC_3D(0x07e0), 2);
42 PUSH_DATA (push
, size
);
43 PUSH_DATA (push
, size
>> 16);
44 BEGIN_NVC0(push
, SUBC_3D(0x15c8), 1); /* bits 0x3 */
46 BEGIN_NVC0(push
, NVC0_3D(ZCULL_WIDTH
), 4);
47 PUSH_DATA (push
, width
);
48 PUSH_DATA (push
, height
);
51 BEGIN_NVC0(push
, NVC0_3D(ZCULL_WINDOW_OFFSET_X
), 2);
54 BEGIN_NVC0(push
, NVC0_3D(ZCULL_INVALIDATE
), 1);
60 nvc0_fb_set_null_rt(struct nouveau_pushbuf
*push
, unsigned i
, unsigned layers
)
62 BEGIN_NVC0(push
, NVC0_3D(RT_ADDRESS_HIGH(i
)), 9);
65 PUSH_DATA (push
, 64); // width
66 PUSH_DATA (push
, 0); // height
67 PUSH_DATA (push
, 0); // format
68 PUSH_DATA (push
, 0); // tile mode
69 PUSH_DATA (push
, layers
); // layers
70 PUSH_DATA (push
, 0); // layer stride
71 PUSH_DATA (push
, 0); // base layer
75 gm200_encode_cb_sample_location(uint8_t x
, uint8_t y
)
77 static const uint8_t lut
[] = {
78 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
79 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
81 /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
82 result
|= lut
[x
] << 8 | lut
[y
] << 24;
83 /* fill in gaps with data in a representation for SV_SAMPLE_POS */
84 result
|= x
<< 12 | y
<< 28;
89 gm200_validate_sample_locations(struct nvc0_context
*nvc0
, unsigned ms
)
91 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
92 struct nvc0_screen
*screen
= nvc0
->screen
;
93 unsigned grid_width
, grid_height
, hw_grid_width
;
94 uint8_t sample_locations
[16][2];
96 unsigned i
, pixel
, pixel_y
, pixel_x
, sample
;
97 uint32_t packed_locations
[4] = {};
99 screen
->base
.base
.get_sample_pixel_grid(
100 &screen
->base
.base
, ms
, &grid_width
, &grid_height
);
102 hw_grid_width
= grid_width
;
103 if (ms
== 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
106 if (nvc0
->sample_locations_enabled
) {
107 uint8_t locations
[2 * 4 * 8];
108 memcpy(locations
, nvc0
->sample_locations
, sizeof(locations
));
109 util_sample_locations_flip_y(
110 &screen
->base
.base
, nvc0
->framebuffer
.height
, ms
, locations
);
112 for (pixel
= 0; pixel
< hw_grid_width
*grid_height
; pixel
++) {
113 for (sample
= 0; sample
< ms
; sample
++) {
114 unsigned pixel_x
= pixel
% hw_grid_width
;
115 unsigned pixel_y
= pixel
/ hw_grid_width
;
116 unsigned wi
= pixel
* ms
+ sample
;
117 unsigned ri
= (pixel_y
* grid_width
+ pixel_x
% grid_width
);
118 ri
= ri
* ms
+ sample
;
119 sample_locations
[wi
][0] = locations
[ri
] & 0xf;
120 sample_locations
[wi
][1] = 16 - (locations
[ri
] >> 4);
124 const uint8_t (*ptr
)[2] = nvc0_get_sample_locations(ms
);
125 for (i
= 0; i
< 16; i
++) {
126 sample_locations
[i
][0] = ptr
[i
% ms
][0];
127 sample_locations
[i
][1] = ptr
[i
% ms
][1];
131 BEGIN_NVC0(push
, NVC0_3D(CB_SIZE
), 3);
132 PUSH_DATA (push
, NVC0_CB_AUX_SIZE
);
133 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
134 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
135 BEGIN_1IC0(push
, NVC0_3D(CB_POS
), 1 + 64);
136 PUSH_DATA (push
, NVC0_CB_AUX_SAMPLE_INFO
);
137 for (pixel_y
= 0; pixel_y
< 4; pixel_y
++) {
138 for (pixel_x
= 0; pixel_x
< 2; pixel_x
++) {
139 for (sample
= 0; sample
< ms
; sample
++) {
140 unsigned write_index
= (pixel_y
* 2 + pixel_x
) * 8 + sample
;
141 unsigned read_index
= pixel_y
% grid_height
* hw_grid_width
;
143 read_index
+= pixel_x
% grid_width
;
144 read_index
= read_index
* ms
+ sample
;
145 x
= sample_locations
[read_index
][0];
146 y
= sample_locations
[read_index
][1];
147 cb
[write_index
] = gm200_encode_cb_sample_location(x
, y
);
151 PUSH_DATAp(push
, cb
, 64);
153 for (i
= 0; i
< 16; i
++) {
154 packed_locations
[i
/ 4] |= sample_locations
[i
][0] << ((i
% 4) * 8);
155 packed_locations
[i
/ 4] |= sample_locations
[i
][1] << ((i
% 4) * 8 + 4);
158 BEGIN_NVC0(push
, SUBC_3D(0x11e0), 4);
159 PUSH_DATAp(push
, packed_locations
, 4);
163 nvc0_validate_sample_locations(struct nvc0_context
*nvc0
, unsigned ms
)
165 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
166 struct nvc0_screen
*screen
= nvc0
->screen
;
169 BEGIN_NVC0(push
, NVC0_3D(CB_SIZE
), 3);
170 PUSH_DATA (push
, NVC0_CB_AUX_SIZE
);
171 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
172 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
173 BEGIN_1IC0(push
, NVC0_3D(CB_POS
), 1 + 2 * ms
);
174 PUSH_DATA (push
, NVC0_CB_AUX_SAMPLE_INFO
);
175 for (i
= 0; i
< ms
; i
++) {
177 nvc0
->base
.pipe
.get_sample_position(&nvc0
->base
.pipe
, ms
, i
, xy
);
178 PUSH_DATAf(push
, xy
[0]);
179 PUSH_DATAf(push
, xy
[1]);
184 validate_sample_locations(struct nvc0_context
*nvc0
)
186 unsigned ms
= util_framebuffer_get_num_samples(&nvc0
->framebuffer
);
188 if (nvc0
->screen
->base
.class_3d
>= GM200_3D_CLASS
)
189 gm200_validate_sample_locations(nvc0
, ms
);
191 nvc0_validate_sample_locations(nvc0
, ms
);
195 nvc0_validate_fb(struct nvc0_context
*nvc0
)
197 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
198 struct pipe_framebuffer_state
*fb
= &nvc0
->framebuffer
;
200 unsigned ms_mode
= NVC0_3D_MULTISAMPLE_MODE_MS1
;
201 unsigned nr_cbufs
= fb
->nr_cbufs
;
202 bool serialize
= false;
204 nouveau_bufctx_reset(nvc0
->bufctx_3d
, NVC0_BIND_3D_FB
);
206 BEGIN_NVC0(push
, NVC0_3D(SCREEN_SCISSOR_HORIZ
), 2);
207 PUSH_DATA (push
, fb
->width
<< 16);
208 PUSH_DATA (push
, fb
->height
<< 16);
210 for (i
= 0; i
< fb
->nr_cbufs
; ++i
) {
211 struct nv50_surface
*sf
;
212 struct nv04_resource
*res
;
213 struct nouveau_bo
*bo
;
216 nvc0_fb_set_null_rt(push
, i
, 0);
220 sf
= nv50_surface(fb
->cbufs
[i
]);
221 res
= nv04_resource(sf
->base
.texture
);
224 BEGIN_NVC0(push
, NVC0_3D(RT_ADDRESS_HIGH(i
)), 9);
225 PUSH_DATAh(push
, res
->address
+ sf
->offset
);
226 PUSH_DATA (push
, res
->address
+ sf
->offset
);
227 if (likely(nouveau_bo_memtype(bo
))) {
228 struct nv50_miptree
*mt
= nv50_miptree(sf
->base
.texture
);
230 assert(sf
->base
.texture
->target
!= PIPE_BUFFER
);
232 PUSH_DATA(push
, sf
->width
);
233 PUSH_DATA(push
, sf
->height
);
234 PUSH_DATA(push
, nvc0_format_table
[sf
->base
.format
].rt
);
235 PUSH_DATA(push
, (mt
->layout_3d
<< 16) |
236 mt
->level
[sf
->base
.u
.tex
.level
].tile_mode
);
237 PUSH_DATA(push
, sf
->base
.u
.tex
.first_layer
+ sf
->depth
);
238 PUSH_DATA(push
, mt
->layer_stride
>> 2);
239 PUSH_DATA(push
, sf
->base
.u
.tex
.first_layer
);
241 ms_mode
= mt
->ms_mode
;
243 if (res
->base
.target
== PIPE_BUFFER
) {
244 PUSH_DATA(push
, 262144);
247 PUSH_DATA(push
, nv50_miptree(sf
->base
.texture
)->level
[0].pitch
);
248 PUSH_DATA(push
, sf
->height
);
250 PUSH_DATA(push
, nvc0_format_table
[sf
->base
.format
].rt
);
251 PUSH_DATA(push
, 1 << 12);
256 nvc0_resource_fence(res
, NOUVEAU_BO_WR
);
261 if (res
->status
& NOUVEAU_BUFFER_STATUS_GPU_READING
)
263 res
->status
|= NOUVEAU_BUFFER_STATUS_GPU_WRITING
;
264 res
->status
&= ~NOUVEAU_BUFFER_STATUS_GPU_READING
;
266 /* only register for writing, otherwise we'd always serialize here */
267 BCTX_REFN(nvc0
->bufctx_3d
, 3D_FB
, res
, WR
);
271 struct nv50_miptree
*mt
= nv50_miptree(fb
->zsbuf
->texture
);
272 struct nv50_surface
*sf
= nv50_surface(fb
->zsbuf
);
273 int unk
= mt
->base
.base
.target
== PIPE_TEXTURE_2D
;
275 BEGIN_NVC0(push
, NVC0_3D(ZETA_ADDRESS_HIGH
), 5);
276 PUSH_DATAh(push
, mt
->base
.address
+ sf
->offset
);
277 PUSH_DATA (push
, mt
->base
.address
+ sf
->offset
);
278 PUSH_DATA (push
, nvc0_format_table
[fb
->zsbuf
->format
].rt
);
279 PUSH_DATA (push
, mt
->level
[sf
->base
.u
.tex
.level
].tile_mode
);
280 PUSH_DATA (push
, mt
->layer_stride
>> 2);
281 BEGIN_NVC0(push
, NVC0_3D(ZETA_ENABLE
), 1);
283 BEGIN_NVC0(push
, NVC0_3D(ZETA_HORIZ
), 3);
284 PUSH_DATA (push
, sf
->width
);
285 PUSH_DATA (push
, sf
->height
);
286 PUSH_DATA (push
, (unk
<< 16) |
287 (sf
->base
.u
.tex
.first_layer
+ sf
->depth
));
288 BEGIN_NVC0(push
, NVC0_3D(ZETA_BASE_LAYER
), 1);
289 PUSH_DATA (push
, sf
->base
.u
.tex
.first_layer
);
291 ms_mode
= mt
->ms_mode
;
293 if (mt
->base
.status
& NOUVEAU_BUFFER_STATUS_GPU_READING
)
295 mt
->base
.status
|= NOUVEAU_BUFFER_STATUS_GPU_WRITING
;
296 mt
->base
.status
&= ~NOUVEAU_BUFFER_STATUS_GPU_READING
;
298 BCTX_REFN(nvc0
->bufctx_3d
, 3D_FB
, &mt
->base
, WR
);
300 BEGIN_NVC0(push
, NVC0_3D(ZETA_ENABLE
), 1);
304 if (nr_cbufs
== 0 && !fb
->zsbuf
) {
305 assert(util_is_power_of_two_or_zero(fb
->samples
));
306 assert(fb
->samples
<= 8);
308 nvc0_fb_set_null_rt(push
, 0, fb
->layers
);
311 ms_mode
= ffs(fb
->samples
) - 1;
315 BEGIN_NVC0(push
, NVC0_3D(RT_CONTROL
), 1);
316 PUSH_DATA (push
, (076543210 << 4) | nr_cbufs
);
317 IMMED_NVC0(push
, NVC0_3D(MULTISAMPLE_MODE
), ms_mode
);
320 IMMED_NVC0(push
, NVC0_3D(SERIALIZE
), 0);
322 NOUVEAU_DRV_STAT(&nvc0
->screen
->base
, gpu_serialize_count
, serialize
);
326 nvc0_validate_blend_colour(struct nvc0_context
*nvc0
)
328 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
330 BEGIN_NVC0(push
, NVC0_3D(BLEND_COLOR(0)), 4);
331 PUSH_DATAf(push
, nvc0
->blend_colour
.color
[0]);
332 PUSH_DATAf(push
, nvc0
->blend_colour
.color
[1]);
333 PUSH_DATAf(push
, nvc0
->blend_colour
.color
[2]);
334 PUSH_DATAf(push
, nvc0
->blend_colour
.color
[3]);
338 nvc0_validate_stencil_ref(struct nvc0_context
*nvc0
)
340 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
341 const ubyte
*ref
= &nvc0
->stencil_ref
.ref_value
[0];
343 IMMED_NVC0(push
, NVC0_3D(STENCIL_FRONT_FUNC_REF
), ref
[0]);
344 IMMED_NVC0(push
, NVC0_3D(STENCIL_BACK_FUNC_REF
), ref
[1]);
348 nvc0_validate_stipple(struct nvc0_context
*nvc0
)
350 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
353 BEGIN_NVC0(push
, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
354 for (i
= 0; i
< 32; ++i
)
355 PUSH_DATA(push
, util_bswap32(nvc0
->stipple
.stipple
[i
]));
359 nvc0_validate_scissor(struct nvc0_context
*nvc0
)
362 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
364 if (!(nvc0
->dirty_3d
& NVC0_NEW_3D_SCISSOR
) &&
365 nvc0
->rast
->pipe
.scissor
== nvc0
->state
.scissor
)
368 if (nvc0
->state
.scissor
!= nvc0
->rast
->pipe
.scissor
)
369 nvc0
->scissors_dirty
= (1 << NVC0_MAX_VIEWPORTS
) - 1;
371 nvc0
->state
.scissor
= nvc0
->rast
->pipe
.scissor
;
373 for (i
= 0; i
< NVC0_MAX_VIEWPORTS
; i
++) {
374 struct pipe_scissor_state
*s
= &nvc0
->scissors
[i
];
375 if (!(nvc0
->scissors_dirty
& (1 << i
)))
378 BEGIN_NVC0(push
, NVC0_3D(SCISSOR_HORIZ(i
)), 2);
379 if (nvc0
->rast
->pipe
.scissor
) {
380 PUSH_DATA(push
, (s
->maxx
<< 16) | s
->minx
);
381 PUSH_DATA(push
, (s
->maxy
<< 16) | s
->miny
);
383 PUSH_DATA(push
, (0xffff << 16) | 0);
384 PUSH_DATA(push
, (0xffff << 16) | 0);
387 nvc0
->scissors_dirty
= 0;
391 nvc0_validate_viewport(struct nvc0_context
*nvc0
)
393 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
394 uint16_t class_3d
= nvc0
->screen
->base
.class_3d
;
398 for (i
= 0; i
< NVC0_MAX_VIEWPORTS
; i
++) {
399 struct pipe_viewport_state
*vp
= &nvc0
->viewports
[i
];
401 if (!(nvc0
->viewports_dirty
& (1 << i
)))
404 BEGIN_NVC0(push
, NVC0_3D(VIEWPORT_TRANSLATE_X(i
)), 3);
405 PUSH_DATAf(push
, vp
->translate
[0]);
406 PUSH_DATAf(push
, vp
->translate
[1]);
407 PUSH_DATAf(push
, vp
->translate
[2]);
409 BEGIN_NVC0(push
, NVC0_3D(VIEWPORT_SCALE_X(i
)), 3);
410 PUSH_DATAf(push
, vp
->scale
[0]);
411 PUSH_DATAf(push
, vp
->scale
[1]);
412 PUSH_DATAf(push
, vp
->scale
[2]);
414 /* now set the viewport rectangle to viewport dimensions for clipping */
416 x
= util_iround(MAX2(0.0f
, vp
->translate
[0] - fabsf(vp
->scale
[0])));
417 y
= util_iround(MAX2(0.0f
, vp
->translate
[1] - fabsf(vp
->scale
[1])));
418 w
= util_iround(vp
->translate
[0] + fabsf(vp
->scale
[0])) - x
;
419 h
= util_iround(vp
->translate
[1] + fabsf(vp
->scale
[1])) - y
;
421 BEGIN_NVC0(push
, NVC0_3D(VIEWPORT_HORIZ(i
)), 2);
422 PUSH_DATA (push
, (w
<< 16) | x
);
423 PUSH_DATA (push
, (h
<< 16) | y
);
425 /* If the halfz setting ever changes, the viewports will also get
426 * updated. The rast will get updated before the validate function has a
427 * chance to hit, so we can just use it directly without an atom
430 util_viewport_zmin_zmax(vp
, nvc0
->rast
->pipe
.clip_halfz
, &zmin
, &zmax
);
432 BEGIN_NVC0(push
, NVC0_3D(DEPTH_RANGE_NEAR(i
)), 2);
433 PUSH_DATAf(push
, zmin
);
434 PUSH_DATAf(push
, zmax
);
436 if (class_3d
>= GM200_3D_CLASS
) {
437 BEGIN_NVC0(push
, NVC0_3D(VIEWPORT_SWIZZLE(i
)), 1);
438 PUSH_DATA (push
, vp
->swizzle_x
<< 0 |
441 vp
->swizzle_w
<< 12);
444 nvc0
->viewports_dirty
= 0;
448 nvc0_validate_window_rects(struct nvc0_context
*nvc0
)
450 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
451 bool enable
= nvc0
->window_rect
.rects
> 0 || nvc0
->window_rect
.inclusive
;
454 IMMED_NVC0(push
, NVC0_3D(CLIP_RECTS_EN
), enable
);
458 IMMED_NVC0(push
, NVC0_3D(CLIP_RECTS_MODE
), !nvc0
->window_rect
.inclusive
);
459 BEGIN_NVC0(push
, NVC0_3D(CLIP_RECT_HORIZ(0)), NVC0_MAX_WINDOW_RECTANGLES
* 2);
460 for (i
= 0; i
< nvc0
->window_rect
.rects
; i
++) {
461 struct pipe_scissor_state
*s
= &nvc0
->window_rect
.rect
[i
];
462 PUSH_DATA(push
, (s
->maxx
<< 16) | s
->minx
);
463 PUSH_DATA(push
, (s
->maxy
<< 16) | s
->miny
);
465 for (; i
< NVC0_MAX_WINDOW_RECTANGLES
; i
++) {
472 nvc0_upload_uclip_planes(struct nvc0_context
*nvc0
, unsigned s
)
474 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
475 struct nvc0_screen
*screen
= nvc0
->screen
;
477 BEGIN_NVC0(push
, NVC0_3D(CB_SIZE
), 3);
478 PUSH_DATA (push
, NVC0_CB_AUX_SIZE
);
479 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
480 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
481 BEGIN_1IC0(push
, NVC0_3D(CB_POS
), PIPE_MAX_CLIP_PLANES
* 4 + 1);
482 PUSH_DATA (push
, NVC0_CB_AUX_UCP_INFO
);
483 PUSH_DATAp(push
, &nvc0
->clip
.ucp
[0][0], PIPE_MAX_CLIP_PLANES
* 4);
487 nvc0_check_program_ucps(struct nvc0_context
*nvc0
,
488 struct nvc0_program
*vp
, uint8_t mask
)
490 const unsigned n
= util_logbase2(mask
) + 1;
492 if (vp
->vp
.num_ucps
>= n
)
494 nvc0_program_destroy(nvc0
, vp
);
497 if (likely(vp
== nvc0
->vertprog
))
498 nvc0_vertprog_validate(nvc0
);
500 if (likely(vp
== nvc0
->gmtyprog
))
501 nvc0_gmtyprog_validate(nvc0
);
503 nvc0_tevlprog_validate(nvc0
);
507 nvc0_validate_clip(struct nvc0_context
*nvc0
)
509 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
510 struct nvc0_program
*vp
;
512 uint8_t clip_enable
= nvc0
->rast
->pipe
.clip_plane_enable
;
514 if (nvc0
->gmtyprog
) {
518 if (nvc0
->tevlprog
) {
526 if (clip_enable
&& vp
->vp
.num_ucps
< PIPE_MAX_CLIP_PLANES
)
527 nvc0_check_program_ucps(nvc0
, vp
, clip_enable
);
529 if (nvc0
->dirty_3d
& (NVC0_NEW_3D_CLIP
| (NVC0_NEW_3D_VERTPROG
<< stage
)))
530 if (vp
->vp
.num_ucps
> 0 && vp
->vp
.num_ucps
<= PIPE_MAX_CLIP_PLANES
)
531 nvc0_upload_uclip_planes(nvc0
, stage
);
533 clip_enable
&= vp
->vp
.clip_enable
;
534 clip_enable
|= vp
->vp
.cull_enable
;
536 if (nvc0
->state
.clip_enable
!= clip_enable
) {
537 nvc0
->state
.clip_enable
= clip_enable
;
538 IMMED_NVC0(push
, NVC0_3D(CLIP_DISTANCE_ENABLE
), clip_enable
);
540 if (nvc0
->state
.clip_mode
!= vp
->vp
.clip_mode
) {
541 nvc0
->state
.clip_mode
= vp
->vp
.clip_mode
;
542 BEGIN_NVC0(push
, NVC0_3D(CLIP_DISTANCE_MODE
), 1);
543 PUSH_DATA (push
, vp
->vp
.clip_mode
);
548 nvc0_validate_blend(struct nvc0_context
*nvc0
)
550 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
552 PUSH_SPACE(push
, nvc0
->blend
->size
);
553 PUSH_DATAp(push
, nvc0
->blend
->state
, nvc0
->blend
->size
);
557 nvc0_validate_zsa(struct nvc0_context
*nvc0
)
559 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
561 PUSH_SPACE(push
, nvc0
->zsa
->size
);
562 PUSH_DATAp(push
, nvc0
->zsa
->state
, nvc0
->zsa
->size
);
566 nvc0_validate_rasterizer(struct nvc0_context
*nvc0
)
568 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
570 PUSH_SPACE(push
, nvc0
->rast
->size
);
571 PUSH_DATAp(push
, nvc0
->rast
->state
, nvc0
->rast
->size
);
575 nvc0_constbufs_validate(struct nvc0_context
*nvc0
)
579 bool can_serialize
= true;
581 for (s
= 0; s
< 5; ++s
) {
582 while (nvc0
->constbuf_dirty
[s
]) {
583 int i
= ffs(nvc0
->constbuf_dirty
[s
]) - 1;
584 nvc0
->constbuf_dirty
[s
] &= ~(1 << i
);
586 if (nvc0
->constbuf
[s
][i
].user
) {
587 struct nouveau_bo
*bo
= nvc0
->screen
->uniform_bo
;
588 const unsigned base
= NVC0_CB_USR_INFO(s
);
589 const unsigned size
= nvc0
->constbuf
[s
][0].size
;
590 assert(i
== 0); /* we really only want OpenGL uniforms here */
591 assert(nvc0
->constbuf
[s
][0].u
.data
);
593 if (!nvc0
->state
.uniform_buffer_bound
[s
]) {
594 nvc0
->state
.uniform_buffer_bound
[s
] = true;
596 nvc0_screen_bind_cb_3d(nvc0
->screen
, &can_serialize
, s
, i
,
597 NVC0_MAX_CONSTBUF_SIZE
, bo
->offset
+ base
);
599 nvc0_cb_bo_push(&nvc0
->base
, bo
, NV_VRAM_DOMAIN(&nvc0
->screen
->base
),
600 base
, NVC0_MAX_CONSTBUF_SIZE
,
602 nvc0
->constbuf
[s
][0].u
.data
);
604 struct nv04_resource
*res
=
605 nv04_resource(nvc0
->constbuf
[s
][i
].u
.buf
);
607 nvc0_screen_bind_cb_3d(nvc0
->screen
, &can_serialize
, s
, i
,
608 nvc0
->constbuf
[s
][i
].size
,
609 res
->address
+ nvc0
->constbuf
[s
][i
].offset
);
611 BCTX_REFN(nvc0
->bufctx_3d
, 3D_CB(s
, i
), res
, RD
);
613 nvc0
->cb_dirty
= 1; /* Force cache flush for UBO. */
614 res
->cb_bindings
[s
] |= 1 << i
;
617 nvc0
->state
.uniform_buffer_bound
[s
] = false;
619 nvc0_screen_bind_cb_3d(nvc0
->screen
, &can_serialize
, s
, i
, -1, 0);
625 if (nvc0
->screen
->base
.class_3d
< NVE4_3D_CLASS
) {
626 /* Invalidate all COMPUTE constbufs because they are aliased with 3D. */
627 nvc0
->dirty_cp
|= NVC0_NEW_CP_CONSTBUF
;
628 nvc0
->constbuf_dirty
[5] |= nvc0
->constbuf_valid
[5];
629 nvc0
->state
.uniform_buffer_bound
[5] = false;
634 nvc0_validate_buffers(struct nvc0_context
*nvc0
)
636 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
637 struct nvc0_screen
*screen
= nvc0
->screen
;
640 for (s
= 0; s
< 5; s
++) {
641 BEGIN_NVC0(push
, NVC0_3D(CB_SIZE
), 3);
642 PUSH_DATA (push
, NVC0_CB_AUX_SIZE
);
643 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
644 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(s
));
645 BEGIN_1IC0(push
, NVC0_3D(CB_POS
), 1 + 4 * NVC0_MAX_BUFFERS
);
646 PUSH_DATA (push
, NVC0_CB_AUX_BUF_INFO(0));
647 for (i
= 0; i
< NVC0_MAX_BUFFERS
; i
++) {
648 if (nvc0
->buffers
[s
][i
].buffer
) {
649 struct nv04_resource
*res
=
650 nv04_resource(nvc0
->buffers
[s
][i
].buffer
);
651 PUSH_DATA (push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
652 PUSH_DATAh(push
, res
->address
+ nvc0
->buffers
[s
][i
].buffer_offset
);
653 PUSH_DATA (push
, nvc0
->buffers
[s
][i
].buffer_size
);
655 BCTX_REFN(nvc0
->bufctx_3d
, 3D_BUF
, res
, RDWR
);
656 util_range_add(&res
->base
, &res
->valid_buffer_range
,
657 nvc0
->buffers
[s
][i
].buffer_offset
,
658 nvc0
->buffers
[s
][i
].buffer_offset
+
659 nvc0
->buffers
[s
][i
].buffer_size
);
672 nvc0_validate_sample_mask(struct nvc0_context
*nvc0
)
674 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
678 nvc0
->sample_mask
& 0xffff,
679 nvc0
->sample_mask
& 0xffff,
680 nvc0
->sample_mask
& 0xffff,
681 nvc0
->sample_mask
& 0xffff
684 BEGIN_NVC0(push
, NVC0_3D(MSAA_MASK(0)), 4);
685 PUSH_DATA (push
, mask
[0]);
686 PUSH_DATA (push
, mask
[1]);
687 PUSH_DATA (push
, mask
[2]);
688 PUSH_DATA (push
, mask
[3]);
692 nvc0_validate_min_samples(struct nvc0_context
*nvc0
)
694 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
697 samples
= util_next_power_of_two(nvc0
->min_samples
);
699 // If we're using the incoming sample mask and doing sample shading, we
700 // have to do sample shading "to the max", otherwise there's no way to
701 // tell which sets of samples are covered by the current invocation.
702 // Similarly for reading the framebuffer.
703 if (nvc0
->fragprog
&& (
704 nvc0
->fragprog
->fp
.sample_mask_in
||
705 nvc0
->fragprog
->fp
.reads_framebuffer
))
706 samples
= util_framebuffer_get_num_samples(&nvc0
->framebuffer
);
707 samples
|= NVC0_3D_SAMPLE_SHADING_ENABLE
;
710 IMMED_NVC0(push
, NVC0_3D(SAMPLE_SHADING
), samples
);
714 nvc0_validate_driverconst(struct nvc0_context
*nvc0
)
716 struct nvc0_screen
*screen
= nvc0
->screen
;
719 for (i
= 0; i
< 5; ++i
)
720 nvc0_screen_bind_cb_3d(screen
, NULL
, i
, 15, NVC0_CB_AUX_SIZE
,
721 screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(i
));
723 nvc0
->dirty_cp
|= NVC0_NEW_CP_DRIVERCONST
;
727 nvc0_validate_fp_zsa_rast(struct nvc0_context
*nvc0
)
729 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
730 bool rasterizer_discard
;
732 if (nvc0
->rast
&& nvc0
->rast
->pipe
.rasterizer_discard
) {
733 rasterizer_discard
= true;
735 bool zs
= nvc0
->zsa
&&
736 (nvc0
->zsa
->pipe
.depth
.enabled
|| nvc0
->zsa
->pipe
.stencil
[0].enabled
);
737 rasterizer_discard
= !zs
&&
738 (!nvc0
->fragprog
|| !nvc0
->fragprog
->hdr
[18]);
741 if (rasterizer_discard
!= nvc0
->state
.rasterizer_discard
) {
742 nvc0
->state
.rasterizer_discard
= rasterizer_discard
;
743 IMMED_NVC0(push
, NVC0_3D(RASTERIZE_ENABLE
), !rasterizer_discard
);
747 /* alpha test is disabled if there are no color RTs, so make sure we have at
748 * least one if alpha test is enabled. Note that this must run after
749 * nvc0_validate_fb, otherwise that will override the RT count setting.
752 nvc0_validate_zsa_fb(struct nvc0_context
*nvc0
)
754 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
756 if (nvc0
->zsa
&& nvc0
->zsa
->pipe
.alpha
.enabled
&&
757 nvc0
->framebuffer
.zsbuf
&&
758 nvc0
->framebuffer
.nr_cbufs
== 0) {
759 nvc0_fb_set_null_rt(push
, 0, 0);
760 BEGIN_NVC0(push
, NVC0_3D(RT_CONTROL
), 1);
761 PUSH_DATA (push
, (076543210 << 4) | 1);
766 nvc0_validate_rast_fb(struct nvc0_context
*nvc0
)
768 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
769 struct pipe_framebuffer_state
*fb
= &nvc0
->framebuffer
;
770 struct pipe_rasterizer_state
*rast
= &nvc0
->rast
->pipe
;
775 if (rast
->offset_units_unscaled
) {
776 BEGIN_NVC0(push
, NVC0_3D(POLYGON_OFFSET_UNITS
), 1);
777 if (fb
->zsbuf
&& fb
->zsbuf
->format
== PIPE_FORMAT_Z16_UNORM
)
778 PUSH_DATAf(push
, rast
->offset_units
* (1 << 16));
780 PUSH_DATAf(push
, rast
->offset_units
* (1 << 24));
786 nvc0_validate_tess_state(struct nvc0_context
*nvc0
)
788 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
790 BEGIN_NVC0(push
, NVC0_3D(TESS_LEVEL_OUTER(0)), 6);
791 PUSH_DATAp(push
, nvc0
->default_tess_outer
, 4);
792 PUSH_DATAp(push
, nvc0
->default_tess_inner
, 2);
795 /* If we have a frag shader bound which tries to read from the framebuffer, we
796 * have to make sure that the fb is bound as a texture in the expected
797 * location. For Fermi, that's in the special driver slot 16, while for Kepler
798 * it's a regular binding stored in the driver constbuf.
801 nvc0_validate_fbread(struct nvc0_context
*nvc0
)
803 struct nouveau_pushbuf
*push
= nvc0
->base
.pushbuf
;
804 struct nvc0_screen
*screen
= nvc0
->screen
;
805 struct pipe_context
*pipe
= &nvc0
->base
.pipe
;
806 struct pipe_sampler_view
*old_view
= nvc0
->fbtexture
;
807 struct pipe_sampler_view
*new_view
= NULL
;
809 if (nvc0
->fragprog
&&
810 nvc0
->fragprog
->fp
.reads_framebuffer
&&
811 nvc0
->framebuffer
.nr_cbufs
&&
812 nvc0
->framebuffer
.cbufs
[0]) {
813 struct pipe_sampler_view tmpl
;
814 struct pipe_surface
*sf
= nvc0
->framebuffer
.cbufs
[0];
816 tmpl
.target
= PIPE_TEXTURE_2D_ARRAY
;
817 tmpl
.format
= sf
->format
;
818 tmpl
.u
.tex
.first_level
= tmpl
.u
.tex
.last_level
= sf
->u
.tex
.level
;
819 tmpl
.u
.tex
.first_layer
= sf
->u
.tex
.first_layer
;
820 tmpl
.u
.tex
.last_layer
= sf
->u
.tex
.last_layer
;
821 tmpl
.swizzle_r
= PIPE_SWIZZLE_X
;
822 tmpl
.swizzle_g
= PIPE_SWIZZLE_Y
;
823 tmpl
.swizzle_b
= PIPE_SWIZZLE_Z
;
824 tmpl
.swizzle_a
= PIPE_SWIZZLE_W
;
826 /* Bail if it's the same parameters */
827 if (old_view
&& old_view
->texture
== sf
->texture
&&
828 old_view
->format
== sf
->format
&&
829 old_view
->u
.tex
.first_level
== sf
->u
.tex
.level
&&
830 old_view
->u
.tex
.first_layer
== sf
->u
.tex
.first_layer
&&
831 old_view
->u
.tex
.last_layer
== sf
->u
.tex
.last_layer
)
834 new_view
= pipe
->create_sampler_view(pipe
, sf
->texture
, &tmpl
);
835 } else if (old_view
== NULL
) {
840 pipe_sampler_view_reference(&nvc0
->fbtexture
, NULL
);
841 nvc0
->fbtexture
= new_view
;
844 struct nv50_tic_entry
*tic
= nv50_tic_entry(new_view
);
846 tic
->id
= nvc0_screen_tic_alloc(screen
, tic
);
847 nvc0
->base
.push_data(&nvc0
->base
, screen
->txc
, tic
->id
* 32,
848 NV_VRAM_DOMAIN(&screen
->base
), 32, tic
->tic
);
849 screen
->tic
.lock
[tic
->id
/ 32] |= 1 << (tic
->id
% 32);
851 if (screen
->base
.class_3d
>= NVE4_3D_CLASS
) {
852 BEGIN_NVC0(push
, NVC0_3D(CB_SIZE
), 3);
853 PUSH_DATA (push
, NVC0_CB_AUX_SIZE
);
854 PUSH_DATAh(push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
855 PUSH_DATA (push
, screen
->uniform_bo
->offset
+ NVC0_CB_AUX_INFO(4));
856 BEGIN_1IC0(push
, NVC0_3D(CB_POS
), 1 + 1);
857 PUSH_DATA (push
, NVC0_CB_AUX_FB_TEX_INFO
);
858 PUSH_DATA (push
, (0 << 20) | tic
->id
);
860 BEGIN_NVC0(push
, NVC0_3D(BIND_TIC2(0)), 1);
861 PUSH_DATA (push
, (tic
->id
<< 9) | 1);
864 IMMED_NVC0(push
, NVC0_3D(TIC_FLUSH
), 0);
869 nvc0_switch_pipe_context(struct nvc0_context
*ctx_to
)
871 struct nvc0_context
*ctx_from
= ctx_to
->screen
->cur_ctx
;
875 ctx_to
->state
= ctx_from
->state
;
877 ctx_to
->state
= ctx_to
->screen
->save_state
;
879 ctx_to
->dirty_3d
= ~0;
880 ctx_to
->dirty_cp
= ~0;
881 ctx_to
->viewports_dirty
= ~0;
882 ctx_to
->scissors_dirty
= ~0;
884 for (s
= 0; s
< 6; ++s
) {
885 ctx_to
->samplers_dirty
[s
] = ~0;
886 ctx_to
->textures_dirty
[s
] = ~0;
887 ctx_to
->constbuf_dirty
[s
] = (1 << NVC0_MAX_PIPE_CONSTBUFS
) - 1;
888 ctx_to
->buffers_dirty
[s
] = ~0;
889 ctx_to
->images_dirty
[s
] = ~0;
892 /* Reset tfb as the shader that owns it may have been deleted. */
893 ctx_to
->state
.tfb
= NULL
;
896 ctx_to
->dirty_3d
&= ~(NVC0_NEW_3D_VERTEX
| NVC0_NEW_3D_ARRAYS
);
898 if (!ctx_to
->vertprog
)
899 ctx_to
->dirty_3d
&= ~NVC0_NEW_3D_VERTPROG
;
900 if (!ctx_to
->fragprog
)
901 ctx_to
->dirty_3d
&= ~NVC0_NEW_3D_FRAGPROG
;
904 ctx_to
->dirty_3d
&= ~NVC0_NEW_3D_BLEND
;
906 ctx_to
->dirty_3d
&= ~(NVC0_NEW_3D_RASTERIZER
| NVC0_NEW_3D_SCISSOR
);
908 ctx_to
->dirty_3d
&= ~NVC0_NEW_3D_ZSA
;
910 ctx_to
->screen
->cur_ctx
= ctx_to
;
913 static struct nvc0_state_validate
914 validate_list_3d
[] = {
915 { nvc0_validate_fb
, NVC0_NEW_3D_FRAMEBUFFER
},
916 { nvc0_validate_blend
, NVC0_NEW_3D_BLEND
},
917 { nvc0_validate_zsa
, NVC0_NEW_3D_ZSA
},
918 { nvc0_validate_sample_mask
, NVC0_NEW_3D_SAMPLE_MASK
},
919 { nvc0_validate_rasterizer
, NVC0_NEW_3D_RASTERIZER
},
920 { nvc0_validate_blend_colour
, NVC0_NEW_3D_BLEND_COLOUR
},
921 { nvc0_validate_stencil_ref
, NVC0_NEW_3D_STENCIL_REF
},
922 { nvc0_validate_stipple
, NVC0_NEW_3D_STIPPLE
},
923 { nvc0_validate_scissor
, NVC0_NEW_3D_SCISSOR
| NVC0_NEW_3D_RASTERIZER
},
924 { nvc0_validate_viewport
, NVC0_NEW_3D_VIEWPORT
},
925 { nvc0_validate_window_rects
, NVC0_NEW_3D_WINDOW_RECTS
},
926 { nvc0_vertprog_validate
, NVC0_NEW_3D_VERTPROG
},
927 { nvc0_tctlprog_validate
, NVC0_NEW_3D_TCTLPROG
},
928 { nvc0_tevlprog_validate
, NVC0_NEW_3D_TEVLPROG
},
929 { nvc0_validate_tess_state
, NVC0_NEW_3D_TESSFACTOR
},
930 { nvc0_gmtyprog_validate
, NVC0_NEW_3D_GMTYPROG
},
931 { nvc0_validate_min_samples
, NVC0_NEW_3D_MIN_SAMPLES
|
932 NVC0_NEW_3D_FRAGPROG
|
933 NVC0_NEW_3D_FRAMEBUFFER
},
934 { nvc0_fragprog_validate
, NVC0_NEW_3D_FRAGPROG
| NVC0_NEW_3D_RASTERIZER
},
935 { nvc0_validate_fp_zsa_rast
, NVC0_NEW_3D_FRAGPROG
| NVC0_NEW_3D_ZSA
|
936 NVC0_NEW_3D_RASTERIZER
},
937 { nvc0_validate_zsa_fb
, NVC0_NEW_3D_ZSA
| NVC0_NEW_3D_FRAMEBUFFER
},
938 { nvc0_validate_rast_fb
, NVC0_NEW_3D_RASTERIZER
| NVC0_NEW_3D_FRAMEBUFFER
},
939 { nvc0_validate_clip
, NVC0_NEW_3D_CLIP
| NVC0_NEW_3D_RASTERIZER
|
940 NVC0_NEW_3D_VERTPROG
|
941 NVC0_NEW_3D_TEVLPROG
|
942 NVC0_NEW_3D_GMTYPROG
},
943 { nvc0_constbufs_validate
, NVC0_NEW_3D_CONSTBUF
},
944 { nvc0_validate_textures
, NVC0_NEW_3D_TEXTURES
},
945 { nvc0_validate_samplers
, NVC0_NEW_3D_SAMPLERS
},
946 { nve4_set_tex_handles
, NVC0_NEW_3D_TEXTURES
| NVC0_NEW_3D_SAMPLERS
},
947 { nvc0_validate_fbread
, NVC0_NEW_3D_FRAGPROG
|
948 NVC0_NEW_3D_FRAMEBUFFER
},
949 { nvc0_vertex_arrays_validate
, NVC0_NEW_3D_VERTEX
| NVC0_NEW_3D_ARRAYS
},
950 { nvc0_validate_surfaces
, NVC0_NEW_3D_SURFACES
},
951 { nvc0_validate_buffers
, NVC0_NEW_3D_BUFFERS
},
952 { nvc0_tfb_validate
, NVC0_NEW_3D_TFB_TARGETS
| NVC0_NEW_3D_GMTYPROG
},
953 { nvc0_layer_validate
, NVC0_NEW_3D_VERTPROG
|
954 NVC0_NEW_3D_TEVLPROG
|
955 NVC0_NEW_3D_GMTYPROG
},
956 { nvc0_validate_driverconst
, NVC0_NEW_3D_DRIVERCONST
},
957 { validate_sample_locations
, NVC0_NEW_3D_SAMPLE_LOCATIONS
|
958 NVC0_NEW_3D_FRAMEBUFFER
},
962 nvc0_state_validate(struct nvc0_context
*nvc0
, uint32_t mask
,
963 struct nvc0_state_validate
*validate_list
, int size
,
964 uint32_t *dirty
, struct nouveau_bufctx
*bufctx
)
970 if (nvc0
->screen
->cur_ctx
!= nvc0
)
971 nvc0_switch_pipe_context(nvc0
);
973 state_mask
= *dirty
& mask
;
976 for (i
= 0; i
< size
; ++i
) {
977 struct nvc0_state_validate
*validate
= &validate_list
[i
];
979 if (state_mask
& validate
->states
)
980 validate
->func(nvc0
);
982 *dirty
&= ~state_mask
;
984 nvc0_bufctx_fence(nvc0
, bufctx
, false);
987 nouveau_pushbuf_bufctx(nvc0
->base
.pushbuf
, bufctx
);
988 ret
= nouveau_pushbuf_validate(nvc0
->base
.pushbuf
);
994 nvc0_state_validate_3d(struct nvc0_context
*nvc0
, uint32_t mask
)
998 ret
= nvc0_state_validate(nvc0
, mask
, validate_list_3d
,
999 ARRAY_SIZE(validate_list_3d
), &nvc0
->dirty_3d
,
1002 if (unlikely(nvc0
->state
.flushed
)) {
1003 nvc0
->state
.flushed
= false;
1004 nvc0_bufctx_fence(nvc0
, nvc0
->bufctx_3d
, true);