2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
29 #include "nv50/nv50_context.h"
30 #include "nv50/nv50_query_hw.h"
32 #include "nv50/nv50_compute.xml.h"
35 nv50_constbufs_validate(struct nv50_context
*nv50
)
37 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
40 for (s
= 0; s
< 3; ++s
) {
43 if (s
== PIPE_SHADER_FRAGMENT
)
44 p
= NV50_3D_SET_PROGRAM_CB_PROGRAM_FRAGMENT
;
46 if (s
== PIPE_SHADER_GEOMETRY
)
47 p
= NV50_3D_SET_PROGRAM_CB_PROGRAM_GEOMETRY
;
49 p
= NV50_3D_SET_PROGRAM_CB_PROGRAM_VERTEX
;
51 while (nv50
->constbuf_dirty
[s
]) {
52 const unsigned i
= (unsigned)ffs(nv50
->constbuf_dirty
[s
]) - 1;
54 assert(i
< NV50_MAX_PIPE_CONSTBUFS
);
55 nv50
->constbuf_dirty
[s
] &= ~(1 << i
);
57 if (nv50
->constbuf
[s
][i
].user
) {
58 const unsigned b
= NV50_CB_PVP
+ s
;
60 unsigned words
= nv50
->constbuf
[s
][0].size
/ 4;
62 NOUVEAU_ERR("user constbufs only supported in slot 0\n");
65 if (!nv50
->state
.uniform_buffer_bound
[s
]) {
66 nv50
->state
.uniform_buffer_bound
[s
] = true;
67 BEGIN_NV04(push
, NV50_3D(SET_PROGRAM_CB
), 1);
68 PUSH_DATA (push
, (b
<< 12) | (i
<< 8) | p
| 1);
71 unsigned nr
= MIN2(words
, NV04_PFIFO_MAX_PACKET_LEN
);
73 PUSH_SPACE(push
, nr
+ 3);
74 BEGIN_NV04(push
, NV50_3D(CB_ADDR
), 1);
75 PUSH_DATA (push
, (start
<< 8) | b
);
76 BEGIN_NI04(push
, NV50_3D(CB_DATA(0)), nr
);
77 PUSH_DATAp(push
, &nv50
->constbuf
[s
][0].u
.data
[start
* 4], nr
);
83 struct nv04_resource
*res
=
84 nv04_resource(nv50
->constbuf
[s
][i
].u
.buf
);
86 /* TODO: allocate persistent bindings */
87 const unsigned b
= s
* 16 + i
;
89 assert(nouveau_resource_mapped_by_gpu(&res
->base
));
91 BEGIN_NV04(push
, NV50_3D(CB_DEF_ADDRESS_HIGH
), 3);
92 PUSH_DATAh(push
, res
->address
+ nv50
->constbuf
[s
][i
].offset
);
93 PUSH_DATA (push
, res
->address
+ nv50
->constbuf
[s
][i
].offset
);
94 PUSH_DATA (push
, (b
<< 16) |
95 (nv50
->constbuf
[s
][i
].size
& 0xffff));
96 BEGIN_NV04(push
, NV50_3D(SET_PROGRAM_CB
), 1);
97 PUSH_DATA (push
, (b
<< 12) | (i
<< 8) | p
| 1);
99 BCTX_REFN(nv50
->bufctx_3d
, 3D_CB(s
, i
), res
, RD
);
101 nv50
->cb_dirty
= 1; /* Force cache flush for UBO. */
102 res
->cb_bindings
[s
] |= 1 << i
;
104 BEGIN_NV04(push
, NV50_3D(SET_PROGRAM_CB
), 1);
105 PUSH_DATA (push
, (i
<< 8) | p
| 0);
108 nv50
->state
.uniform_buffer_bound
[s
] = false;
115 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*prog
)
117 if (!prog
->translated
) {
118 prog
->translated
= nv50_program_translate(
119 prog
, nv50
->screen
->base
.device
->chipset
, &nv50
->base
.debug
);
120 if (!prog
->translated
)
126 return nv50_program_upload_code(nv50
, prog
);
130 nv50_program_update_context_state(struct nv50_context
*nv50
,
131 struct nv50_program
*prog
, int stage
)
133 const unsigned flags
= NOUVEAU_BO_VRAM
| NOUVEAU_BO_RDWR
;
135 if (prog
&& prog
->tls_space
) {
136 if (nv50
->state
.new_tls_space
)
137 nouveau_bufctx_reset(nv50
->bufctx_3d
, NV50_BIND_3D_TLS
);
138 if (!nv50
->state
.tls_required
|| nv50
->state
.new_tls_space
)
139 BCTX_REFN_bo(nv50
->bufctx_3d
, 3D_TLS
, flags
, nv50
->screen
->tls_bo
);
140 nv50
->state
.new_tls_space
= false;
141 nv50
->state
.tls_required
|= 1 << stage
;
143 if (nv50
->state
.tls_required
== (1 << stage
))
144 nouveau_bufctx_reset(nv50
->bufctx_3d
, NV50_BIND_3D_TLS
);
145 nv50
->state
.tls_required
&= ~(1 << stage
);
150 nv50_vertprog_validate(struct nv50_context
*nv50
)
152 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
153 struct nv50_program
*vp
= nv50
->vertprog
;
155 if (!nv50_program_validate(nv50
, vp
))
157 nv50_program_update_context_state(nv50
, vp
, 0);
159 BEGIN_NV04(push
, NV50_3D(VP_ATTR_EN(0)), 2);
160 PUSH_DATA (push
, vp
->vp
.attrs
[0]);
161 PUSH_DATA (push
, vp
->vp
.attrs
[1]);
162 BEGIN_NV04(push
, NV50_3D(VP_REG_ALLOC_RESULT
), 1);
163 PUSH_DATA (push
, vp
->max_out
);
164 BEGIN_NV04(push
, NV50_3D(VP_REG_ALLOC_TEMP
), 1);
165 PUSH_DATA (push
, vp
->max_gpr
);
166 BEGIN_NV04(push
, NV50_3D(VP_START_ID
), 1);
167 PUSH_DATA (push
, vp
->code_base
);
171 nv50_fragprog_validate(struct nv50_context
*nv50
)
173 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
174 struct nv50_program
*fp
= nv50
->fragprog
;
175 struct pipe_rasterizer_state
*rast
= &nv50
->rast
->pipe
;
180 if (nv50
->zsa
&& nv50
->zsa
->pipe
.alpha
.enabled
) {
181 struct pipe_framebuffer_state
*fb
= &nv50
->framebuffer
;
182 bool blendable
= fb
->nr_cbufs
== 0 || !fb
->cbufs
[0] ||
183 nv50
->screen
->base
.base
.is_format_supported(
184 &nv50
->screen
->base
.base
,
185 fb
->cbufs
[0]->format
,
186 fb
->cbufs
[0]->texture
->target
,
187 fb
->cbufs
[0]->texture
->nr_samples
,
188 PIPE_BIND_BLENDABLE
);
189 /* If we already have alphatest code, we have to keep updating
190 * it. However we only have to have different code if the current RT0 is
191 * non-blendable. Otherwise we just set it to always pass and use the
192 * hardware alpha test.
194 if (fp
->fp
.alphatest
|| !blendable
) {
195 uint8_t alphatest
= PIPE_FUNC_ALWAYS
+ 1;
197 alphatest
= nv50
->zsa
->pipe
.alpha
.func
+ 1;
198 if (!fp
->fp
.alphatest
)
199 nv50_program_destroy(nv50
, fp
);
200 else if (fp
->mem
&& fp
->fp
.alphatest
!= alphatest
)
201 nouveau_heap_free(&fp
->mem
);
203 fp
->fp
.alphatest
= alphatest
;
205 } else if (fp
->fp
.alphatest
&& fp
->fp
.alphatest
!= PIPE_FUNC_ALWAYS
+ 1) {
206 /* Alpha test is disabled but we have a shader where it's filled
207 * in. Make sure to reset the function to 'always', otherwise it'll end
208 * up discarding fragments incorrectly.
211 nouveau_heap_free(&fp
->mem
);
213 fp
->fp
.alphatest
= PIPE_FUNC_ALWAYS
+ 1;
216 if (fp
->fp
.force_persample_interp
!= rast
->force_persample_interp
) {
217 /* Force the program to be reuploaded, which will trigger interp fixups
221 nouveau_heap_free(&fp
->mem
);
223 fp
->fp
.force_persample_interp
= rast
->force_persample_interp
;
226 if (fp
->mem
&& !(nv50
->dirty_3d
& (NV50_NEW_3D_FRAGPROG
| NV50_NEW_3D_MIN_SAMPLES
)))
229 if (!nv50_program_validate(nv50
, fp
))
231 nv50_program_update_context_state(nv50
, fp
, 1);
233 BEGIN_NV04(push
, NV50_3D(FP_REG_ALLOC_TEMP
), 1);
234 PUSH_DATA (push
, fp
->max_gpr
);
235 BEGIN_NV04(push
, NV50_3D(FP_RESULT_COUNT
), 1);
236 PUSH_DATA (push
, fp
->max_out
);
237 BEGIN_NV04(push
, NV50_3D(FP_CONTROL
), 1);
238 PUSH_DATA (push
, fp
->fp
.flags
[0]);
239 BEGIN_NV04(push
, NV50_3D(FP_CTRL_UNK196C
), 1);
240 PUSH_DATA (push
, fp
->fp
.flags
[1]);
241 BEGIN_NV04(push
, NV50_3D(FP_START_ID
), 1);
242 PUSH_DATA (push
, fp
->code_base
);
244 if (nv50
->screen
->tesla
->oclass
>= NVA3_3D_CLASS
) {
245 BEGIN_NV04(push
, SUBC_3D(NVA3_3D_FP_MULTISAMPLE
), 1);
246 if (nv50
->min_samples
> 1 || fp
->fp
.has_samplemask
)
248 NVA3_3D_FP_MULTISAMPLE_FORCE_PER_SAMPLE
|
249 (NVA3_3D_FP_MULTISAMPLE_EXPORT_SAMPLE_MASK
*
250 fp
->fp
.has_samplemask
));
257 nv50_gmtyprog_validate(struct nv50_context
*nv50
)
259 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
260 struct nv50_program
*gp
= nv50
->gmtyprog
;
263 if (!nv50_program_validate(nv50
, gp
))
265 BEGIN_NV04(push
, NV50_3D(GP_REG_ALLOC_TEMP
), 1);
266 PUSH_DATA (push
, gp
->max_gpr
);
267 BEGIN_NV04(push
, NV50_3D(GP_REG_ALLOC_RESULT
), 1);
268 PUSH_DATA (push
, gp
->max_out
);
269 BEGIN_NV04(push
, NV50_3D(GP_OUTPUT_PRIMITIVE_TYPE
), 1);
270 PUSH_DATA (push
, gp
->gp
.prim_type
);
271 BEGIN_NV04(push
, NV50_3D(GP_VERTEX_OUTPUT_COUNT
), 1);
272 PUSH_DATA (push
, gp
->gp
.vert_count
);
273 BEGIN_NV04(push
, NV50_3D(GP_START_ID
), 1);
274 PUSH_DATA (push
, gp
->code_base
);
276 nv50
->state
.prim_size
= gp
->gp
.prim_type
; /* enum matches vertex count */
278 nv50_program_update_context_state(nv50
, gp
, 2);
280 /* GP_ENABLE is updated in linkage validation */
284 nv50_compprog_validate(struct nv50_context
*nv50
)
286 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
287 struct nv50_program
*cp
= nv50
->compprog
;
289 if (cp
&& !nv50_program_validate(nv50
, cp
))
292 BEGIN_NV04(push
, NV50_CP(CODE_CB_FLUSH
), 1);
297 nv50_sprite_coords_validate(struct nv50_context
*nv50
)
299 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
300 uint32_t pntc
[8], mode
;
301 struct nv50_program
*fp
= nv50
->fragprog
;
303 unsigned m
= (nv50
->state
.interpolant_ctrl
>> 8) & 0xff;
305 if (!nv50
->rast
->pipe
.point_quad_rasterization
) {
306 if (nv50
->state
.point_sprite
) {
307 BEGIN_NV04(push
, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
308 for (i
= 0; i
< 8; ++i
)
311 nv50
->state
.point_sprite
= false;
315 nv50
->state
.point_sprite
= true;
318 memset(pntc
, 0, sizeof(pntc
));
320 for (i
= 0; i
< fp
->in_nr
; i
++) {
321 unsigned n
= util_bitcount(fp
->in
[i
].mask
);
323 if (fp
->in
[i
].sn
!= TGSI_SEMANTIC_GENERIC
) {
327 if (!(nv50
->rast
->pipe
.sprite_coord_enable
& (1 << fp
->in
[i
].si
))) {
332 for (c
= 0; c
< 4; ++c
) {
333 if (fp
->in
[i
].mask
& (1 << c
)) {
334 pntc
[m
/ 8] |= (c
+ 1) << ((m
% 8) * 4);
340 if (nv50
->rast
->pipe
.sprite_coord_mode
== PIPE_SPRITE_COORD_LOWER_LEFT
)
345 BEGIN_NV04(push
, NV50_3D(POINT_SPRITE_CTRL
), 1);
346 PUSH_DATA (push
, mode
);
348 BEGIN_NV04(push
, NV50_3D(POINT_COORD_REPLACE_MAP(0)), 8);
349 PUSH_DATAp(push
, pntc
, 8);
352 /* Validate state derived from shaders and the rasterizer cso. */
354 nv50_validate_derived_rs(struct nv50_context
*nv50
)
356 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
357 uint32_t color
, psize
;
359 nv50_sprite_coords_validate(nv50
);
361 if (nv50
->state
.rasterizer_discard
!= nv50
->rast
->pipe
.rasterizer_discard
) {
362 nv50
->state
.rasterizer_discard
= nv50
->rast
->pipe
.rasterizer_discard
;
363 BEGIN_NV04(push
, NV50_3D(RASTERIZE_ENABLE
), 1);
364 PUSH_DATA (push
, !nv50
->rast
->pipe
.rasterizer_discard
);
367 if (nv50
->dirty_3d
& NV50_NEW_3D_FRAGPROG
)
369 psize
= nv50
->state
.semantic_psize
& ~NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK
;
370 color
= nv50
->state
.semantic_color
& ~NV50_3D_SEMANTIC_COLOR_CLMP_EN
;
372 if (nv50
->rast
->pipe
.clamp_vertex_color
)
373 color
|= NV50_3D_SEMANTIC_COLOR_CLMP_EN
;
375 if (color
!= nv50
->state
.semantic_color
) {
376 nv50
->state
.semantic_color
= color
;
377 BEGIN_NV04(push
, NV50_3D(SEMANTIC_COLOR
), 1);
378 PUSH_DATA (push
, color
);
381 if (nv50
->rast
->pipe
.point_size_per_vertex
)
382 psize
|= NV50_3D_SEMANTIC_PTSZ_PTSZ_EN__MASK
;
384 if (psize
!= nv50
->state
.semantic_psize
) {
385 nv50
->state
.semantic_psize
= psize
;
386 BEGIN_NV04(push
, NV50_3D(SEMANTIC_PTSZ
), 1);
387 PUSH_DATA (push
, psize
);
392 nv50_vec4_map(uint8_t *map
, int mid
, uint32_t lin
[4],
393 struct nv50_varying
*in
, struct nv50_varying
*out
)
396 uint8_t mv
= out
->mask
, mf
= in
->mask
, oid
= out
->hw
;
398 for (c
= 0; c
< 4; ++c
) {
401 lin
[mid
/ 32] |= 1 << (mid
% 32);
419 nv50_fp_linkage_validate(struct nv50_context
*nv50
)
421 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
422 struct nv50_program
*vp
= nv50
->gmtyprog
? nv50
->gmtyprog
: nv50
->vertprog
;
423 struct nv50_program
*fp
= nv50
->fragprog
;
424 struct nv50_varying dummy
;
427 uint32_t layerid
= 0;
428 uint32_t viewportid
= 0;
429 uint32_t psiz
= 0x000;
430 uint32_t interp
= fp
->fp
.interp
;
431 uint32_t colors
= fp
->fp
.colors
;
432 uint32_t clpd_nr
= util_last_bit(vp
->vp
.clip_enable
| vp
->vp
.cull_enable
);
437 if (!(nv50
->dirty_3d
& (NV50_NEW_3D_VERTPROG
|
438 NV50_NEW_3D_FRAGPROG
|
439 NV50_NEW_3D_GMTYPROG
))) {
441 ffc
= (nv50
->state
.semantic_color
& NV50_3D_SEMANTIC_COLOR_FFC0_ID__MASK
);
442 bfc
= (nv50
->state
.semantic_color
& NV50_3D_SEMANTIC_COLOR_BFC0_ID__MASK
)
444 if (nv50
->rast
->pipe
.light_twoside
== ((ffc
== bfc
) ? 0 : 1))
448 memset(lin
, 0x00, sizeof(lin
));
450 /* XXX: in buggy-endian mode, is the first element of map (u32)0x000000xx
451 * or is it the first byte ?
453 memset(map
, nv50
->gmtyprog
? 0x80 : 0x40, sizeof(map
));
455 dummy
.mask
= 0xf; /* map all components of HPOS */
457 m
= nv50_vec4_map(map
, 0, lin
, &dummy
, &vp
->out
[0]);
459 for (c
= 0; c
< clpd_nr
; ++c
)
460 map
[m
++] = vp
->vp
.clpd
[c
/ 4] + (c
% 4);
462 colors
|= m
<< 8; /* adjust BFC0 id */
466 /* if light_twoside is active, FFC0_ID == BFC0_ID is invalid */
467 if (nv50
->rast
->pipe
.light_twoside
) {
468 for (i
= 0; i
< 2; ++i
) {
470 if (fp
->vp
.bfc
[i
] >= fp
->in_nr
)
472 m
= nv50_vec4_map(map
, m
, lin
, &fp
->in
[fp
->vp
.bfc
[i
]],
473 (n
< vp
->out_nr
) ? &vp
->out
[n
] : &dummy
);
476 colors
+= m
- 4; /* adjust FFC0 id */
477 interp
|= m
<< 8; /* set map id where 'normal' FP inputs start */
479 for (i
= 0; i
< fp
->in_nr
; ++i
) {
480 for (n
= 0; n
< vp
->out_nr
; ++n
)
481 if (vp
->out
[n
].sn
== fp
->in
[i
].sn
&&
482 vp
->out
[n
].si
== fp
->in
[i
].si
)
484 switch (fp
->in
[i
].sn
) {
485 case TGSI_SEMANTIC_PRIMID
:
488 case TGSI_SEMANTIC_LAYER
:
491 case TGSI_SEMANTIC_VIEWPORT_INDEX
:
495 m
= nv50_vec4_map(map
, m
, lin
,
496 &fp
->in
[i
], (n
< vp
->out_nr
) ? &vp
->out
[n
] : &dummy
);
499 if (vp
->gp
.has_layer
&& !layerid
) {
501 map
[m
++] = vp
->gp
.layerid
;
504 if (vp
->gp
.has_viewport
&& !viewportid
) {
506 map
[m
++] = vp
->gp
.viewportid
;
509 if (nv50
->rast
->pipe
.point_size_per_vertex
) {
511 map
[m
++] = vp
->vp
.psiz
;
514 if (nv50
->rast
->pipe
.clamp_vertex_color
)
515 colors
|= NV50_3D_SEMANTIC_COLOR_CLMP_EN
;
517 if (unlikely(vp
->so
)) {
518 /* Slot i in STRMOUT_MAP specifies the offset where slot i in RESULT_MAP
522 * Inverting vp->so->map (output -> offset) would probably speed this up.
524 memset(so_map
, 0, sizeof(so_map
));
525 for (i
= 0; i
< vp
->so
->map_size
; ++i
) {
526 if (vp
->so
->map
[i
] == 0xff)
528 for (c
= 0; c
< m
; ++c
)
529 if (map
[c
] == vp
->so
->map
[i
] && !so_map
[c
])
533 map
[m
++] = vp
->so
->map
[i
];
535 so_map
[c
] = 0x80 | i
;
537 for (c
= m
; c
& 3; ++c
)
544 if (unlikely(nv50
->gmtyprog
)) {
545 BEGIN_NV04(push
, NV50_3D(GP_RESULT_MAP_SIZE
), 1);
547 BEGIN_NV04(push
, NV50_3D(GP_RESULT_MAP(0)), n
);
548 PUSH_DATAp(push
, map
, n
);
550 BEGIN_NV04(push
, NV50_3D(VP_GP_BUILTIN_ATTR_EN
), 1);
551 PUSH_DATA (push
, vp
->vp
.attrs
[2] | fp
->vp
.attrs
[2]);
553 BEGIN_NV04(push
, NV50_3D(SEMANTIC_PRIM_ID
), 1);
554 PUSH_DATA (push
, primid
);
557 BEGIN_NV04(push
, NV50_3D(VP_RESULT_MAP_SIZE
), 1);
559 BEGIN_NV04(push
, NV50_3D(VP_RESULT_MAP(0)), n
);
560 PUSH_DATAp(push
, map
, n
);
563 BEGIN_NV04(push
, NV50_3D(GP_VIEWPORT_ID_ENABLE
), 5);
564 PUSH_DATA (push
, vp
->gp
.has_viewport
);
565 PUSH_DATA (push
, colors
);
566 PUSH_DATA (push
, (clpd_nr
<< 8) | 4);
567 PUSH_DATA (push
, layerid
);
568 PUSH_DATA (push
, psiz
);
570 BEGIN_NV04(push
, NV50_3D(SEMANTIC_VIEWPORT
), 1);
571 PUSH_DATA (push
, viewportid
);
573 BEGIN_NV04(push
, NV50_3D(LAYER
), 1);
574 PUSH_DATA (push
, vp
->gp
.has_layer
<< 16);
576 BEGIN_NV04(push
, NV50_3D(FP_INTERPOLANT_CTRL
), 1);
577 PUSH_DATA (push
, interp
);
579 nv50
->state
.interpolant_ctrl
= interp
;
581 nv50
->state
.semantic_color
= colors
;
582 nv50
->state
.semantic_psize
= psiz
;
584 BEGIN_NV04(push
, NV50_3D(NOPERSPECTIVE_BITMAP(0)), 4);
585 PUSH_DATAp(push
, lin
, 4);
587 BEGIN_NV04(push
, NV50_3D(GP_ENABLE
), 1);
588 PUSH_DATA (push
, nv50
->gmtyprog
? 1 : 0);
591 BEGIN_NV04(push
, NV50_3D(STRMOUT_MAP(0)), n
);
592 PUSH_DATAp(push
, so_map
, n
);
597 nv50_vp_gp_mapping(uint8_t *map
, int m
,
598 struct nv50_program
*vp
, struct nv50_program
*gp
)
602 for (i
= 0; i
< gp
->in_nr
; ++i
) {
603 uint8_t oid
= 0, mv
= 0, mg
= gp
->in
[i
].mask
;
605 for (j
= 0; j
< vp
->out_nr
; ++j
) {
606 if (vp
->out
[j
].sn
== gp
->in
[i
].sn
&&
607 vp
->out
[j
].si
== gp
->in
[i
].si
) {
608 mv
= vp
->out
[j
].mask
;
614 for (c
= 0; c
< 4; ++c
, mv
>>= 1, mg
>>= 1) {
619 map
[m
++] = (c
== 3) ? 0x41 : 0x40;
629 nv50_gp_linkage_validate(struct nv50_context
*nv50
)
631 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
632 struct nv50_program
*vp
= nv50
->vertprog
;
633 struct nv50_program
*gp
= nv50
->gmtyprog
;
640 memset(map
, 0, sizeof(map
));
642 m
= nv50_vp_gp_mapping(map
, m
, vp
, gp
);
646 BEGIN_NV04(push
, NV50_3D(VP_GP_BUILTIN_ATTR_EN
), 1);
647 PUSH_DATA (push
, vp
->vp
.attrs
[2] | gp
->vp
.attrs
[2]);
650 BEGIN_NV04(push
, NV50_3D(VP_RESULT_MAP_SIZE
), 1);
652 BEGIN_NV04(push
, NV50_3D(VP_RESULT_MAP(0)), n
);
653 PUSH_DATAp(push
, map
, n
);
657 nv50_stream_output_validate(struct nv50_context
*nv50
)
659 struct nouveau_pushbuf
*push
= nv50
->base
.pushbuf
;
660 struct nv50_stream_output_state
*so
;
665 so
= nv50
->gmtyprog
? nv50
->gmtyprog
->so
: nv50
->vertprog
->so
;
667 BEGIN_NV04(push
, NV50_3D(STRMOUT_ENABLE
), 1);
669 if (!so
|| !nv50
->num_so_targets
) {
670 if (nv50
->screen
->base
.class_3d
< NVA0_3D_CLASS
) {
671 BEGIN_NV04(push
, NV50_3D(STRMOUT_PRIMITIVE_LIMIT
), 1);
674 BEGIN_NV04(push
, NV50_3D(STRMOUT_PARAMS_LATCH
), 1);
679 /* previous TFB needs to complete */
680 if (nv50
->screen
->base
.class_3d
< NVA0_3D_CLASS
) {
681 BEGIN_NV04(push
, SUBC_3D(NV50_GRAPH_SERIALIZE
), 1);
686 if (nv50
->screen
->base
.class_3d
>= NVA0_3D_CLASS
)
687 ctrl
|= NVA0_3D_STRMOUT_BUFFERS_CTRL_LIMIT_MODE_OFFSET
;
689 BEGIN_NV04(push
, NV50_3D(STRMOUT_BUFFERS_CTRL
), 1);
690 PUSH_DATA (push
, ctrl
);
692 for (i
= 0; i
< nv50
->num_so_targets
; ++i
) {
693 struct nv50_so_target
*targ
= nv50_so_target(nv50
->so_target
[i
]);
694 struct nv04_resource
*buf
= nv04_resource(targ
->pipe
.buffer
);
696 const unsigned n
= nv50
->screen
->base
.class_3d
>= NVA0_3D_CLASS
? 4 : 3;
698 if (n
== 4 && !targ
->clean
)
699 nv84_hw_query_fifo_wait(push
, nv50_query(targ
->pq
));
700 BEGIN_NV04(push
, NV50_3D(STRMOUT_ADDRESS_HIGH(i
)), n
);
701 PUSH_DATAh(push
, buf
->address
+ targ
->pipe
.buffer_offset
);
702 PUSH_DATA (push
, buf
->address
+ targ
->pipe
.buffer_offset
);
703 PUSH_DATA (push
, so
->num_attribs
[i
]);
705 PUSH_DATA(push
, targ
->pipe
.buffer_size
);
708 nv50_hw_query_pushbuf_submit(push
, NVA0_3D_STRMOUT_OFFSET(i
),
709 nv50_query(targ
->pq
), 0x4);
711 BEGIN_NV04(push
, NVA0_3D(STRMOUT_OFFSET(i
)), 1);
716 const unsigned limit
= targ
->pipe
.buffer_size
/
717 (so
->stride
[i
] * nv50
->state
.prim_size
);
718 prims
= MIN2(prims
, limit
);
720 targ
->stride
= so
->stride
[i
];
721 BCTX_REFN(nv50
->bufctx_3d
, 3D_SO
, buf
, WR
);
724 BEGIN_NV04(push
, NV50_3D(STRMOUT_PRIMITIVE_LIMIT
), 1);
725 PUSH_DATA (push
, prims
);
727 BEGIN_NV04(push
, NV50_3D(STRMOUT_PARAMS_LATCH
), 1);
729 BEGIN_NV04(push
, NV50_3D(STRMOUT_ENABLE
), 1);