2 * Copyright 2008 Ben Skeggs
3 * Copyright 2010 Christoph Bumiller
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
20 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "pipe/p_context.h"
25 #include "pipe/p_defines.h"
26 #include "pipe/p_state.h"
27 #include "util/u_inlines.h"
29 #include "nv50_context.h"
32 nv50_transfer_constbuf(struct nv50_context
*nv50
,
33 struct pipe_resource
*buf
, unsigned size
, unsigned cbi
)
35 struct pipe_context
*pipe
= &nv50
->pipe
;
36 struct pipe_transfer
*transfer
;
37 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
38 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
40 unsigned count
, start
;
45 map
= pipe_buffer_map(pipe
, buf
, PIPE_TRANSFER_READ
, &transfer
);
49 count
= (buf
->width0
+ 3) / 4;
53 unsigned nr
= AVAIL_RING(chan
);
59 nr
= MIN2(count
, nr
- 7);
60 nr
= MIN2(nr
, NV04_PFIFO_MAX_PACKET_LEN
);
62 nv50_screen_reloc_constbuf(nv50
->screen
, cbi
);
64 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 1);
65 OUT_RING (chan
, (start
<< 8) | cbi
);
66 BEGIN_RING_NI(chan
, tesla
, NV50TCL_CB_DATA(0), nr
);
67 OUT_RINGp (chan
, map
, nr
);
74 pipe_buffer_unmap(pipe
, transfer
);
78 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
80 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
81 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
85 uint32_t *data
= p
->immd
;
86 unsigned count
= p
->immd_size
/ 4;
90 unsigned nr
= AVAIL_RING(chan
);
96 nr
= MIN2(count
, nr
- 7);
97 nr
= MIN2(nr
, NV04_PFIFO_MAX_PACKET_LEN
);
99 nv50_screen_reloc_constbuf(nv50
->screen
, NV50_CB_PMISC
);
101 BEGIN_RING(chan
, tesla
, NV50TCL_CB_ADDR
, 1);
102 OUT_RING (chan
, (start
<< 8) | NV50_CB_PMISC
);
103 BEGIN_RING_NI(chan
, tesla
, NV50TCL_CB_DATA(0), nr
);
104 OUT_RINGp (chan
, data
, nr
);
112 /* If the state tracker doesn't change the constbuf, and it is first
113 * validated with a program that doesn't use it, this check prevents
114 * it from even being uploaded. */
116 if (p->parm_size == 0)
121 case PIPE_SHADER_VERTEX
:
124 case PIPE_SHADER_FRAGMENT
:
127 case PIPE_SHADER_GEOMETRY
:
135 nv50_transfer_constbuf(nv50
, nv50
->constbuf
[p
->type
], p
->parm_size
, cbi
);
139 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
141 struct nouveau_channel
*chan
= nv50
->screen
->base
.channel
;
142 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
143 struct nouveau_grobj
*eng2d
= nv50
->screen
->eng2d
;
146 unsigned size
= p
->code_size
;
147 uint32_t *data
= p
->code
;
149 assert(p
->translated
);
151 /* TODO: use a single bo (for each type) for shader code */
154 ret
= nouveau_bo_new(chan
->device
, NOUVEAU_BO_VRAM
, 0x100, size
, &p
->bo
);
157 offset
= p
->code_start
= 0;
159 BEGIN_RING(chan
, eng2d
, NV50_2D_DST_FORMAT
, 2);
160 OUT_RING (chan
, NV50_2D_DST_FORMAT_R8_UNORM
);
162 BEGIN_RING(chan
, eng2d
, NV50_2D_DST_PITCH
, 1);
163 OUT_RING (chan
, 0x40000);
164 BEGIN_RING(chan
, eng2d
, NV50_2D_DST_WIDTH
, 2);
165 OUT_RING (chan
, 0x10000);
169 unsigned nr
= size
/ 4;
171 if (AVAIL_RING(chan
) < 32)
174 nr
= MIN2(nr
, AVAIL_RING(chan
) - 18);
180 BEGIN_RING(chan
, eng2d
, NV50_2D_DST_ADDRESS_HIGH
, 2);
181 OUT_RELOCh(chan
, p
->bo
, offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_WR
);
182 OUT_RELOCl(chan
, p
->bo
, offset
, NOUVEAU_BO_VRAM
| NOUVEAU_BO_WR
);
183 BEGIN_RING(chan
, eng2d
, NV50_2D_SIFC_BITMAP_ENABLE
, 2);
185 OUT_RING (chan
, NV50_2D_SIFC_FORMAT_R8_UNORM
);
186 BEGIN_RING(chan
, eng2d
, NV50_2D_SIFC_WIDTH
, 10);
187 OUT_RING (chan
, nr
* 4);
198 BEGIN_RING_NI(chan
, eng2d
, NV50_2D_SIFC_DATA
, nr
);
199 OUT_RINGp (chan
, data
, nr
);
206 BEGIN_RING(chan
, tesla
, NV50TCL_CODE_CB_FLUSH
, 1);
211 nv50_vp_update_stateobj(struct nv50_context
*nv50
, struct nv50_program
*p
)
213 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
214 struct nouveau_stateobj
*so
= so_new(5, 7, 2);
216 nv50_program_validate_code(nv50
, p
);
218 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
219 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
220 NOUVEAU_BO_HIGH
, 0, 0);
221 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
222 NOUVEAU_BO_LOW
, 0, 0);
223 so_method(so
, tesla
, NV50TCL_VP_ATTR_EN_0
, 2);
224 so_data (so
, p
->vp
.attrs
[0]);
225 so_data (so
, p
->vp
.attrs
[1]);
226 so_method(so
, tesla
, NV50TCL_VP_REG_ALLOC_RESULT
, 1);
227 so_data (so
, p
->max_out
);
228 so_method(so
, tesla
, NV50TCL_VP_REG_ALLOC_TEMP
, 1);
229 so_data (so
, p
->max_gpr
);
230 so_method(so
, tesla
, NV50TCL_VP_START_ID
, 1);
231 so_data (so
, p
->code_start
);
238 nv50_fp_update_stateobj(struct nv50_context
*nv50
, struct nv50_program
*p
)
240 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
241 struct nouveau_stateobj
*so
= so_new(6, 7, 2);
243 nv50_program_validate_code(nv50
, p
);
245 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
246 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
247 NOUVEAU_BO_HIGH
, 0, 0);
248 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
249 NOUVEAU_BO_LOW
, 0, 0);
250 so_method(so
, tesla
, NV50TCL_FP_REG_ALLOC_TEMP
, 1);
251 so_data (so
, p
->max_gpr
);
252 so_method(so
, tesla
, NV50TCL_FP_RESULT_COUNT
, 1);
253 so_data (so
, p
->max_out
);
254 so_method(so
, tesla
, NV50TCL_FP_CONTROL
, 1);
255 so_data (so
, p
->fp
.flags
[0]);
256 so_method(so
, tesla
, NV50TCL_FP_CTRL_UNK196C
, 1);
257 so_data (so
, p
->fp
.flags
[1]);
258 so_method(so
, tesla
, NV50TCL_FP_START_ID
, 1);
259 so_data (so
, p
->code_start
);
266 nv50_gp_update_stateobj(struct nv50_context
*nv50
, struct nv50_program
*p
)
268 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
269 struct nouveau_stateobj
*so
= so_new(6, 7, 2);
271 nv50_program_validate_code(nv50
, p
);
273 so_method(so
, tesla
, NV50TCL_GP_ADDRESS_HIGH
, 2);
274 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
275 NOUVEAU_BO_HIGH
, 0, 0);
276 so_reloc (so
, p
->bo
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
277 NOUVEAU_BO_LOW
, 0, 0);
278 so_method(so
, tesla
, NV50TCL_GP_REG_ALLOC_TEMP
, 1);
279 so_data (so
, p
->max_gpr
);
280 so_method(so
, tesla
, NV50TCL_GP_REG_ALLOC_RESULT
, 1);
281 so_data (so
, p
->max_out
);
282 so_method(so
, tesla
, NV50TCL_GP_OUTPUT_PRIMITIVE_TYPE
, 1);
283 so_data (so
, p
->gp
.prim_type
);
284 so_method(so
, tesla
, NV50TCL_GP_VERTEX_OUTPUT_COUNT
, 1);
285 so_data (so
, p
->gp
.vert_count
);
286 so_method(so
, tesla
, NV50TCL_GP_START_ID
, 1);
287 so_data (so
, p
->code_start
);
294 nv50_program_validate(struct nv50_program
*p
)
296 p
->translated
= nv50_program_tx(p
);
297 assert(p
->translated
);
298 return p
->translated
;
302 nv50_program_validate_common(struct nv50_context
*nv50
, struct nv50_program
*p
)
304 nv50_program_validate_code(nv50
, p
);
307 nv50
->req_lmem
|= 1 << p
->type
;
309 nv50
->req_lmem
&= ~(1 << p
->type
);
312 struct nouveau_stateobj
*
313 nv50_vertprog_validate(struct nv50_context
*nv50
)
315 struct nv50_program
*p
= nv50
->vertprog
;
316 struct nouveau_stateobj
*so
= NULL
;
318 if (!p
->translated
) {
319 if (nv50_program_validate(p
))
320 nv50_vp_update_stateobj(nv50
, p
);
325 if (nv50
->dirty
& NV50_NEW_VERTPROG_CB
)
326 nv50_program_validate_data(nv50
, p
);
328 if (!(nv50
->dirty
& NV50_NEW_VERTPROG
))
331 nv50_program_validate_common(nv50
, p
);
337 struct nouveau_stateobj
*
338 nv50_fragprog_validate(struct nv50_context
*nv50
)
340 struct nv50_program
*p
= nv50
->fragprog
;
341 struct nouveau_stateobj
*so
= NULL
;
343 if (!p
->translated
) {
344 if (nv50_program_validate(p
))
345 nv50_fp_update_stateobj(nv50
, p
);
350 if (nv50
->dirty
& NV50_NEW_FRAGPROG_CB
)
351 nv50_program_validate_data(nv50
, p
);
353 if (!(nv50
->dirty
& NV50_NEW_FRAGPROG
))
356 nv50_program_validate_common(nv50
, p
);
362 struct nouveau_stateobj
*
363 nv50_geomprog_validate(struct nv50_context
*nv50
)
365 struct nv50_program
*p
= nv50
->geomprog
;
366 struct nouveau_stateobj
*so
= NULL
;
368 /* GP may be NULL, but VP and FP may not */
370 return NULL
; /* GP is deactivated in linkage validation */
372 if (!p
->translated
) {
373 if (nv50_program_validate(p
))
374 nv50_gp_update_stateobj(nv50
, p
);
379 if (nv50
->dirty
& NV50_NEW_GEOMPROG_CB
)
380 nv50_program_validate_data(nv50
, p
);
382 if (!(nv50
->dirty
& NV50_NEW_GEOMPROG
))
385 nv50_program_validate_common(nv50
, p
);
391 /* XXX: this might not work correctly in all cases yet: we assume that
392 * an FP generic input that is not written in the VP is gl_PointCoord.
395 nv50_pntc_replace(struct nv50_context
*nv50
, uint32_t pntc
[8], unsigned m
)
397 struct nv50_program
*vp
= nv50
->vertprog
;
398 struct nv50_program
*fp
= nv50
->fragprog
;
401 memset(pntc
, 0, 8 * sizeof(uint32_t));
406 for (i
= 0; i
< fp
->in_nr
; i
++) {
407 unsigned j
, n
= util_bitcount(fp
->in
[i
].mask
);
409 if (fp
->in
[i
].sn
!= TGSI_SEMANTIC_GENERIC
) {
414 for (j
= 0; j
< vp
->out_nr
; ++j
)
415 if (vp
->out
[j
].sn
== fp
->in
[i
].sn
&& vp
->out
[j
].si
== fp
->in
[i
].si
)
418 if (j
< vp
->out_nr
) {
419 uint32_t en
= nv50
->rasterizer
->pipe
.sprite_coord_enable
;
421 if (!(en
& (1 << vp
->out
[j
].si
))) {
427 /* this is either PointCoord or replaced by sprite coords */
428 for (c
= 0; c
< 4; c
++) {
429 if (!(fp
->in
[i
].mask
& (1 << c
)))
431 pntc
[m
/ 8] |= (c
+ 1) << ((m
% 8) * 4);
435 if (nv50
->rasterizer
->pipe
.sprite_coord_mode
== PIPE_SPRITE_COORD_LOWER_LEFT
)
441 nv50_vec4_map(uint32_t *map32
, int mid
, uint32_t lin
[4],
442 struct nv50_varying
*in
, struct nv50_varying
*out
)
445 uint8_t mv
= out
->mask
, mf
= in
->mask
, oid
= out
->hw
;
446 uint8_t *map
= (uint8_t *)map32
;
448 for (c
= 0; c
< 4; ++c
) {
451 lin
[mid
/ 32] |= 1 << (mid
% 32);
468 struct nouveau_stateobj
*
469 nv50_fp_linkage_validate(struct nv50_context
*nv50
)
471 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
472 struct nv50_program
*vp
;
473 struct nv50_program
*fp
= nv50
->fragprog
;
474 struct nouveau_stateobj
*so
;
475 struct nv50_varying dummy
;
478 uint32_t map
[16], lin
[4], pntc
[8];
480 uint32_t interp
= fp
->fp
.interp
;
481 uint32_t colors
= fp
->fp
.colors
;
482 uint32_t clip
= 0x04;
483 uint32_t psiz
= 0x000;
487 if (nv50
->geomprog
) {
489 memset(map
, 0x80, sizeof(map
));
492 memset(map
, 0x40, sizeof(map
));
494 memset(lin
, 0, sizeof(lin
));
497 dummy
.mask
= 0xf; /* map all components of HPOS */
498 m
= nv50_vec4_map(map
, 0, lin
, &dummy
, &vp
->out
[0]);
500 if (vp
->vp
.clpd
< 0x40) {
501 for (c
= 0; c
< vp
->vp
.clpd_nr
; ++c
) {
502 map
[m
/ 4] |= (vp
->vp
.clpd
+ c
) << ((m
% 4) * 8);
505 clip
|= vp
->vp
.clpd_nr
<< 8;
508 colors
|= m
<< 8; /* adjust BFC0 id */
510 /* if light_twoside is active, it seems FFC0_ID == BFC0_ID is bad */
511 if (nv50
->rasterizer
->pipe
.light_twoside
) {
512 for (i
= 0; i
< 2; ++i
)
513 m
= nv50_vec4_map(map
, m
, lin
,
514 &fp
->in
[fp
->vp
.bfc
[i
]],
515 &vp
->out
[vp
->vp
.bfc
[i
]]);
518 colors
+= m
- 4; /* adjust FFC0 id */
519 interp
|= m
<< 8; /* set mid where 'normal' FP inputs start */
522 for (i
= 0; i
< fp
->in_nr
; i
++) {
523 for (n
= 0; n
< vp
->out_nr
; ++n
)
524 if (vp
->out
[n
].sn
== fp
->in
[i
].sn
&&
525 vp
->out
[n
].si
== fp
->in
[i
].si
)
528 m
= nv50_vec4_map(map
, m
, lin
,
529 &fp
->in
[i
], (n
< vp
->out_nr
) ? &vp
->out
[n
] : &dummy
);
532 /* PrimitiveID either is replaced by the system value, or
533 * written by the geometry shader into an output register
535 if (fp
->gp
.primid
< 0x40) {
537 map
[m
/ 4] = (map
[m
/ 4] & ~(0xff << i
)) | (vp
->gp
.primid
<< i
);
541 if (nv50
->rasterizer
->pipe
.point_size_per_vertex
) {
543 map
[m
/ 4] = (map
[m
/ 4] & ~(0xff << i
)) | (vp
->vp
.psiz
<< i
);
544 psiz
= (m
++ << 4) | 1;
547 /* now fill the stateobj (at most 28 so_data) */
548 so
= so_new(10, 54, 0);
552 if (vp
->type
== PIPE_SHADER_GEOMETRY
) {
553 so_method(so
, tesla
, NV50TCL_GP_RESULT_MAP_SIZE
, 1);
555 so_method(so
, tesla
, NV50TCL_GP_RESULT_MAP(0), n
);
556 so_datap (so
, map
, n
);
558 so_method(so
, tesla
, NV50TCL_VP_GP_BUILTIN_ATTR_EN
, 1);
559 so_data (so
, vp
->vp
.attrs
[2]);
561 so_method(so
, tesla
, NV50TCL_MAP_SEMANTIC_4
, 1);
562 so_data (so
, primid
);
564 so_method(so
, tesla
, NV50TCL_VP_RESULT_MAP_SIZE
, 1);
566 so_method(so
, tesla
, NV50TCL_VP_RESULT_MAP(0), n
);
567 so_datap (so
, map
, n
);
570 so_method(so
, tesla
, NV50TCL_MAP_SEMANTIC_0
, 4);
571 so_data (so
, colors
);
573 so_data (so
, sysval
);
576 so_method(so
, tesla
, NV50TCL_FP_INTERPOLANT_CTRL
, 1);
577 so_data (so
, interp
);
579 so_method(so
, tesla
, NV50TCL_NOPERSPECTIVE_BITMAP(0), 4);
580 so_datap (so
, lin
, 4);
582 if (nv50
->rasterizer
->pipe
.point_quad_rasterization
) {
583 so_method(so
, tesla
, NV50TCL_POINT_SPRITE_CTRL
, 1);
585 nv50_pntc_replace(nv50
, pntc
, (interp
>> 8) & 0xff));
587 so_method(so
, tesla
, NV50TCL_POINT_COORD_REPLACE_MAP(0), 8);
588 so_datap (so
, pntc
, 8);
591 so_method(so
, tesla
, NV50TCL_GP_ENABLE
, 1);
592 so_data (so
, (vp
->type
== PIPE_SHADER_GEOMETRY
) ? 1 : 0);
598 nv50_vp_gp_mapping(uint32_t *map32
, int m
,
599 struct nv50_program
*vp
, struct nv50_program
*gp
)
601 uint8_t *map
= (uint8_t *)map32
;
604 for (i
= 0; i
< gp
->in_nr
; ++i
) {
605 uint8_t oid
= 0, mv
= 0, mg
= gp
->in
[i
].mask
;
607 for (j
= 0; j
< vp
->out_nr
; ++j
) {
608 if (vp
->out
[j
].sn
== gp
->in
[i
].sn
&&
609 vp
->out
[j
].si
== gp
->in
[i
].si
) {
610 mv
= vp
->out
[j
].mask
;
616 for (c
= 0; c
< 4; ++c
, mv
>>= 1, mg
>>= 1) {
621 map
[m
++] = (c
== 3) ? 0x41 : 0x40;
628 struct nouveau_stateobj
*
629 nv50_gp_linkage_validate(struct nv50_context
*nv50
)
631 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
632 struct nouveau_stateobj
*so
;
633 struct nv50_program
*vp
= nv50
->vertprog
;
634 struct nv50_program
*gp
= nv50
->geomprog
;
640 memset(map
, 0, sizeof(map
));
642 m
= nv50_vp_gp_mapping(map
, m
, vp
, gp
);
644 so
= so_new(3, 24 - 3, 0);
646 so_method(so
, tesla
, NV50TCL_VP_GP_BUILTIN_ATTR_EN
, 1);
647 so_data (so
, vp
->vp
.attrs
[2] | gp
->vp
.attrs
[2]);
650 so_method(so
, tesla
, NV50TCL_VP_RESULT_MAP_SIZE
, 1);
654 so_method(so
, tesla
, NV50TCL_VP_RESULT_MAP(0), m
);
655 so_datap (so
, map
, m
);