Move declarations before code.
Fix void pointer arithmetic.
static inline int
nv04_region_is_contiguous(struct nv04_region* rgn, int w, int h)
{
+ int surf_min;
+ int rect_min;
+
if(rgn->pitch)
return rgn->pitch == w << rgn->bpps;
if(rgn->d > 1)
return 0;
- int surf_min = MIN2(rgn->w, rgn->h);
- int rect_min = MIN2(w, h);
+ surf_min = MIN2(rgn->w, rgn->h);
+ rect_min = MIN2(w, h);
if((rect_min == surf_min) || (w == h) || (w == 2 * h))
return 1;
{
if(rgn->pitch > 0)
{
+ int delta;
+
assert(!(rgn->offset & ((1 << rgn->bpps) - 1))); // fatal!
- int delta = rgn->offset & ((1 << shift) - 1);
+ delta = rgn->offset & ((1 << shift) - 1);
if(h <= 1)
{
}
else
{
+ int size;
+ int min;
+ int v;
+
// we don't care about the alignment of 3D surfaces since the 2D engine can't use them
if(rgn->d < 0)
return -1;
- int size;
- int min = MIN2(rgn->w, rgn->h);
+ min = MIN2(rgn->w, rgn->h);
size = min * min << rgn->bpps;
// this is unfixable, and should not be happening
if(rgn->offset & (size - 1))
return -1;
- int v = (rgn->offset & ((1 << shift) - 1)) / size;
+ v = (rgn->offset & ((1 << shift) - 1)) / size;
rgn->offset -= v * size;
if(rgn->h == min)
void
nv04_region_copy_cpu(struct nv04_region* dst, struct nv04_region* src, int w, int h)
{
+ uint8_t* mdst;
+ uint8_t* msrc;
+ int size;
+
if(dst->bo != src->bo)
{
nouveau_bo_map(dst->bo, NOUVEAU_BO_WR);
else
nouveau_bo_map(dst->bo, NOUVEAU_BO_WR | NOUVEAU_BO_RD);
- uint8_t* mdst = dst->bo->map + dst->offset;
- uint8_t* msrc = src->bo->map + src->offset;
+ mdst = (uint8_t*)dst->bo->map + dst->offset;
+ msrc = (uint8_t*)src->bo->map + src->offset;
- int size = w << dst->bpps;
+ size = w << dst->bpps;
nv04_region_assert(dst, w, h);
nv04_region_assert(src, w, h);
int* dswy;
int* sswx;
int* sswy;
+ int dir;
if(!dst->pitch)
{
sswy[iy] = nv04_swizzle_bits(0, src->y + iy, src->z, src->w, src->h, src->d);
}
- int dir = 1;
+ dir = 1;
/* do backwards copies for overlapping swizzled surfaces */
if(dst->pitch == src->pitch && dst->offset == src->offset)
{
void
nv04_region_fill_cpu(struct nv04_region* dst, int w, int h, unsigned value)
{
- uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), dst->bo->map + dst->offset);
+ uint8_t* mdst = (nouveau_bo_map(dst->bo, NOUVEAU_BO_WR), (uint8_t*)dst->bo->map + dst->offset);
#ifdef NV04_REGION_DEBUG
fprintf(stderr, "\tRGN_FILL_CPU ");
unsigned ex = (dst->x + w - 1) >> max_shift;
unsigned ey = (dst->y + h - 1) >> max_shift;
unsigned chunks = (ex - sx + 1) * (ey - sy + 1);
+ unsigned chunk_size;
if(dst->w < cw)
cw = dst->w;
if(dst->h < ch)
ch = dst->h;
- unsigned chunk_size = cw * ch << dst->bpps;
+ chunk_size = cw * ch << dst->bpps;
#ifdef NV04_REGION_DEBUG
fprintf(stderr, "\tRGN_COPY_SWIZZLE [%i, %i: %i] ", w, h, dst->bpps);
for (int cx = sx; cx <= ex; ++cx) {
int rx = MAX2(0, (int)(dst->x - cw * cx));
int rw = MIN2((int)cw, (int)(dst->x - cw * cx + w)) - rx;
+ unsigned dst_offset;
+ unsigned src_offset;
BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1);
- unsigned dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps);
+ dst_offset = dst->offset + (nv04_swizzle_bits_2d(cx * cw, cy * ch, dst->w, dst->h) << dst->bpps);
assert(dst_offset <= dst->bo->size);
assert(dst_offset + chunk_size <= dst->bo->size);
OUT_RELOCl(chan, dst->bo, dst_offset,
OUT_RING (chan, src->pitch |
NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER |
NV03_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE);
- unsigned src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps);
+ src_offset = src->offset + (cy * ch + ry + src->y - dst->y) * src->pitch + ((cx * cw + rx + src->x - dst->x) << src->bpps);
assert(src_offset <= src->bo->size);
assert(src_offset + (src->pitch * (rh - 1)) + (rw << src->bpps) <= src->bo->size);
OUT_RELOCl(chan, src->bo, src_offset,
// TODO: may want to use a temporary in some cases
nouveau_bo_map(buffer->base.bo, NOUVEAU_BO_WR
| (buffer->dirty_unsynchronized ? NOUVEAU_BO_NOSYNC : 0));
- memcpy(buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
+ memcpy((uint8_t*)buffer->base.bo->map + buffer->dirty_begin, buffer->data + buffer->dirty_begin, dirty);
nouveau_bo_unmap(buffer->base.bo);
buffer->dirty_begin = buffer->dirty_end = 0;
}
{
const struct nvfx_src none = nvfx_src(nvfx_reg(NVFXSR_NONE, 0));
struct nvfx_insn insn = arith(0, MOV, none.reg, NVFX_FP_MASK_X, src, none, none);
+ uint32_t *hw;
insn.cc_update = 1;
nvfx_fp_emit(fpc, insn);
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
NV40_FP_OP_OUT_NONE |
nv40_fp_cal(struct nvfx_fpc *fpc, unsigned target)
{
struct nvfx_label_relocation reloc;
+ uint32_t *hw;
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_CAL << NVFX_FP_OP_OPCODE_SHIFT);
/* Use .xxxx swizzle so that we check only src[0].x*/
static void
nv40_fp_ret(struct nvfx_fpc *fpc)
{
+ uint32_t *hw;
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_RET << NVFX_FP_OP_OPCODE_SHIFT);
/* Use .xxxx swizzle so that we check only src[0].x*/
nv40_fp_rep(struct nvfx_fpc *fpc, unsigned count, unsigned target)
{
struct nvfx_label_relocation reloc;
+ uint32_t *hw;
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_REP << NVFX_FP_OP_OPCODE_SHIFT) |
NV40_FP_OP_OUT_NONE |
nv40_fp_bra(struct nvfx_fpc *fpc, unsigned target)
{
struct nvfx_label_relocation reloc;
+ uint32_t *hw;
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_IF << NVFX_FP_OP_OPCODE_SHIFT) |
NV40_FP_OP_OUT_NONE |
static void
nv40_fp_brk(struct nvfx_fpc *fpc)
{
+ uint32_t *hw;
fpc->inst_offset = fpc->fp->insn_len;
grow_insns(fpc, 4);
- uint32_t *hw = &fpc->fp->insn[fpc->inst_offset];
+ hw = &fpc->fp->insn[fpc->inst_offset];
/* I really wonder why fp16 precision is used. Presumably the hardware ignores it? */
hw[0] = (NV40_FP_OP_BRA_OPCODE_BRK << NVFX_FP_OP_OPCODE_SHIFT) |
NV40_FP_OP_OUT_NONE;
case TGSI_OPCODE_ELSE:
{
+ uint32_t *hw;
if(!nvfx->is_nv4x)
goto nv3x_cflow;
assert(util_dynarray_contains(&fpc->if_stack, unsigned));
- uint32_t *hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
+ hw = &fpc->fp->insn[util_dynarray_top(&fpc->if_stack, unsigned)];
hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
break;
}
case TGSI_OPCODE_ENDIF:
{
+ uint32_t *hw;
if(!nvfx->is_nv4x)
goto nv3x_cflow;
assert(util_dynarray_contains(&fpc->if_stack, unsigned));
- uint32_t *hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
+ hw = &fpc->fp->insn[util_dynarray_pop(&fpc->if_stack, unsigned)];
if(!hw[2])
hw[2] = NV40_FP_OP_OPCODE_IS_BRANCH | fpc->fp->insn_len;
hw[3] = fpc->fp->insn_len;
struct nouveau_channel* chan = nvfx->screen->base.channel;
struct nvfx_fragment_program *fp = nvfx->fragprog;
int update = 0;
+ struct nvfx_vertex_program* vp;
+ unsigned sprite_coord_enable;
if (!fp->translated)
{
if (nvfx->dirty & (NVFX_NEW_FRAGCONST | NVFX_NEW_FRAGPROG))
update = TRUE;
- struct nvfx_vertex_program* vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
+ vp = nvfx->render_mode == HW ? nvfx->vertprog : nvfx->swtnl.vertprog;
if (fp->last_vp_id != vp->id) {
char* vp_sem_table = vp->generic_to_fp_input;
unsigned char* fp_semantics = fp->slot_to_generic;
unsigned diff = 0;
+ unsigned char* cur_slots;
fp->last_vp_id = nvfx->vertprog->id;
- unsigned char* cur_slots = fp->slot_to_fp_input;
+ cur_slots = fp->slot_to_fp_input;
for(unsigned i = 0; i < fp->num_slots; ++i) {
unsigned char slot_mask = vp_sem_table[fp_semantics[i]];
diff |= (slot_mask >> 4) & (slot_mask ^ cur_slots[i]);
}
// last_sprite_coord_enable
- unsigned sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable;
+ sprite_coord_enable = nvfx->rasterizer->pipe.point_quad_rasterization * nvfx->rasterizer->pipe.sprite_coord_enable;
if(fp->last_sprite_coord_enable != sprite_coord_enable)
{
unsigned texcoord_mask = vp->texcoord_ouput_mask;
}
if(update) {
+ int offset;
+ uint32_t* fpmap;
+
++fp->bo_prog_idx;
if(fp->bo_prog_idx >= fp->progs_per_bo)
{
else
{
struct nvfx_fragment_program_bo* fpbo = os_malloc_aligned(sizeof(struct nvfx_fragment_program) + (fp->prog_size + 8) * fp->progs_per_bo, 16);
+ uint8_t* map;
+ uint8_t* buf;
+
fpbo->slots = (unsigned char*)&fpbo->insn[(fp->prog_size) * fp->progs_per_bo];
memset(fpbo->slots, 0, 8 * fp->progs_per_bo);
if(fp->fpbo)
nouveau_bo_new(nvfx->screen->base.device, NOUVEAU_BO_VRAM | NOUVEAU_BO_MAP, 64, fp->prog_size * fp->progs_per_bo, &fpbo->bo);
nouveau_bo_map(fpbo->bo, NOUVEAU_BO_NOSYNC);
- uint8_t* map = fpbo->bo->map;
- uint8_t* buf = (uint8_t*)fpbo->insn;
+ map = fpbo->bo->map;
+ buf = (uint8_t*)fpbo->insn;
for(unsigned i = 0; i < fp->progs_per_bo; ++i)
{
memcpy(buf, fp->insn, fp->insn_len * 4);
fp->bo_prog_idx = 0;
}
- int offset = fp->bo_prog_idx * fp->prog_size;
- uint32_t* fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
+ offset = fp->bo_prog_idx * fp->prog_size;
+ fpmap = (uint32_t*)((char*)fp->fpbo->bo->map + offset);
if(nvfx->constbuf[PIPE_SHADER_FRAGMENT]) {
struct pipe_resource* constbuf = nvfx->constbuf[PIPE_SHADER_FRAGMENT];
nvfx_miptree_create(struct pipe_screen *pscreen, const struct pipe_resource *pt)
{
struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, pt);
+ unsigned size;
nvfx_miptree_choose_format(mt);
- unsigned size = nvfx_miptree_layout(mt);
+ size = nvfx_miptree_layout(mt);
mt->base.bo = nouveau_screen_bo_new(pscreen, 256, pt->usage, pt->bind, size);
nvfx_miptree_from_handle(struct pipe_screen *pscreen, const struct pipe_resource *template, struct winsys_handle *whandle)
{
struct nvfx_miptree* mt = nvfx_miptree_create_skeleton(pscreen, template);
+ unsigned stride;
if(whandle->stride) {
mt->linear_pitch = whandle->stride;
mt->base.base.flags |= NVFX_RESOURCE_FLAG_LINEAR;
nvfx_miptree_layout(mt);
- unsigned stride;
mt->base.bo = nouveau_screen_bo_from_handle(pscreen, whandle, &stride);
if (mt->base.bo == NULL) {
FREE(mt);
unsigned still_dirty = 0;
int all_swizzled = -1;
boolean flush_tex_cache = FALSE;
+ unsigned render_temps;
if(nvfx != nvfx->screen->cur_ctx)
{
nvfx->dirty = dirty & still_dirty;
- unsigned render_temps = nvfx->state.render_temps;
+ render_temps = nvfx->state.render_temps;
if(render_temps)
{
for(int i = 0; i < nvfx->framebuffer.nr_cbufs; ++i)
rgn->bpps = 2;
break;
default:
- assert(util_is_pot(bits));
- int shift = util_logbase2(bits) - 3;
- assert(shift >= 2);
- rgn->bpps = 2;
- shift -= 2;
-
- rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
- rgn->y = util_format_get_nblocksy(format, rgn->y);
+ {
+ int shift;
+ assert(util_is_pot(bits));
+ shift = util_logbase2(bits) - 3;
+ assert(shift >= 2);
+ rgn->bpps = 2;
+ shift -= 2;
+
+ rgn->x = util_format_get_nblocksx(format, rgn->x) << shift;
+ rgn->y = util_format_get_nblocksy(format, rgn->y);
+ }
}
}
unsigned srcx, unsigned srcy, unsigned srcz,
unsigned w, unsigned h)
{
+ static int copy_threshold = -1;
struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
struct nv04_region dst, src;
+ int dst_to_gpu;
+ int src_on_gpu;
+ boolean small;
+ int ret;
if(!w || !h)
return;
- static int copy_threshold = -1;
if(copy_threshold < 0)
copy_threshold = debug_get_num_option("NOUVEAU_COPY_THRESHOLD", 4);
- int dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
- int src_on_gpu = nvfx_resource_on_gpu(srcr);
+ dst_to_gpu = dstr->usage != PIPE_USAGE_DYNAMIC && dstr->usage != PIPE_USAGE_STAGING;
+ src_on_gpu = nvfx_resource_on_gpu(srcr);
nvfx_region_init_for_subresource(&dst, dstr, subdst, dstx, dsty, dstz, TRUE);
nvfx_region_init_for_subresource(&src, srcr, subsrc, srcx, srcy, srcz, FALSE);
w = util_format_get_stride(dstr->format, w) >> dst.bpps;
h = util_format_get_nblocksy(dstr->format, h);
- int ret;
- boolean small = (w * h <= copy_threshold);
+ small = (w * h <= copy_threshold);
if((!dst_to_gpu || !src_on_gpu) && small)
ret = -1; /* use the CPU */
else
{
struct nv04_2d_context *ctx = nvfx_screen(pipe->screen)->eng2d;
struct nv04_region dst;
+ int ret;
/* Always try to use the GPU right now, if possible
* If the user wanted the surface data on the CPU, he would have cleared with memset (hopefully) */
w = util_format_get_stride(dsts->format, w) >> dst.bpps;
h = util_format_get_nblocksy(dsts->format, h);
- int ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
+ ret = nv04_region_fill_2d(ctx, &dst, w, h, value);
if(ret > 0 && dsts->texture->bind & PIPE_BIND_RENDER_TARGET)
return 1;
else if(ret)
unsigned inline_cost = 0;
unsigned unique_vertices;
unsigned upload_mode;
+ float best_index_cost_for_hardware_vertices_as_inline_cost;
+ boolean prefer_hardware_indices;
+ unsigned index_inline_cost;
+ unsigned index_hardware_cost;
if (info->indexed)
unique_vertices = util_guess_unique_indices_count(info->mode, info->count);
else
inline_cost += vbi->per_vertex_size * info->count;
}
- float best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
- boolean prefer_hardware_indices = FALSE;
- unsigned index_inline_cost = 0;
- unsigned index_hardware_cost = 0;
+ best_index_cost_for_hardware_vertices_as_inline_cost = 0.0f;
+ prefer_hardware_indices = FALSE;
+ index_inline_cost = 0;
+ index_hardware_cost = 0;
if (info->indexed)
{
void
nvfx_vbo_relocate(struct nvfx_context *nvfx)
{
+ struct nouveau_channel* chan;
+ unsigned vb_flags;
+ int i;
+
if(!nvfx->use_vertex_buffers)
return;
- struct nouveau_channel* chan = nvfx->screen->base.channel;
- unsigned vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
- int i;
+ chan = nvfx->screen->base.channel;
+ vb_flags = nvfx->screen->vertex_buffer_reloc_flags | NOUVEAU_BO_RD | NOUVEAU_BO_DUMMY;
MARK_RING(chan, 2 * 16 + 3, 2 * 16 + 3);
for (i = 0; i < nvfx->vtxelt->num_per_vertex; i++) {
struct nvfx_vtxelt_state *cso = CALLOC_STRUCT(nvfx_vtxelt_state);
struct translate_key transkey;
unsigned per_vertex_size[16];
- memset(per_vertex_size, 0, sizeof(per_vertex_size));
-
unsigned vb_compacted_index[16];
+ memset(per_vertex_size, 0, sizeof(per_vertex_size));
+
assert(num_elements < 16); /* not doing fallbacks yet */
memcpy(cso->pipe, elements, num_elements * sizeof(elements[0]));