git.libre-soc.org Git - mesa.git/blob - src/mesa/drivers/dri/radeon/radeon

1 /**************************************************************************

6 The Weather Channel (TM) funded Tungsten Graphics to develop the

7 initial release of the Radeon 8500 driver under the XFree86 license.

8 This notice must be preserved.

12 Permission is hereby granted, free of charge, to any person obtaining a

13 copy of this software and associated documentation files (the "Software"),

14 to deal in the Software without restriction, including without limitation

15 on the rights to use, copy, modify, merge, publish, distribute, sub

16 license, and/or sell copies of the Software, and to permit persons to whom

17 the Software is furnished to do so, subject to the following conditions:

19 The above copyright notice and this permission notice (including the next

20 paragraph) shall be included in all copies or substantial portions of the

21 Software.

23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL

26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,

27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR

28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE

29 USE OR OTHER DEALINGS IN THE SOFTWARE.

31 **************************************************************************/

33 #include <errno.h>

34 #include "radeon_common.h"

35 #include "radeon_fog.h"

36 #include "util/simple_list.h"

38 #if defined(USE_X86_ASM)

39 #define COPY_DWORDS( dst, src, nr ) \

40 do { \

41 int __tmp; \

42 __asm__ __volatile__( "rep ; movsl" \

                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \

44 : "0" (nr), \

45 "D" ((long)dst), \

46 "S" ((long)src) ); \

47 } while (0)

48 #else

49 #define COPY_DWORDS( dst, src, nr ) \

50 do { \

51 int j; \

52 for ( j = 0 ; j < nr ; j++ ) \

53 dst[j] = ((int *)src)[j]; \

54 dst += nr; \

55 } while (0)

56 #endif

 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)

59 {

60 int i;

62 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __func__, count, stride, (void *)out, (void *)data);

         if (stride == 4)

                 COPY_DWORDS(out, data, count);

68 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

71 out++;

72 data += stride;

73 }

74 }

 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)

77 {

78 int i;

80 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __func__, count, stride, (void *)out, (void *)data);

         if (stride == 8)

                 COPY_DWORDS(out, data, count * 2);

86 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

90 out += 2;

91 data += stride;

92 }

93 }

 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)

96 {

97 int i;

99 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __func__, count, stride, (void *)out, (void *)data);

102

         if (stride == 12) {

                 COPY_DWORDS(out, data, count * 3);

105 }

106 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

                         out[2] = *(int *)(data + 8);

111 out += 3;

112 data += stride;

113 }

114 }

115

 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)

117 {

118 int i;

119

120 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __func__, count, stride, (void *)out, (void *)data);

123

         if (stride == 16)

                 COPY_DWORDS(out, data, count * 4);

126 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

                         out[2] = *(int *)(data + 8);

                         out[3] = *(int *)(data + 12);

132 out += 4;

133 data += stride;

134 }

135 }

136

 void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,

                          const GLvoid * data, int size, int stride, int count)

139 {

140 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

141 uint32_t *out;

142

         if (stride == 0) {

                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);

145 count = 1;

146 aos->stride = 0;

147 } else {

                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);

149 aos->stride = size;

150 }

151

152 aos->components = size;

153 aos->count = count;

154

         radeon_bo_map(aos->bo, 1);

         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);

157 switch (size) {

         case 1: radeonEmitVec4(out, data, stride, count); break;

         case 2: radeonEmitVec8(out, data, stride, count); break;

         case 3: radeonEmitVec12(out, data, stride, count); break;

         case 4: radeonEmitVec16(out, data, stride, count); break;

162 default:

163 assert(0);

164 break;

165 }

166 radeon_bo_unmap(aos->bo);

167 }

168

 void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,

                          GLvoid *data, int stride, int count)

171 {

172 int i;

173 float *out;

174 int size = 1;

175 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

176

177 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d\n",

179 __func__, count, stride);

180

         if (stride == 0) {

                 radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );

183 count = 1;

184 aos->stride = 0;

185 } else {

                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);

187 aos->stride = size;

188 }

189

190 aos->components = size;

191 aos->count = count;

192

193 /* Emit the data */

         radeon_bo_map(aos->bo, 1);

         out = (float*)((char*)aos->bo->ptr + aos->offset);

         for (i = 0; i < count; i++) {

                 out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );

198 out++;

199 data += stride;

200 }

201 radeon_bo_unmap(aos->bo);

202 }

203

204 void radeon_init_dma(radeonContextPtr rmesa)

205 {

         make_empty_list(&rmesa->dma.free);

         make_empty_list(&rmesa->dma.wait);

         make_empty_list(&rmesa->dma.reserved);

209 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;

210 }

211

 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)

213 {

214 struct radeon_dma_bo *dma_bo = NULL;

215 /* we set minimum sizes to at least requested size

216 aligned to next 16 bytes. */

         if (size > rmesa->dma.minimum_size)

                 rmesa->dma.minimum_size = (size + 15) & (~15);

219

         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",

                         __func__, size, rmesa->dma.minimum_size);

222

         if (is_empty_list(&rmesa->dma.free)

               || last_elem(&rmesa->dma.free)->bo->size < size) {

225 dma_bo = CALLOC_STRUCT(radeon_dma_bo);

226 assert(dma_bo);

227

228 again_alloc:

                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,

                                             0, rmesa->dma.minimum_size, 4,

231 RADEON_GEM_DOMAIN_GTT, 0);

232

233 if (!dma_bo->bo) {

234 rcommonFlushCmdBuf(rmesa, __func__);

235 goto again_alloc;

236 }

                 insert_at_head(&rmesa->dma.reserved, dma_bo);

238 } else {

239 /* We push and pop buffers from end of list so we can keep

240 counter on unused buffers for later freeing them from

241 begin of list */

                 dma_bo = last_elem(&rmesa->dma.free);

243 remove_from_list(dma_bo);

                 insert_at_head(&rmesa->dma.reserved, dma_bo);

245 }

246

         rmesa->dma.current_used = 0;

         rmesa->dma.current_vertexptr = 0;

249

         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,

                                           first_elem(&rmesa->dma.reserved)->bo,

252 RADEON_GEM_DOMAIN_GTT, 0))

                 fprintf(stderr,"failure to revalidate BOs - badness\n");

254

         if (is_empty_list(&rmesa->dma.reserved)) {

256 /* Cmd buff have been flushed in radeon_revalidate_bos */

257 goto again_alloc;

258 }

         radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);

260 }

261

262 /* Allocates a region from rmesa->dma.current. If there isn't enough

263 * space in current, grab a new buffer (and discard what was left of current)

264 */

265 void radeonAllocDmaRegion(radeonContextPtr rmesa,

                           struct radeon_bo **pbo, int *poffset,

267 int bytes, int alignment)

268 {

269 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s %d\n", __func__, bytes);

271

         if (rmesa->dma.flush)

                 rmesa->dma.flush(&rmesa->glCtx);

274

         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);

276

277 alignment--;

         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;

279

         if (is_empty_list(&rmesa->dma.reserved)

                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)

282 radeonRefillCurrentDmaRegion(rmesa, bytes);

283

         *poffset = rmesa->dma.current_used;

         *pbo = first_elem(&rmesa->dma.reserved)->bo;

286 radeon_bo_ref(*pbo);

287

288 /* Always align to at least 16 bytes */

         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;

         rmesa->dma.current_vertexptr = rmesa->dma.current_used;

291

         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);

293 }

294

295 void radeonFreeDmaRegions(radeonContextPtr rmesa)

296 {

297 struct radeon_dma_bo *dma_bo;

298 struct radeon_dma_bo *temp;

299 if (RADEON_DEBUG & RADEON_DMA)

                 fprintf(stderr, "%s\n", __func__);

301

         foreach_s(dma_bo, temp, &rmesa->dma.free) {

303 remove_from_list(dma_bo);

304 radeon_bo_unref(dma_bo->bo);

305 free(dma_bo);

306 }

307

         foreach_s(dma_bo, temp, &rmesa->dma.wait) {

309 remove_from_list(dma_bo);

310 radeon_bo_unref(dma_bo->bo);

311 free(dma_bo);

312 }

313

         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {

315 remove_from_list(dma_bo);

316 radeon_bo_unref(dma_bo->bo);

317 free(dma_bo);

318 }

319 }

320

 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)

322 {

         if (is_empty_list(&rmesa->dma.reserved))

324 return;

325

326 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s %d\n", __func__, return_bytes);

328 rmesa->dma.current_used -= return_bytes;

         rmesa->dma.current_vertexptr = rmesa->dma.current_used;

330 }

331

 static int radeon_bo_is_idle(struct radeon_bo* bo)

333 {

334 uint32_t domain;

         int ret = radeon_bo_is_busy(bo, &domain);

336 if (ret == -EINVAL) {

                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"

338 "This may cause small performance drop for you.\n");

339 }

340 return ret != -EBUSY;

341 }

342

343 void radeonReleaseDmaRegions(radeonContextPtr rmesa)

344 {

345 struct radeon_dma_bo *dma_bo;

346 struct radeon_dma_bo *temp;

         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;

         const int time = rmesa->dma.free.expire_counter;

349

350 if (RADEON_DEBUG & RADEON_DMA) {

351 size_t free = 0,

352 wait = 0,

353 reserved = 0;

                 foreach(dma_bo, &rmesa->dma.free)

355 ++free;

356

                 foreach(dma_bo, &rmesa->dma.wait)

358 ++wait;

359

                 foreach(dma_bo, &rmesa->dma.reserved)

361 ++reserved;

362

                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",

                       __func__, free, wait, reserved, rmesa->dma.minimum_size);

365 }

366

367 /* move waiting bos to free list.

368 wait list provides gpu time to handle data before reuse */

         foreach_s(dma_bo, temp, &rmesa->dma.wait) {

                 if (dma_bo->expire_counter == time) {

                         WARN_ONCE("Leaking dma buffer object!\n");

372 radeon_bo_unref(dma_bo->bo);

373 remove_from_list(dma_bo);

374 free(dma_bo);

375 continue;

376 }

377 /* free objects that are too small to be used because of large request */

                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {

379 radeon_bo_unref(dma_bo->bo);

380 remove_from_list(dma_bo);

381 free(dma_bo);

382 continue;

383 }

                 if (!radeon_bo_is_idle(dma_bo->bo)) {

385 break;

386 }

387 remove_from_list(dma_bo);

388 dma_bo->expire_counter = expire_at;

                 insert_at_tail(&rmesa->dma.free, dma_bo);

390 }

391

392 /* move reserved to wait list */

         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {

394 radeon_bo_unmap(dma_bo->bo);

395 /* free objects that are too small to be used because of large request */

                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {

397 radeon_bo_unref(dma_bo->bo);

398 remove_from_list(dma_bo);

399 free(dma_bo);

400 continue;

401 }

402 remove_from_list(dma_bo);

403 dma_bo->expire_counter = expire_at;

                 insert_at_tail(&rmesa->dma.wait, dma_bo);

405 }

406

407 /* free bos that have been unused for some time */

         foreach_s(dma_bo, temp, &rmesa->dma.free) {

                 if (dma_bo->expire_counter != time)

410 break;

411 remove_from_list(dma_bo);

412 radeon_bo_unref(dma_bo->bo);

413 free(dma_bo);

414 }

415

416 }

417

418

419 /* Flush vertices in the current dma region.

420 */

 void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )

422 {

423 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

         struct radeon_dma *dma = &rmesa->dma;

425

426 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __func__);

428 dma->flush = NULL;

429

         radeon_bo_unmap(rmesa->swtcl.bo);

431

         if (!is_empty_list(&dma->reserved)) {

433 GLuint current_offset = dma->current_used;

434

435 assert (dma->current_used +

                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==

437 dma->current_vertexptr);

438

             if (dma->current_used != dma->current_vertexptr) {

440 dma->current_used = dma->current_vertexptr;

441

                     rmesa->vtbl.swtcl_flush(ctx, current_offset);

443 }

             rmesa->swtcl.numverts = 0;

445 }

         radeon_bo_unref(rmesa->swtcl.bo);

447 rmesa->swtcl.bo = NULL;

448 }

449 /* Alloc space in the current dma region.

450 */

451 void *

 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )

453 {

454 GLuint bytes = vsize * nverts;

455 void *head;

456 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __func__);

458

         if(is_empty_list(&rmesa->dma.reserved)

               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {

                 if (rmesa->dma.flush) {

                         rmesa->dma.flush(&rmesa->glCtx);

463 }

464

465 radeonRefillCurrentDmaRegion(rmesa, bytes);

466

467 return NULL;

468 }

469

         if (!rmesa->dma.flush) {

471 /* if cmdbuf flushed DMA restart */

                 rmesa->glCtx.Driver.NeedFlush |= FLUSH_STORED_VERTICES;

473 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;

474 }

475

         assert( vsize == rmesa->swtcl.vertex_size * 4 );

         assert( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );

         assert( rmesa->dma.current_used +

                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==

480 rmesa->dma.current_vertexptr );

481

         if (!rmesa->swtcl.bo) {

                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;

                 radeon_bo_ref(rmesa->swtcl.bo);

                 radeon_bo_map(rmesa->swtcl.bo, 1);

486 }

487

         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);

489 rmesa->dma.current_vertexptr += bytes;

490 rmesa->swtcl.numverts += nverts;

491 return head;

492 }

493

 void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )

495 {

496 radeonContextPtr radeon = RADEON_CONTEXT( ctx );

497 int i;

498 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __func__);

500

    if (radeon->dma.flush) {

        radeon->dma.flush(&radeon->glCtx);

503 }

    for (i = 0; i < radeon->tcl.aos_count; i++) {

       if (radeon->tcl.aos[i].bo) {

          radeon_bo_unref(radeon->tcl.aos[i].bo);

          radeon->tcl.aos[i].bo = NULL;

508

509 }

510 }

511 }