git.libre-soc.org Git - mesa.git/blob - src/mesa/drivers/dri/intel/intel

1 /**************************************************************************

2 *

5 *

6 * Permission is hereby granted, free of charge, to any person obtaining a

7 * copy of this software and associated documentation files (the

8 * "Software"), to deal in the Software without restriction, including

9 * without limitation the rights to use, copy, modify, merge, publish,

10 * distribute, sub license, and/or sell copies of the Software, and to

11 * permit persons to whom the Software is furnished to do so, subject to

12 * the following conditions:

13 *

14 * The above copyright notice and this permission notice (including the

15 * next paragraph) shall be included in all copies or substantial portions

16 * of the Software.

17 *

18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.

21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR

22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

25 *

26 **************************************************************************/

28 #include "intel_context.h"

29 #include "intel_batchbuffer.h"

30 #include "intel_buffer_objects.h"

31 #include "intel_decode.h"

32 #include "intel_reg.h"

33 #include "intel_bufmgr.h"

34 #include "intel_buffers.h"

36 struct cached_batch_item {

37 struct cached_batch_item *next;

38 uint16_t header;

39 uint16_t size;

40 };

 static void clear_cache( struct intel_context *intel )

43 {

    struct cached_batch_item *item = intel->batch.cached_items;

46 while (item) {

       struct cached_batch_item *next = item->next;

48 free(item);

49 item = next;

50 }

52 intel->batch.cached_items = NULL;

53 }

55 void

 intel_batchbuffer_init(struct intel_context *intel)

57 {

58 intel_batchbuffer_reset(intel);

    if (intel->gen == 6) {

61 /* We can't just use brw_state_batch to get a chunk of space for

62 * the gen6 workaround because it involves actually writing to

63 * the buffer, and the kernel doesn't let us write to the batch.

64 */

       intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,

66 "gen6 workaround",

67 4096, 4096);

68 }

69 }

71 void

 intel_batchbuffer_reset(struct intel_context *intel)

73 {

    if (intel->batch.last_bo != NULL) {

       drm_intel_bo_unreference(intel->batch.last_bo);

76 intel->batch.last_bo = NULL;

77 }

    intel->batch.last_bo = intel->batch.bo;

80 clear_cache(intel);

    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",

83 intel->maxBatchSize, 4096);

85 intel->batch.reserved_space = BATCH_RESERVED;

    intel->batch.state_batch_offset = intel->batch.bo->size;

    intel->batch.used = 0;

88 }

90 void

 intel_batchbuffer_free(struct intel_context *intel)

92 {

    drm_intel_bo_unreference(intel->batch.last_bo);

    drm_intel_bo_unreference(intel->batch.bo);

    drm_intel_bo_unreference(intel->batch.workaround_bo);

96 clear_cache(intel);

97 }

100 /* TODO: Push this whole function into bufmgr.

101 */

102 static void

 do_flush_locked(struct intel_context *intel)

104 {

    struct intel_batchbuffer *batch = &intel->batch;

106 int ret = 0;

107

    ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);

    if (ret == 0 && batch->state_batch_offset != batch->bo->size) {

       ret = drm_intel_bo_subdata(batch->bo,

111 batch->state_batch_offset,

                                  batch->bo->size - batch->state_batch_offset,

                                  (char *)batch->map + batch->state_batch_offset);

114 }

115

    if (!intel->intelScreen->no_hw) {

117 int ring;

118

       if (intel->gen < 6 || !batch->is_blit) {

120 ring = I915_EXEC_RENDER;

121 } else {

122 ring = I915_EXEC_BLT;

123 }

124

       if (ret == 0)

          ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring);

127 }

128

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {

       drm_intel_bo_map(batch->bo, false);

       intel_decode(batch->bo->virtual, batch->used,

132 batch->bo->offset,

133 intel->intelScreen->deviceID, GL_TRUE);

134 drm_intel_bo_unmap(batch->bo);

135

       if (intel->vtbl.debug_batch != NULL)

          intel->vtbl.debug_batch(intel);

138 }

139

    if (ret != 0) {

       fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(ret));

142 exit(1);

143 }

    intel->vtbl.new_batch(intel);

145 }

146

147 void

 _intel_batchbuffer_flush(struct intel_context *intel,

                          const char *file, int line)

150 {

    if (intel->batch.used == 0)

152 return;

153

    if (intel->first_post_swapbuffers_batch == NULL) {

       intel->first_post_swapbuffers_batch = intel->batch.bo;

156 drm_intel_bo_reference(intel->first_post_swapbuffers_batch);

157 }

158

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))

       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,

               4*intel->batch.used);

162

    intel->batch.reserved_space = 0;

164

165 if (intel->always_flush_cache) {

166 intel_batchbuffer_emit_mi_flush(intel);

167 }

168

169 /* Mark the end of the buffer. */

170 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);

    if (intel->batch.used & 1) {

172 /* Round batchbuffer usage to 2 DWORDs. */

173 intel_batchbuffer_emit_dword(intel, MI_NOOP);

174 }

175

    if (intel->vtbl.finish_batch)

       intel->vtbl.finish_batch(intel);

178

179 intel_upload_finish(intel);

180

181 /* Check that we didn't just wrap our batchbuffer at a bad time. */

182 assert(!intel->no_batch_wrap);

183

184 do_flush_locked(intel);

185

    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {

       fprintf(stderr, "waiting for idle\n");

       drm_intel_bo_wait_rendering(intel->batch.bo);

189 }

190

191 /* Reset the buffer:

192 */

193 intel_batchbuffer_reset(intel);

194 }

195

196

197 /* This is the only way buffers get added to the validate list.

198 */

199 GLboolean

 intel_batchbuffer_emit_reloc(struct intel_context *intel,

201 drm_intel_bo *buffer,

202 uint32_t read_domains, uint32_t write_domain,

203 uint32_t delta)

204 {

205 int ret;

206

    ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,

208 buffer, delta,

209 read_domains, write_domain);

    assert(ret == 0);

211 (void)ret;

212

213 /*

214 * Using the old buffer offset, write in what the right data would be, in case

215 * the buffer doesn't move and we can short-circuit the relocation processing

216 * in the kernel

217 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

219

220 return GL_TRUE;

221 }

222

223 GLboolean

 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,

225 drm_intel_bo *buffer,

226 uint32_t read_domains,

227 uint32_t write_domain,

228 uint32_t delta)

229 {

230 int ret;

231

    ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,

233 buffer, delta,

234 read_domains, write_domain);

    assert(ret == 0);

236 (void)ret;

237

238 /*

239 * Using the old buffer offset, write in what the right data would

240 * be, in case the buffer doesn't move and we can short-circuit the

241 * relocation processing in the kernel

242 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

244

245 return GL_TRUE;

246 }

247

248 void

 intel_batchbuffer_data(struct intel_context *intel,

                        const void *data, GLuint bytes, bool is_blit)

251 {

    assert((bytes & 3) == 0);

    intel_batchbuffer_require_space(intel, bytes, is_blit);

    __memcpy(intel->batch.map + intel->batch.used, data, bytes);

    intel->batch.used += bytes >> 2;

256 }

257

258 void

 intel_batchbuffer_cached_advance(struct intel_context *intel)

260 {

    struct cached_batch_item **prev = &intel->batch.cached_items, *item;

    uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);

    uint32_t *start = intel->batch.map + intel->batch.emit;

    uint16_t op = *start >> 16;

265

266 while (*prev) {

267 uint32_t *old;

268

269 item = *prev;

       old = intel->batch.map + item->header;

       if (op == *old >> 16) {

          if (item->size == sz && memcmp(old, start, sz) == 0) {

             if (prev != &intel->batch.cached_items) {

274 *prev = item->next;

                item->next = intel->batch.cached_items;

276 intel->batch.cached_items = item;

277 }

             intel->batch.used = intel->batch.emit;

279 return;

280 }

281

282 goto emit;

283 }

284 prev = &item->next;

285 }

286

    item = malloc(sizeof(struct cached_batch_item));

288 if (item == NULL)

289 return;

290

    item->next = intel->batch.cached_items;

292 intel->batch.cached_items = item;

293

294 emit:

295 item->size = sz;

    item->header = intel->batch.emit;

297 }

298

299 /**

300 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for

301 * implementing two workarounds on gen6. From section 1.4.7.1

302 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:

303 *

304 * [DevSNB-C+{W/A}] Before any depth stall flush (including those

305 * produced by non-pipelined state commands), software needs to first

306 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=

307 * 0.

308 *

309 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable

310 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.

311 *

312 * And the workaround for these two requires this workaround first:

313 *

314 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent

315 * BEFORE the pipe-control with a post-sync op and no write-cache

316 * flushes.

317 *

318 * And this last workaround is tricky because of the requirements on

319 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM

320 * volume 2 part 1:

321 *

322 * "1 of the following must also be set:

323 * - Render Target Cache Flush Enable ([12] of DW1)

324 * - Depth Cache Flush Enable ([0] of DW1)

325 * - Stall at Pixel Scoreboard ([1] of DW1)

326 * - Depth Stall ([13] of DW1)

327 * - Post-Sync Operation ([13] of DW1)

328 * - Notify Enable ([8] of DW1)"

329 *

330 * The cache flushes require the workaround flush that triggered this

331 * one, so we can't use it. Depth stall would trigger the same.

332 * Post-sync nonzero is what triggered this second workaround, so we

333 * can't use that one either. Notify enable is IRQs, which aren't

334 * really our business. That leaves only stall at scoreboard.

335 */

336 void

 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)

338 {

    if (!intel->batch.need_workaround_flush)

340 return;

341

342 BEGIN_BATCH(4);

343 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

344 OUT_BATCH(PIPE_CONTROL_CS_STALL |

345 PIPE_CONTROL_STALL_AT_SCOREBOARD);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

348 ADVANCE_BATCH();

349

350 BEGIN_BATCH(4);

351 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

352 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);

    OUT_RELOC(intel->batch.workaround_bo,

354 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

    OUT_BATCH(0); /* write data */

356 ADVANCE_BATCH();

357

    intel->batch.need_workaround_flush = false;

359 }

360

361 /* Emit a pipelined flush to either flush render and texture cache for

362 * reading from a FBO-drawn texture, or flush so that frontbuffer

363 * render appears on the screen in DRI1.

364 *

365 * This is also used for the always_flush_cache driconf debug option.

366 */

367 void

 intel_batchbuffer_emit_mi_flush(struct intel_context *intel)

369 {

    if (intel->gen >= 6) {

       if (intel->batch.is_blit) {

372 BEGIN_BATCH_BLT(4);

373 OUT_BATCH(MI_FLUSH_DW);

374 OUT_BATCH(0);

375 OUT_BATCH(0);

376 OUT_BATCH(0);

377 ADVANCE_BATCH();

378 } else {

          if (intel->gen == 6) {

380 /* Hardware workaround: SNB B-Spec says:

381 *

382 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache

383 * Flush Enable =1, a PIPE_CONTROL with any non-zero

384 * post-sync-op is required.

385 */

386 intel_emit_post_sync_nonzero_flush(intel);

387 }

388

389 BEGIN_BATCH(4);

390 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

391 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |

392 PIPE_CONTROL_WRITE_FLUSH |

393 PIPE_CONTROL_DEPTH_CACHE_FLUSH |

394 PIPE_CONTROL_TC_FLUSH |

395 PIPE_CONTROL_NO_WRITE);

          OUT_BATCH(0); /* write address */

          OUT_BATCH(0); /* write data */

398 ADVANCE_BATCH();

399 }

    } else if (intel->gen >= 4) {

401 BEGIN_BATCH(4);

402 OUT_BATCH(_3DSTATE_PIPE_CONTROL |

403 PIPE_CONTROL_WRITE_FLUSH |

404 PIPE_CONTROL_NO_WRITE);

       OUT_BATCH(0); /* write address */

       OUT_BATCH(0); /* write data */

       OUT_BATCH(0); /* write data */

408 ADVANCE_BATCH();

409 } else {

410 BEGIN_BATCH(1);

411 OUT_BATCH(MI_FLUSH);

412 ADVANCE_BATCH();

413 }

414 }