git.libre-soc.org Git - mesa.git/blob - src/mesa/drivers/dri/intel/intel

1 /**************************************************************************

2 *

5 *

6 * Permission is hereby granted, free of charge, to any person obtaining a

7 * copy of this software and associated documentation files (the

8 * "Software"), to deal in the Software without restriction, including

9 * without limitation the rights to use, copy, modify, merge, publish,

10 * distribute, sub license, and/or sell copies of the Software, and to

11 * permit persons to whom the Software is furnished to do so, subject to

12 * the following conditions:

13 *

14 * The above copyright notice and this permission notice (including the

15 * next paragraph) shall be included in all copies or substantial portions

16 * of the Software.

17 *

18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.

21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR

22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

25 *

26 **************************************************************************/

28 #include "intel_context.h"

29 #include "intel_batchbuffer.h"

30 #include "intel_buffer_objects.h"

31 #include "intel_decode.h"

32 #include "intel_reg.h"

33 #include "intel_bufmgr.h"

34 #include "intel_buffers.h"

36 struct cached_batch_item {

37 struct cached_batch_item *next;

38 uint16_t header;

39 uint16_t size;

40 };

 static void clear_cache( struct intel_context *intel )

43 {

    struct cached_batch_item *item = intel->batch.cached_items;

46 while (item) {

       struct cached_batch_item *next = item->next;

48 free(item);

49 item = next;

50 }

52 intel->batch.cached_items = NULL;

53 }

55 void

 intel_batchbuffer_init(struct intel_context *intel)

57 {

58 intel_batchbuffer_reset(intel);

    if (intel->gen == 6) {

61 /* We can't just use brw_state_batch to get a chunk of space for

62 * the gen6 workaround because it involves actually writing to

63 * the buffer, and the kernel doesn't let us write to the batch.

64 */

       intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,

66 "gen6 workaround",

67 4096, 4096);

68 }

69 }

71 void

 intel_batchbuffer_reset(struct intel_context *intel)

73 {

    if (intel->batch.last_bo != NULL) {

       drm_intel_bo_unreference(intel->batch.last_bo);

76 intel->batch.last_bo = NULL;

77 }

    intel->batch.last_bo = intel->batch.bo;

80 clear_cache(intel);

    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",

83 intel->maxBatchSize, 4096);

85 intel->batch.reserved_space = BATCH_RESERVED;

    intel->batch.state_batch_offset = intel->batch.bo->size;

    intel->batch.used = 0;

88 }

90 void

 intel_batchbuffer_free(struct intel_context *intel)

92 {

    drm_intel_bo_unreference(intel->batch.last_bo);

    drm_intel_bo_unreference(intel->batch.bo);

    drm_intel_bo_unreference(intel->batch.workaround_bo);

96 clear_cache(intel);

97 }

100 /* TODO: Push this whole function into bufmgr.

101 */

102 static void

 do_flush_locked(struct intel_context *intel)

104 {

    struct intel_batchbuffer *batch = &intel->batch;

106 int ret = 0;

107

    if (!intel->intelScreen->no_hw) {

109 int ring;

110

       if (intel->gen < 6 || !batch->is_blit) {

112 ring = I915_EXEC_RENDER;

113 } else {

114 ring = I915_EXEC_BLT;

115 }

116

       ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);

       if (ret == 0 && batch->state_batch_offset != batch->bo->size) {

          ret = drm_intel_bo_subdata(batch->bo,

120 batch->state_batch_offset,

                                     batch->bo->size - batch->state_batch_offset,

                                     (char *)batch->map + batch->state_batch_offset);

123 }

124

       if (ret == 0)

          ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring);

127 }

128

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {

       drm_intel_bo_map(batch->bo, false);

       intel_decode(batch->bo->virtual, batch->used,

132 batch->bo->offset,

133 intel->intelScreen->deviceID, GL_TRUE);

134 drm_intel_bo_unmap(batch->bo);

135

       if (intel->vtbl.debug_batch != NULL)

          intel->vtbl.debug_batch(intel);

138 }

139

    if (ret != 0) {

141 exit(1);

142 }

    intel->vtbl.new_batch(intel);

144 }

145

146 void

 _intel_batchbuffer_flush(struct intel_context *intel,

                          const char *file, int line)

149 {

    if (intel->batch.used == 0)

151 return;

152

    if (intel->first_post_swapbuffers_batch == NULL) {

       intel->first_post_swapbuffers_batch = intel->batch.bo;

155 drm_intel_bo_reference(intel->first_post_swapbuffers_batch);

156 }

157

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))

       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,

               4*intel->batch.used);

161

    intel->batch.reserved_space = 0;

163

164 if (intel->always_flush_cache) {

165 intel_batchbuffer_emit_mi_flush(intel);

166 }

167

168 /* Mark the end of the buffer. */

169 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);

    if (intel->batch.used & 1) {

171 /* Round batchbuffer usage to 2 DWORDs. */

172 intel_batchbuffer_emit_dword(intel, MI_NOOP);

173 }

174

    if (intel->vtbl.finish_batch)

       intel->vtbl.finish_batch(intel);

177

178 intel_upload_finish(intel);

179

180 /* Check that we didn't just wrap our batchbuffer at a bad time. */

181 assert(!intel->no_batch_wrap);

182

183 do_flush_locked(intel);

184

    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {

       fprintf(stderr, "waiting for idle\n");

       drm_intel_bo_wait_rendering(intel->batch.bo);

188 }

189

190 /* Reset the buffer:

191 */

192 intel_batchbuffer_reset(intel);

193 }

194

195

196 /* This is the only way buffers get added to the validate list.

197 */

198 GLboolean

 intel_batchbuffer_emit_reloc(struct intel_context *intel,

200 drm_intel_bo *buffer,

201 uint32_t read_domains, uint32_t write_domain,

202 uint32_t delta)

203 {

204 int ret;

205

    ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,

207 buffer, delta,

208 read_domains, write_domain);

    assert(ret == 0);

210 (void)ret;

211

212 /*

213 * Using the old buffer offset, write in what the right data would be, in case

214 * the buffer doesn't move and we can short-circuit the relocation processing

215 * in the kernel

216 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

218

219 return GL_TRUE;

220 }

221

222 GLboolean

 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,

224 drm_intel_bo *buffer,

225 uint32_t read_domains,

226 uint32_t write_domain,

227 uint32_t delta)

228 {

229 int ret;

230

    ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,

232 buffer, delta,

233 read_domains, write_domain);

    assert(ret == 0);

235 (void)ret;

236

237 /*

238 * Using the old buffer offset, write in what the right data would

239 * be, in case the buffer doesn't move and we can short-circuit the

240 * relocation processing in the kernel

241 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

243

244 return GL_TRUE;

245 }

246

247 void

 intel_batchbuffer_data(struct intel_context *intel,

                        const void *data, GLuint bytes, bool is_blit)

250 {

    assert((bytes & 3) == 0);

    intel_batchbuffer_require_space(intel, bytes, is_blit);

    __memcpy(intel->batch.map + intel->batch.used, data, bytes);

    intel->batch.used += bytes >> 2;

255 }

256

257 void

 intel_batchbuffer_cached_advance(struct intel_context *intel)

259 {

    struct cached_batch_item **prev = &intel->batch.cached_items, *item;

    uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);

    uint32_t *start = intel->batch.map + intel->batch.emit;

    uint16_t op = *start >> 16;

264

265 while (*prev) {

266 uint32_t *old;

267

268 item = *prev;

       old = intel->batch.map + item->header;

       if (op == *old >> 16) {

          if (item->size == sz && memcmp(old, start, sz) == 0) {

             if (prev != &intel->batch.cached_items) {

273 *prev = item->next;

                item->next = intel->batch.cached_items;

275 intel->batch.cached_items = item;

276 }

             intel->batch.used = intel->batch.emit;

278 return;

279 }

280

281 goto emit;

282 }

283 prev = &item->next;

284 }

285

    item = malloc(sizeof(struct cached_batch_item));

287 if (item == NULL)

288 return;

289

    item->next = intel->batch.cached_items;

291 intel->batch.cached_items = item;

292

293 emit:

294 item->size = sz;

    item->header = intel->batch.emit;

296 }

297

298 /**

299 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for

300 * implementing two workarounds on gen6. From section 1.4.7.1

301 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:

302 *

303 * [DevSNB-C+{W/A}] Before any depth stall flush (including those

304 * produced by non-pipelined state commands), software needs to first

305 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=

306 * 0.

307 *

308 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable

309 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.

310 *

311 * And the workaround for these two requires this workaround first:

312 *

313 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent

314 * BEFORE the pipe-control with a post-sync op and no write-cache

315 * flushes.

316 *

317 * And this last workaround is tricky because of the requirements on

318 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM

319 * volume 2 part 1:

320 *

321 * "1 of the following must also be set:

322 * - Render Target Cache Flush Enable ([12] of DW1)

323 * - Depth Cache Flush Enable ([0] of DW1)

324 * - Stall at Pixel Scoreboard ([1] of DW1)

325 * - Depth Stall ([13] of DW1)

326 * - Post-Sync Operation ([13] of DW1)

327 * - Notify Enable ([8] of DW1)"

328 *

329 * The cache flushes require the workaround flush that triggered this

330 * one, so we can't use it. Depth stall would trigger the same.

331 * Post-sync nonzero is what triggered this second workaround, so we

332 * can't use that one either. Notify enable is IRQs, which aren't

333 * really our business. That leaves only stall at scoreboard.

334 */

335 void

 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)

337 {

    if (!intel->batch.need_workaround_flush)

339 return;

340

341 BEGIN_BATCH(4);

342 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

343 OUT_BATCH(PIPE_CONTROL_CS_STALL |

344 PIPE_CONTROL_STALL_AT_SCOREBOARD);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

347 ADVANCE_BATCH();

348

349 BEGIN_BATCH(4);

350 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

351 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);

    OUT_RELOC(intel->batch.workaround_bo,

353 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

    OUT_BATCH(0); /* write data */

355 ADVANCE_BATCH();

356

    intel->batch.need_workaround_flush = false;

358 }

359

360 /* Emit a pipelined flush to either flush render and texture cache for

361 * reading from a FBO-drawn texture, or flush so that frontbuffer

362 * render appears on the screen in DRI1.

363 *

364 * This is also used for the always_flush_cache driconf debug option.

365 */

366 void

 intel_batchbuffer_emit_mi_flush(struct intel_context *intel)

368 {

    if (intel->gen >= 6) {

       if (intel->batch.is_blit) {

371 BEGIN_BATCH_BLT(4);

372 OUT_BATCH(MI_FLUSH_DW);

373 OUT_BATCH(0);

374 OUT_BATCH(0);

375 OUT_BATCH(0);

376 ADVANCE_BATCH();

377 } else {

          if (intel->gen == 6) {

379 /* Hardware workaround: SNB B-Spec says:

380 *

381 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache

382 * Flush Enable =1, a PIPE_CONTROL with any non-zero

383 * post-sync-op is required.

384 */

385 intel_emit_post_sync_nonzero_flush(intel);

386 }

387

388 BEGIN_BATCH(4);

389 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

390 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |

391 PIPE_CONTROL_WRITE_FLUSH |

392 PIPE_CONTROL_DEPTH_CACHE_FLUSH |

393 PIPE_CONTROL_TC_FLUSH |

394 PIPE_CONTROL_NO_WRITE);

          OUT_BATCH(0); /* write address */

          OUT_BATCH(0); /* write data */

397 ADVANCE_BATCH();

398 }

    } else if (intel->gen >= 4) {

400 BEGIN_BATCH(4);

401 OUT_BATCH(_3DSTATE_PIPE_CONTROL |

402 PIPE_CONTROL_WRITE_FLUSH |

403 PIPE_CONTROL_NO_WRITE);

       OUT_BATCH(0); /* write address */

       OUT_BATCH(0); /* write data */

       OUT_BATCH(0); /* write data */

407 ADVANCE_BATCH();

408 } else {

409 BEGIN_BATCH(1);

410 OUT_BATCH(MI_FLUSH);

411 ADVANCE_BATCH();

412 }

413 }