A game framework written with osu! in mind.
at master 671 lines 26 kB view raw
1// Copyright (c) ppy Pty Ltd <contact@ppy.sh>. Licensed under the MIT Licence. 2// See the LICENCE file in the repository root for full licence text. 3 4using FFmpeg.AutoGen; 5using osuTK; 6using osu.Framework.Graphics.Textures; 7using System; 8using System.Collections.Concurrent; 9using System.Collections.Generic; 10using System.Diagnostics; 11using System.IO; 12using System.Runtime.InteropServices; 13using System.Text; 14using System.Threading; 15using System.Threading.Tasks; 16using osu.Framework.Allocation; 17using osu.Framework.Logging; 18using osu.Framework.Platform; 19using AGffmpeg = FFmpeg.AutoGen.ffmpeg; 20 21namespace osu.Framework.Graphics.Video 22{ 23 /// <summary> 24 /// Represents a video decoder that can be used convert video streams and files into textures. 25 /// </summary> 26 public unsafe class VideoDecoder : IDisposable 27 { 28 /// <summary> 29 /// The duration of the video that is being decoded. Can only be queried after the decoder has started decoding has loaded. This value may be an estimate by FFmpeg, depending on the video loaded. 30 /// </summary> 31 public double Duration => stream == null ? 0 : duration * timeBaseInSeconds * 1000; 32 33 /// <summary> 34 /// True if the decoder currently does not decode any more frames, false otherwise. 35 /// </summary> 36 public bool IsRunning => State == DecoderState.Running; 37 38 /// <summary> 39 /// True if the decoder has faulted after starting to decode. You can try to restart a failed decoder by invoking <see cref="StartDecoding"/> again. 40 /// </summary> 41 public bool IsFaulted => State == DecoderState.Faulted; 42 43 /// <summary> 44 /// The timestamp of the last frame that was decoded by this video decoder, or 0 if no frames have been decoded. 45 /// </summary> 46 public float LastDecodedFrameTime => lastDecodedFrameTime; 47 48 /// <summary> 49 /// The frame rate of the video stream this decoder is decoding. 50 /// </summary> 51 public double FrameRate => stream == null ? 0 : stream->avg_frame_rate.GetValue(); 52 53 /// <summary> 54 /// True if the decoder can seek, false otherwise. Determined by the stream this decoder was created with. 55 /// </summary> 56 public bool CanSeek => videoStream?.CanSeek == true; 57 58 /// <summary> 59 /// The current state of the decoding process. 60 /// </summary> 61 public DecoderState State { get; private set; } 62 63 // libav-context-related 64 private AVFormatContext* formatContext; 65 private AVStream* stream; 66 private AVCodecParameters codecParams; 67 private byte* contextBuffer; 68 private byte[] managedContextBuffer; 69 70 private avio_alloc_context_read_packet readPacketCallback; 71 private avio_alloc_context_seek seekCallback; 72 73 private bool inputOpened; 74 private bool isDisposed; 75 private Stream videoStream; 76 77 private double timeBaseInSeconds; 78 private long duration; 79 80 private SwsContext* convCtx; 81 private bool convert = true; 82 83 // active decoder state 84 private volatile float lastDecodedFrameTime; 85 86 private Task decodingTask; 87 private CancellationTokenSource decodingTaskCancellationTokenSource; 88 89 private double? skipOutputUntilTime; 90 91 private readonly ConcurrentQueue<DecodedFrame> decodedFrames; 92 private readonly ConcurrentQueue<Action> decoderCommands; 93 94 private readonly ConcurrentQueue<Texture> availableTextures; 95 96 private ObjectHandle<VideoDecoder> handle; 97 98 private readonly FFmpegFuncs ffmpeg; 99 100 internal bool Looping; 101 102 /// <summary> 103 /// Creates a new video decoder that decodes the given video file. 104 /// </summary> 105 /// <param name="filename">The path to the file that should be decoded.</param> 106 public VideoDecoder(string filename) 107 : this(File.OpenRead(filename)) 108 { 109 } 110 111 /// <summary> 112 /// Creates a new video decoder that decodes the given video stream. 113 /// </summary> 114 /// <param name="videoStream">The stream that should be decoded.</param> 115 public VideoDecoder(Stream videoStream) 116 { 117 ffmpeg = CreateFuncs(); 118 119 this.videoStream = videoStream; 120 if (!videoStream.CanRead) 121 throw new InvalidOperationException($"The given stream does not support reading. A stream used for a {nameof(VideoDecoder)} must support reading."); 122 123 State = DecoderState.Ready; 124 decodedFrames = new ConcurrentQueue<DecodedFrame>(); 125 decoderCommands = new ConcurrentQueue<Action>(); 126 availableTextures = new ConcurrentQueue<Texture>(); // TODO: use "real" object pool when there's some public pool supporting disposables 127 handle = new ObjectHandle<VideoDecoder>(this, GCHandleType.Normal); 128 } 129 130 /// <summary> 131 /// Seek the decoder to the given timestamp. This will fail if <see cref="CanSeek"/> is false. 132 /// </summary> 133 /// <param name="targetTimestamp">The timestamp to seek to.</param> 134 public void Seek(double targetTimestamp) 135 { 136 if (!CanSeek) 137 throw new InvalidOperationException("This decoder cannot seek because the underlying stream used to decode the video does not support seeking."); 138 139 decoderCommands.Enqueue(() => 140 { 141 ffmpeg.av_seek_frame(formatContext, stream->index, (long)(targetTimestamp / timeBaseInSeconds / 1000.0), AGffmpeg.AVSEEK_FLAG_BACKWARD); 142 skipOutputUntilTime = targetTimestamp; 143 State = DecoderState.Ready; 144 }); 145 } 146 147 /// <summary> 148 /// Returns the given frames back to the decoder, allowing the decoder to reuse the textures contained in the frames to draw new frames. 149 /// </summary> 150 /// <param name="frames">The frames that should be returned to the decoder.</param> 151 public void ReturnFrames(IEnumerable<DecodedFrame> frames) 152 { 153 foreach (var f in frames) 154 { 155 ((VideoTexture)f.Texture.TextureGL).FlushUploads(); 156 availableTextures.Enqueue(f.Texture); 157 } 158 } 159 160 /// <summary> 161 /// Starts the decoding process. The decoding will happen asynchronously in a separate thread. The decoded frames can be retrieved by using <see cref="GetDecodedFrames"/>. 162 /// </summary> 163 public void StartDecoding() 164 { 165 if (decodingTask != null) 166 throw new InvalidOperationException($"Cannot start decoding once already started. Call {nameof(StopDecoding)} first."); 167 168 // only prepare for decoding if this is our first time starting the decoding process 169 if (formatContext == null) 170 { 171 try 172 { 173 prepareDecoding(); 174 } 175 catch (Exception e) 176 { 177 Logger.Log($"VideoDecoder faulted: {e}"); 178 State = DecoderState.Faulted; 179 return; 180 } 181 } 182 183 decodingTaskCancellationTokenSource = new CancellationTokenSource(); 184 decodingTask = Task.Factory.StartNew(() => decodingLoop(decodingTaskCancellationTokenSource.Token), decodingTaskCancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default); 185 } 186 187 /// <summary> 188 /// Stops the decoding process. Optionally waits for the decoder thread to terminate. 189 /// </summary> 190 /// <param name="waitForDecoderExit">True if this method should wait for the decoder thread to terminate, false otherwise.</param> 191 public void StopDecoding(bool waitForDecoderExit) 192 { 193 if (decodingTask == null) 194 return; 195 196 decodingTaskCancellationTokenSource.Cancel(); 197 if (waitForDecoderExit) 198 decodingTask.Wait(); 199 200 decodingTask = null; 201 decodingTaskCancellationTokenSource.Dispose(); 202 decodingTaskCancellationTokenSource = null; 203 204 State = DecoderState.Ready; 205 } 206 207 /// <summary> 208 /// Gets all frames that have been decoded by the decoder up until the point in time when this method was called. 209 /// Retrieving decoded frames using this method consumes them, ie calling this method again will never retrieve the same frame twice. 210 /// </summary> 211 /// <returns>The frames that have been decoded up until the point in time this method was called.</returns> 212 public IEnumerable<DecodedFrame> GetDecodedFrames() 213 { 214 var frames = new List<DecodedFrame>(decodedFrames.Count); 215 while (decodedFrames.TryDequeue(out var df)) 216 frames.Add(df); 217 218 return frames; 219 } 220 221 // https://en.wikipedia.org/wiki/YCbCr 222 public Matrix3 GetConversionMatrix() 223 { 224 if (stream == null) 225 return Matrix3.Zero; 226 227 switch (stream->codec->colorspace) 228 { 229 case AVColorSpace.AVCOL_SPC_BT709: 230 return new Matrix3(1.164f, 1.164f, 1.164f, 231 0.000f, -0.213f, 2.112f, 232 1.793f, -0.533f, 0.000f); 233 234 case AVColorSpace.AVCOL_SPC_UNSPECIFIED: 235 case AVColorSpace.AVCOL_SPC_SMPTE170M: 236 case AVColorSpace.AVCOL_SPC_SMPTE240M: 237 default: 238 return new Matrix3(1.164f, 1.164f, 1.164f, 239 0.000f, -0.392f, 2.017f, 240 1.596f, -0.813f, 0.000f); 241 } 242 } 243 244 [MonoPInvokeCallback(typeof(avio_alloc_context_read_packet))] 245 private static int readPacket(void* opaque, byte* bufferPtr, int bufferSize) 246 { 247 var handle = new ObjectHandle<VideoDecoder>((IntPtr)opaque); 248 if (!handle.GetTarget(out VideoDecoder decoder)) 249 return 0; 250 251 if (bufferSize != decoder.managedContextBuffer.Length) 252 decoder.managedContextBuffer = new byte[bufferSize]; 253 254 var bytesRead = decoder.videoStream.Read(decoder.managedContextBuffer, 0, bufferSize); 255 Marshal.Copy(decoder.managedContextBuffer, 0, (IntPtr)bufferPtr, bytesRead); 256 return bytesRead; 257 } 258 259 [MonoPInvokeCallback(typeof(avio_alloc_context_seek))] 260 private static long streamSeekCallbacks(void* opaque, long offset, int whence) 261 { 262 var handle = new ObjectHandle<VideoDecoder>((IntPtr)opaque); 263 if (!handle.GetTarget(out VideoDecoder decoder)) 264 return -1; 265 266 if (!decoder.videoStream.CanSeek) 267 throw new InvalidOperationException("Tried seeking on a video sourced by a non-seekable stream."); 268 269 switch (whence) 270 { 271 case StdIo.SEEK_CUR: 272 decoder.videoStream.Seek(offset, SeekOrigin.Current); 273 break; 274 275 case StdIo.SEEK_END: 276 decoder.videoStream.Seek(offset, SeekOrigin.End); 277 break; 278 279 case StdIo.SEEK_SET: 280 decoder.videoStream.Seek(offset, SeekOrigin.Begin); 281 break; 282 283 case AGffmpeg.AVSEEK_SIZE: 284 return decoder.videoStream.Length; 285 286 default: 287 return -1; 288 } 289 290 return decoder.videoStream.Position; 291 } 292 293 private void prepareFilters() 294 { 295 // only convert if needed 296 if (stream->codec->pix_fmt == AVPixelFormat.AV_PIX_FMT_YUV420P) 297 { 298 convert = false; 299 return; 300 } 301 302 // 1 = SWS_FAST_BILINEAR 303 // https://www.ffmpeg.org/doxygen/3.1/swscale_8h_source.html#l00056 304 convCtx = ffmpeg.sws_getContext(stream->codec->width, stream->codec->height, stream->codec->pix_fmt, stream->codec->width, stream->codec->height, 305 AVPixelFormat.AV_PIX_FMT_YUV420P, 1, null, null, null); 306 } 307 308 // sets up libavformat state: creates the AVFormatContext, the frames, etc. to start decoding, but does not actually start the decodingLoop 309 private void prepareDecoding() 310 { 311 const int context_buffer_size = 4096; 312 313 // the first call to FFmpeg will throw an exception if the libraries cannot be found 314 // this will be safely handled in StartDecoding() 315 var fcPtr = ffmpeg.avformat_alloc_context(); 316 formatContext = fcPtr; 317 contextBuffer = (byte*)ffmpeg.av_malloc(context_buffer_size); 318 managedContextBuffer = new byte[context_buffer_size]; 319 readPacketCallback = readPacket; 320 seekCallback = streamSeekCallbacks; 321 formatContext->pb = ffmpeg.avio_alloc_context(contextBuffer, context_buffer_size, 0, (void*)handle.Handle, readPacketCallback, null, seekCallback); 322 323 int openInputResult = ffmpeg.avformat_open_input(&fcPtr, "dummy", null, null); 324 inputOpened = openInputResult >= 0; 325 if (!inputOpened) 326 throw new InvalidOperationException($"Error opening file or stream: {getErrorMessage(openInputResult)}"); 327 328 int findStreamInfoResult = ffmpeg.avformat_find_stream_info(formatContext, null); 329 if (findStreamInfoResult < 0) 330 throw new InvalidOperationException($"Error finding stream info: {getErrorMessage(findStreamInfoResult)}"); 331 332 var nStreams = formatContext->nb_streams; 333 334 for (var i = 0; i < nStreams; ++i) 335 { 336 stream = formatContext->streams[i]; 337 338 codecParams = *stream->codecpar; 339 340 if (codecParams.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO) 341 { 342 duration = stream->duration <= 0 ? formatContext->duration : stream->duration; 343 344 timeBaseInSeconds = stream->time_base.GetValue(); 345 var codecPtr = ffmpeg.avcodec_find_decoder(codecParams.codec_id); 346 if (codecPtr == null) 347 throw new InvalidOperationException($"Couldn't find codec with id: {codecParams.codec_id}"); 348 349 int openCodecResult = ffmpeg.avcodec_open2(stream->codec, codecPtr, null); 350 if (openCodecResult < 0) 351 throw new InvalidOperationException($"Error trying to open codec with id {codecParams.codec_id}: {getErrorMessage(openCodecResult)}"); 352 353 break; 354 } 355 } 356 357 prepareFilters(); 358 } 359 360 private void decodingLoop(CancellationToken cancellationToken) 361 { 362 var packet = ffmpeg.av_packet_alloc(); 363 364 const int max_pending_frames = 3; 365 366 try 367 { 368 while (!cancellationToken.IsCancellationRequested) 369 { 370 switch (State) 371 { 372 case DecoderState.Ready: 373 case DecoderState.Running: 374 if (decodedFrames.Count < max_pending_frames) 375 { 376 decodeNextFrame(packet); 377 } 378 else 379 { 380 // wait until existing buffers are consumed. 381 State = DecoderState.Ready; 382 Thread.Sleep(1); 383 } 384 385 break; 386 387 case DecoderState.EndOfStream: 388 // While at the end of the stream, avoid attempting to read further as this comes with a non-negligible overhead. 389 // A Seek() operation will trigger a state change, allowing decoding to potentially start again. 390 Thread.Sleep(50); 391 break; 392 393 default: 394 Debug.Fail($"Video decoder should never be in a \"{State}\" state during decode."); 395 return; 396 } 397 398 while (!decoderCommands.IsEmpty) 399 { 400 if (cancellationToken.IsCancellationRequested) 401 return; 402 403 if (decoderCommands.TryDequeue(out var cmd)) 404 cmd(); 405 } 406 } 407 } 408 catch (Exception e) 409 { 410 Logger.Log($"VideoDecoder faulted: {e}"); 411 State = DecoderState.Faulted; 412 } 413 finally 414 { 415 ffmpeg.av_packet_free(&packet); 416 417 if (State != DecoderState.Faulted) 418 State = DecoderState.Stopped; 419 } 420 } 421 422 private void decodeNextFrame(AVPacket* packet) 423 { 424 int readFrameResult = ffmpeg.av_read_frame(formatContext, packet); 425 426 if (readFrameResult >= 0) 427 { 428 State = DecoderState.Running; 429 430 if (packet->stream_index == stream->index) 431 { 432 int sendPacketResult = ffmpeg.avcodec_send_packet(stream->codec, packet); 433 434 if (sendPacketResult == 0) 435 { 436 AVFrame* frame = ffmpeg.av_frame_alloc(); 437 AVFrame* outFrame = null; 438 439 var result = ffmpeg.avcodec_receive_frame(stream->codec, frame); 440 441 if (result == 0) 442 { 443 var frameTime = (frame->best_effort_timestamp - stream->start_time) * timeBaseInSeconds * 1000; 444 445 if (!skipOutputUntilTime.HasValue || skipOutputUntilTime.Value < frameTime) 446 { 447 skipOutputUntilTime = null; 448 449 if (convert) 450 { 451 outFrame = ffmpeg.av_frame_alloc(); 452 outFrame->format = (int)AVPixelFormat.AV_PIX_FMT_YUV420P; 453 outFrame->width = stream->codec->width; 454 outFrame->height = stream->codec->height; 455 456 var ret = ffmpeg.av_frame_get_buffer(outFrame, 32); 457 if (ret < 0) 458 throw new InvalidOperationException($"Error allocating video frame: {getErrorMessage(ret)}"); 459 460 ffmpeg.sws_scale(convCtx, frame->data, frame->linesize, 0, stream->codec->height, 461 outFrame->data, outFrame->linesize); 462 } 463 else 464 outFrame = frame; 465 466 if (!availableTextures.TryDequeue(out var tex)) 467 tex = new Texture(new VideoTexture(codecParams.width, codecParams.height)); 468 469 var upload = new VideoTextureUpload(outFrame, ffmpeg.av_frame_free); 470 471 tex.SetData(upload); 472 decodedFrames.Enqueue(new DecodedFrame { Time = frameTime, Texture = tex }); 473 } 474 475 lastDecodedFrameTime = (float)frameTime; 476 } 477 478 // There are two cases: outFrame could be null in which case the above decode hasn't run, or the outFrame doesn't match the input frame, 479 // in which case it won't be automatically freed by the texture upload. In both cases we need to free the input frame. 480 if (outFrame != frame) 481 ffmpeg.av_frame_free(&frame); 482 } 483 else 484 Logger.Log($"Error {sendPacketResult} sending packet in VideoDecoder"); 485 } 486 487 ffmpeg.av_packet_unref(packet); 488 } 489 else if (readFrameResult == AGffmpeg.AVERROR_EOF) 490 { 491 if (Looping) 492 { 493 Seek(0); 494 } 495 else 496 { 497 // This marks the video stream as no longer relevant (until a future potential Seek operation). 498 State = DecoderState.EndOfStream; 499 } 500 } 501 else 502 { 503 State = DecoderState.Ready; 504 Thread.Sleep(1); 505 } 506 } 507 508 private string getErrorMessage(int errorCode) 509 { 510 const ulong buffer_size = 256; 511 byte[] buffer = new byte[buffer_size]; 512 513 int strErrorCode; 514 515 fixed (byte* bufPtr = buffer) 516 { 517 strErrorCode = ffmpeg.av_strerror(errorCode, bufPtr, buffer_size); 518 } 519 520 if (strErrorCode < 0) 521 return $"{errorCode} (av_strerror failed with code {strErrorCode})"; 522 523 var messageLength = Math.Max(0, Array.IndexOf(buffer, (byte)0)); 524 return Encoding.ASCII.GetString(buffer[..messageLength]); 525 } 526 527 protected virtual FFmpegFuncs CreateFuncs() 528 { 529 // other frameworks should handle native libraries themselves 530#if NET5_0 531 AGffmpeg.GetOrLoadLibrary = name => 532 { 533 int version = AGffmpeg.LibraryVersionMap[name]; 534 535 string libraryName = null; 536 537 // "lib" prefix and extensions are resolved by .net core 538 switch (RuntimeInfo.OS) 539 { 540 case RuntimeInfo.Platform.macOS: 541 libraryName = $"{name}.{version}"; 542 break; 543 544 case RuntimeInfo.Platform.Windows: 545 libraryName = $"{name}-{version}"; 546 break; 547 548 case RuntimeInfo.Platform.Linux: 549 libraryName = name; 550 break; 551 } 552 553 return NativeLibrary.Load(libraryName, System.Reflection.Assembly.GetEntryAssembly(), DllImportSearchPath.UseDllDirectoryForDependencies | DllImportSearchPath.SafeDirectories); 554 }; 555#endif 556 557 return new FFmpegFuncs 558 { 559 av_frame_alloc = AGffmpeg.av_frame_alloc, 560 av_frame_free = AGffmpeg.av_frame_free, 561 av_frame_unref = AGffmpeg.av_frame_unref, 562 av_frame_get_buffer = AGffmpeg.av_frame_get_buffer, 563 av_strdup = AGffmpeg.av_strdup, 564 av_strerror = AGffmpeg.av_strerror, 565 av_malloc = AGffmpeg.av_malloc, 566 av_packet_alloc = AGffmpeg.av_packet_alloc, 567 av_packet_unref = AGffmpeg.av_packet_unref, 568 av_packet_free = AGffmpeg.av_packet_free, 569 av_read_frame = AGffmpeg.av_read_frame, 570 av_seek_frame = AGffmpeg.av_seek_frame, 571 avcodec_find_decoder = AGffmpeg.avcodec_find_decoder, 572 avcodec_open2 = AGffmpeg.avcodec_open2, 573 avcodec_receive_frame = AGffmpeg.avcodec_receive_frame, 574 avcodec_send_packet = AGffmpeg.avcodec_send_packet, 575 avformat_alloc_context = AGffmpeg.avformat_alloc_context, 576 avformat_close_input = AGffmpeg.avformat_close_input, 577 avformat_find_stream_info = AGffmpeg.avformat_find_stream_info, 578 avformat_open_input = AGffmpeg.avformat_open_input, 579 avio_alloc_context = AGffmpeg.avio_alloc_context, 580 sws_freeContext = AGffmpeg.sws_freeContext, 581 sws_getContext = AGffmpeg.sws_getContext, 582 sws_scale = AGffmpeg.sws_scale 583 }; 584 } 585 586 #region Disposal 587 588 ~VideoDecoder() 589 { 590 Dispose(false); 591 } 592 593 public void Dispose() 594 { 595 Dispose(true); 596 GC.SuppressFinalize(this); 597 } 598 599 protected virtual void Dispose(bool disposing) 600 { 601 if (isDisposed) 602 return; 603 604 isDisposed = true; 605 606 decoderCommands.Clear(); 607 608 StopDecoding(true); 609 610 if (formatContext != null && inputOpened) 611 { 612 fixed (AVFormatContext** ptr = &formatContext) 613 ffmpeg.avformat_close_input(ptr); 614 } 615 616 seekCallback = null; 617 readPacketCallback = null; 618 managedContextBuffer = null; 619 620 videoStream.Dispose(); 621 videoStream = null; 622 623 // gets freed by libavformat when closing the input 624 contextBuffer = null; 625 626 if (convCtx != null) 627 ffmpeg.sws_freeContext(convCtx); 628 629 while (decodedFrames.TryDequeue(out var f)) 630 f.Texture.Dispose(); 631 632 while (availableTextures.TryDequeue(out var t)) 633 t.Dispose(); 634 635 handle.Dispose(); 636 } 637 638 #endregion 639 640 /// <summary> 641 /// Represents the possible states the decoder can be in. 642 /// </summary> 643 public enum DecoderState 644 { 645 /// <summary> 646 /// The decoder is ready to begin decoding. This is the default state before the decoder starts operations. 647 /// </summary> 648 Ready = 0, 649 650 /// <summary> 651 /// The decoder is currently running and decoding frames. 652 /// </summary> 653 Running = 1, 654 655 /// <summary> 656 /// The decoder has faulted with an exception. 657 /// </summary> 658 Faulted = 2, 659 660 /// <summary> 661 /// The decoder has reached the end of the video data. 662 /// </summary> 663 EndOfStream = 3, 664 665 /// <summary> 666 /// The decoder has been completely stopped and cannot be resumed. 667 /// </summary> 668 Stopped = 4, 669 } 670 } 671}