A game framework written with osu! in mind.
1// Copyright (c) ppy Pty Ltd <contact@ppy.sh>. Licensed under the MIT Licence.
2// See the LICENCE file in the repository root for full licence text.
3
4using FFmpeg.AutoGen;
5using osuTK;
6using osu.Framework.Graphics.Textures;
7using System;
8using System.Collections.Concurrent;
9using System.Collections.Generic;
10using System.Diagnostics;
11using System.IO;
12using System.Runtime.InteropServices;
13using System.Text;
14using System.Threading;
15using System.Threading.Tasks;
16using osu.Framework.Allocation;
17using osu.Framework.Logging;
18using osu.Framework.Platform;
19using AGffmpeg = FFmpeg.AutoGen.ffmpeg;
20
21namespace osu.Framework.Graphics.Video
22{
23 /// <summary>
24 /// Represents a video decoder that can be used convert video streams and files into textures.
25 /// </summary>
26 public unsafe class VideoDecoder : IDisposable
27 {
28 /// <summary>
29 /// The duration of the video that is being decoded. Can only be queried after the decoder has started decoding has loaded. This value may be an estimate by FFmpeg, depending on the video loaded.
30 /// </summary>
31 public double Duration => stream == null ? 0 : duration * timeBaseInSeconds * 1000;
32
33 /// <summary>
34 /// True if the decoder currently does not decode any more frames, false otherwise.
35 /// </summary>
36 public bool IsRunning => State == DecoderState.Running;
37
38 /// <summary>
39 /// True if the decoder has faulted after starting to decode. You can try to restart a failed decoder by invoking <see cref="StartDecoding"/> again.
40 /// </summary>
41 public bool IsFaulted => State == DecoderState.Faulted;
42
43 /// <summary>
44 /// The timestamp of the last frame that was decoded by this video decoder, or 0 if no frames have been decoded.
45 /// </summary>
46 public float LastDecodedFrameTime => lastDecodedFrameTime;
47
48 /// <summary>
49 /// The frame rate of the video stream this decoder is decoding.
50 /// </summary>
51 public double FrameRate => stream == null ? 0 : stream->avg_frame_rate.GetValue();
52
53 /// <summary>
54 /// True if the decoder can seek, false otherwise. Determined by the stream this decoder was created with.
55 /// </summary>
56 public bool CanSeek => videoStream?.CanSeek == true;
57
58 /// <summary>
59 /// The current state of the decoding process.
60 /// </summary>
61 public DecoderState State { get; private set; }
62
63 // libav-context-related
64 private AVFormatContext* formatContext;
65 private AVStream* stream;
66 private AVCodecParameters codecParams;
67 private byte* contextBuffer;
68 private byte[] managedContextBuffer;
69
70 private avio_alloc_context_read_packet readPacketCallback;
71 private avio_alloc_context_seek seekCallback;
72
73 private bool inputOpened;
74 private bool isDisposed;
75 private Stream videoStream;
76
77 private double timeBaseInSeconds;
78 private long duration;
79
80 private SwsContext* convCtx;
81 private bool convert = true;
82
83 // active decoder state
84 private volatile float lastDecodedFrameTime;
85
86 private Task decodingTask;
87 private CancellationTokenSource decodingTaskCancellationTokenSource;
88
89 private double? skipOutputUntilTime;
90
91 private readonly ConcurrentQueue<DecodedFrame> decodedFrames;
92 private readonly ConcurrentQueue<Action> decoderCommands;
93
94 private readonly ConcurrentQueue<Texture> availableTextures;
95
96 private ObjectHandle<VideoDecoder> handle;
97
98 private readonly FFmpegFuncs ffmpeg;
99
100 internal bool Looping;
101
102 /// <summary>
103 /// Creates a new video decoder that decodes the given video file.
104 /// </summary>
105 /// <param name="filename">The path to the file that should be decoded.</param>
106 public VideoDecoder(string filename)
107 : this(File.OpenRead(filename))
108 {
109 }
110
111 /// <summary>
112 /// Creates a new video decoder that decodes the given video stream.
113 /// </summary>
114 /// <param name="videoStream">The stream that should be decoded.</param>
115 public VideoDecoder(Stream videoStream)
116 {
117 ffmpeg = CreateFuncs();
118
119 this.videoStream = videoStream;
120 if (!videoStream.CanRead)
121 throw new InvalidOperationException($"The given stream does not support reading. A stream used for a {nameof(VideoDecoder)} must support reading.");
122
123 State = DecoderState.Ready;
124 decodedFrames = new ConcurrentQueue<DecodedFrame>();
125 decoderCommands = new ConcurrentQueue<Action>();
126 availableTextures = new ConcurrentQueue<Texture>(); // TODO: use "real" object pool when there's some public pool supporting disposables
127 handle = new ObjectHandle<VideoDecoder>(this, GCHandleType.Normal);
128 }
129
130 /// <summary>
131 /// Seek the decoder to the given timestamp. This will fail if <see cref="CanSeek"/> is false.
132 /// </summary>
133 /// <param name="targetTimestamp">The timestamp to seek to.</param>
134 public void Seek(double targetTimestamp)
135 {
136 if (!CanSeek)
137 throw new InvalidOperationException("This decoder cannot seek because the underlying stream used to decode the video does not support seeking.");
138
139 decoderCommands.Enqueue(() =>
140 {
141 ffmpeg.av_seek_frame(formatContext, stream->index, (long)(targetTimestamp / timeBaseInSeconds / 1000.0), AGffmpeg.AVSEEK_FLAG_BACKWARD);
142 skipOutputUntilTime = targetTimestamp;
143 State = DecoderState.Ready;
144 });
145 }
146
147 /// <summary>
148 /// Returns the given frames back to the decoder, allowing the decoder to reuse the textures contained in the frames to draw new frames.
149 /// </summary>
150 /// <param name="frames">The frames that should be returned to the decoder.</param>
151 public void ReturnFrames(IEnumerable<DecodedFrame> frames)
152 {
153 foreach (var f in frames)
154 {
155 ((VideoTexture)f.Texture.TextureGL).FlushUploads();
156 availableTextures.Enqueue(f.Texture);
157 }
158 }
159
160 /// <summary>
161 /// Starts the decoding process. The decoding will happen asynchronously in a separate thread. The decoded frames can be retrieved by using <see cref="GetDecodedFrames"/>.
162 /// </summary>
163 public void StartDecoding()
164 {
165 if (decodingTask != null)
166 throw new InvalidOperationException($"Cannot start decoding once already started. Call {nameof(StopDecoding)} first.");
167
168 // only prepare for decoding if this is our first time starting the decoding process
169 if (formatContext == null)
170 {
171 try
172 {
173 prepareDecoding();
174 }
175 catch (Exception e)
176 {
177 Logger.Log($"VideoDecoder faulted: {e}");
178 State = DecoderState.Faulted;
179 return;
180 }
181 }
182
183 decodingTaskCancellationTokenSource = new CancellationTokenSource();
184 decodingTask = Task.Factory.StartNew(() => decodingLoop(decodingTaskCancellationTokenSource.Token), decodingTaskCancellationTokenSource.Token, TaskCreationOptions.LongRunning, TaskScheduler.Default);
185 }
186
187 /// <summary>
188 /// Stops the decoding process. Optionally waits for the decoder thread to terminate.
189 /// </summary>
190 /// <param name="waitForDecoderExit">True if this method should wait for the decoder thread to terminate, false otherwise.</param>
191 public void StopDecoding(bool waitForDecoderExit)
192 {
193 if (decodingTask == null)
194 return;
195
196 decodingTaskCancellationTokenSource.Cancel();
197 if (waitForDecoderExit)
198 decodingTask.Wait();
199
200 decodingTask = null;
201 decodingTaskCancellationTokenSource.Dispose();
202 decodingTaskCancellationTokenSource = null;
203
204 State = DecoderState.Ready;
205 }
206
207 /// <summary>
208 /// Gets all frames that have been decoded by the decoder up until the point in time when this method was called.
209 /// Retrieving decoded frames using this method consumes them, ie calling this method again will never retrieve the same frame twice.
210 /// </summary>
211 /// <returns>The frames that have been decoded up until the point in time this method was called.</returns>
212 public IEnumerable<DecodedFrame> GetDecodedFrames()
213 {
214 var frames = new List<DecodedFrame>(decodedFrames.Count);
215 while (decodedFrames.TryDequeue(out var df))
216 frames.Add(df);
217
218 return frames;
219 }
220
221 // https://en.wikipedia.org/wiki/YCbCr
222 public Matrix3 GetConversionMatrix()
223 {
224 if (stream == null)
225 return Matrix3.Zero;
226
227 switch (stream->codec->colorspace)
228 {
229 case AVColorSpace.AVCOL_SPC_BT709:
230 return new Matrix3(1.164f, 1.164f, 1.164f,
231 0.000f, -0.213f, 2.112f,
232 1.793f, -0.533f, 0.000f);
233
234 case AVColorSpace.AVCOL_SPC_UNSPECIFIED:
235 case AVColorSpace.AVCOL_SPC_SMPTE170M:
236 case AVColorSpace.AVCOL_SPC_SMPTE240M:
237 default:
238 return new Matrix3(1.164f, 1.164f, 1.164f,
239 0.000f, -0.392f, 2.017f,
240 1.596f, -0.813f, 0.000f);
241 }
242 }
243
244 [MonoPInvokeCallback(typeof(avio_alloc_context_read_packet))]
245 private static int readPacket(void* opaque, byte* bufferPtr, int bufferSize)
246 {
247 var handle = new ObjectHandle<VideoDecoder>((IntPtr)opaque);
248 if (!handle.GetTarget(out VideoDecoder decoder))
249 return 0;
250
251 if (bufferSize != decoder.managedContextBuffer.Length)
252 decoder.managedContextBuffer = new byte[bufferSize];
253
254 var bytesRead = decoder.videoStream.Read(decoder.managedContextBuffer, 0, bufferSize);
255 Marshal.Copy(decoder.managedContextBuffer, 0, (IntPtr)bufferPtr, bytesRead);
256 return bytesRead;
257 }
258
259 [MonoPInvokeCallback(typeof(avio_alloc_context_seek))]
260 private static long streamSeekCallbacks(void* opaque, long offset, int whence)
261 {
262 var handle = new ObjectHandle<VideoDecoder>((IntPtr)opaque);
263 if (!handle.GetTarget(out VideoDecoder decoder))
264 return -1;
265
266 if (!decoder.videoStream.CanSeek)
267 throw new InvalidOperationException("Tried seeking on a video sourced by a non-seekable stream.");
268
269 switch (whence)
270 {
271 case StdIo.SEEK_CUR:
272 decoder.videoStream.Seek(offset, SeekOrigin.Current);
273 break;
274
275 case StdIo.SEEK_END:
276 decoder.videoStream.Seek(offset, SeekOrigin.End);
277 break;
278
279 case StdIo.SEEK_SET:
280 decoder.videoStream.Seek(offset, SeekOrigin.Begin);
281 break;
282
283 case AGffmpeg.AVSEEK_SIZE:
284 return decoder.videoStream.Length;
285
286 default:
287 return -1;
288 }
289
290 return decoder.videoStream.Position;
291 }
292
293 private void prepareFilters()
294 {
295 // only convert if needed
296 if (stream->codec->pix_fmt == AVPixelFormat.AV_PIX_FMT_YUV420P)
297 {
298 convert = false;
299 return;
300 }
301
302 // 1 = SWS_FAST_BILINEAR
303 // https://www.ffmpeg.org/doxygen/3.1/swscale_8h_source.html#l00056
304 convCtx = ffmpeg.sws_getContext(stream->codec->width, stream->codec->height, stream->codec->pix_fmt, stream->codec->width, stream->codec->height,
305 AVPixelFormat.AV_PIX_FMT_YUV420P, 1, null, null, null);
306 }
307
308 // sets up libavformat state: creates the AVFormatContext, the frames, etc. to start decoding, but does not actually start the decodingLoop
309 private void prepareDecoding()
310 {
311 const int context_buffer_size = 4096;
312
313 // the first call to FFmpeg will throw an exception if the libraries cannot be found
314 // this will be safely handled in StartDecoding()
315 var fcPtr = ffmpeg.avformat_alloc_context();
316 formatContext = fcPtr;
317 contextBuffer = (byte*)ffmpeg.av_malloc(context_buffer_size);
318 managedContextBuffer = new byte[context_buffer_size];
319 readPacketCallback = readPacket;
320 seekCallback = streamSeekCallbacks;
321 formatContext->pb = ffmpeg.avio_alloc_context(contextBuffer, context_buffer_size, 0, (void*)handle.Handle, readPacketCallback, null, seekCallback);
322
323 int openInputResult = ffmpeg.avformat_open_input(&fcPtr, "dummy", null, null);
324 inputOpened = openInputResult >= 0;
325 if (!inputOpened)
326 throw new InvalidOperationException($"Error opening file or stream: {getErrorMessage(openInputResult)}");
327
328 int findStreamInfoResult = ffmpeg.avformat_find_stream_info(formatContext, null);
329 if (findStreamInfoResult < 0)
330 throw new InvalidOperationException($"Error finding stream info: {getErrorMessage(findStreamInfoResult)}");
331
332 var nStreams = formatContext->nb_streams;
333
334 for (var i = 0; i < nStreams; ++i)
335 {
336 stream = formatContext->streams[i];
337
338 codecParams = *stream->codecpar;
339
340 if (codecParams.codec_type == AVMediaType.AVMEDIA_TYPE_VIDEO)
341 {
342 duration = stream->duration <= 0 ? formatContext->duration : stream->duration;
343
344 timeBaseInSeconds = stream->time_base.GetValue();
345 var codecPtr = ffmpeg.avcodec_find_decoder(codecParams.codec_id);
346 if (codecPtr == null)
347 throw new InvalidOperationException($"Couldn't find codec with id: {codecParams.codec_id}");
348
349 int openCodecResult = ffmpeg.avcodec_open2(stream->codec, codecPtr, null);
350 if (openCodecResult < 0)
351 throw new InvalidOperationException($"Error trying to open codec with id {codecParams.codec_id}: {getErrorMessage(openCodecResult)}");
352
353 break;
354 }
355 }
356
357 prepareFilters();
358 }
359
360 private void decodingLoop(CancellationToken cancellationToken)
361 {
362 var packet = ffmpeg.av_packet_alloc();
363
364 const int max_pending_frames = 3;
365
366 try
367 {
368 while (!cancellationToken.IsCancellationRequested)
369 {
370 switch (State)
371 {
372 case DecoderState.Ready:
373 case DecoderState.Running:
374 if (decodedFrames.Count < max_pending_frames)
375 {
376 decodeNextFrame(packet);
377 }
378 else
379 {
380 // wait until existing buffers are consumed.
381 State = DecoderState.Ready;
382 Thread.Sleep(1);
383 }
384
385 break;
386
387 case DecoderState.EndOfStream:
388 // While at the end of the stream, avoid attempting to read further as this comes with a non-negligible overhead.
389 // A Seek() operation will trigger a state change, allowing decoding to potentially start again.
390 Thread.Sleep(50);
391 break;
392
393 default:
394 Debug.Fail($"Video decoder should never be in a \"{State}\" state during decode.");
395 return;
396 }
397
398 while (!decoderCommands.IsEmpty)
399 {
400 if (cancellationToken.IsCancellationRequested)
401 return;
402
403 if (decoderCommands.TryDequeue(out var cmd))
404 cmd();
405 }
406 }
407 }
408 catch (Exception e)
409 {
410 Logger.Log($"VideoDecoder faulted: {e}");
411 State = DecoderState.Faulted;
412 }
413 finally
414 {
415 ffmpeg.av_packet_free(&packet);
416
417 if (State != DecoderState.Faulted)
418 State = DecoderState.Stopped;
419 }
420 }
421
422 private void decodeNextFrame(AVPacket* packet)
423 {
424 int readFrameResult = ffmpeg.av_read_frame(formatContext, packet);
425
426 if (readFrameResult >= 0)
427 {
428 State = DecoderState.Running;
429
430 if (packet->stream_index == stream->index)
431 {
432 int sendPacketResult = ffmpeg.avcodec_send_packet(stream->codec, packet);
433
434 if (sendPacketResult == 0)
435 {
436 AVFrame* frame = ffmpeg.av_frame_alloc();
437 AVFrame* outFrame = null;
438
439 var result = ffmpeg.avcodec_receive_frame(stream->codec, frame);
440
441 if (result == 0)
442 {
443 var frameTime = (frame->best_effort_timestamp - stream->start_time) * timeBaseInSeconds * 1000;
444
445 if (!skipOutputUntilTime.HasValue || skipOutputUntilTime.Value < frameTime)
446 {
447 skipOutputUntilTime = null;
448
449 if (convert)
450 {
451 outFrame = ffmpeg.av_frame_alloc();
452 outFrame->format = (int)AVPixelFormat.AV_PIX_FMT_YUV420P;
453 outFrame->width = stream->codec->width;
454 outFrame->height = stream->codec->height;
455
456 var ret = ffmpeg.av_frame_get_buffer(outFrame, 32);
457 if (ret < 0)
458 throw new InvalidOperationException($"Error allocating video frame: {getErrorMessage(ret)}");
459
460 ffmpeg.sws_scale(convCtx, frame->data, frame->linesize, 0, stream->codec->height,
461 outFrame->data, outFrame->linesize);
462 }
463 else
464 outFrame = frame;
465
466 if (!availableTextures.TryDequeue(out var tex))
467 tex = new Texture(new VideoTexture(codecParams.width, codecParams.height));
468
469 var upload = new VideoTextureUpload(outFrame, ffmpeg.av_frame_free);
470
471 tex.SetData(upload);
472 decodedFrames.Enqueue(new DecodedFrame { Time = frameTime, Texture = tex });
473 }
474
475 lastDecodedFrameTime = (float)frameTime;
476 }
477
478 // There are two cases: outFrame could be null in which case the above decode hasn't run, or the outFrame doesn't match the input frame,
479 // in which case it won't be automatically freed by the texture upload. In both cases we need to free the input frame.
480 if (outFrame != frame)
481 ffmpeg.av_frame_free(&frame);
482 }
483 else
484 Logger.Log($"Error {sendPacketResult} sending packet in VideoDecoder");
485 }
486
487 ffmpeg.av_packet_unref(packet);
488 }
489 else if (readFrameResult == AGffmpeg.AVERROR_EOF)
490 {
491 if (Looping)
492 {
493 Seek(0);
494 }
495 else
496 {
497 // This marks the video stream as no longer relevant (until a future potential Seek operation).
498 State = DecoderState.EndOfStream;
499 }
500 }
501 else
502 {
503 State = DecoderState.Ready;
504 Thread.Sleep(1);
505 }
506 }
507
508 private string getErrorMessage(int errorCode)
509 {
510 const ulong buffer_size = 256;
511 byte[] buffer = new byte[buffer_size];
512
513 int strErrorCode;
514
515 fixed (byte* bufPtr = buffer)
516 {
517 strErrorCode = ffmpeg.av_strerror(errorCode, bufPtr, buffer_size);
518 }
519
520 if (strErrorCode < 0)
521 return $"{errorCode} (av_strerror failed with code {strErrorCode})";
522
523 var messageLength = Math.Max(0, Array.IndexOf(buffer, (byte)0));
524 return Encoding.ASCII.GetString(buffer[..messageLength]);
525 }
526
527 protected virtual FFmpegFuncs CreateFuncs()
528 {
529 // other frameworks should handle native libraries themselves
530#if NET5_0
531 AGffmpeg.GetOrLoadLibrary = name =>
532 {
533 int version = AGffmpeg.LibraryVersionMap[name];
534
535 string libraryName = null;
536
537 // "lib" prefix and extensions are resolved by .net core
538 switch (RuntimeInfo.OS)
539 {
540 case RuntimeInfo.Platform.macOS:
541 libraryName = $"{name}.{version}";
542 break;
543
544 case RuntimeInfo.Platform.Windows:
545 libraryName = $"{name}-{version}";
546 break;
547
548 case RuntimeInfo.Platform.Linux:
549 libraryName = name;
550 break;
551 }
552
553 return NativeLibrary.Load(libraryName, System.Reflection.Assembly.GetEntryAssembly(), DllImportSearchPath.UseDllDirectoryForDependencies | DllImportSearchPath.SafeDirectories);
554 };
555#endif
556
557 return new FFmpegFuncs
558 {
559 av_frame_alloc = AGffmpeg.av_frame_alloc,
560 av_frame_free = AGffmpeg.av_frame_free,
561 av_frame_unref = AGffmpeg.av_frame_unref,
562 av_frame_get_buffer = AGffmpeg.av_frame_get_buffer,
563 av_strdup = AGffmpeg.av_strdup,
564 av_strerror = AGffmpeg.av_strerror,
565 av_malloc = AGffmpeg.av_malloc,
566 av_packet_alloc = AGffmpeg.av_packet_alloc,
567 av_packet_unref = AGffmpeg.av_packet_unref,
568 av_packet_free = AGffmpeg.av_packet_free,
569 av_read_frame = AGffmpeg.av_read_frame,
570 av_seek_frame = AGffmpeg.av_seek_frame,
571 avcodec_find_decoder = AGffmpeg.avcodec_find_decoder,
572 avcodec_open2 = AGffmpeg.avcodec_open2,
573 avcodec_receive_frame = AGffmpeg.avcodec_receive_frame,
574 avcodec_send_packet = AGffmpeg.avcodec_send_packet,
575 avformat_alloc_context = AGffmpeg.avformat_alloc_context,
576 avformat_close_input = AGffmpeg.avformat_close_input,
577 avformat_find_stream_info = AGffmpeg.avformat_find_stream_info,
578 avformat_open_input = AGffmpeg.avformat_open_input,
579 avio_alloc_context = AGffmpeg.avio_alloc_context,
580 sws_freeContext = AGffmpeg.sws_freeContext,
581 sws_getContext = AGffmpeg.sws_getContext,
582 sws_scale = AGffmpeg.sws_scale
583 };
584 }
585
586 #region Disposal
587
588 ~VideoDecoder()
589 {
590 Dispose(false);
591 }
592
593 public void Dispose()
594 {
595 Dispose(true);
596 GC.SuppressFinalize(this);
597 }
598
599 protected virtual void Dispose(bool disposing)
600 {
601 if (isDisposed)
602 return;
603
604 isDisposed = true;
605
606 decoderCommands.Clear();
607
608 StopDecoding(true);
609
610 if (formatContext != null && inputOpened)
611 {
612 fixed (AVFormatContext** ptr = &formatContext)
613 ffmpeg.avformat_close_input(ptr);
614 }
615
616 seekCallback = null;
617 readPacketCallback = null;
618 managedContextBuffer = null;
619
620 videoStream.Dispose();
621 videoStream = null;
622
623 // gets freed by libavformat when closing the input
624 contextBuffer = null;
625
626 if (convCtx != null)
627 ffmpeg.sws_freeContext(convCtx);
628
629 while (decodedFrames.TryDequeue(out var f))
630 f.Texture.Dispose();
631
632 while (availableTextures.TryDequeue(out var t))
633 t.Dispose();
634
635 handle.Dispose();
636 }
637
638 #endregion
639
640 /// <summary>
641 /// Represents the possible states the decoder can be in.
642 /// </summary>
643 public enum DecoderState
644 {
645 /// <summary>
646 /// The decoder is ready to begin decoding. This is the default state before the decoder starts operations.
647 /// </summary>
648 Ready = 0,
649
650 /// <summary>
651 /// The decoder is currently running and decoding frames.
652 /// </summary>
653 Running = 1,
654
655 /// <summary>
656 /// The decoder has faulted with an exception.
657 /// </summary>
658 Faulted = 2,
659
660 /// <summary>
661 /// The decoder has reached the end of the video data.
662 /// </summary>
663 EndOfStream = 3,
664
665 /// <summary>
666 /// The decoder has been completely stopped and cannot be resumed.
667 /// </summary>
668 Stopped = 4,
669 }
670 }
671}