jellyfin/MediaBrowser.MediaEncoding/Subtitles/SrtParser.cs

90 lines
3.7 KiB
C#
Raw Normal View History

2015-01-17 13:12:02 -07:00
using MediaBrowser.Model.Extensions;
2015-01-21 12:21:19 -07:00
using MediaBrowser.Model.Logging;
2015-01-17 13:12:02 -07:00
using System;
2014-05-05 07:45:45 -07:00
using System.Collections.Generic;
2014-05-10 19:29:34 -07:00
using System.Globalization;
2014-05-05 07:45:45 -07:00
using System.IO;
2014-05-10 19:29:34 -07:00
using System.Text.RegularExpressions;
using System.Threading;
2014-05-05 07:45:45 -07:00
namespace MediaBrowser.MediaEncoding.Subtitles
{
2014-05-06 19:28:19 -07:00
public class SrtParser : ISubtitleParser
2014-05-05 07:45:45 -07:00
{
2015-01-21 12:21:19 -07:00
private readonly ILogger _logger;
2014-05-10 19:29:34 -07:00
private readonly CultureInfo _usCulture = new CultureInfo("en-US");
2015-01-21 12:21:19 -07:00
public SrtParser(ILogger logger)
{
_logger = logger;
}
public SubtitleTrackInfo Parse(Stream stream, CancellationToken cancellationToken)
{
2014-05-10 19:29:34 -07:00
var trackInfo = new SubtitleTrackInfo();
using ( var reader = new StreamReader(stream))
{
string line;
while ((line = reader.ReadLine()) != null)
{
cancellationToken.ThrowIfCancellationRequested();
2014-05-10 19:29:34 -07:00
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
var subEvent = new SubtitleTrackEvent {Id = line};
line = reader.ReadLine();
if (string.IsNullOrWhiteSpace(line))
{
continue;
}
2014-05-10 19:29:34 -07:00
var time = Regex.Split(line, @"[\t ]*-->[\t ]*");
2015-01-21 12:21:19 -07:00
if (time.Length < 2)
{
// This occurs when subtitle text has an empty line as part of the text.
// Need to adjust the break statement below to resolve this.
_logger.Warn("Unrecognized line in srt: {0}", line);
continue;
}
2014-05-10 19:29:34 -07:00
subEvent.StartPositionTicks = GetTicks(time[0]);
var endTime = time[1];
var idx = endTime.IndexOf(" ", StringComparison.Ordinal);
if (idx > 0)
endTime = endTime.Substring(0, idx);
subEvent.EndPositionTicks = GetTicks(endTime);
var multiline = new List<string>();
while ((line = reader.ReadLine()) != null)
{
if (string.IsNullOrEmpty(line))
{
break;
}
multiline.Add(line);
}
2014-08-31 11:28:20 -07:00
subEvent.Text = string.Join(ParserValues.NewLine, multiline);
subEvent.Text = subEvent.Text.Replace(@"\N", ParserValues.NewLine, StringComparison.OrdinalIgnoreCase);
2014-05-10 21:33:14 -07:00
subEvent.Text = Regex.Replace(subEvent.Text, @"\{(\\[\w]+\(?([\w\d]+,?)+\)?)+\}", string.Empty, RegexOptions.IgnoreCase);
2014-05-10 19:29:34 -07:00
subEvent.Text = Regex.Replace(subEvent.Text, "<", "&lt;", RegexOptions.IgnoreCase);
subEvent.Text = Regex.Replace(subEvent.Text, ">", "&gt;", RegexOptions.IgnoreCase);
subEvent.Text = Regex.Replace(subEvent.Text, "&lt;(\\/?(font|b|u|i|s))((\\s+(\\w|\\w[\\w\\-]*\\w)(\\s*=\\s*(?:\\\".*?\\\"|'.*?'|[^'\\\">\\s]+))?)+\\s*|\\s*)(\\/?)&gt;", "<$1$3$7>", RegexOptions.IgnoreCase);
trackInfo.TrackEvents.Add(subEvent);
}
}
return trackInfo;
}
long GetTicks(string time) {
TimeSpan span;
return TimeSpan.TryParseExact(time, @"hh\:mm\:ss\.fff", _usCulture, out span)
? span.Ticks
: (TimeSpan.TryParseExact(time, @"hh\:mm\:ss\,fff", _usCulture, out span)
? span.Ticks : 0);
2014-05-05 07:45:45 -07:00
}
}
}