Use original media input for Faster-Whisper - thx Purfview :)

Fix #8097
This commit is contained in:
Nikolaj Olsson 2024-03-30 13:02:21 +01:00
parent 1dd0228c70
commit bb3e583f31
5 changed files with 132 additions and 7 deletions

View File

@ -0,0 +1,87 @@
using Nikse.SubtitleEdit.Core.Common;
using Nikse.SubtitleEdit.Core.SubtitleFormats;
using Nikse.SubtitleEdit.Core.Translate;
using System;
using System.Collections.Generic;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace Nikse.SubtitleEdit.Core.AutoTranslate
{
public class AnthropicTranslate : IAutoTranslator
{
private HttpClient _httpClient;
public static string StaticName { get; set; } = "Anthropic Claude";
public string Name => StaticName;
public string Url => "https://www.anthropic.com/";
public string Error { get; set; }
public int MaxCharacters => 1500;
public void Initialize()
{
_httpClient?.Dispose();
_httpClient = new HttpClient();
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("Content-Type", "application/json");
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("accept", "application/json");
_httpClient.BaseAddress = new Uri(Configuration.Settings.Tools.AnthropicApiUrl);
if (!string.IsNullOrEmpty(Configuration.Settings.Tools.AnthropicApiKey))
{
_httpClient.DefaultRequestHeaders.TryAddWithoutValidation("x-api-key", Configuration.Settings.Tools.AnthropicApiKey);
}
}
public List<TranslationPair> GetSupportedSourceLanguages()
{
return ChatGptTranslate.ListLanguages();
}
public List<TranslationPair> GetSupportedTargetLanguages()
{
return ChatGptTranslate.ListLanguages();
}
public async Task<string> Translate(string text, string sourceLanguageCode, string targetLanguageCode, CancellationToken cancellationToken)
{
var model = Configuration.Settings.Tools.AnthropicApiModel;
if (string.IsNullOrEmpty(model))
{
model = "claude-3-opus-20240229";
Configuration.Settings.Tools.AnthropicApiModel = model;
}
var input = "{ \"model\": \"" + model + "\",\"messages\": [{ \"role\": \"user\", \"content\": \"Please translate the following text from " + sourceLanguageCode + " to " + targetLanguageCode + ", only write the result: \\n\\n" + Json.EncodeJsonText(text.Trim()) + "\" }]}";
var content = new StringContent(input, Encoding.UTF8);
content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");
var result = await _httpClient.PostAsync(string.Empty, content, cancellationToken);
var bytes = await result.Content.ReadAsByteArrayAsync();
var json = Encoding.UTF8.GetString(bytes).Trim();
if (!result.IsSuccessStatusCode)
{
Error = json;
SeLogger.Error("Anthropic Translate failed calling API: Status code=" + result.StatusCode + Environment.NewLine + json);
}
result.EnsureSuccessStatusCode();
var parser = new SeJsonParser();
var resultText = parser.GetFirstObject(json, "content");
if (resultText == null)
{
return string.Empty;
}
var outputText = Json.DecodeJsonText(resultText).Trim();
if (outputText.StartsWith('"') && outputText.EndsWith('"') && !text.StartsWith('"'))
{
outputText = outputText.Trim('"').Trim();
}
return outputText;
}
}
}

View File

@ -84,7 +84,7 @@ namespace Nikse.SubtitleEdit.Core.AutoTranslate
return outputText;
}
private static List<TranslationPair> ListLanguages()
public static List<TranslationPair> ListLanguages()
{
return new List<TranslationPair>
{

View File

@ -176,6 +176,9 @@ namespace Nikse.SubtitleEdit.Core.Common
public string ChatGptUrl { get; set; }
public string ChatGptApiKey { get; set; }
public string ChatGptModel { get; set; }
public string AnthropicApiUrl { get; set; }
public string AnthropicApiKey { get; set; }
public string AnthropicApiModel { get; set; }
public int AutoTranslateDelaySeconds { get; set; }
public string GeminiProApiKey { get; set; }
public bool DisableVidoInfoViaLabel { get; set; }
@ -5351,6 +5354,24 @@ $HorzAlign = Center
settings.Tools.ChatGptModel = subNode.InnerText;
}
subNode = node.SelectSingleNode("AnthropicApiUrl");
if (subNode != null)
{
settings.Tools.AnthropicApiUrl = subNode.InnerText;
}
subNode = node.SelectSingleNode("AnthropicApiKey");
if (subNode != null)
{
settings.Tools.AnthropicApiKey = subNode.InnerText;
}
subNode = node.SelectSingleNode("AnthropicApiModel");
if (subNode != null)
{
settings.Tools.AnthropicApiModel = subNode.InnerText;
}
subNode = node.SelectSingleNode("AutoTranslateDelaySeconds");
if (subNode != null)
{
@ -11858,6 +11879,9 @@ $HorzAlign = Center
textWriter.WriteElementString("ChatGptUrl", settings.Tools.ChatGptUrl);
textWriter.WriteElementString("ChatGptApiKey", settings.Tools.ChatGptApiKey);
textWriter.WriteElementString("ChatGptModel", settings.Tools.ChatGptModel);
textWriter.WriteElementString("AnthropicApiUrl", settings.Tools.AnthropicApiUrl);
textWriter.WriteElementString("AnthropicApiKey", settings.Tools.AnthropicApiKey);
textWriter.WriteElementString("AnthropicApiModel", settings.Tools.AnthropicApiModel);
textWriter.WriteElementString("AutoTranslateDelaySeconds", settings.Tools.AutoTranslateDelaySeconds.ToString(CultureInfo.InvariantCulture));
textWriter.WriteElementString("GeminiProApiKey", settings.Tools.GeminiProApiKey);
textWriter.WriteElementString("DisableVidoInfoViaLabel", settings.Tools.DisableVidoInfoViaLabel.ToString(CultureInfo.InvariantCulture));

View File

@ -800,7 +800,16 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
labelProgress.Refresh();
Application.DoEvents();
_resultList = new List<ResultText>();
var process = GetWhisperProcess(waveFileName, model.Name, _languageCode, checkBoxTranslateToEnglish.Checked, OutputHandler);
var inputFile = waveFileName;
if (!_useCenterChannelOnly &&
comboBoxWhisperEngine.Text == WhisperChoice.PurfviewFasterWhisper &&
_audioTrackNumber == 0)
{
inputFile = _videoFileName;
}
var process = GetWhisperProcess(inputFile, model.Name, _languageCode, checkBoxTranslateToEnglish.Checked, OutputHandler);
var sw = Stopwatch.StartNew();
_outputText.Add($"Calling whisper ({Configuration.Settings.Tools.WhisperChoice}) with : {process.StartInfo.FileName} {process.StartInfo.Arguments}{Environment.NewLine}");
_startTicks = DateTime.UtcNow.Ticks;
@ -867,7 +876,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
process.Dispose();
if (GetResultFromSrt(waveFileName, out var resultTexts, _outputText, _filesToDelete))
if (GetResultFromSrt(waveFileName, _videoFileName, out var resultTexts, _outputText, _filesToDelete))
{
var subtitle = new Subtitle();
subtitle.Paragraphs.AddRange(resultTexts.Select(p => new Paragraph(p.Text, (double)p.Start * 1000.0, (double)p.End * 1000.0)).ToList());
@ -881,7 +890,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
return sub;
}
public static bool GetResultFromSrt(string waveFileName, out List<ResultText> resultTexts, ConcurrentBag<string> outputText, List<string> filesToDelete)
public static bool GetResultFromSrt(string waveFileName, string videoFileName, out List<ResultText> resultTexts, ConcurrentBag<string> outputText, List<string> filesToDelete)
{
var srtFileName = waveFileName + ".srt";
if (!File.Exists(srtFileName) && waveFileName.EndsWith(".wav"))
@ -890,6 +899,11 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
}
var whisperFolder = WhisperHelper.GetWhisperFolder() ?? string.Empty;
if (!string.IsNullOrEmpty(whisperFolder) && !File.Exists(whisperFolder) && !string.IsNullOrEmpty(videoFileName))
{
srtFileName = Path.Combine(whisperFolder, Path.GetFileNameWithoutExtension(videoFileName)) + ".srt";
}
if (!File.Exists(srtFileName))
{
srtFileName = Path.Combine(whisperFolder, Path.GetFileNameWithoutExtension(waveFileName)) + ".srt";

View File

@ -155,7 +155,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
var waveFileName = videoFileName;
_outputText.Add(string.Empty);
var transcript = TranscribeViaWhisper(waveFileName);
var transcript = TranscribeViaWhisper(waveFileName, videoFileName);
if (_cancel)
{
TaskbarList.SetProgressState(_parentForm.Handle, TaskbarButtonProgressFlags.NoProgress);
@ -185,7 +185,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
DialogResult = DialogResult.OK;
}
public List<ResultText> TranscribeViaWhisper(string waveFileName)
public List<ResultText> TranscribeViaWhisper(string waveFileName, string videoFileName)
{
var model = comboBoxModels.Items[comboBoxModels.SelectedIndex] as WhisperModel;
if (model == null)
@ -242,7 +242,7 @@ namespace Nikse.SubtitleEdit.Forms.AudioToText
System.Threading.Thread.Sleep(50);
}
if (WhisperAudioToText.GetResultFromSrt(waveFileName, out var resultTexts, _outputText, null))
if (WhisperAudioToText.GetResultFromSrt(waveFileName, videoFileName, out var resultTexts, _outputText, null))
{
return resultTexts;
}