import {
  pipeline,
  env,
  AutomaticSpeechRecognitionPipeline,
  WhisperTextStreamer,
  WhisperTokenizer,
  type AutomaticSpeechRecognitionOutput,
  ProgressCallback,
  ProgressInfo,
} from "@huggingface/transformers"

env.allowRemoteModels = true
// env.allowLocalModels = true
env.remoteHost = "https://zimu-cdn.youyi.dev"
// env.backends.onnx.wasm.wasmPaths = "/wasm/"

declare global {
  interface Window {
    transcribe: (data: UIData) => Promise<any>
    exportSubtitles: (data: UIData) => Promise<any>
    exportText: (data: UIData) => Promise<any>
  }
}

export async function loadModel(model: string, progress_callback: ProgressCallback) {
  console.time("load model")
  console.log("start load model")
  var modelOptions = {} as Parameters<typeof pipeline>[2]

  if (navigator["gpu"] && (await navigator["gpu"].requestAdapter()).features.has("shader-f16")) {
    modelOptions = {
      device: "webgpu",
      dtype: {
        encoder_model: "fp16",
        decoder_model_merged: "q4",
      },
    }
  } else {
    modelOptions = {
      device: "wasm",
      dtype: "q8",
    }
  }

  var transcriber = await pipeline(
    "automatic-speech-recognition",
    model,
    {
      ...modelOptions,
      progress_callback,
    }
    //   {
    //         // this.model === "onnx-community/whisper-large-v3-turbo"
    //         //     ? "fp16"
    //         //     : "fp32",
    //         decoder_model_merged: "q4", // or 'fp32' ('fp16' is broken)
    //       },
  )
  console.timeEnd("load model")
  return transcriber
}

type UIData = {
  language: string
  translate: false //翻译成英文
  files: FileList
  progress: { [filename: string]: ProgressInfo }
  model_status: "ready" | string
  temp_result: string
  maxCharsPerLine: string
  result: AutomaticSpeechRecognitionOutput[]
}

let transcriber: AutomaticSpeechRecognitionPipeline | undefined
window.transcribe = async function (data: UIData) {
  data.temp_result = ""
  data.result = []

  if (!transcriber) {
    let loadModelStart = Date.now()

    transcriber = await loadModel("onnx-community/whisper-large-v3-turbo_timestamped", (progressInfo) => {
      if ("file" in progressInfo) {
        data.progress[progressInfo.file] = progressInfo
      }
    })
    window["umami"]?.track("load-model-done", {
      duration: Date.now() - loadModelStart,
      type: navigator["gpu"] ? "gpu" : "wasm",
    })
  }
  let timeStart = Date.now()
  var out = await transcriber(URL.createObjectURL(data.files[0]), {
    language: "zh",
    return_timestamps: "word",
    chunk_length_s: 30,
    stride_length_s: 5,
    task: data.translate ? "translate" : "transcribe",
    output_attentions: true,
    streamer: new WhisperTextStreamer(transcriber.tokenizer as WhisperTokenizer, {
      callback_function: (x) => {
        data.temp_result += x
      },
    }),
  })
  let transcribeDuration = Date.now() - timeStart
  window["umami"]?.track("transcribe-done", { duration: transcribeDuration, type: navigator["gpu"] ? "gpu" : "wasm" })
  console.log("transcribe duration:" + transcribeDuration)
  data.temp_result = ""
  data.result = Array.isArray(out) ? out : [out]
}

window.exportText = async function exportText(data: UIData) {
  let result = data.result[0]
  const text = result.chunks?.map((item) => item.text).join("")
  downloadFile(text || "", data.files[0].name + ".txt", "text/plain")
}

window.exportSubtitles = async function exportSubtitles(data: UIData) {
  let result = data.result[0]
  let maxCharsPerLine = parseInt(data.maxCharsPerLine)
  let srtContent = ""
  let index = 1
  let currentLine = ""

  let lineStartTime = ""
  let lineEndTime = ""
  result.chunks?.forEach((item, i) => {
    const word = item.text

    // 第一行
    if (i == 0) {
      currentLine += word
      lineStartTime = formatTime(item.timestamp[0])
      lineEndTime = formatTime(item.timestamp[1])
    }

    if ((currentLine + word).length > maxCharsPerLine) {
      srtContent += `${index}\n${lineStartTime} --> ${lineEndTime}\n${currentLine.trim()}\n\n`
      // 下一行
      index++
      currentLine = word
      lineStartTime = formatTime(item.timestamp[0])
      lineEndTime = formatTime(item.timestamp[1])
    } else {
      currentLine += word
      lineEndTime = formatTime(item.timestamp[1])
    }
  })
  // 处理最后一行
  if (currentLine.trim()) {
    srtContent += `${index}\n${lineStartTime} --> ${lineEndTime}\n${currentLine.trim()}\n\n`
  }
  // 截取掉 data.files[0].name 去掉后缀 设置文件编码utf-8编码
  downloadFile(srtContent.trim(), data.files[0].name.split(".")[0] + ".srt", "text/plain")
}

function formatTime(seconds: number): string {
  const date = new Date(seconds * 1000)
  const hh = String(date.getUTCHours()).padStart(2, "0")
  const mm = String(date.getUTCMinutes()).padStart(2, "0")
  const ss = String(date.getUTCSeconds()).padStart(2, "0")
  const ms = String(date.getUTCMilliseconds()).padStart(3, "0")
  return `${hh}:${mm}:${ss},${ms}`
}

function downloadFile(content: string, fileName: string, contentType: string) {
  const blob = new Blob([content], { type: `${contentType};charset=utf-8` })
  const url = URL.createObjectURL(blob)
  const a = document.createElement("a")
  a.href = url
  a.download = fileName
  document.body.appendChild(a)
  a.click()
  document.body.removeChild(a)
  URL.revokeObjectURL(url)
}
