import {
  pipeline,
  env,
  AutomaticSpeechRecognitionPipeline,
  WhisperTextStreamer,
  WhisperTokenizer,
  type AutomaticSpeechRecognitionOutput,
} from "@huggingface/transformers"

env.allowRemoteModels = true
// env.allowLocalModels = true
// env.localModelPath = "/models/"
// env.backends.onnx.wasm.wasmPaths = "/wasm/"

declare global {
  interface Window {
    transcribe: (data: UIData) => Promise<any>
    exportSubtitles: (data: UIData) => Promise<any>
    exportText: (data: UIData) => Promise<any>
  }
}

//#region type Progress
type Progress =
  | {
      file: string
      name: string
      status: "initiate"
    }
  | {
      file: string
      name: string
      status: "download"
    }
  | {
      file: string
      name: string
      status: "progress"
      progress: number
      loaded: number
      total: number
    }
  | {
      file: string
      name: string
      status: "done"
    }
  | {
      model: string
      status: "ready"
      task: "automatic-speech-recognition"
    }
//#endregion

export async function loadModel(model: string, progress_callback: (progress: Progress) => any) {
  console.time("load model")
  console.log("start load model")
  var transcriber = await pipeline(
    "automatic-speech-recognition",
    model,
    navigator["gpu"]
      ? {
          dtype: {
            encoder_model: "fp16",
            decoder_model_merged: "q4",
          },
          device: "webgpu",
          progress_callback,
        }
      : {
          dtype: "q8",
          device: "wasm",
          progress_callback,
        }
    //   {
    //         // this.model === "onnx-community/whisper-large-v3-turbo"
    //         //     ? "fp16"
    //         //     : "fp32",
    //         decoder_model_merged: "q4", // or 'fp32' ('fp16' is broken)
    //       },
  )
  console.timeEnd("load model")
  return transcriber
}

type UIData = {
  language: string
  translate: false //翻译成英文
  files: FileList
  progress: { [filename: string]: Progress }
  model_status: "ready" | string
  temp_result: string
  maxCharsPerLine: number
  result: AutomaticSpeechRecognitionOutput[]
}

let transcriber: AutomaticSpeechRecognitionPipeline | undefined
window.transcribe = async function (data: UIData) {
  data.temp_result = ""
  data.result = []

  if (!transcriber) {
    let loadModelStart = Date.now()

    transcriber = await loadModel("onnx-community/whisper-large-v3-turbo_timestamped", (progress) => {
      if ("file" in progress) {
        data.progress[progress.file] = progress
      }
    })
    window["umami"]?.track("firstLoadModel", {
      duration: Date.now() - loadModelStart,
      type: navigator["gpu"] ? "gpu" : "wasm",
    })
  }
  let timeStart = Date.now()
  var out = await transcriber(URL.createObjectURL(data.files[0]), {
    language: "zh",
    return_timestamps: "word",
    chunk_length_s: 30,
    stride_length_s: 5,
    task: data.translate ? "translate" : "transcribe",
    output_attentions: true,
    // @ts-ignore
    streamer: new WhisperTextStreamer(transcriber.tokenizer as WhisperTokenizer, {
      callback_function: (x) => {
        data.temp_result += x
      },
    }),
  })
  let transcribeDuration = Date.now() - timeStart
  window["umami"]?.track("transcribe", { duration: transcribeDuration, type: navigator["gpu"] ? "gpu" : "wasm" })
  console.log("transcribe duration:" + transcribeDuration)
  console.log(out)
  data.temp_result = ""
  data.result = Array.isArray(out) ? out : [out]
}

window.exportText = async function exportText(data: UIData) {
  let result = data.result[0]
  const text = result.chunks?.map((item) => item.text).join("")
  downloadFile(text || "", data.files[0].name + ".txt", "text/plain")
}

window.exportSubtitles = async function exportSubtitles(data: UIData) {
  let result = data.result[0]
  let maxCharsPerLine = data.maxCharsPerLine
  let srtContent = ""
  let index = 1
  let currentLine = ""

  result.chunks?.forEach((item, i) => {
    const startTime = formatTime(item.timestamp[0])
    const endTime = formatTime(item.timestamp[1])
    const words = item.text.split(" ")

    words.forEach((word) => {
      if ((currentLine + word).length > maxCharsPerLine) {
        srtContent += `${index}\n${startTime} --> ${endTime}\n${currentLine.trim()}\n\n`
        index++
        currentLine = ""
      }
      currentLine += word + " "
    })

    if (currentLine.trim()) {
      srtContent += `${index}\n${startTime} --> ${endTime}\n${currentLine.trim()}\n\n`
      index++
      currentLine = ""
    }
  })

  downloadFile(srtContent.trim(), data.files[0].name + ".srt", "text/plain")
}

function formatTime(seconds: number): string {
  const date = new Date(seconds * 1000)
  const hh = String(date.getUTCHours()).padStart(2, "0")
  const mm = String(date.getUTCMinutes()).padStart(2, "0")
  const ss = String(date.getUTCSeconds()).padStart(2, "0")
  const ms = String(date.getUTCMilliseconds()).padStart(3, "0")
  return `${hh}:${mm}:${ss},${ms}`
}

function downloadFile(content: string, fileName: string, contentType: string) {
  const blob = new Blob([content], { type: contentType })
  const url = URL.createObjectURL(blob)
  const a = document.createElement("a")
  a.href = url
  a.download = fileName
  document.body.appendChild(a)
  a.click()
  document.body.removeChild(a)
  URL.revokeObjectURL(url)
}
