import { assertExists } from '../../utils/type.utils';
import { isGeminiChatLlm } from '../chat.consts';
import { Llm, SupportedLlm } from '../llm.consts';

// Name of the field on the data form that contains the file
// passed to the express file upload route.
export const EXPRESS_UPLOAD_FILE_DATA_FORM_FIELD_NAME = 'data_file';

/** Base Enums/Consts/Types */
// IMPORTANT: Corresponds with values in llama_indexer, be sure to update both if editing
export enum FileType {
  AAC = 'AAC',
  CSV = 'CSV',
  DOC = 'DOC',
  DOCX = 'DOCX',
  FLAC = 'FLAC',
  FLV = 'FLV',
  GIF = 'GIF',
  GOOGLE_DOC = 'GOOGLE_DOC',
  GOOGLE_DRAWINGS = 'GOOGLE_DRAWINGS',
  GOOGLE_SHEETS = 'GOOGLE_SHEETS',
  GOOGLE_SLIDES = 'GOOGLE_SLIDES',
  HEIC = 'HEIC',
  HEIF = 'HEIF',
  HTML = 'HTML',
  JPEG = 'JPEG',
  JSON = 'JSON',
  M4A = 'M4A',
  M4V = 'M4V',
  MD = 'MD',
  MOV = 'MOV',
  MP3 = 'MP3',
  MP4 = 'MP4',
  MPA = 'MPA',
  MPEG = 'MPEG',
  MPEGPS = 'MPEGPS',
  MPG = 'MPG',
  MPGA = 'MPGA',
  OGG = 'OGG',
  OPUS = 'OPUS',
  PCM = 'PCM',
  PDF = 'PDF',
  PNG = 'PNG',
  PPT = 'PPT',
  PPTX = 'PPTX',
  RTF = 'RTF',
  THREE_GPP = '3GPP',
  TXT = 'TXT',
  WAV = 'WAV',
  WEBM = 'WEBM',
  WEBP = 'WEBP',
  WMV = 'WMV',
  XLS = 'XLS',
  XLSX = 'XLSX'
}

export const FILE_EXTENSIONS: Record<FileType, string[]> = {
  [FileType.AAC]: ['aac'],
  [FileType.CSV]: ['csv'],
  [FileType.DOC]: ['doc'],
  [FileType.DOCX]: ['docx'],
  [FileType.FLAC]: ['flac'],
  [FileType.FLV]: ['flv'],
  [FileType.GIF]: ['gif'],
  [FileType.GOOGLE_DOC]: ['gdoc'],
  [FileType.GOOGLE_DRAWINGS]: ['gdraw'],
  [FileType.GOOGLE_SHEETS]: ['gsheet'],
  [FileType.GOOGLE_SLIDES]: ['gslides'],
  [FileType.HEIC]: ['heic'],
  [FileType.HEIF]: ['heif'],
  [FileType.HTML]: ['html', 'htm'],
  [FileType.JPEG]: ['jpeg', 'jpg'],
  [FileType.JSON]: ['json'],
  [FileType.M4A]: ['m4a'],
  [FileType.M4V]: ['m4v'],
  [FileType.MD]: ['md'],
  [FileType.MOV]: ['mov'],
  [FileType.MP3]: ['mp3'],
  [FileType.MP4]: ['mp4'],
  [FileType.MPEG]: ['mpeg'],
  [FileType.MPEGPS]: ['mpegps'],
  [FileType.MPG]: ['mpg'],
  [FileType.MPA]: ['mpa'],
  [FileType.MPGA]: ['mpga'],
  [FileType.OGG]: ['ogg'],
  [FileType.OPUS]: ['opus'],
  [FileType.PDF]: ['pdf'],
  [FileType.PCM]: ['pcm'],
  [FileType.PNG]: ['png'],
  [FileType.PPT]: ['ppt'],
  [FileType.PPTX]: ['pptx'],
  [FileType.RTF]: ['rtf'],
  [FileType.TXT]: ['txt'],
  [FileType.WAV]: ['wav'],
  [FileType.WEBM]: ['webm'],
  [FileType.WEBP]: ['webp'],
  [FileType.WMV]: ['wmv'],
  [FileType.XLS]: ['xls'],
  [FileType.XLSX]: ['xlsx'],
  [FileType.THREE_GPP]: ['3gpp']
} as const;

// When modifying, mime types, make sure to update mime types in file_consts.py
export const FILE_MIME_TYPES: Record<FileType, string> = {
  [FileType.AAC]: 'audio/aac',
  [FileType.CSV]: 'text/csv',
  [FileType.DOC]: 'application/msword',
  [FileType.DOCX]:
    'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
  [FileType.FLAC]: 'audio/flac',
  [FileType.FLV]: 'video/x-flv',
  [FileType.GIF]: 'image/gif',
  [FileType.GOOGLE_DOC]: 'application/vnd.google-apps.document',
  [FileType.GOOGLE_DRAWINGS]: 'application/vnd.google-apps.drawing',
  [FileType.GOOGLE_SHEETS]: 'application/vnd.google-apps.spreadsheet',
  [FileType.GOOGLE_SLIDES]: 'application/vnd.google-apps.presentation',
  [FileType.HEIC]: 'image/heic',
  [FileType.HEIF]: 'image/heif',
  [FileType.HTML]: 'text/html',
  [FileType.JPEG]: 'image/jpeg',
  [FileType.JSON]: 'application/json',
  [FileType.M4A]: 'audio/x-m4a',
  [FileType.M4V]: 'video/x-m4v',
  [FileType.MD]: 'text/markdown',
  [FileType.MOV]: 'video/quicktime',
  [FileType.MP3]: 'audio/mpeg',
  [FileType.MP4]: 'video/mp4',
  [FileType.MPEG]: 'video/mpeg',
  [FileType.MPEGPS]: 'video/mpegps',
  [FileType.MPG]: 'video/mpg',
  [FileType.MPA]: 'audio/m4a',
  [FileType.MPGA]: 'audio/mpga',
  [FileType.OGG]: 'audio/ogg',
  [FileType.OPUS]: 'audio/opus',
  [FileType.PDF]: 'application/pdf',
  [FileType.PCM]: 'audio/pcm',
  [FileType.PNG]: 'image/png',
  [FileType.PPT]: 'application/vnd.ms-powerpoint',
  [FileType.PPTX]:
    'application/vnd.openxmlformats-officedocument.presentationml.presentation',
  [FileType.RTF]: 'application/rtf',
  [FileType.TXT]: 'text/plain',
  [FileType.WAV]: 'audio/wav',
  [FileType.WEBM]: 'audio/webm',
  [FileType.WEBP]: 'image/webp',
  [FileType.WMV]: 'video/wmv',
  [FileType.XLS]: 'application/vnd.ms-excel',
  [FileType.XLSX]:
    'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
  [FileType.THREE_GPP]: 'video/3gpp'
} as const;

export const PROCESSABLE_FILE_MIME_TYPES = Object.values(FILE_MIME_TYPES);

/** Utility functions  */

// Extensions -> File Type
export function getFileTypeFromExtension(
  extension: string
): FileType | undefined {
  const fileType = Object.entries(FILE_EXTENSIONS).find(([, extensions]) =>
    extensions.includes(extension.toLowerCase())
  )?.[0] as FileType | undefined;

  return fileType;
}

// Extensions -> File Type (throws if not found)
export function getFileTypeFromExtensionOrThrow(extension: string): FileType {
  const fileType = getFileTypeFromExtension(extension);

  assertExists(fileType, `Unknown file type for extension: ${extension}`);

  return fileType;
}

// Mime Type -> File Type
export function getFileTypeFromMimeType(
  mimeType: string | undefined | null
): FileType | undefined {
  const fileType = Object.entries(FILE_MIME_TYPES).find(
    ([, typeMimeType]) => typeMimeType.toLowerCase() === mimeType?.toLowerCase()
  )?.[0] as FileType | undefined;

  return fileType;
}

// Mime Type -> File Type (throws if not found)
export function getFileTypeFromMimeTypeOrThrow(mimeType: string): FileType {
  const fileType = getFileTypeFromMimeType(mimeType);

  assertExists(fileType, `Unknown file type for mime type: ${mimeType}`);

  return fileType;
}

// Extension -> Mime Type
export function getFileMimeTypeFromExtensionOrThrow(extension: string): string {
  return FILE_MIME_TYPES[getFileTypeFromExtensionOrThrow(extension)];
}

// Mime Type -> Extension[0] (throws if not found)
export function getFileExtensionFromMimeTypeOrThrow(mimeType: string): string {
  return getFileTypeExtensionOrThrow(getFileTypeFromMimeTypeOrThrow(mimeType));
}

// File Type -> Mime Type
export function getFileTypeMimeType(fileType: FileType): string {
  return FILE_MIME_TYPES[fileType];
}

// File Type -> Extensions
export function getFileTypeExtensions(fileType: FileType): string[] {
  return FILE_EXTENSIONS[fileType];
}

// File Type -> Extensions[0] (throws if not found)
export function getFileTypeExtensionOrThrow(fileType: FileType): string {
  const extension = getFileTypeExtensions(fileType)[0];

  assertExists(extension, `Unknown extension for file type: ${fileType}`);

  return extension;
}

// Extension or Mime Type -> File Type
// Cases where bad data returned from external provider may mix up extensions and mime types
export function getFileTypeFromExtensionOrMimeType(
  extensionOrMimeType: string
) {
  return (
    getFileTypeFromExtension(extensionOrMimeType) ??
    getFileTypeFromMimeType(extensionOrMimeType)
  );
}

export function getFileTypeFromExtensionOrMimeTypeOrThrow(
  extensionOrMimeType: string
): FileType {
  const fileType = getFileTypeFromExtensionOrMimeType(extensionOrMimeType);

  assertExists(
    fileType,
    `Unknown file type for extension or mime type: ${extensionOrMimeType}`
  );

  return fileType;
}

/**
 * Given a file name and mime type,
 * validate that the name has one of the correct extensions for the mime type.
 * If not, return the file name with the correct extension appended.
 */
export function getFileNameWithCorrectExtension({
  fileName,
  mimeType
}: {
  fileName: string;
  mimeType: string;
}) {
  const fileType = getFileTypeFromMimeTypeOrThrow(mimeType);
  const extensions = getFileTypeExtensions(fileType);

  const fileExtension = fileName.split('.').pop();

  if (fileExtension && extensions.includes(fileExtension.toLowerCase())) {
    return fileName;
  }

  return `${fileName}.${extensions[0]}`;
}

/** FileType Type Groupings (Videos, Images, etc) */

// Images
// IMPORTANT: Corresponds with values in llama_indexer
export const IMAGE_FILE_TYPES = [
  FileType.PNG,
  FileType.JPEG,
  FileType.GIF,
  FileType.WEBP,
  FileType.HEIC,
  FileType.HEIF
] as const;

export const IMAGE_MIME_TYPES = IMAGE_FILE_TYPES.map(getFileTypeMimeType);

export function isImageFileType(fileType: FileType | undefined) {
  return IMAGE_FILE_TYPES.includes(fileType as any);
}

// Videos
export const VIDEO_FILE_TYPES = [
  FileType.MOV,
  FileType.MP4,
  FileType.MPEG,
  FileType.M4V,
  FileType.FLV,
  FileType.MPEGPS,
  FileType.MPG,
  FileType.WEBM,
  FileType.WMV,
  FileType.THREE_GPP
] as const;

export function isVideoFileType(fileType: FileType) {
  return VIDEO_FILE_TYPES.includes(fileType as any);
}

// Audio
export const AUDIO_FILE_TYPES = [
  FileType.AAC,
  FileType.FLAC,
  FileType.MP3,
  FileType.MPA,
  FileType.M4A,
  FileType.MPGA,
  FileType.OPUS,
  FileType.PCM,
  FileType.WAV
] as const;

export function isAudioFileType(fileType: FileType) {
  return AUDIO_FILE_TYPES.includes(fileType as any);
}

// Text
export const TEXT_FILE_TYPES = [
  FileType.CSV,
  FileType.MD,
  FileType.HTML,
  FileType.RTF,
  FileType.TXT
] as const;

export function isTextFileType(fileType: FileType) {
  return TEXT_FILE_TYPES.includes(fileType as any);
}

/** Other FileType Groupings */

export const FILE_TYPES_TO_CONVERT_TO_PLAINTEXT = [
  ...AUDIO_FILE_TYPES,
  ...VIDEO_FILE_TYPES
] as const;

export function shouldConvertFileTypeToPlaintext(fileType: FileType) {
  return FILE_TYPES_TO_CONVERT_TO_PLAINTEXT.includes(fileType as any);
}

export const MIME_TYPES_TO_CONVERT_TO_PLAINTEXT =
  FILE_TYPES_TO_CONVERT_TO_PLAINTEXT.map((fileType) =>
    getFileTypeMimeType(fileType)
  );

// File types that are downloaded from merge as a different type,
// rather than their original file type
// E.g. Google Docs are downloaded as PDFs
// https://help.merge.dev/en/articles/8615316-file-export-and-download-specification?_gl=1*on4xs5*_gcl_aw*R0NMLjE3MDEzODgxMzUuQ2owS0NRaUFncUdyQmhEdEFSSXNBTTVzMF9uc3dGN1Z4UTNvR0lacmdBZk13RVBBckNNTDAwNWlOQ0hjUDZneVV5Nm9wTnlUVTZmMjF3Y2FBa2JBRUFMd193Y0I.*_gcl_au*Mzg1NDk4MjU1LjE3MDAwMDA2MjcuODU5OTA0OTI2LjE3MDY1NzgxMDUuMTcwNjU4MDIwMA..*_ga*MTc0NzcwMzY3OC4xNzAwMDAwNjI4*_ga_S6X9VBDBJN*MTcwNzE2MTkwMy4yOC4xLjE3MDcxNjI4MTguMTMuMC4w
const MERGE_DOWNLOAD_FILE_TYPES_CONVERSIONS: Partial<
  Record<FileType, FileType>
> = {
  [FileType.GOOGLE_DOC]: FileType.PDF,
  [FileType.GOOGLE_SHEETS]: FileType.PDF,
  [FileType.GOOGLE_DRAWINGS]: FileType.PDF,
  [FileType.GOOGLE_SLIDES]: FileType.PDF
};

// Merge's provided "mimeType" field can be either the file's extension or its actual mime type
export function getMergeDownloadFileTypeFromExtensionOrMimeTypeOrThrow(
  extensionOrMimeType: string
): FileType {
  const fileType =
    getFileTypeFromExtensionOrMimeTypeOrThrow(extensionOrMimeType);

  // Check if there is a conversion for the merge download file type
  const conversionFileType = MERGE_DOWNLOAD_FILE_TYPES_CONVERSIONS[fileType];

  // Return the conversion file type if it exists, otherwise return the original file type
  return conversionFileType ?? fileType;
}

// Gemini 1.5 Pro Preview accepted file types
// https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/send-multimodal-prompts#gemini-send-multimodal-samples-images-nodejs
export const GEMINI_1_5_PRO_ACCEPTED_FILE_TYPES: FileType[] = [
  // Image
  FileType.PNG,
  FileType.JPEG,
  // Audio
  FileType.AAC,
  FileType.FLAC,
  FileType.MP3,
  FileType.MPA,
  FileType.MPGA,
  FileType.OPUS,
  FileType.PCM,
  FileType.WAV,
  // Video
  FileType.MOV,
  FileType.MPEG,
  FileType.MPEGPS,
  FileType.MPG,
  FileType.MP4,
  FileType.WEBM,
  FileType.WMV,
  FileType.THREE_GPP,
  // PDF
  FileType.PDF
] as const;

export function isGemini15ProAcceptedFileType(fileType: FileType) {
  return GEMINI_1_5_PRO_ACCEPTED_FILE_TYPES.includes(fileType as any);
}

// Files that the frontend can render previews of
export const PREVIEWABLE_FILE_TYPES: readonly FileType[] = [
  FileType.TXT,
  FileType.PDF,
  FileType.PNG,
  FileType.JPEG,
  FileType.HEIC,
  FileType.HEIF,
  FileType.WEBP
] as const;

// File types that can NOT be indexed by LlamaIndexer and used for vector search
// IMPORTANT: Corresponds with values in llama_indexer
export const NON_INDEXABLE_VECTOR_SEARCH_FILE_TYPES = [
  ...IMAGE_FILE_TYPES,
  FileType.CSV,
  FileType.GOOGLE_SHEETS,
  FileType.XLSX,
  FileType.XLS
] as const satisfies readonly FileType[];

// File types that can be indexed by LlamaIndexer and used for vector search
export const isIndexableVectorSearchFileType = (fileType: FileType) =>
  !NON_INDEXABLE_VECTOR_SEARCH_FILE_TYPES.includes(fileType as any);

// All files types except images
export const NON_IMAGE_FILE_TYPES: FileType[] = Object.values(FileType).filter(
  (type) => !isImageFileType(type)
);

// Get the file types that are supported by a model
export function getModelSupportedFileTypes(model: Llm) {
  return isGeminiChatLlm(model as SupportedLlm)
    ? GEMINI_1_5_PRO_ACCEPTED_FILE_TYPES
    : NON_IMAGE_FILE_TYPES;
}
