
    ǄgD                        d dl Z d dlmZ d dlmZmZ d dlmZmZ d dl	Z
d dlZd dlmc mZ ddlmZ dZdZd	Zd
Zeez  Z eee      Zedz  Z eee      Z eee      ZefdedefdZefdddedefdZ ed      dedej>                  fd       Z 	 	 	 ddeee
jB                  ej>                  f   dededeeeejD                  f      fdZ#y)    N)	lru_cache)CalledProcessErrorrun)OptionalUnion   )	exact_divi>  i           filesrc                 |   ddddd| dddd	d
ddt        |      dg}	 t        |dd      j                  }t        j                  |t        j                        j                         j                  t        j                        dz  S # t        $ r,}t	        d|j
                  j                                |d}~ww xY w)a?  
    Open an audio file and read as mono waveform, resampling as necessary

    Parameters
    ----------
    file: str
        The audio file to open

    sr: int
        The sample rate to resample the audio if necessary

    Returns
    -------
    A NumPy array containing the audio waveform, in float32 dtype.
    ffmpegz-nostdinz-threads0z-iz-fs16lez-ac1z-acodec	pcm_s16lez-ar-T)capture_outputcheckzFailed to load audio: Ng      @)strr   stdoutr   RuntimeErrorstderrdecodenp
frombufferint16flattenastypefloat32)r   r   cmdoutes        U/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/whisper/audio.py
load_audior'      s    * 	Cdgs;s2w
CP#d$7>> ==bhh'//188DwNN  P3AHHOO4E3FGHaOPs   B 	B;'B66B;)axislengthr)   c          	         t        j                  |       r| j                  |   |kD  r2| j                  |t        j                  || j
                              } | j                  |   |k  rZdg| j                  z  }d|| j                  |   z
  f||<   t        j                  | |ddd   D cg c]  }|D ]  }|  c}}      } | S | j                  |   |kD  r| j                  t        |      |      } | j                  |   |k  r=dg| j                  z  }d|| j                  |   z
  f||<   t        j                  | |      } | S c c}}w )zO
    Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
    )device)dimindex)r   r   r   Nr(   )indicesr)   )torch	is_tensorshapeindex_selectaranger,   ndimFpadtakeranger   )arrayr*   r)   
pad_widthssizesr7   s         r&   pad_or_trimr=   A   sC    u;;tv%&&VELL I ' E ;;tv% EJJ.J !6EKK,=#=>JtEE%:dd3C!U%u!U#!U#!UVE L ;;tv%JJuV}4J@E;;tv% EJJ.J !6EKK,=#=>JtFF5*-EL "Vs   .E)maxsizen_melsreturnc                 P   |dv s
J d|        t         j                  j                  t         j                  j                  t              dd      }t        j                  |d      5 }t        j                  |d|          j                  |       cddd       S # 1 sw Y   yxY w)	ad  
    load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
    Allows decoupling librosa dependency; saved using:

        np.savez_compressed(
            "mel_filters.npz",
            mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
            mel_128=librosa.filters.mel(sr=16000, n_fft=400, n_mels=128),
        )
    >   P      zUnsupported n_mels: assetszmel_filters.npzF)allow_picklemel_N)
ospathjoindirname__file__r   loadr0   
from_numpyto)r,   r?   filters_pathfs       r&   mel_filtersrQ   [   s     Y?"6vh ??77<< 98EVWL	E	2 ?aD/ 2366v>? ? ?s   (*BB%audiopaddingr,   c                    t        j                  |       s0t        | t              rt	        |       } t        j
                  |       } || j                  |      } |dkD  rt        j                  | d|f      } t        j                  t              j                  | j                        }t        j                  | t        t        |d      }|dddf   j                         dz  }t        | j                  |      }||z  }t        j                   |d	      j#                         }	t        j$                  |	|	j'                         d
z
        }	|	dz   dz  }	|	S )ap  
    Compute the log-Mel spectrogram of

    Parameters
    ----------
    audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
        The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz

    n_mels: int
        The number of Mel-frequency filters, only 80 is supported

    padding: int
        Number of zero samples to pad to the right

    device: Optional[Union[str, torch.device]]
        If given, the audio tensor is moved to this device before STFT

    Returns
    -------
    torch.Tensor, shape = (80, n_frames)
        A Tensor that contains the Mel spectrogram
    Nr   T)windowreturn_complex.r(   r   g|=)ming       @g      @)r0   r1   
isinstancer   r'   rM   rN   r6   r7   hann_windowN_FFTr,   stft
HOP_LENGTHabsrQ   clamplog10maximummax)
rR   r?   rS   r,   rU   r[   
magnitudesfiltersmel_speclog_specs
             r&   log_mel_spectrogramrf   n   s   8 ??5!eS!u%E  ' {ea\*u%((6F::eUJvdSDc3B3h##%*J%,,/G#H{{8/557H}}Xx||~';<H3#%HO    )rB   r   N)$rG   	functoolsr   
subprocessr   r   typingr   r   numpyr   r0   torch.nn.functionalnn
functionalr6   utilsr	   SAMPLE_RATErZ   r\   CHUNK_LENGTH	N_SAMPLESN_FRAMESN_SAMPLES_PER_TOKENFRAMES_PER_SECONDTOKENS_PER_SECONDr   intr'   r=   TensorrQ   ndarrayr,   rf    rg   r&   <module>r{      s2   	  . "      
;&	Y
+ 1n k:6 k+>?  %0 %OS %Oc %OP &/ r s  4 4? ? ? ?( 15	/bjj%,,./// / U3,-.	/rg   