
    Ǆg}                        d dl mZmZmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlZd dlZd dlmc mZ d dlmZ d dlmZ ddlmZ ddlmZmZ dd	lmZ erdd
lmZ  ej@                         	 d1dddededeeee!   f   fd       Z" ed       G d d             Z# ed       G d d             Z$ G d d      Z% G d de%      Z& G d d      Z' G d de'      Z( G d d       Z) G d! d"e)      Z* G d# d$e)      Z+ G d% d&      Z, G d' d(e,      Z- G d) d*e,      Z. G d+ d,e,      Z/ G d- d.      Z0 ej@                          e#       fddded/e#dee$ee$   f   fd0       Z1y)2    )	dataclassfieldreplace)TYPE_CHECKINGDictIterableListOptionalSequenceTupleUnionN)Tensor)Categorical   )CHUNK_LENGTH)	Tokenizerget_tokenizer)compression_ratio)Whispermodelr   mel	tokenizerreturnc                 r   |!t        | j                  | j                        }|j                  |j                  |j
                  vrt        d      |j                  dk(  }|r|j                  d      }|j                  dd | j                  j                  | j                  j                  fk7  r| j                  |      }|j                  d   }t        j                  |j                   gg|z        j#                  |j$                        }| j'                  ||      dddf   }t        j(                  |j                  d   t        j*                        }d	|t-        |j.                        <   t0        j2                   |dd|f<   |j5                  d
      }|j7                  d
      j9                         }	t;        |      D 
cg c]I  }
t=        |j.                  |j>                        D ci c]  \  }}||	|
|f   jA                          c}}K }}}
}|r
|d   }|d   }||fS c c}}w c c}}}
w )ao  
    Detect the spoken language in the audio, and return them as list of strings, along with the ids
    of the most probable language tokens and the probability distribution over all language tokens.
    This is performed outside the main decode loop in order to not interfere with kv-caching.

    Returns
    -------
    language_tokens : Tensor, shape = (n_audio,)
        ids of the most probable language tokens, which appears after the startoftranscript token.
    language_probs : List[Dict[str, float]], length = n_audio
        list of dictionaries containing the probability distribution over all languages.
    N)num_languageszCThis model doesn't have language tokens so it can't perform lang id   r   )dtypeFdim)!r   is_multilingualr   languagelanguage_tokensot_sequence
ValueErrorndim	unsqueezeshapedimsn_audio_ctxn_audio_stateencodertorchtensorsottodevicelogitsonesboollistall_language_tokensnpinfargmaxsoftmaxcpurangezipall_language_codesitem)r   r   r   singlen_audioxr3   masklanguage_tokenslanguage_token_probsijclanguage_probss                 X/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/whisper/decoding.pydetect_languagerL      s     !!!1D1D
	 	"##9+A+AAQ
 	
 XX]FmmA yy~%**00%**2J2JKKmmC  iilGy}}o&0144SZZ@A\\!S!!Q$'F ::fll2&ejj9D05Di++	,-vvgF1d7Ommm+O!>>b>1557 w 
  I999;W;WX	
1 #AqD)..00	
N  )!,'*N**	
s   
(H22 H,H2,H2T)frozenc                   L   e Zd ZU dZeed<   dZee   ed<   dZe	ed<   dZ
ee   ed<   dZee   ed<   dZee   ed	<   dZee	   ed
<   dZee	   ed<   dZeeeee   f      ed<   dZeeeee   f      ed<   dZeeeee   f      ed<   dZeed<   dZeed<   dZee	   ed<   dZeed<   y)DecodingOptions
transcribetaskNr#   g        temperature
sample_lenbest_of	beam_sizepatiencelength_penaltypromptprefixz-1suppress_tokensTsuppress_blankFwithout_timestamps      ?max_initial_timestampfp16)__name__
__module____qualname__rQ   str__annotations__r#   r
   rR   floatrS   intrT   rU   rV   rW   rX   r   r	   rY   rZ   r   r[   r5   r\   r^   r_        rK   rO   rO   P   s     D# #Hhsm" K $J$!GXc]!#Ix}# $Hhuo$ '+NHUO* /3FHU3S	>*+2.2FHU3S	>*+2 <@OXeC#$678?ND  %$-08E?0 D$rh   rO   c                       e Zd ZU eed<   eed<   dZeeee	f      ed<    e
e      Zee   ed<   dZeed<   ej"                  Ze	ed	<   ej"                  Ze	ed
<   ej"                  Ze	ed<   ej"                  Ze	ed<   y)DecodingResultaudio_featuresr#   NrJ   )default_factorytokens textavg_logprobno_speech_probrR   r   )r`   ra   rb   r   rd   rc   rJ   r
   r   re   r   r6   rm   r	   rf   ro   r8   nanrp   rq   rR   r   rg   rh   rK   rj   rj   u   sz    M15NHT#u*-.5d3FDI3D#NKFFNE"K!vvu%rh   rj   c                   0    e Zd ZdededefdZddZddZy)		Inferencerm   rk   r   c                     t         )zAPerform a forward pass on the decoder and return per-token logitsNotImplementedErrorselfrm   rk   s      rK   r3   zInference.logits       !!rh   Nc                     t         )z9Update the key-value cache according to the updated beamsrv   )ry   source_indicess     rK   rearrange_kv_cachezInference.rearrange_kv_cache   rz   rh   c                      y)z:Clean up any resources or hooks after decoding is finishedNrg   ry   s    rK   cleanup_cachingzInference.cleanup_caching   s    rh   )r   N)r`   ra   rb   r   r3   r}   r   rg   rh   rK   rt   rt      s&    "V "V " ""rh   rt   c                   <    e Zd ZdddefdZdededefdZd	 Zd
 Zy)PyTorchInferencer   r   initial_token_lengthc                 h   || _         || _        i | _        g | _        | j                   j                  j
                  D cg c]  }|j                  j                   }}| j                   j                  j
                  D cg c]  }|j                  j                   }}||z   | _	        y c c}w c c}w N)
r   r   kv_cachehooksdecoderblocksattnkeyvalue
kv_modules)ry   r   r   blockkey_modulesvalue_moduless         rK   __init__zPyTorchInference.__init__   s     %
$8!
37::3E3E3L3LM%uzz~~MM7;zz7I7I7P7PQe))QQ%5 NQs   B* B/rm   rk   r   c                    | j                   s'| j                  j                         \  | _         | _        |j                  d   | j
                  kD  r|d d dd f   }| j                  j                  ||| j                         S )Nr   )r   )r   r   install_kv_cache_hooksr   r)   r   r   rx   s      rK   r3   zPyTorchInference.logits   sj    }}(,

(I(I(K%DM4:<<d777ArsF^Fzz!!&.4==!QQrh   c                 b    | j                   D ]  }|j                           i | _        g | _         y r   )r   remover   )ry   hooks     rK   r   z PyTorchInference.cleanup_caching   s.    JJ 	DKKM	 
rh   c                     |t        t        t        |                  k7  r?| j                  D ]/  }| j                  |   |   j                         | j                  |<   1 y y r   )r6   r=   lenr   r   detach)ry   r|   modules      rK   r}   z#PyTorchInference.rearrange_kv_cache   sX    T%N(;"<==// W(,f(=n(M(T(T(Vf%W >rh   N)	r`   ra   rb   rf   r   r   r3   r   r}   rg   rh   rK   r   r      s>    6i 6s 6RV RV R RWrh   r   c                   >    e Zd Zdeee      deee      dee   fdZy)SequenceRankerrm   sum_logprobsr   c                     t         )z
        Given a list of groups of samples and their cumulative log probabilities,
        return the indices of the samples in each group to select as the final result
        rv   ry   rm   r   s      rK   rankzSequenceRanker.rank   s
     "!rh   N)r`   ra   rb   r	   r   re   rf   r   rg   rh   rK   r   r      s5    "4<("8<T%[8I"	c"rh   r   c                   J    e Zd ZdZdee   fdZdeee      deee      fdZ	y)MaximumLikelihoodRankerz
    Select the sample with the highest log probabilities, penalized using either
    a simple length normalization or Google NMT paper's length penalty
    rW   c                     || _         y r   )rW   )ry   rW   s     rK   r   z MaximumLikelihoodRanker.__init__   s
    ,rh   rm   r   c           
            fd}|D cg c]  }|D cg c]  }t        |       c} }}}t        ||      D cg c]!  \  }}t        j                   |||            # c}}S c c}w c c}}w c c}}w )Nc                     g }t        | |      D ]=  \  }}j                  |}nd|z   dz  j                  z  }|j                  ||z         ? |S )N      )r>   rW   append)logprobslengthsresultlogproblengthpenaltyry   s         rK   scoresz,MaximumLikelihoodRanker.rank.<locals>.scores   sf    F#&x#9 1&&.$G !"F
a/D4G4GGGg/01 Mrh   )r   r>   r8   r:   )	ry   rm   r   r   str   pls	   `        rK   r   zMaximumLikelihoodRanker.rank   sb    		 1771A&qCF&7747g4NODAq		&A,'OO '7Os   	A-A(A-&A3(A-N)
r`   ra   rb   __doc__r
   re   r   r	   r   r   rg   rh   rK   r   r      s?    
-x -P4V- PT$u+=N Prh   r   c            
       j    e Zd Zd Zdedededeeef   fdZdededeeee      e	e	e
      f   fdZy)	TokenDecoderc                      y)z=Initialize any stateful variables for decoding a new sequenceNrg   r   s    rK   resetzTokenDecoder.reset   s    rh   rm   r3   r   r   c                     t         )a  Specify how to select the next token, based on the current trace and logits

        Parameters
        ----------
        tokens : Tensor, shape = (n_batch, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence tokens

        logits : Tensor, shape = (n_batch, vocab_size)
            per-token logits of the probability distribution at the current step

        sum_logprobs : Tensor, shape = (n_batch)
            cumulative log probabilities for each sequence

        Returns
        -------
        tokens : Tensor, shape = (n_batch, current_sequence_length + 1)
            the tokens, appended with the selected next token

        completed : bool
            True if all sequences has reached the end of text

        rv   )ry   rm   r3   r   s       rK   updatezTokenDecoder.update   s
    2 "!rh   c                     t         )a  Finalize search and return the final candidate sequences

        Parameters
        ----------
        tokens : Tensor, shape = (n_audio, n_group, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence

        sum_logprobs : Tensor, shape = (n_audio, n_group)
            cumulative log probabilities for each sequence

        Returns
        -------
        tokens : Sequence[Sequence[Tensor]], length = n_audio
            sequence of Tensors containing candidate token sequences, for each audio input

        sum_logprobs : List[List[float]], length = n_audio
            sequence of cumulative log probabilities corresponding to the above

        rv   r   s      rK   finalizezTokenDecoder.finalize   s
    , "!rh   N)r`   ra   rb   r   r   r   r5   r   r   r	   re   r   rg   rh   rK   r   r      sp    L""&,"<B"	vt|	"6"",2"	x()4U+<<	="rh   r   c            
       N    e Zd ZdedefdZdedededeeef   fdZ	dedefd	Z
y
)GreedyDecoderrR   eotc                      || _         || _        y r   )rR   r   )ry   rR   r   s      rK   r   zGreedyDecoder.__init__  s    &rh   rm   r3   r   r   c                 :   | j                   dk(  r|j                  d      }n't        || j                   z        j                         }t	        j
                  |j                         d      }|t        j                  |j                  d         |f   }|||d d df   | j                  k7  z  z  }| j                  ||d d df   | j                  k(  <   t        j                  ||d d d f   gd      }|d d df   | j                  k(  j                         }||fS )Nr   r   r    )r3   )rR   r:   r   sampleFlog_softmaxre   r.   aranger)   r   catall)ry   rm   r3   r   next_tokensr   current_logprobs	completeds           rK   r   zGreedyDecoder.update  s     q  --B-/K%Vd6F6F-FGNNPK==R8#ELL1B$C[$PQ(F1b5MTXX,EFF15F1b5MTXX-.FK4$89rBArE]dhh.335	y  rh   c                 j    t        j                  |d| j                        }||j                         fS )N)r   r   )r   )r   padr   tolistr   s      rK   r   zGreedyDecoder.finalize'  s,    vvTXX6|**,,,rh   N)r`   ra   rb   re   rf   r   r   r   r5   r   r   rg   rh   rK   r   r     sV    E  !!&,!<B!	vt|	!$-v -V -rh   r   c            
       f    e Zd Z	 ddedededee   fdZd Zde	d	e	d
e	de
e	ef   fdZde	d
e	fdZy)BeamSearchDecoderNrU   r   	inferencerV   c                     || _         || _        || _        |xs d| _        t	        || j                  z        | _        d | _        | j
                  dkD  sJ d| d| d       y )Nr]   r   zInvalid beam size (z) or patience ())rU   r   r   rV   roundmax_candidatesfinished_sequences)ry   rU   r   r   rV   s        rK   r   zBeamSearchDecoder.__init__.  ss     #" C#(T]])B#C"& !#	G ?8*AF	G#rh   c                     d | _         y r   )r   r   s    rK   r   zBeamSearchDecoder.reset@  s
    "&rh   rm   r3   r   r   c                     |j                   d    j                  z  dk7  r%t        |j                    d j                   d      |j                   d    j                  z  } j                  t	        |      D cg c]  }i  c} _        t        j                  |j                         d      }g g g }	}}t	        |      D ]K  }
i i i }}}t	         j                        D ]  }|
 j                  z  |z   }||   j                         }t        ||   j                   j                  dz          D ]B  \  }}||   |z   j                         }t        ||j                         gz         }|||<   |||<   D  d}t        ||j                  d      D ]i  }|d    j                  k(  r	||   ||<   ||   |t!        |      <   |j#                  |       |j#                  ||          |dz  }| j                  k(  si n |	j#                  |       N t%        j&                  ||j(                  	      } j*                  j-                  |       t!         j                        t!        |	      k(  sJ t         j                  |	      D ]D  \  }}t        ||j                  d      D ]$  }t!        |       j.                  k\  r ;||   ||<   & F t1         fd
 j                  D              }||fS c c}w )Nr   z[0] % z != 0r   r    r   T)r   reverser2   c              3   N   K   | ]  }t        |      j                  k\    y wr   )r   r   ).0	sequencesry   s     rK   	<genexpr>z+BeamSearchDecoder.update.<locals>.<genexpr>z  s(      
 	Nd111
s   "%)r)   rU   r&   r   r=   r   r   re   r   r>   topkr@   tuplesortedgetr   r   r   r.   r/   r2   r   r}   r   r   )ry   rm   r3   r   rB   _r   r   r|   r   rG   r   sourcesfinishedrH   idxrY   r   tokennew_logprobsequencesavedpreviously_finishednewly_finishedseqr   s   `                         rK   r   zBeamSearchDecoder.updateC  s    <<?T^^+q0~VDNN3C5IJJ,,q/T^^3""*38>&Bar&BD#==R8:<b"%7^w 	0A(*BXGF 4>>* ,$..(1,++-&)8C=+=+=dnnq>P+Q&R ,NGU#/#4w#>"D"D"FK$Vuzz|n%<=H'2F8$(+GH%	,, E"6vzz4H 
B<488+)/)9HX&5;H5EL[!12&&x0"))'(*;<QJE.
 %%h/7	0: k&--@)).9 4**+s3E/FFFF36##%74
 	?/ n.2D2DdS ?*+t/B/BB+9#+>#C(?	?  
!44
 
	 y  g 'Cs   ;	K/preceding_tokensc           
         |j                         }t        | j                        D ]  \  }}t        |      | j                  k  st        t        j                  ||               d d d   D ]a  }|||f   j                         | j                  gz   }||   |   j                         |t        |      <   t        |      | j                  k\  sa   | j                  D cg c]3  }|j                         D cg c]  }t        j                  |       c}5 }}}| j                  D cg c]  }t        |j                                }}||fS c c}w c c}}w c c}w )Nr   )r<   	enumerater   r   rU   r6   r8   argsortr   r   r@   r   keysr.   r/   values)	ry   r   r   rG   r   rH   r   r   rm   s	            rK   r   zBeamSearchDecoder.finalize  sI   #'')%d&=&=> 	LAyI/bjja9:4R4@ A/15<<>$((KH1=a1C1H1H1JIeHo.9~7			 "44&
 +4..*:;3U\\#;&
 &

 7;6M6M+
)2D!!#$+
 +
 |## <&
+
s   !E8EE* EEr   )r`   ra   rb   rf   rt   r
   re   r   r   r   r   r5   r   r   rg   rh   rK   r   r   -  s     %)GG G 	G
 5/G$';!;!&,;!<B;!	vt|	;!z$ $v $rh   r   c                        e Zd ZdededdfdZy)LogitFilterr3   rm   r   Nc                     t         )a  Apply any filtering or masking to logits in-place

        Parameters
        ----------
        logits : Tensor, shape = (n_batch, vocab_size)
            per-token logits of the probability distribution at the current step

        tokens : Tensor, shape = (n_batch, current_sequence_length)
            all tokens in the context so far, including the prefix and sot_sequence tokens

        rv   ry   r3   rm   s      rK   applyzLogitFilter.apply  s
     "!rh   )r`   ra   rb   r   r   rg   rh   rK   r   r     s    "F "F "t "rh   r   c                   ,    e Zd ZdedefdZdedefdZy)SuppressBlankr   sample_beginc                      || _         || _        y r   )r   r   )ry   r   r   s      rK   r   zSuppressBlank.__init__  s    "(rh   r3   rm   c                     |j                   d   | j                  k(  rJt        j                   |d d | j                  j                  d      | j                  j                  gz   f<   y y )Nr    )r)   r   r8   r9   r   encoder   r   s      rK   r   zSuppressBlank.apply  sS    <<?d///LNFF7F1dnn++C0DNN4F4F3GGGH 0rh   N)r`   ra   rb   r   rf   r   r   r   rg   rh   rK   r   r     s+    )) )3 )SF SF Srh   r   c                   .    e Zd Zdee   fdZdedefdZy)SuppressTokensrZ   c                 $    t        |      | _        y r   )r6   rZ   )ry   rZ   s     rK   r   zSuppressTokens.__init__  s    #O4rh   r3   rm   c                 H    t         j                   |d d | j                  f<   y r   )r8   r9   rZ   r   s      rK   r   zSuppressTokens.apply  s    +-66'q$&&&'rh   N)r`   ra   rb   r   rf   r   r   r   rg   rh   rK   r   r     s%    5 52F 2F 2rh   r   c                   6    e Zd Zdededee   fdZdedefdZy)	ApplyTimestampRulesr   r   max_initial_timestamp_indexc                 .    || _         || _        || _        y r   )r   r   r  )ry   r   r   r  s       rK   r   zApplyTimestampRules.__init__  s     #(+F(rh   r3   rm   c                 T   | j                   j                  ,t        j                   |d d | j                   j                  f<   t	        |j
                  d         D ]c  }||| j                  d f   }|j                         D cg c]  }| }}t        |      dk\  xr |d   | j                   j                  k\  }t        |      dk  xs |d   | j                   j                  k\  }|r[|r-t        j                   ||| j                   j                  d f<   n,t        j                   ||d | j                   j                  f<   ||j                  | j                   j                           }	|	j                         dkD  s&|r|s|	d   }
n|	d   dz   }
t        j                   ||| j                   j                  |
f<   f |j
                  d   | j                  k(  rzt        j                   |d d d | j                   j                  f<   | j                  @| j                   j                  | j                  z   }t        j                   |d d |dz   d f<   t        j                  |j!                         d      }t	        |j
                  d         D ]  }||| j                   j                  d f   j#                  d      }||d | j                   j                  f   j%                         }||kD  sat        j                   ||d | j                   j                  f<    y c c}w )Nr   r   r   r   r   r    )r   no_timestampsr8   r9   r=   r)   r   r   r   timestamp_beginr   genumelr  r   r   re   	logsumexpmax)ry   r3   rm   ksampled_tokensr   r   last_was_timestamppenultimate_was_timestamp
timestampstimestamp_lastlast_allowedr   timestamp_logprobmax_text_token_logprobs                  rK   r   zApplyTimestampRules.apply  s   >>''379vvgF1dnn2223 v||A' 	UA#At'8'8':$:;N,335616C6CAK#b'T^^-K-K"K  C1IB4>>+I+I I & ",CE66'F1dnn<<>>?79vvgF12 2 2223'!!$.."@"@AJ !A% &.G%/^N%/^a%7NNPffWq$..88>IIJ5	U8 <<?d///;=66'F1666667 //;NN22T5U5UU  24q,*,,- ==R8v||A' 	FA (DNN,J,J,L)L M W W !X ! &.a1Q4>>3Q3Q1Q.Q%R%V%V%X" #99?Avvgq:DNN::::;	FO 7s   	L%N)	r`   ra   rb   r   rf   r
   r   r   r   rg   rh   rK   r  r    sA    GG G &.c]	G5FF 5FF 5Frh   r  c                       e Zd ZU eed<   eed<   eed<   ee   ed<   ddde	fdZ
de	d	e	fd
Zd	ee   fdZd	ee   fdZdefdZdedefdZdedefdZ ej*                         ded	ee   fd       Zy)DecodingTaskr   sequence_rankerr   logit_filtersr   r   optionsc                 b   || _         |j                  xs d}t        |j                  |j                  ||j
                        }|| _        | j                  |      | _        |j                  xs |j                  xs d| _        |j                  j                  | _        |j                  xs |j                  j                  dz  | _        |j                   | _        | j                  j"                  r|j$                  | _        | j'                         | _        t+        | j(                        | _        | j(                  j/                  |j0                        | _        t5        |t+        | j(                              | _        t9        |j:                        | _        |j                  <t?        |j                  |j@                  | j6                  |jB                        | _"        n%tG        |jH                  |j@                        | _"        g | _%        | j                  jL                  r9| jJ                  jO                  tQ        | j                  | j,                               | j                  jR                  r2| jJ                  jO                  tU        | jW                                      |j"                  s~tX        |j                  jZ                  z  }d }|j\                  r"t_        | j                  j\                  |z        }| jJ                  jO                  ta        || j,                  |             y y )Nen)r   r#   rQ   r   r   )1r   r#   r   r"   r   rQ   r   _verify_optionsr  rU   rT   n_groupr*   
n_text_ctxn_ctxrS   r%   r\   #sot_sequence_including_notimestamps_get_initial_tokensinitial_tokensr   r   indexr0   	sot_indexr   r   r   rW   r  r   r   rV   r   r   rR   r  r[   r   r   rZ   r   _get_suppress_tokensr   r+   r^   r   r  )ry   r   r  r#   r   	precisionr  s          rK   r   zDecodingTask.__init__  sv   
##+t!!!--	
	 %.(,(<(<W(E#--EEA**//
&11OUZZ5J5Ja5O(1(>(><<** ) M MD*.*B*B*D!$T%8%8!9"1177	F *%T5H5H1IJ  7w7M7MN (,!!9==$..'BRBRDL ))<)<immLDL  <<&&%%mDNNDDUDU&VW<<''%%nT5N5N5P&QR))$uzz'='==I*.',,.3LL66B/+ %%#t002M *rh   r   c                 N   |j                   |j                  t        d      |j                  dk(  r|j                  t        d      |j                  |j                   t        d      |j
                  ,d|j
                  cxk  rdk  st        d       t        d      |S )Nz-beam_size and best_of can't be given togetherr   z4best_of with greedy sampling (T=0) is not compatiblez'patience requires beam_size to be givenr   z8length_penalty (alpha) should be a value between 0 and 1)rU   rT   r&   rR   rV   rW   )ry   r  s     rK   r  zDecodingTask._verify_options<  s    (W__-HLMM!#* !WXX'G,=,=,EFGG!!-'',1,WXX -WXXrh   c                 b   t        | j                        }| j                  j                  x}rqt	        |t
              r,| j                  j                  d|j                         z         n|}| j                  "| j                  dz  | j                  z
  }|| d  }||z   }| j                  j                  x}rot	        |t
              r,| j                  j                  d|j                         z         n|}| j                  j                  g|| j                  dz  dz
   d  z   |z   }t        |      S )Nr   r   r   )r6   r%   r  rY   
isinstancerc   r   r   striprS   r!  rX   sot_prevr   )ry   rm   rY   prefix_tokensmax_prefix_lenrX   prompt_tokenss          rK   r#  z DecodingTask._get_initial_tokensK  s(   d''(\\(((6( fc* %%cFLLN&:; 
 *!%q4??!B -~o.> ?m+F\\(((6( fc* %%cFLLN&:;  (()$**/A"5 6 89:  V}rh   c                 &   | j                   j                  }t        |t              r'|j	                  d      D cg c]  }t        |       }}d|v r;|D cg c]
  }|dk\  s	| }}|j                  | j                  j                         n*|t        |      dk(  rg }nt        |t              sJ d       |j                  | j                  j                  | j                  j                  | j                  j                  | j                  j                  | j                  j                  g       | j                  j                   %|j#                  | j                  j                          t%        t'        t)        |                  S c c}w c c}w )N,r   r   zsuppress_tokens must be a list)r  rZ   r+  rc   splitrf   extendr   non_speech_tokensr   r6   rP   	translater0   r-  sot_lm	no_speechr   r   r   set)ry   rZ   r   s      rK   r'  z!DecodingTask._get_suppress_tokensg  sB   ,,66os+/>/D/DS/IJ!s1vJOJ *9DQQ!VqDOD""4>>#C#CD$O(<(A Oot4V6VV4))((""''%%	
 >>##/""4>>#;#;<VC0122/ K Es   F	
F!Fr   c                    | j                   j                  r|j                         }|j                  dd  | j                  j
                  j                  | j                  j
                  j                  fk(  r|}n| j                  j                  |      }|j                  | j                   j                  rt        j                  nt        j                  k7  rt        d|j                         S |S )Nr   z'audio_features has an incorrect dtype: )r  r_   halfr)   r   r*   r+   r,   r-   r   r.   float16float32	TypeError)ry   r   rk   s      rK   _get_audio_featuresz DecodingTask._get_audio_features  s    <<((*C99RS>JJOO''JJOO))
 

 !N!ZZ//4N!\\..EMMEMM
 9.:N:N9OP  rh   rk   rm   c                    | j                   j                  g|j                  d   z  }d }| j                   j                  | j                   j                  dk(  ry| j                  j                  || j                        \  }}|D cg c]  }t        ||j                         }}| j                   j                  ||d d | j                  dz   f<   ||fS c c}w )Nr   lang_id)r   r   )
r  r#   r)   rQ   r   rL   r   r  r   r&  )ry   rk   rm   	languages
lang_probslang_tokensprobss          rK   _detect_languagezDecodingTask._detect_language  s    \\**+n.B.B1.EE	
<<  (DLL,=,=,J&*jj&@&@'#K AKKuU		2KIK||$$,0;q$..1,,-*$$	 Ls   Cc                 >   |j                   d   }t        j                  ||j                        }t        j
                  g|z  }	 t        | j                        D ]  }| j                  j                  ||      }|dk(  rr| j                  j                  \|d d | j                  f   j                         j                  d      }|d d | j                  j                  f   j                         }|d d df   }| j                   D ]  }	|	j#                  ||        | j$                  j'                  |||      \  }}
|
s|j                   d   | j(                  kD  s n | j                  j+                          |||fS # | j                  j+                          w xY w)Nr   r   r   r    )r)   r.   zerosr2   r8   rr   r=   rS   r   r3   r   r8  r&  re   r;   r   r  r   r   r   r!  r   )ry   rk   rm   n_batchr   no_speech_probsrG   r3   probs_at_sotlogit_filterr   s              rK   
_main_loopzDecodingTask._main_loop  so   ,,q/${{7>;P;PQ66(W,	-4??+ ..v~F Ft~~77C#)!T^^*;#<#B#B#D#L#LQS#L#TL&21dnn6N6N3N&O&V&V&XO  2 %)$6$6 7L &&vv67 %)LL$7$7$U!	R 04:: =), NN**,|_44 NN**,s   DF  F   Fc                 V   | j                   j                          | j                  }|j                  d   }| j	                  |      }t        j                  | j                  g      j                  |d      }| j                  ||      \  }}| j                  j                  dk(  r/t        |||      D 	
cg c]  \  }}	}
t        ||	|
       c}
}	}S |j                  | j                  d      j!                  |j"                        }| j%                  ||      \  }}}|d d | j                     }|d d | j                     }|j                  d   t'        |      cxk(  r|k(  sJ  J |j)                  || j                  d      }|j)                  || j                        }| j                   j+                  ||      \  }}|D cg c]=  }|D cg c]/  }|| j,                  ||j.                  k(  j1                         d    1 c}? }}}| j2                  j5                  ||      }t        ||      D cg c]  \  }}||   j7                          }}}|D cg c]!  }|j9                  |      j;                         # }}t        ||      D cg c]
  \  }}||    }}}t        ||      D cg c]  \  }}|t'        |      dz   z   }}}||||||f}t'        t=        t?        t&        |                  dk7  r%tA        dtC        t?        t&        |                   t        | D 	cg c]9  \  }}	}}}}t        ||	||||| j                  jD                  tG        |      	      ; c}}}}}	}S c c}
}	}w c c}w c c}}w c c}}w c c}w c c}}w c c}}w c c}}}}}	}w )
Nr   r   rA  )rk   r#   rJ   r    r   )r   r   zinconsistent result lengths: )rk   r#   rm   ro   rp   rq   rR   r   )$r   r   r   r)   r?  r.   r/   r$  repeatrF  r  rQ   r>   rj   repeat_interleaver  r1   r2   rM  r   reshaper   r   r   nonzeror  r   r   decoder,  r9  mapRuntimeErrorr6   rR   r   )ry   r   r   rB   rk   rm   rB  rJ   featuresr#   rE  r   rJ  r   r   selectedrG   textslpavg_logprobsfieldsro   rp   rq   s                           rK   runzDecodingTask.run  s   #~~	yy|!%!9!9#!>t':':&;<CCGQO %)$9$9.&$Q!	><<	)
 25"I~2	  .Hh #+hu  ))$,,A)>AA.BWBWX 15PV0W-o (4<<8)/T\\/:##A&#o*>I'IIIIIr:#++GT\\B  $||44V\J &
 STTQQt  A$6#?#?#A$#GHT&
 &
 '',,V\B=@6=R"STQ1Q4;;="S"SAGHAI,,Q/557HH8;Hl8S$Tuq"RU$T$T+.v|+D%
"'!RB#a&1*%
 %

 
 s3sF#$%*!>tCVDT?U>VWXX RUR
 
 Nh+~ '!'- LL44"3D"9	
 	
g4 U&
 #TH$T%

s<   6M8	N4M?N7N
&NN3N2>N!?NN)r`   ra   rb   rt   rd   r   r   r	   r   rO   r   r  r   rf   r#  r'  r   r?  rF  rM  r.   no_gradrj   r\  rg   rh   rK   r  r    s    ##$$8i 8/ 8t ? U3Z 83eCj 3:v ,%v %v %5 5 5@ U]]_L
v L
$~"6 L
 L
rh   r  r  c                     |j                   dk(  x}r|j                  d      }|rt        |fi |}t        | |      j	                  |      }|r|d   S |S )a;  
    Performs decoding of 30-second audio segment(s), provided as Mel spectrogram(s).

    Parameters
    ----------
    model: Whisper
        the Whisper model instance

    mel: torch.Tensor, shape = (80, 3000) or (*, 80, 3000)
        A tensor containing the Mel spectrogram(s)

    options: DecodingOptions
        A dataclass that contains all necessary options for decoding 30-second segments

    Returns
    -------
    result: Union[DecodingResult, List[DecodingResult]]
        The result(s) of decoding contained in `DecodingResult` dataclass instance(s)
    r   r   )r'   r(   r   r  r\  )r   r   r  kwargsrA   r   s         rK   rS  rS    s_    4 QvmmA',V,%)--c2F6!9*F*rh   r   )2dataclassesr   r   r   typingr   r   r   r	   r
   r   r   r   numpyr8   r.   torch.nn.functionalnn
functionalr   r   torch.distributionsr   audior   r   r   r   utilsr   r   r   r]  dictrL   rO   rj   rt   r   r   r   r   r   r   r   r   r   r  r  rS  rg   rh   rK   <module>rj     s   1 1 X X X      +  / $ :>:+:+!:+.7:+
64::+ :+z $! ! !H $	& 	& 	&  Wy  WF" "Pn P45" 5"p-L -:g$ g$T" " SK S2[ 2@F+ @FFY
 Y
x   /0!+!+	!+ !+
 >4//0!+ !+rh   