
    Ǆg,              	          d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	  ej                         Zedk7  rd Znd Zd Zd Zd Zd	 Zd
efdZ	 d$dededefdZdee   d
ee   fdZdee   d
ee   fdZ G d d      Z G d de      Z G d de      Z G d de      Z G d de      Z G d de      Z G d d e      Zd!ed"ed
eee	egdf   fd#Z y)%    N)CallableListOptionalTextIOutf-8c                 V    | j                  t        d      j                  t              S )Nreplace)errors)encodesystem_encodingdecodestrings    U/home/mcse/projects/flask_80/flask-venv/lib/python3.12/site-packages/whisper/utils.py	make_safer      s"     }}_Y}?FFWW    c                     | S N r   s    r   r   r      s    r   c                      | |z  dk(  sJ | |z  S )Nr   r   )xys     r   	exact_divr      s    q5A::6Mr   c                 n    ddd}| |v r||    S t        dt        |j                                d|        )NTF)TrueFalsezExpected one of z, got )
ValueErrorsetkeys)r   str2vals     r   str2boolr!      sC    e,Gv+C,?+@vhOPPr   c                 &    | dk(  rd S t        |       S NNone)intr   s    r   optional_intr&   %   s    V#44V4r   c                 &    | dk(  rd S t        |       S r#   )floatr   s    r   optional_floatr)   )   s    V#46v6r   returnc                 x    | j                  d      }t        |      t        t        j                  |            z  S )Nr   )r   lenzlibcompress)text
text_bytess     r   compression_ratior1   -   s-    W%Jz?Sz!:;;;r   secondsalways_include_hoursdecimal_markerc                     | dk\  sJ d       t        | dz        }|dz  }||dz  z  }|dz  }||dz  z  }|dz  } || dz  z  }|s|dkD  r|ddnd	}| |dd| d| |d
S )Nr   znon-negative timestamp expectedg     @@i6 i`    02d: 03d)round)r2   r3   r4   millisecondshoursminuteshours_markers          r   format_timestampr@   2   s     a<:::<6)*LI%EEI%%Lf$GGf$$Le#GGeO#L&:eaieC[?RL.QwsmN3CLQTCUVr   segmentsc                 D    t        d | D        | r| d   d         S d       S )Nc              3   :   K   | ]  }|d    D ]	  }|d      yw)wordsstartNr   .0sws      r   	<genexpr>zget_start.<locals>.<genexpr>I   s$     :qz:!7::s   r   rE   )nextrA   s    r   	get_startrM   G   s2    :X: (G .2 r   c                 V    t        d t        |       D        | r| d   d         S d       S )Nc              3   L   K   | ]  }t        |d          D ]	  }|d      yw)rD   endN)reversedrF   s      r   rJ   zget_end.<locals>.<genexpr>P   s)     Lax'
7KL!5LLs   "$rP   )rK   rQ   rL   s    r   get_endrS   N   s7    L8H-L'U -1 r   c                   `    e Zd ZU eed<   defdZ	 ddededee   fdZ	 dded	e	dee   fd
Z
y)ResultWriter	extension
output_dirc                     || _         y r   )rW   )selfrW   s     r   __init__zResultWriter.__init__X   s	    $r   Nresult
audio_pathoptionsc                 l   t         j                  j                  |      }t         j                  j                  |      d   }t         j                  j	                  | j
                  |dz   | j                  z         }t        |dd      5 } | j                  |f||d| d d d        y # 1 sw Y   y xY w)Nr   .rI   r   )encoding)filer]   )	ospathbasenamesplitextjoinrW   rV   openwrite_result)rY   r[   r\   r]   kwargsaudio_basenameoutput_pathfs           r   __call__zResultWriter.__call__[   s     ))*5)).9!<ggllOO^c1DNNB
 +sW5 	IDfH1gHH	I 	I 	Is   
B**B3ra   c                     t         r   )NotImplementedErrorrY   r[   ra   r]   ri   s        r   rh   zResultWriter.write_resultg   s
     "!r   r   )__name__
__module____qualname__str__annotations__rZ   dictr   rm   r   rh   r   r   r   rU   rU   U   sg    N%3 % HL
I
I(+
I6>tn
I EI"""("3;D>"r   rU   c                   :    e Zd ZU dZeed<   	 ddededee   fdZ	y)	WriteTXTtxtrV   Nr[   ra   r]   c                 V    |d   D ]!  }t        |d   j                         |d       # y )NrA   r/   Tra   flush)printstriprY   r[   ra   r]   ri   segments         r   rh   zWriteTXT.write_resultp   s3     j) 	BG'&/'')DA	Br   r   
rq   rr   rs   rV   rt   ru   rv   r   r   rh   r   r   r   rx   rx   m   s8    Is EIBB"(B3;D>Br   rx   c                   v    e Zd ZU eed<   eed<   	 dddddddedee   dee   d	ee   d
edee   fdZ	de
fdZy)SubtitlesWriterr3   r4   NF)max_line_widthmax_line_counthighlight_wordsmax_words_per_liner[   r]   r   r   r   r   c             #   F  K   |xs i }xs |j                  d      xs |j                  d      |xs |j                  dd      }xs |j                  d      d u xs d u xs dxs dfd}t        d         d	kD  r d
d   d	   v r |       D ]	  }| j                  |d	   d         }	| j                  |d   d         }
dj                  |D cg c]  }|d   	 c}      }|r|	}|D cg c]  }|d   	 }}t	        |      D ]  \  }}| j                  |d         }| j                  |d         }||k7  r|||f ||dj                  t	        |      D cg c]#  \  }}||k(  rt        j                  dd|      n|% c}}      f |} |	|
|f  y d   D ]T  }| j                  |d         }| j                  |d         }|d   j                         j                  dd      }|||f V y c c}w c c}w c c}}w w)Nr   r   r   Fr   r6   c               3   0  K   d} d}g }t        d         xs d}d   D ]]  }d}}|t        |d         k  st        |d         |z
  }t        |d         |z
  kD  r|}t        |d   |||z          D ]  \  }}	|	j                         }
 xr |
d   |z
  dkD  }| t        |
d         z   k  }|dk(  xr t        |      dkD  xr }| dkD  r|r|s|s| t        |
d         z  } ni|
d   j	                         |
d<   t        |      dkD  r	|s|k\  s|r	| g }d}n| dkD  r|dz  }d	|
d   z   |
d<   t        |
d   j	                               } |j                  |
       |
d   } |z  }|t        |d         k  rD` t        |      dkD  r| y y w)
Nr      rA   g        rD   rE   g      @word
)rM   r,   	enumeratecopyr~   append)line_len
line_countsubtitlelastr   chunk_indexwords_countremaining_wordsioriginal_timingtiming
long_pausehas_room	seg_breakr   r   r   preserve_segmentsr[   s                 r   iterate_subtitlesz9SubtitlesWriter.iterate_result.<locals>.iterate_subtitles   s    HJ#%H#F:$67>3D!*- ,60!C(8$99&)''*:&;k&IO)C0@,AK,OO&5.7({[7PQ/ $/*? "1!5!5!7 11RfWo6Ls6R # $,c&..A#A^#S$%F$Vs8}q/@$VEV	$qL ($.$- %F6N(;;H .4F^-A-A-CF6N #H 1$2$>%/:3O#, '/+--.
!)A *a
15v1Fv'*6&>+?+?+A'BH /%gI$/J  #55KS "C(8$99,6Z 8}q  !s   7FEF?FrA   r   rD   rE   rR   rP   r9   r   z^(\s*)(.*)$z\1<u>\2</u>r/   z-->z->)	getr,   r@   rf   r   resubr~   r	   )rY   r[   r]   r   r   r   r   r   r   subtitle_startsubtitle_endr   subtitle_textr   r   	all_wordsr   	this_wordrE   rP   jr   segment_startsegment_endsegment_textr   s    ` `` `                  @r   iterate_resultzSubtitlesWriter.iterate_result{   s     -R'H7;;7G+H'H7;;7G+H)RW[[9JE-R/T7;;?S3T*d2Ln6L'/4/744	 4	l vj!"Q&7fZ6H6K+K-/ F!%!6!6x{77K!L#44Xb\%5HI "((K$f(K L")D>F GF GI G(1((; #9 $ 5 5i6H I"33Ie4DE5="&}"<<#S"''
 09/C	 %,At $%6 !#~~t L%)!*+    ##  ),EE/F2 "*- ? $ 5 5gg6F G"33GENC&v446>>udK#[,>>	?- )L !Hs,   C H!'H
3H!HA"H!0(HB	H!r2   c                 F    t        || j                  | j                        S )N)r2   r3   r4   )r@   r3   r4   )rY   r2   s     r   r@   z SubtitlesWriter.format_timestamp   s$    !%!:!:..
 	
r   r   )rq   rr   rs   boolru   rt   rv   r   r%   r   r(   r@   r   r   r   r   r   w   s    
 #'g?
 )-(, %,0g?g? $g?
 !g? !g? g? %SMg?R
 
r   r   c                   V    e Zd ZU dZeed<   dZeed<   dZeed<   	 dde	d	e
d
ee	   fdZy)WriteVTTvttrV   Fr3   r_   r4   Nr[   ra   r]   c           	          t        d|        | j                  ||fi |D ]  \  }}}t        | d| d| d|d        y )NzWEBVTT
)ra    --> r   Tr{   )r}   r   )rY   r[   ra   r]   ri   rE   rP   r/   s           r   rh   zWriteVTT.write_result   sY     	jt$ 3 3 3FG Nv N 	IE3UG5RvR0t4H	Ir   r   rq   rr   rs   rV   rt   ru   r3   r   r4   rv   r   r   rh   r   r   r   r   r      sM    Is!&$&NC EIII"(I3;D>Ir   r   c                   V    e Zd ZU dZeed<   dZeed<   dZeed<   	 dde	d	e
d
ee	   fdZy)WriteSRTsrtrV   Tr3   ,r4   Nr[   ra   r]   c                     t         | j                  ||fi |d      D ]#  \  }\  }}}t        | d| d| d| d|d       % y )Nr   )rE   r   r   Tr{   )r   r   r}   )	rY   r[   ra   r]   ri   r   rE   rP   r/   s	            r   rh   zWriteSRT.write_result   sf     &/D:6:!&
 	N!A!sD QCr%cU"TF"5DM	Nr   r   r   r   r   r   r   r      sM    Is!%$%NC EINN"(N3;D>Nr   r   c                   >    e Zd ZU dZdZeed<   	 d	dedede	e   fdZ
y)
WriteTSVa  
    Write a transcript to a file in TSV (tab-separated values) format containing lines like:
    <start time in integer milliseconds>	<end time in integer milliseconds>	<transcript text>

    Using integer milliseconds as start and end times means there's no chance of interference from
    an environment setting a language encoding that causes the decimal in a floating point number
    to appear as a comma; also is faster and more efficient to parse & store, e.g., in C++.
    tsvrV   Nr[   ra   r]   c                 
   t        dddd|       |d   D ]k  }t        t        d|d   z        |d       t        t        d|d   z        |d       t        |d   j                         j                  dd	      |d
       m y )NrE   rP   r/   	)sepra   rA   r6   )ra   rP    Tr{   )r}   r;   r~   r	   r   s         r   rh   zWriteTSV.write_result  s     	guf$T:j) 	UG%ww//0tF%wu~-.TtD'&/'')11$<4tT	Ur   r   )rq   rr   rs   __doc__rV   rt   ru   rv   r   r   rh   r   r   r   r   r     s?     Is EIUU"(U3;D>Ur   r   c                   :    e Zd ZU dZeed<   	 ddededee   fdZ	y)		WriteJSONjsonrV   Nr[   ra   r]   c                 0    t        j                  ||       y r   )r   dumprp   s        r   rh   zWriteJSON.write_result   s     			&$r   r   r   r   r   r   r   r     s4    Is EI  "( 3;D> r   r   output_formatrW   c                     t         t        t        t        t        d}| dk(  rG|j                         D cg c]
  } ||       c}	 ddt        dt        dt        t           ffd}|S  ||    |      S c c}w )N)ry   r   r   r   r   allr[   ra   r]   c                 *    D ]  } || ||fi |  y r   r   )r[   ra   r]   ri   writerall_writerss        r   	write_allzget_writer.<locals>.write_all4  s%     & 8vtW778r   r   )	rx   r   r   r   r   valuesrv   r   r   )r   rW   writersr   r   r   s        @r   
get_writerr   &  s     G 8?8HIfvj)I CG	8	8 &	819$	8 !7=!*-- Js   A5)Fr_   )!r   rb   r   sysr-   typingr   r   r   r   getdefaultencodingr   r   r   r!   r&   r)   r(   r1   r   rt   r@   rv   rM   rS   rU   rx   r   r   r   r   r   r   r   r   r   <module>r      sC    	 	 
  3 3(#((*gX

Q57<u < OR*.HK*T
 x d4j Xe_ " "0B| Br
l r
j
I 
IN NU| U,   ..$'.tVT"D().r   