
    }iG                        d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZmZmZ d dlmZ d dlZd dlmZmZmZ d	d
lmZ d	dlmZmZmZ d	dlmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ e G d d             Z,e G d d             Z-e G d d             Z. G d de/e      Z0 G d de/e      Z1dede/defdZ2 G d d      Z3 G d d      Z4 G d d       Z5 G d! d"      Z6y)#    )	dataclassasdict)Enum)chain)unescape)ListDictIteratorIterablePatternOptional)ElementTreeN)	HTTPErrorSessionResponse   )ProxyConfig)	WATCH_URLINNERTUBE_CONTEXTINNERTUBE_API_URL)VideoUnavailableYouTubeRequestFailedNoTranscriptFoundTranscriptsDisabledNotTranslatableTranslationLanguageNotAvailableFailedToCreateConsentCookieInvalidVideoId	IpBlockedRequestBlockedAgeRestrictedVideoUnplayableYouTubeDataUnparsablePoTokenRequiredc                   .    e Zd ZU eed<   eed<   	 eed<   y)FetchedTranscriptSnippettextstartdurationN)__name__
__module____qualname__str__annotations__float     N/usr/local/lib/python3.12/dist-packages/youtube_transcript_api/_transcripts.pyr&   r&   "   s    
IL Or1   r&   c                       e Zd ZU dZee   ed<   eed<   eed<   eed<   eed<   de	e   fdZ
defd	Zdefd
Zdee   fdZy)FetchedTranscriptz
    Represents a fetched transcript. This object is iterable, which allows you to
    iterate over the transcript snippets.
    snippetsvideo_idlanguagelanguage_codeis_generatedreturnc                 ,    t        | j                        S N)iterr5   selfs    r2   __iter__zFetchedTranscript.__iter__>   s    DMM""r1   c                      | j                   |   S r<   )r5   )r?   indexs     r2   __getitem__zFetchedTranscript.__getitem__A   s    }}U##r1   c                 ,    t        | j                        S r<   )lenr5   r>   s    r2   __len__zFetchedTranscript.__len__D   s    4==!!r1   c                 >    | D cg c]  }t        |       c}S c c}w r<   )r   )r?   snippets     r2   to_raw_datazFetchedTranscript.to_raw_dataG   s    /34Gw444s   N)r*   r+   r,   __doc__r   r&   r.   r-   boolr
   r@   rC   intrF   r	   rI   r0   r1   r2   r4   r4   1   sf    
 +,,MM#(#;< #$$< $" "5T$Z 5r1   r4   c                   "    e Zd ZU eed<   eed<   y)_TranslationLanguager7   r8   N)r*   r+   r,   r-   r.   r0   r1   r2   rN   rN   K   s    Mr1   rN   c                       e Zd ZdZdZdZy)_PlayabilityStatusOKERRORLOGIN_REQUIREDN)r*   r+   r,   rQ   rR   rS   r0   r1   r2   rP   rP   Q   s    	BE%Nr1   rP   c                       e Zd ZdZdZdZy)_PlayabilityFailedReasonu%   Sign in to confirm you’re not a botz/This video may be inappropriate for some users.zThis video is unavailableN)r*   r+   r,   BOT_DETECTEDAGE_RESTRICTEDVIDEO_UNAVAILABLEr0   r1   r2   rU   rU   W   s    :LFN3r1   rU   responser6   r:   c                     	 | j                   dk(  rt        |      | j                          | S # t        $ r}t	        ||      d }~ww xY w)Ni  )status_coder   raise_for_statusr   r   )rY   r6   errors      r2   _raise_http_errorsr^   ]   sN    43&H%%!!# 4"8U334s   +. 	AAAc                   z    e Zd Zdededededededee   fdZdd	ed
e	fdZ
d
efdZed
efd       Zded
d fdZy)
Transcripthttp_clientr6   urlr7   r8   r9   translation_languagesc                     || _         || _        || _        || _        || _        || _        || _        |D ci c]  }|j                  |j                   c}| _        yc c}w )z
        You probably don't want to initialize this directly. Usually you'll access Transcript objects using a
        TranscriptList.
        N)_http_clientr6   _urlr7   r8   r9   rc   _translation_languages_dict)	r?   ra   r6   rb   r7   r8   r9   rc   translation_languages	            r2   __init__zTranscript.__init__h   sm     ( 	 *(%:" )>,
$ !..0D0M0MM,
( ,
s   Apreserve_formattingr:   c                 t   d| j                   v rt        | j                        | j                  j	                  | j                         }t        |      j                  t        || j                        j                        }t        || j                  | j                  | j                  | j                        S )z
        Loads the actual transcript data.
        :param preserve_formatting: whether to keep select HTML text formatting
        z&exp=xpe)rj   )r5   r6   r7   r8   r9   )rf   r$   r6   re   get_TranscriptParserparser^   r'   r4   r7   r8   r9   )r?   rj   rY   r5   s       r2   fetchzTranscript.fetch   s    
 "!$--00$$((3$9LMSSx7<<
 !]]]],,**
 	
r1   c                 x    dj                  | j                  | j                  | j                  rd      S d      S )Nz7{language_code} ("{language}"){translation_description}z[TRANSLATABLE] )r7   r8   translation_description)formatr7   r8   is_translatabler>   s    r2   __str__zTranscript.__str__   sN    HOO]],,8<8L8L$4 P 
 	
 SU P 
 	
r1   c                 2    t        | j                        dkD  S )Nr   )rE   rc   r>   s    r2   rt   zTranscript.is_translatable   s    4--.22r1   c           	      $   | j                   st        | j                        || j                  vrt	        | j                        t        | j                  | j                  dj                  | j                  |      | j                  |   |dg       S )Nz{url}&tlang={language_code})rb   r8   T)	rt   r   r6   rg   r   r`   re   rs   rf   )r?   r8   s     r2   	translatezTranscript.translate   s    ##!$--00 @ @@1$--@@MM)00II] 1  ,,];

 
	
r1   NF)r*   r+   r,   r   r-   rK   r   rN   ri   r4   ro   ru   propertyrt   rx   r0   r1   r2   r`   r`   g   s    

 
 	

 
 
 
  $$89
4
 
:K 
&
 
 3 3 3
s 
| 
r1   r`   c            	          e Zd ZdZdedeeef   deeef   dee   fdZ	e
dededed	d fd
       Zd	ee   fdZdee   d	efdZdee   d	efdZdee   d	efdZdee   deeeef      d	efdZd	efdZdee   d	efdZy)TranscriptListz
    This object represents a list of transcripts. It can be iterated over to list all transcripts which are available
    for a given YouTube video. Also, it provides functionality to search for a transcript in a given language.
    r6   manually_created_transcriptsgenerated_transcriptsrc   c                 <    || _         || _        || _        || _        y)a  
        The constructor is only for internal use. Use the static build method instead.

        :param video_id: the id of the video this TranscriptList is for
        :param manually_created_transcripts: dict mapping language codes to the manually created transcripts
        :param generated_transcripts: dict mapping language codes to the generated transcripts
        :param translation_languages: list of languages which can be used for translatable languages
        N)r6   _manually_created_transcripts_generated_transcripts_translation_languages)r?   r6   r}   r~   rc   s        r2   ri   zTranscriptList.__init__   s#     !-I*&;#&;#r1   ra   captions_jsonr:   c                    |j                  dg       D cg c]  }t        |d   d   d   d   |d           }}i }i }|d   D ]|  }|j                  d	d
      dk(  r|}n|}t        | ||d   j                  dd
      |d   d   d   d   |d   |j                  d	d
      dk(  |j                  dd      r|ng       ||d   <   ~ t	        ||||      S c c}w )a]  
        Factory method for TranscriptList.

        :param http_client: http client which is used to make the transcript retrieving http calls
        :param video_id: the id of the video this TranscriptList is for
        :param captions_json: the JSON parsed from the YouTube pages static HTML
        :return: the created TranscriptList
        translationLanguageslanguageNamerunsr   r'   languageCoder7   r8   captionTrackskindrq   asrbaseUrlz	&fmt=srv3nameisTranslatableF)rl   rN   r`   replacer|   )	ra   r6   r   rh   rc   r}   r~   captiontranscript_dicts	            r2   buildzTranscriptList.build   s+   " )6(9(9:PRT(U!

 %	 !-n=fEaHP2>B!
 !
 (*$ "$_5 	G{{62&%/"7">7A	"**;;'*62'FB'50)05Eu)M%SU8OGN34	  (!!	
 	
7!
s   #Cc                 z    t        | j                  j                         | j                  j                               S r<   )r   r   valuesr   r>   s    r2   r@   zTranscriptList.__iter__   s3    ..557''..0
 	
r1   language_codesc                 R    | j                  || j                  | j                  g      S )a>  
        Finds a transcript for a given language code. Manually created transcripts are returned first and only if none
        are found, generated transcripts are used. If you only want generated transcripts use
        `find_manually_created_transcript` instead.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )_find_transcriptr   r   r?   r   s     r2   find_transcriptzTranscriptList.find_transcript  s.     $$//1L1LM
 	
r1   c                 <    | j                  || j                  g      S )a  
        Finds an automatically generated transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   r   r   s     r2   find_generated_transcriptz(TranscriptList.find_generated_transcript  s      $$^d6Q6Q5RSSr1   c                 <    | j                  || j                  g      S )a|  
        Finds a manually created transcript for a given language code.

        :param language_codes: A list of language codes in a descending priority. For example, if this is set to
        ['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
        it fails to do so.
        :return: the found Transcript
        )r   r   r   s     r2    find_manually_created_transcriptz/TranscriptList.find_manually_created_transcript  s%     $$T??@
 	
r1   transcript_dictsc                 f    |D ]  }|D ]  }||v s||   c c S   t        | j                  ||       r<   )r   r6   )r?   r   r   r8   r   s        r2   r   zTranscriptList._find_transcript,  sJ    
 , 	:M#3 : O3*=99:	:
  ~tDDr1   c           	      8   dj                  | j                  | j                  d | j                  j	                         D              | j                  d | j
                  j	                         D              | j                  d | j                  D                    S )Na  For this video ({video_id}) transcripts are available in the following languages:

(MANUALLY CREATED)
{available_manually_created_transcript_languages}

(GENERATED)
{available_generated_transcripts}

(TRANSLATION LANGUAGES)
{available_translation_languages}c              3   2   K   | ]  }t        |        y wr<   r-   .0
transcripts     r2   	<genexpr>z)TranscriptList.__str__.<locals>.<genexpr>C  s      [ J[   c              3   2   K   | ]  }t        |        y wr<   r   r   s     r2   r   z)TranscriptList.__str__.<locals>.<genexpr>G  s      K$.JKr   c              3   j   K   | ]+  }d j                  |j                  |j                         - yw)z{language_code} ("{language}")r   N)rs   r7   r8   )r   rh   s     r2   r   z)TranscriptList.__str__.<locals>.<genexpr>J  s>      K
 )	 1771::"6"D"D 8 Ks   13)r6   /available_manually_created_transcript_languagesavailable_generated_transcriptsavailable_translation_languages)rs   r6   _get_language_descriptionr   r   r   r   r>   s    r2   ru   zTranscriptList.__str__8  s    0 &]]<@<Z<Z ["&"D"D"K"K"M[ = -1,J,J K262M2M2T2T2VK - -1,J,J K
 -1,G,GK -  
	
r1   transcript_stringsc                 >    dj                  d |D              }|r|S dS )N
c              3   @   K   | ]  }d j                  |        yw)z - {transcript})r   N)rs   r   s     r2   r   z;TranscriptList._get_language_description.<locals>.<genexpr>T  s&       
 $$
$; 
s   None)join)r?   r   descriptions      r2   r   z(TranscriptList._get_language_descriptionS  s,    ii  
0 
 
 *{5v5r1   N)r*   r+   r,   rJ   r-   r	   r`   r   rN   ri   staticmethodr   r   r
   r@   r   r   r   r   r   ru   r   r0   r1   r2   r|   r|      s4   
<< '+3
?&;<  $CO4	<
  $$89<( +
+
(++
<@+
	+
 +
Z
(:. 

hsm 

 
 	T 	T* 	T
&sm
	

E 
E tCO45
E 
	
E
 
66HSM 6c 6r1   r|   c                       e Zd Zdedee   fdZdedefdZ	ddede
defdZd	ededefd
ZdededefdZdededdfdZd	ededdfdZdedefdZdedefdZdededefdZy)TranscriptListFetcherra   proxy_configc                      || _         || _        y r<   )re   _proxy_config)r?   ra   r   s      r2   ri   zTranscriptListFetcher.__init__\  s    ')r1   r6   r:   c                 b    t         j                  | j                  || j                  |            S r<   )r|   r   re   _fetch_captions_json)r?   r6   s     r2   ro   zTranscriptListFetcher.fetch`  s/    ##%%h/
 	
r1   
try_numberc                 x   	 | j                  |      }| j                  ||      }| j                  ||      }| j                  ||      S # t        $ rg}| j
                  dn| j
                  j                  }|dz   |k  r| j                  ||dz         cY d }~S |j                  | j
                        d }~ww xY w)Nr   r   )r   )	_fetch_video_html_extract_innertube_api_key_fetch_innertube_data_extract_captions_jsonr    r   retries_when_blockedr   with_proxy_config)r?   r6   r   htmlapi_keyinnertube_data	exceptionretriess           r2   r   z*TranscriptListFetcher._fetch_captions_jsong  s    	B))(3D55dHEG!77'JN..~xHH 	B %%- ''<< 
 A~'00jSTn0UU--d.@.@AA	Bs%   AA	 		B9AB4B9B44B9r   c                     d}t        j                  ||      }|r-t        |j                               dk(  r|j	                  d      S d|v rt        |      t        |      )Nz)"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"r   zclass="g-recaptcha")researchrE   groupsgroupr   r#   )r?   r   r6   patternmatchs        r2   r   z0TranscriptListFetcher._extract_innertube_api_keyw  sX    >		'4(S(A-;;q>! D(H%%#H--r1   r   c                     | j                  |j                  d      |       |j                  di       j                  d      }|d|vrt        |      |S )NplayabilityStatuscaptionsplayerCaptionsTracklistRendererr   )_assert_playabilityrl   r   )r?   r   r6   r   s       r2   r   z,TranscriptListFetcher._extract_captions_json  s^      !3!34G!H(S&**:r:>>-
  O=$H%h//r1   playability_status_dataNc                 .   |j                  d      }|t        j                  j                  k7  ra|]|j                  d      }|t        j                  j                  k(  rP|t
        j                  j                  k(  rt        |      |t
        j                  j                  k(  rt        |      |t        j                  j                  k(  rU|t
        j                  j                  k(  r8|j                  d      s|j                  d      rt        |      t        |      |j                  di       j                  di       j                  di       j                  dg       }t        |||D cg c]  }|j                  d	d
       c}      y y c c}w )Nstatusreasonzhttp://zhttps://errorScreenplayerErrorMessageRenderer	subreasonr   r'   rq   )rl   rP   rQ   valuerS   rU   rV   r    rW   r!   rR   rX   
startswithr   r   r"   )r?   r   r6   playability_statusr   
subreasonsruns          r2   r   z)TranscriptListFetcher._assert_playability  sc   488B"4"7"7"="==".,00:F!%7%F%F%L%LL5BBHHH(225DDJJJ'11"&8&>&>&D&DD6HHNNN&&y1X5H5H5T(22&x00'++M2>126["%VR	  "&*"M337762#6"M + / >. #Ns   0Fc                     t        j                  d|      }|t        |      | j                  j                  j                  dd|j                  d      z   d       y )Nzname="v" value="(.*?)"CONSENTzYES+r   z.youtube.com)domain)r   r   r   re   cookiessetr   )r?   r   r6   r   s       r2   _create_consent_cookiez,TranscriptListFetcher._create_consent_cookie  sV    		2D9=-h77!!%%vA.~ 	& 	
r1   c                     | j                  |      }d|v r2| j                  ||       | j                  |      }d|v rt        |      |S )Nz&action="https://consent.youtube.com/s")_fetch_htmlr   r   )r?   r6   r   s      r2   r   z'TranscriptListFetcher._fetch_video_html  sR    )3t;''h7##H-D74?1(;;r1   c                     | j                   j                  t        j                  |            }t	        t        ||      j                        S )N)r6   )re   rl   r   rs   r   r^   r'   )r?   r6   rY   s      r2   r   z!TranscriptListFetcher._fetch_html  s<    $$(()9)98)LM*8X>CCDDr1   r   c                     | j                   j                  t        j                  |      t        |d      }t        ||      j                         }|S )N)r   )contextvideoId)json)re   postr   rs   r   r^   r   )r?   r6   r   rY   datas        r2   r   z+TranscriptListFetcher._fetch_innertube_data  sR    $$))$$W5,# * 
 "(H5::<r1   )r   )r*   r+   r,   r   r   r   ri   r-   r|   ro   rL   r	   r   r   r   r   r   r   r   r   r0   r1   r2   r   r   [  s    *G *8K;P *
c 
n 
BS Bc B$ B .s .c .c .	T 	S 	T 	4 3 SW :
3 
# 
$ 
# # EC EC E	c 	C 	D 	r1   r   c                   N    e Zd Zg dZd	defdZdedee   fdZdede	e
   fdZy)
rm   )
strongembimarksmalldelinssubsuprj   c                 0    | j                  |      | _        y r<   )_get_html_regex_html_regex)r?   rj   s     r2   ri   z_TranscriptParser.__init__  s    //0CDr1   r:   c                     |rIdj                  | j                        }d|z   dz   }t        j                  |t        j                        }|S t        j                  dt        j                        }|S )N|z<\/?(?!\/?(z
)\b).*?\b>z<[^>]*>)r   _FORMATTING_TAGSr   compile
IGNORECASE)r?   rj   formats_regex
html_regexs       r2   r   z!_TranscriptParser._get_html_regex  s`    HHT%:%:;M*]:]JMM2==AJ  J>Jr1   raw_datac                 X   t        j                  |      D cg c]  }|j                  yt        t	        j
                  | j                  dt        |j                              t        |j                  d         t        |j                  j                  dd                   c}S c c}w )Nrq   r(   durz0.0)r'   r(   r)   )r   
fromstringr'   r&   r   r   r   r   r/   attribrl   )r?   r  xml_elements      r2   rn   z_TranscriptParser.parse  s      +55h?
 + %VVD,,b(;;K;K2LMK..w78{1155eUCD
 	
 
s   BB'Nry   )r*   r+   r,   r  rK   ri   r   r-   r   r   r&   rn   r0   r1   r2   rm   rm     sH    ED E4 GCL 	
c 	
d+C&D 	
r1   rm   )7dataclassesr   r   enumr   	itertoolsr   r   r   typingr   r	   r
   r   r   r   
defusedxmlr   r   requestsr   r   r   proxiesr   	_settingsr   r   r   _errorsr   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r&   r4   rN   r-   rP   rU   r^   r`   r|   r   rm   r0   r1   r2   <module>r     s    )    D D " 	 1 1   F F   $    5 5 52   
&d &4sD 44 4S 4X 4J
 J
Zd6 d6Nk k\#
 #
r1   