
    }iP              
          d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZmZ  ej>                  e       Z!de"fdZ#d Z$d Z%de"de&fdZ'de(dee	e	f   fdZ)de"de"fdZ*de"de"fdZ+de"de"fdZ,de"de"fdZ-de"de"fdZ.de"de"de"fdZ/de"de"de"fd Z0d!ede"fd"Z1d#e"de"fd$Z2d%e"dee"ee"   f   fd&Z3d#e"de	fd'Z4d#e"de	fd(Z5d#e"de"fd)Z6d*e
d+e
d,e"ddfd-Z7d*e
d+e
de"d.e"ddf
d/Z8d0e
deee
      fd1Z9de"de(fd2Z:de"de"fd3Z;dee   fd4Z<y)5zBThis module contains all non-cipher related data extraction logic.    N)OrderedDict)datetime)AnyDictListOptionalTuple)parse_qsquote	urlencodeurlparse)Cipher)HTMLParseErrorLiveStreamErrorRegexMatchErrorregex_search)YouTubeMetadata)parse_for_objectparse_for_all_objects
watch_htmlc                     	 t        j                  d|       }|r$t        j                  |j	                  d            S y# t
        $ r Y yw xY w)zExtract publish date and return it as a datetime object
    :param str watch_html:
        The html contents of the watch page.
    :rtype: datetime
    :returns:
        Publish date of the video as a datetime object with timezone.
    z\(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}r   N)researchr   fromisoformatgroupAttributeError)r   results     </usr/local/lib/python3.12/dist-packages/pytubefix/extract.pypublish_dater       sP    k
 ))&,,q/::  s   ;? 	A
Ac                 $    dg}|D ]  }|| v s y y)zCheck if live stream recording is available.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    z,This live stream recording is not available.FT )r   unavailable_stringsstrings      r   recording_availabler%   &   s0     	7 & Z     c                 &    g d}|D ]  }|| v s y y)zCheck if content is private.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is private.
    )zFThis is a private video. Please sign in to verify that you may see it.z"simpleText":"Private video"zThis video is private.TFr"   )r   private_stringsr$   s      r   
is_privater)   8   s*    O
 " Z r&   returnc                 @    	 t        d| d       y# t        $ r Y yw xY w)zCheck if content is age restricted.

    :param str watch_html:
        The html contents of the watch page.
    :rtype: bool
    :returns:
        Whether or not the content is age restricted.
    zog:restrictions:ager   r   FT)r   r   )r   s    r   is_age_restrictedr-   L   s-    +ZqA   s    	player_responsec                     | j                  di       }d| v rd| d   v ryd|v rd|v r|d   |d   gfS d|v r
|d   |d   fS ddgfS )	a  Return the playability status and status explanation of a video.

    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
    of "This is a private video. Please sign in to verify that you may see it."

    This explanation is what gets incorporated into the media player overlay.

    :param str player_response:
        Content of the player's response.
    :rtype: bool
    :returns:
        Playability status and reason of the video.
    playabilityStatusvideoDetailsisLive)LIVE_STREAMzVideo is a live stream.statusreasonmessagesN)get)r.   status_dicts     r   playability_statusr9   \   s     "%%&92>K (~66;;{"x(;x+@*AAA$x(+j*AAA$<r&   jsc                     t        d| d      S )NzsignatureTimestamp:(\d*)   r,   r   )r:   s    r   signature_timestampr=   z   s    3RqAAr&   response_contextc                     t        d| d      S )Nz>visitor_data[',\"\s]+value['\"]:\s?['\"]([a-zA-Z0-9_%-]+)['\"]r<   r,   r   )r>   s    r   visitor_datar@   ~   s    Y[kstuur&   urlc                     t        d| d      S )ar  Extract the ``video_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/watch?v={video_id}`
    - :samp:`https://youtube.com/embed/{video_id}`
    - :samp:`https://youtu.be/{video_id}`

    :param str url:
        A YouTube url containing a video id.
    :rtype: str
    :returns:
        YouTube video id.
    z(?:v=|\/)([0-9A-Za-z_-]{11}).*r<   r,   r   )rA   s    r   video_idrC      s     93aHHr&   c                 v    t         j                  j                  |       }t        |j                        d   d   S )ao  Extract the ``playlist_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/playlist?list={playlist_id}`
    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`

    :param str url:
        A YouTube url containing a playlist id.
    :rtype: str
    :returns:
        YouTube playlist id.
    listr   )urllibparser   r
   query)rA   parseds     r   playlist_idrJ      s1     \\""3'FFLL!&)!,,r&   c                 "   g d}|D ]z  }t        j                  |      }|j                  |       }|s,t        j	                  d|       |j                  d      }|j                  d      }|dk7  r
d| d| c S d| | c S  t        dd	      )
a  Extract the ``channel_name`` or ``channel_id`` from a YouTube url.

    This function supports the following patterns:

    - :samp:`https://youtube.com/c/{channel_name}/*`
    - :samp:`https://youtube.com/channel/{channel_id}/*
    - :samp:`https://youtube.com/u/{channel_name}/*`
    - :samp:`https://youtube.com/user/{channel_id}/*
    - :samp:`https://youtube.com/@{channel_id}/*

    :param str url:
        A YouTube url containing a channel name.
    :rtype: str
    :returns:
        YouTube channel name.
    )z(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)z%(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)z(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)z"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)z (?:\/(\@)([%\d\w_\-\.]+)(\/.*)?)"finished regex search, matched: %sr<      @/channel_namepatternscallerpatternr   compiler   loggerdebugr   r   )rA   rQ   rT   regexfunction_match	uri_styleuri_identifiers          r   rP   rP      s    "H  m

7#c*LL=wG&,,Q/I+11!4N7@C7GQyk>"23lqQZP[\j[kMllm z r&   rC   	watch_urlc           	      V    t        d| fddt        |      fddddg      }t        |      S )a  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str watch_url:
        A YouTube watch url.
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    rC   )psdefaulteurl)hlen_UShtml51cTVHTML5cverz
7.20201028)r   r   _video_info_url)rC   r]   paramss      r   video_info_urlrn      sD     "U9%&"	

F 6""r&   
embed_htmlc                     	 t        d|d      }d|  }t        d| fd|fd|fd	d
dg      }t        |      S # t        $ r d}Y 4w xY w)a<  Construct the video_info url.

    :param str video_id:
        A YouTube video identifier.
    :param str embed_html:
        The html contents of the embed page (for age restricted videos).
    :rtype: str
    :returns:
        :samp:`https://youtube.com/get_video_info` with necessary GET
        parameters.
    z"sts"\s*:\s*(\d+)r<   r,    z!https://youtube.googleapis.com/v/rC   ra   stsrd   rg   rj   )r   r   r   rl   )rC   ro   rr   ra   rm   s        r   video_info_url_age_restrictedrs      su    /1E
 /xj9D"TNCL"	
	F 6""  s   7 AArm   c                     dt        |        S )Nz'https://www.youtube.com/get_video_info?)r   )rm   s    r   rl   rl     s    4Yv5F4GHHr&   htmlc                 p    	 t        |       d   d   }d| S # t        t        f$ r t        |       }Y !w xY w)zGet the base JavaScript url.

    Construct the base JavaScript url, which contains the decipher
    "transforms".

    :param str html:
        The html contents of the watch page.
    assetsr:   zhttps://youtube.com)get_ytplayer_configKeyErrorr   get_ytplayer_js)ru   base_jss     r   js_urlr|   	  sJ    (%d+H5d; !	** o& (!$'(s    55mime_type_codecc                     d}t        j                  |      }|j                  |       }|st        d|      |j	                         \  }}||j                  d      D cg c]  }|j                          c}fS c c}w )a  Parse the type data.

    Breaks up the data in the ``type`` key of the manifest, which contains the
    mime type and codecs serialized together, and splits them into separate
    elements.

    **Example**:

    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])

    :param str mime_type_codec:
        String containing mime type and codecs.
    :rtype: tuple
    :returns:
        The mime type and a list of codecs.

    z,(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\"r}   rR   ,)r   rV   r   r   groupssplitstrip)r}   rT   rY   results	mime_typecodecsrh   s          r   r}   r}     so    $ >GJJwEll?+G%6HH(Iv&,,s*;<Qqwwy<<<<s   A:c                    dg}|D ]m  }t        j                  |      }|j                  |       }|s,t        j	                  d|       |j                  d      }t        j	                  d|z          |c S  t        dd      )zGet the YouTube player base JavaScript path.

    :param str html
        The html contents of the watch page.
    :rtype: str
    :returns:
        Path to YouTube's base.js file.
    z'(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)rL   r<   zplayer JS: rz   js_url_patternsrR   rU   )ru   r   rT   rY   rZ   yt_player_jss         r   rz   rz   4  s     	3O #  

7#d+LL=wG)//2LLL56   *; r&   c                 L   t         j                  d       ddg}|D ]  }	 t        | |      c S  dg}|D ]  }	 t        | |      c S  t	        dd	      # t        $ r7}t         j                  d|        t         j                  |       Y d}~sd}~ww xY w# t        $ r Y nw xY w)
a  Get the YouTube player configuration data from the watch html.

    Extract the ``ytplayer_config``, which is json data embedded within the
    watch html and serves as the primary source of obtaining the stream
    manifest data.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    zfinding initial function namezytplayer\.config\s*=\s*ytInitialPlayerResponse\s*=\s*zPattern failed: Nz,yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*rx   z#config_patterns, setconfig_patternsrR   )rW   rX   r   r   r   )ru   config_patternsrT   esetconfig_patternss        r   rx   rx   N  s     LL01")O # 	#D'22 	8 & 	#D'22 $.S '  	LL+G956LLO	   		s(   AB	B-BB	B#"B#c                     i }ddg}|D ]'  }	 t        | |      }|D ]  }|j                  |        ) |r|S t        dd      # t        $ r Y Fw xY w)a;  Get the entirety of the ytcfg object.

    This is built over multiple pieces, so we have to find all matches and
    combine the dicts together.

    :param str html:
        The html contents of the watch page.
    :rtype: str
    :returns:
        Substring of the html containing the encoded manifest data.
    z
ytcfg\s=\szytcfg\.set\(	get_ytcfgytcfg_pattenrsrR   )r   updater   r   )ru   ytcfgytcfg_patternsrT   found_objectsobjs         r   r   r   |  s     EN " 	1$@M$ "S!"	 
$4   		s   $A	AAstream_manifestvid_infopo_tokenc           	         t         j                  d       t        |       D ]  \  }}	 |d   }t              }t        t        |      j                        }|j                         D 	
ci c]  \  }	}
|	|
d    }}	}
||d<   |j                   d|j                   |j                   d	t        |       }|| |   d<    y
# t        $ r1 |j	                  di       j	                  d      }|rt        d      Y w xY wc c}
}	w )zApply the proof of origin token to the stream manifest

    :param dict stream_manifest:
        Details of the media streams available.
    :param str po_token:
        Proof of Origin Token.
    zApplying poTokenrA   r0   liveStreamabilityUNKNOWNr   pot://?N)rW   rX   	enumeratery   r7   r   r   r
   rH   itemsschemenetlocpathr   )r   r   r   istreamrA   live_stream
parsed_urlquery_paramskvs              r   apply_po_tokenr     s    LL#%/ (	6	1e}C c]
   3 34 , 2 2 4
1AqtG
 
 'U""#3z'8'8&9*//9J!IVbLcKde$'5!/(  	10"7()  %i00 	1
s   B?)C<?7C98C9url_jsc           	         t        ||      }t               }t        |       D ]n  \  }}	 |d   }t              }
t        t        |      j                        }|j                         D ci c]  \  }}||d    }}}d|v sd|vrd	|v sd
|v rt        j                  d       n3|j                  |d         }t        j                  d|d          ||d<   d|j                         v rm|d   }t        j                  d|        ||vr|j                  |      ||<   nt        j                  d       ||   }||d<   t        j                  d|        |
j                   d|
j                    |
j"                   dt%        |       }|| |   d<   q |j&                  j)                          |j*                  j)                          y# t        $ r2 |j	                  di       j	                  d      }	|	rt        d      Y w xY wc c}}w )a   Apply the decrypted signature to the stream manifest.

    :param dict stream_manifest:
        Details of the media streams available.
    :param str js:
        The contents of the base.js asset file.
    :param str url_js:
        Full base.js url

    )r:   r|   rA   r0   r   r   r   	signaturesz&sig=z&lsig=zsignature found, skip decipher)ciphered_signaturez+finished descrambling signature for itag=%sitagsignzParameter n is: z%Parameter n found skipping decryptionzParameter n deciphered: r   r   N)r   dictr   ry   r7   r   r   r
   rH   r   rW   rX   get_sigkeysget_nsigr   r   r   r   
runner_sigcloserunner_nsig)r   r   r:   r   cipherdiscovered_nr   r   rA   r   r   r   r   r   r   	initial_nnew_ns                    r   apply_signaturer     s)    r&)F6L/ 8(	6	1e}C c]
   3 34 , 2 2 4
1AqtG
 

 #6!w#~S
 LL9: &+FILL=vf~ #,L,##%% %S)ILL+I;78 ,*0//)*DY'DE +E %LLL3E7;<""#3z'8'8&9*//9J!IVbLcKde$'5!q8(t 
q  	10"7()  %i00 	1
s   G
,H
7HHstream_datac                    d| v ryg }d| j                         v r|j                  | d          d| j                         v r|j                  | d          |D ]`  }d|vr.d|v r*t        |d         }|d   d   |d<   |d   d   |d<   d|d	<   nd|vrd|vr| d
   |d<   d|d	<   |j                  d      dk(  |d<   b t        j                  d       |S )a-  Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict stream_data:
        Dictionary containing query string encoded values.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    rA   NformatsadaptiveFormatssignatureCipherr   r   Fis_sabrserverAbrStreamingUrlTtypeFORMAT_STREAM_TYPE_OTFis_otfzapplying descrambler)r   extendr
   r7   rW   rX   )r   r   data
cipher_urls       r   apply_descramblerr     s   $  GK$$&&{9-.K,,..{#456  	F!2d!:!$'8"9:J$U+A.DK"3*DI#DO$#4D#@%&=>DK"DO&)-EEX	F LL'(Nr&   c                 l    ddg}|D ]  }	 t        | |      c S  t        dd      # t        $ r Y +w xY w)zExtract the ytInitialData json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z'window\[['\"]ytInitialData['\"]]\s*=\s*zytInitialData\s*=\s*initial_datainitial_data_patternrR   r   r   r   r   rQ   rT   s      r   r   r   8  sX     	3H  	#J88 9O
PP  		   '	33c                 l    ddg}|D ]  }	 t        | |      c S  t        dd      # t        $ r Y +w xY w)a  Extract the ytInitialPlayerResponse json from the watch_html page.

    This mostly contains metadata necessary for rendering the page on-load,
    such as video information, copyright notices, etc.

    @param watch_html: Html of the watch page
    @return:
    z1window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*r   initial_player_responseinitial_player_response_patternrR   r   r   s      r   r   r   N  s]     	=)H  	#J88 (1   		r   c                     	 | d   d   d   d   d   d   d   d   d   d   }t        d	 |      }|D cg c]  }|d
   	 }}t        |      S # t         t        f$ r t        g       cY S w xY wc c}w )u<  Get the informational metadata for the video.

    e.g.:
    [
        {
            'Song': '강남스타일(Gangnam Style)',
            'Artist': 'PSY',
            'Album': 'PSY SIX RULES Pt.1',
            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
        }
    ]

    :rtype: YouTubeMetadata
    contentstwoColumnWatchNextResultsr   r<   videoSecondaryInfoRenderermetadataRowContainermetadataRowContainerRendererrowsc                 &    d| j                         v S )NmetadataRowRenderer)r   )xs    r   <lambda>zmetadata.<locals>.<lambda>  s    '16683 r&   r   )ry   
IndexErrorr   filter)r   metadata_rowsr   s      r   metadatar   g  s    #*:67RS "",../11MO"$$BDDJL 3M 8EE!Q,-EME=)) j! #r""# Fs    A A,A)(A))=__doc__loggingurllib.parserF   r   collectionsr   r   typingr   r   r   r   r	   r
   r   r   r   pytubefix.cipherr   pytubefix.exceptionsr   r   r   pytubefix.helpersr   pytubefix.metadatar   pytubefix.parserr   r   	getLogger__name__rW   strr    r%   r)   boolr-   r   r9   r=   r@   rC   rJ   rP   rn   rs   rl   r|   r}   rz   rx   r   r   r   r   r   r   r   r"   r&   r   <module>r      sV   H   	 #  3 3 = = # Q Q * . D			8	$S &$(# $   sCx <BC BC Bv3 v3 vI# I# I$-S -S -$#c #c #J#S #S #S #4#C #S #S #@IK IC I+ + + =S =U3S	>-B =6# # 4+c +c +\C C D (D  (D  (C  (D  (FHT HT Hs HC HTX HV)4 )HT$Z,@ )XQS QT Q,  2"*h7 "*r&   