Notice
Recent Posts
Recent Comments
Link
«   2025/05   »
1 2 3
4 5 6 7 8 9 10
11 12 13 14 15 16 17
18 19 20 21 22 23 24
25 26 27 28 29 30 31
Tags
more
Archives
Today
Total
관리 메뉴

KJH

Youtube Playlist Auto Download 본문

Python

Youtube Playlist Auto Download

모이스쳐라이징 2021. 1. 1. 19:13

좋아요 해놓은 영상들을 보려고 가끔 들어가서 보면 다시 편집되거나 삭제되는 경우가 간혹 있어서 개인 소장하기 위해 만들어 보았습니다.

 

Update 2022-03-08
저작권 문제로 youtube_dl 사용을 지속적으로 막으려 노력하고 있고, 크롤링을 통한 구글 로그인을 2차인증으로 막는 등.. 앞으론 사용 못 할 가능성이 높아 보입니다. 
https://www.eff.org/deeplinks/2022/03/campaign-shut-down-crucial-documentary-tool-youtube-dl-continues-and-so-does-fight

 

아래는 필요한 모듈들이고 핵심은 [ youtube_dl ] 입니다.

beautifulsoup4==4.9.3
bs4==0.0.1
chromedriver==2.24.1
chromedriver-autoinstaller==0.2.2
fire==0.4.0
selenium==3.141.0
selenium-stealth==1.0.6
youtube-dl==2020.12.31

 

 

your google ID, your google PW에 본인 ID와 PW를 넣습니다.

## id-button xpath
driver.find_element_by_xpath('//*[@id="openid-buttons"]/button[1]').click()

## id 입력 xpath
driver.find_element_by_id('identifierId').send_keys('your google ID')

## 패스워드 입력하러 가는 버튼 xpath
driver.find_element_by_xpath('//*[@id="identifierNext"]').click()
sleep(3)

## 패스워드 입력 xpath
driver.find_element_by_xpath('//input[@type="password"]').send_keys('your goolge PW')

## 로그인 버튼
driver.find_element_by_xpath('//*[@id="passwordNext"]').click()
sleep(2)

ctrl+shift+c로 원하는 버튼을 눌러 개발자 도구 Elements에서 아래와 같이 원하는 버튼이나 input의 XPath를 추출 합니다. (추출 하는 방법을 소개해드린 것이고 별도로 진행하실건 없습니다.)

XPath

 

google 로그인은 앞에서 했으니 youtube playlist로 접근합니다.

driver.get('https://www.youtube.com/playlist?list=LL')

 

youtube playlist 영상 url 추출 하는 과정 입니다.

playlist=[]
videos=driver.find_elements_by_class_name('style-scope ytd-playlist-video-renderer')
for video in videos:
    link=video.find_element_by_xpath('.//*[@id="thumbnail"]/a').get_attribute("href")
    playlist.append(link)

 

추출된 url을 보면 url에 &와 해당 영상의 순서 Index가 포함되어 있는데 필요없는 부분이기 때문에 &앞까지 끊어주는 과정이 필요 합니다.

def vidstrip(playlist):
    for i in range(len(playlist)):
        end=playlist[i].find("&")
        playlist[i]=playlist[i][:end]
    return playlist

 

youtube playlist 한 페이지에 영상100개가 load되기 때문에 좋아요 한 영상이 100개 이상이면 아래와 같이 PAGE_DOWN하여 영상을 더 불러올수 있도록 해야합니다.

body = driver.find_element_by_css_selector('body')
for i in range(50):
    body.send_keys(Keys.PAGE_DOWN)
    ## sleep(2)

 

함수를 통해 url을 수정하고 영상이 다운받을 path를 절대경로로 기입하면 끝입니다.

vidlist=vidstrip(playlist)
count=1

## 영상이 저장될 위치
os.chdir('C:/Users/mskjh/Downloads') 

ydl_opts = {} 

## youtube_dl을 통해 영상을 하나씩 다운받음
for link in vidlist:
    try:
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([link])
    except:
        print("Exception occured. Either the video has no quality as set by you, or it is not available. Skipping video {number}".format(number = count))
        continue
    count += 1
    
driver.close()

 

 

chrome 보안정책 강화로 인해 stealth 기능 및 접속 url 변경

import os
import shutil
import subprocess
import sys
import time
from msvcrt import getch
from time import sleep

import chromedriver_autoinstaller
import youtube_dl
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.ui import WebDriverWait
from selenium_stealth import stealth

try:
    shutil.rmtree(r"c:\chrometemp")  #쿠키 / 캐쉬파일 삭제
except FileNotFoundError:
    pass

subprocess.Popen(r'C:\Program Files\Google\Chrome\Application\chrome.exe --remote-debugging-port=9222 --user-data-dir="C:\chrometemp"') # 디버거 크롬 구동

option = webdriver.ChromeOptions()
option.add_argument("headless")
option.add_experimental_option("debuggerAddress", "127.0.0.1:9222")

chrome_ver = chromedriver_autoinstaller.get_chrome_version().split('.')[0]

try:
    driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
except:
    chromedriver_autoinstaller.install(True)
    driver = webdriver.Chrome(f'./{chrome_ver}/chromedriver.exe', options=option)
driver.implicitly_wait(10)


def vidstrip(playlist):
    for i in range(len(playlist)):
        end=playlist[i].find("&")
        playlist[i]=playlist[i][:end]
    return playlist


def download():
    ydl_opts = {} 

    body = driver.find_element_by_css_selector('body')
    for i in range(5):
        body.send_keys(Keys.PAGE_DOWN)
    time.sleep(5)
    playlist=[]
    videos=[]
    videos=driver.find_elements_by_class_name('style-scope ytd-playlist-video-renderer')

    for video in videos:
        link=video.find_element_by_xpath('.//*[@id="thumbnail"]/a').get_attribute("href")
        playlist.append(link)

    vidlist=vidstrip(playlist)
    os.chdir('D:\youtube') 

    for link in vidlist:
        try:
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                ydl.download([link])
        except (KeyboardInterrupt, SystemExit):
            sys.exit(0)
    driver.close()



def login(username, password):       # Logs in the user
    driver.get("https://accounts.google.com/ServiceLogin?service=youtube&uilel=3&passive=true&continue=https%3A%2F%2Fwww.youtube.com%2Fsignin%3Faction_handle_signin%3Dtrue%26app%3Ddesktop%26hl%3Den%26next%3Dhttps%253A%252F%252Fwww.youtube.com%252F&hl=en&ec=65620")


    try:
        WebDriverWait(driver, 60).until(expected_conditions.presence_of_element_located(
            (By.ID, "Email"))).send_keys(username)      # Enters username
    except TimeoutException:
        del username
        driver.quit()
    WebDriverWait(driver, 60).until(expected_conditions.element_to_be_clickable(
        (By.XPATH, "/html/body/div/div[2]/div[2]/div[1]/form/div/div/input"))).click()      # Clicks NEXT
    time.sleep(0.5)

    try:
        try:
            WebDriverWait(driver, 60).until(expected_conditions.presence_of_element_located(
                (By.ID, "password"))).send_keys(password)       # Enters decoded Password
        except TimeoutException:
            driver.quit()
        WebDriverWait(driver, 5).until(expected_conditions.element_to_be_clickable(
            (By.ID, "submit"))).click()     # Clicks on Sign-in
    except TimeoutException or NoSuchElementException:
        print('\nUsername/Password seems to be incorrect, please re-check\nand Re-Run the program.')
        del username, password
        driver.quit()

    try:
        print('\nLogin Successful!\n')

        driver.get("https://www.youtube.com/playlist?list=LL")


    except TimeoutException:
        print('\nUsername/Password seems to be incorrect, please re-check\nand Re-Run the program.')
        del username, password
        driver.quit()

USERNAME = "email"
PASSWORD = "password"

# Assign drivers here.

stealth(driver,
        user_agent='DN',
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )       # Before Login, using stealth

login(USERNAME, PASSWORD)       # Call login function/method

stealth(driver,
        user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36',
        languages=["en-US", "en"],
        vendor="Google Inc.",
        platform="Win32",
        webgl_vendor="Intel Inc.",
        renderer="Intel Iris OpenGL Engine",
        fix_hairline=True,
        )       # After logging in, revert back user agent to normal.

# Redirecting to Google Meet Web-Page
time.sleep(2)

download()

 

 

'Python' 카테고리의 다른 글

Slack API (fastapi - slack_bolt)  (0) 2024.07.19
azure openAI (GPT4.0, ada-002)  (0) 2023.11.26
Slack API (Legacy)  (0) 2023.10.20
비동기 ??? asyncio ???  (0) 2022.05.15
google spread sheet API  (0) 2021.03.30