본문 바로가기

Coding/파이썬

파이썬 셀레니움 크롤링 : 유튜브(제목, 조회수, 게시일자, 채널명, 본문)

유튜브에서 '코딩'을 검색한 결과 창에서 제목, 조회수, 게시일자, 채널명, 본문을 크롤링하고, 그 데이터들을 엑셀 파일로 저장해 보았다.

 

코드

 

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd

driver = webdriver.Chrome()

driver.get("https://www.youtube.com/results?search_query=%EC%BD%94%EB%94%A9")

driver.implicitly_wait(10)

video = []
view = []
date = []
channel = []
explanation = []

for i in range(1,15):
    print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/div/h3/a/yt-formatted-string'.format(i)).text)
    print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[1]'.format(i)).text)
    print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[2]'.format(i)).text)
    print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[2]/ytd-channel-name/div/div/yt-formatted-string/a'.format(i)).text)
    print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[3]/yt-formatted-string'.format(i)).text)

for i in range(1,15):
    video.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/div/h3/a/yt-formatted-string'.format(i)).text)
    view.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[1]'.format(i)).text)
    date.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[2]'.format(i)).text)
    channel.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[2]/ytd-channel-name/div/div/yt-formatted-string/a'.format(i)).text)
    explanation.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[3]/yt-formatted-string'.format(i)).text)

for i in range(0, 14):
    print(video[i])
    print(view[i])
    print(date[i])
    print(channel[i])
    print(explanation[i])

df = pd.DataFrame(video, columns = ['video'])
df['view'] = view
df['date'] = date
df['channel'] = channel
df['explanation'] = explanation

df.to_csv("youtube.csv", index = False)

 

실행 결과