유튜브에서 '코딩'을 검색한 결과 창에서 제목, 조회수, 게시일자, 채널명, 본문을 크롤링하고, 그 데이터들을 엑셀 파일로 저장해 보았다.
코드
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import pandas as pd
driver = webdriver.Chrome()
driver.get("https://www.youtube.com/results?search_query=%EC%BD%94%EB%94%A9")
driver.implicitly_wait(10)
video = []
view = []
date = []
channel = []
explanation = []
for i in range(1,15):
print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/div/h3/a/yt-formatted-string'.format(i)).text)
print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[1]'.format(i)).text)
print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[2]'.format(i)).text)
print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[2]/ytd-channel-name/div/div/yt-formatted-string/a'.format(i)).text)
print(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[3]/yt-formatted-string'.format(i)).text)
for i in range(1,15):
video.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/div/h3/a/yt-formatted-string'.format(i)).text)
view.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[1]'.format(i)).text)
date.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[1]/ytd-video-meta-block/div[1]/div[2]/span[2]'.format(i)).text)
channel.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[2]/ytd-channel-name/div/div/yt-formatted-string/a'.format(i)).text)
explanation.append(driver.find_element("xpath", '/html/body/ytd-app/div[1]/ytd-page-manager/ytd-search/div[1]/ytd-two-column-search-results-renderer/div/ytd-section-list-renderer/div[2]/ytd-item-section-renderer[3]/div[3]/ytd-video-renderer[{0}]/div[1]/div/div[3]/yt-formatted-string'.format(i)).text)
for i in range(0, 14):
print(video[i])
print(view[i])
print(date[i])
print(channel[i])
print(explanation[i])
df = pd.DataFrame(video, columns = ['video'])
df['view'] = view
df['date'] = date
df['channel'] = channel
df['explanation'] = explanation
df.to_csv("youtube.csv", index = False)
실행 결과