This repository has been archived on 2024-09-10. You can view files and clone it, but cannot push or open issues or pull requests.
Douyin_Web_Live/scripts/webdriver.py

63 lines
2.3 KiB
Python

import requests
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.common.proxy import Proxy, ProxyType
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.expected_conditions import presence_of_element_located
from config.helper import config
from store.mongo import MongoStore
def go(url):
chrome_options = Options()
chrome_options.add_argument('--proxy-server=%s' % config()['webdriver']['proxy'])
chrome_options.add_argument('--headless')
# 2022-04-09 添加一个忽略证书
chrome_options.add_argument('-ignore-certificate-errors')
chrome_options.add_argument('-ignore -ssl-errors')
chrome_options.add_argument('--incognito')
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = config()['webdriver']['proxy']
proxy.ssl_proxy = config()['webdriver']['proxy']
capabilities = DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities)
with webdriver.Chrome(options=chrome_options,
desired_capabilities=capabilities,
executable_path=config()['webdriver']['bin']
) as driver:
wait = WebDriverWait(driver, 10)
driver.implicitly_wait(24 * 60 * 60)
driver.get(url)
first_result = wait.until(presence_of_element_located((By.ID, "RENDER_DATA")))
json_str = requests.utils.unquote(first_result.get_attribute("textContent"))
json_obj = json.loads(json_str)
roomInfo = json_obj['initialState']['roomStore']['roomInfo']
store = MongoStore()
store.set_collection('room')
store.insert_one({
'roomId': roomInfo['roomId'],
'web_rid': roomInfo['web_rid'],
'title': roomInfo['room']['title'],
'user_count_str': roomInfo['room']['user_count_str'],
'cover': roomInfo['room']['cover']['url_list'][0],
'admin_user_ids': roomInfo['room']['admin_user_ids'],
'owner': roomInfo['room']['owner']
})
store.close()
wait.until(presence_of_element_located((By.CLASS_NAME, "oSu9Aw19")))