63 lines
2.3 KiB
Python
63 lines
2.3 KiB
Python
import requests
|
|
import json
|
|
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
from selenium.webdriver.common.proxy import Proxy, ProxyType
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support.expected_conditions import presence_of_element_located
|
|
|
|
from config.helper import config
|
|
from store.mongo import MongoStore
|
|
|
|
def go(url):
|
|
chrome_options = Options()
|
|
chrome_options.add_argument('--proxy-server=%s' % config()['webdriver']['proxy'])
|
|
chrome_options.add_argument('--headless')
|
|
|
|
# 2022-04-09 添加一个忽略证书
|
|
chrome_options.add_argument('-ignore-certificate-errors')
|
|
chrome_options.add_argument('-ignore -ssl-errors')
|
|
chrome_options.add_argument('--incognito')
|
|
|
|
proxy = Proxy()
|
|
proxy.proxy_type = ProxyType.MANUAL
|
|
proxy.http_proxy = config()['webdriver']['proxy']
|
|
proxy.ssl_proxy = config()['webdriver']['proxy']
|
|
|
|
capabilities = DesiredCapabilities.CHROME
|
|
proxy.add_to_capabilities(capabilities)
|
|
|
|
with webdriver.Chrome(options=chrome_options,
|
|
desired_capabilities=capabilities,
|
|
executable_path=config()['webdriver']['bin']
|
|
) as driver:
|
|
wait = WebDriverWait(driver, 10)
|
|
|
|
driver.implicitly_wait(24 * 60 * 60)
|
|
|
|
driver.get(url)
|
|
|
|
first_result = wait.until(presence_of_element_located((By.ID, "RENDER_DATA")))
|
|
json_str = requests.utils.unquote(first_result.get_attribute("textContent"))
|
|
json_obj = json.loads(json_str)
|
|
|
|
roomInfo = json_obj['initialState']['roomStore']['roomInfo']
|
|
|
|
store = MongoStore()
|
|
store.set_collection('room')
|
|
store.insert_one({
|
|
'roomId': roomInfo['roomId'],
|
|
'web_rid': roomInfo['web_rid'],
|
|
'title': roomInfo['room']['title'],
|
|
'user_count_str': roomInfo['room']['user_count_str'],
|
|
'cover': roomInfo['room']['cover']['url_list'][0],
|
|
'admin_user_ids': roomInfo['room']['admin_user_ids'],
|
|
'owner': roomInfo['room']['owner']
|
|
})
|
|
store.close()
|
|
|
|
wait.until(presence_of_element_located((By.CLASS_NAME, "oSu9Aw19")))
|
|
|