diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b98b726 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "cSpell.words": [ + "douyin", + "mitmproxy" + ] +} \ No newline at end of file diff --git a/config/__pycache__/__init__.cpython-38.pyc b/config/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..8e9ddf5 Binary files /dev/null and b/config/__pycache__/__init__.cpython-38.pyc differ diff --git a/config/__pycache__/helper.cpython-38.pyc b/config/__pycache__/helper.cpython-38.pyc new file mode 100644 index 0000000..33ae2ff Binary files /dev/null and b/config/__pycache__/helper.cpython-38.pyc differ diff --git a/messages/__pycache__/__init__.cpython-38.pyc b/messages/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..6708204 Binary files /dev/null and b/messages/__pycache__/__init__.cpython-38.pyc differ diff --git a/messages/__pycache__/base.cpython-38.pyc b/messages/__pycache__/base.cpython-38.pyc new file mode 100644 index 0000000..6c0835d Binary files /dev/null and b/messages/__pycache__/base.cpython-38.pyc differ diff --git a/messages/__pycache__/chat.cpython-38.pyc b/messages/__pycache__/chat.cpython-38.pyc new file mode 100644 index 0000000..3286eb3 Binary files /dev/null and b/messages/__pycache__/chat.cpython-38.pyc differ diff --git a/messages/__pycache__/gift.cpython-38.pyc b/messages/__pycache__/gift.cpython-38.pyc new file mode 100644 index 0000000..1bab226 Binary files /dev/null and b/messages/__pycache__/gift.cpython-38.pyc differ diff --git a/messages/__pycache__/like.cpython-38.pyc b/messages/__pycache__/like.cpython-38.pyc new file mode 100644 index 0000000..7f6268d Binary files /dev/null and b/messages/__pycache__/like.cpython-38.pyc differ diff --git a/messages/__pycache__/member.cpython-38.pyc b/messages/__pycache__/member.cpython-38.pyc new file mode 100644 index 0000000..550c9f9 Binary files /dev/null and b/messages/__pycache__/member.cpython-38.pyc differ diff --git a/messages/__pycache__/roomuserseq.cpython-38.pyc b/messages/__pycache__/roomuserseq.cpython-38.pyc new file mode 100644 index 0000000..20474b2 Binary files /dev/null and b/messages/__pycache__/roomuserseq.cpython-38.pyc differ diff --git a/messages/__pycache__/social.cpython-38.pyc b/messages/__pycache__/social.cpython-38.pyc new file mode 100644 index 0000000..3a691c1 Binary files /dev/null and b/messages/__pycache__/social.cpython-38.pyc differ diff --git a/messages/__pycache__/utils.cpython-38.pyc b/messages/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..3d3ad16 Binary files /dev/null and b/messages/__pycache__/utils.cpython-38.pyc differ diff --git a/messages/utils.py b/messages/utils.py index 8a2a888..386fe52 100644 --- a/messages/utils.py +++ b/messages/utils.py @@ -1,6 +1,7 @@ import os from protobuf import message_pb2 - +from protobuf import wss_pb2 +import gzip from messages.member import MemberMessage from messages.like import LikeMessage from messages.roomuserseq import RoomUserSeqMessage @@ -22,13 +23,16 @@ init() def unpackMsgBin(filepath): response = message_pb2.Response() - + wss = wss_pb2.WssResponse() try: with open(filepath, 'rb') as f: - response.ParseFromString(f.read()) - + path_content = f.read() + wss.ParseFromString( path_content ) + decompressed = gzip.decompress(wss.data) + response.ParseFromString(decompressed) decodeMsg(response.messages) except Exception as e: + os.remove(filepath) pass finally: os.remove(filepath) diff --git a/protobuf/__pycache__/__init__.cpython-38.pyc b/protobuf/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..a98949a Binary files /dev/null and b/protobuf/__pycache__/__init__.cpython-38.pyc differ diff --git a/protobuf/__pycache__/message_pb2.cpython-38.pyc b/protobuf/__pycache__/message_pb2.cpython-38.pyc new file mode 100644 index 0000000..435b34f Binary files /dev/null and b/protobuf/__pycache__/message_pb2.cpython-38.pyc differ diff --git a/protobuf/__pycache__/wss_pb2.cpython-38.pyc b/protobuf/__pycache__/wss_pb2.cpython-38.pyc new file mode 100644 index 0000000..0cd79c0 Binary files /dev/null and b/protobuf/__pycache__/wss_pb2.cpython-38.pyc differ diff --git a/protobuf/wss.proto b/protobuf/wss.proto new file mode 100644 index 0000000..0f7f63c --- /dev/null +++ b/protobuf/wss.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +message WssResponse{ + int64 wss_push_room_id = 1; + int64 wss_push_did = 2; + int64 wss_push_log_id = 3; + int64 wss_fetch_ms = 4; + int64 wss_push_ms = 5; + string wss_msg_type = 6; + string pb = 7; + bytes data = 8; + int64 server_time = 9; + string compress_type = 10; +} \ No newline at end of file diff --git a/protobuf/wss_pb2.py b/protobuf/wss_pb2.py new file mode 100644 index 0000000..1b3fbcc --- /dev/null +++ b/protobuf/wss_pb2.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: wss.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\twss.proto\"\xdd\x01\n\x0bWssResponse\x12\x18\n\x10wss_push_room_id\x18\x01 \x01(\x03\x12\x14\n\x0cwss_push_did\x18\x02 \x01(\x03\x12\x17\n\x0fwss_push_log_id\x18\x03 \x01(\x03\x12\x14\n\x0cwss_fetch_ms\x18\x04 \x01(\x03\x12\x13\n\x0bwss_push_ms\x18\x05 \x01(\x03\x12\x14\n\x0cwss_msg_type\x18\x06 \x01(\t\x12\n\n\x02pb\x18\x07 \x01(\t\x12\x0c\n\x04\x64\x61ta\x18\x08 \x01(\x0c\x12\x13\n\x0bserver_time\x18\t \x01(\x03\x12\x15\n\rcompress_type\x18\n \x01(\tb\x06proto3') + + + +_WSSRESPONSE = DESCRIPTOR.message_types_by_name['WssResponse'] +WssResponse = _reflection.GeneratedProtocolMessageType('WssResponse', (_message.Message,), { + 'DESCRIPTOR' : _WSSRESPONSE, + '__module__' : 'wss_pb2' + # @@protoc_insertion_point(class_scope:WssResponse) + }) +_sym_db.RegisterMessage(WssResponse) + +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _WSSRESPONSE._serialized_start=14 + _WSSRESPONSE._serialized_end=235 +# @@protoc_insertion_point(module_scope) diff --git a/scripts/__pycache__/__init__.cpython-38.pyc b/scripts/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ffb1d1b Binary files /dev/null and b/scripts/__pycache__/__init__.cpython-38.pyc differ diff --git a/scripts/__pycache__/mitmproxy.cpython-38.pyc b/scripts/__pycache__/mitmproxy.cpython-38.pyc new file mode 100644 index 0000000..e450e26 Binary files /dev/null and b/scripts/__pycache__/mitmproxy.cpython-38.pyc differ diff --git a/scripts/__pycache__/watcher.cpython-38.pyc b/scripts/__pycache__/watcher.cpython-38.pyc new file mode 100644 index 0000000..30d739c Binary files /dev/null and b/scripts/__pycache__/watcher.cpython-38.pyc differ diff --git a/scripts/__pycache__/webdriver.cpython-38.pyc b/scripts/__pycache__/webdriver.cpython-38.pyc new file mode 100644 index 0000000..735c6d2 Binary files /dev/null and b/scripts/__pycache__/webdriver.cpython-38.pyc differ diff --git a/scripts/mitmproxy.py b/scripts/mitmproxy.py index 4ac654a..ae87cf7 100644 --- a/scripts/mitmproxy.py +++ b/scripts/mitmproxy.py @@ -3,11 +3,14 @@ import uuid from mitmproxy import http +import re class Writer: - def response(self, flow: http.HTTPFlow) -> None: - if flow.request.host == 'live.douyin.com': + def websocket_message(self, flow: http.HTTPFlow) : + re_c = re.search('webcast3-ws-web-.*\.douyin\.com', flow.request.host) + if re_c : with open('/Users/geng/douyin_live/' + uuid.uuid4().hex, 'wb') as f: - f.write(bytes(flow.response.content)) + mess = flow.websocket.messages[-1].content + f.write(bytes(mess)) addons = [Writer()] diff --git a/scripts/watcher.py b/scripts/watcher.py index 5c4beb6..e4a1c8e 100644 --- a/scripts/watcher.py +++ b/scripts/watcher.py @@ -1,6 +1,6 @@ import concurrent.futures import queue - +import time from watchdog.observers import Observer from watchdog.events import FileSystemEventHandler @@ -23,6 +23,7 @@ class Watcher: try: while True: with concurrent.futures.ThreadPoolExecutor() as executor: + time.sleep(0.2) executor.submit(unpackMsgBin, q.get()) except: self.observer.stop() diff --git a/scripts/webdriver.py b/scripts/webdriver.py index 6aeb69a..01e0f73 100644 --- a/scripts/webdriver.py +++ b/scripts/webdriver.py @@ -17,6 +17,11 @@ def go(url): chrome_options.add_argument('--proxy-server=%s' % config()['webdriver']['proxy']) chrome_options.add_argument('--headless') + # 2022-04-09 添加一个忽略证书 + chrome_options.add_argument('-ignore-certificate-errors') + chrome_options.add_argument('-ignore -ssl-errors') + chrome_options.add_argument('--incognito') + proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL proxy.http_proxy = config()['webdriver']['proxy'] diff --git a/store/__pycache__/__init__.cpython-38.pyc b/store/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..ac04b65 Binary files /dev/null and b/store/__pycache__/__init__.cpython-38.pyc differ diff --git a/store/__pycache__/mongo.cpython-38.pyc b/store/__pycache__/mongo.cpython-38.pyc new file mode 100644 index 0000000..c2b65ac Binary files /dev/null and b/store/__pycache__/mongo.cpython-38.pyc differ