diff --git a/README.md b/README.md index 5092d28..c6d3947 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,26 @@ -**抖音直播间(web)弹幕抓取** + +抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取 +-- + +**屏幕效果截图** ![enter image description here](https://github.com/gll19920817/tiktok_live/blob/main/WX20211129-144919@2x.png?raw=true) -> Pre Requirements - - Python3 - - Charles - 1. `git clone https://github.com/gll19920817/tiktok_live` - 2. `pip install -r requirements.txt` - 3. `Open Charles > Tools > Mirror > Mirrors Setting` - - `Enable Mirror` - - `Save to a folder, eg:/Users/douyin/feeds/` - - `Add location: https://live.douyin.com/webcast/im/fetch/` -4. `change main.py Watcher directory parameter to folder that Step 3 choose, eg: /Users/douyin/feeds/` -5. `python3 main.py` +**项目思路** -Final thoughts : - 1. ~~Save data to mongodb~~ - 2. Charles alternative: maybe mitmproxy & scapy \ No newline at end of file +1. Selenium 无窗口且detach模式打开live直播间 +2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件 +3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式) +4. 反序列化弹幕消息分类后terminal输出且入库(mongodb) + +**使用方法** + +1. 安装[python3](https://www.python.org/downloads/) +2. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt` +3. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目) +4. terminal执行 `python3 main.py` + +**注意事项** + +1. 本源代码改动仅可作学习目的!!! +2. emm... 🤔 diff --git a/Welcome file (3).md b/Welcome file (3).md new file mode 100644 index 0000000..cb1c796 --- /dev/null +++ b/Welcome file (3).md @@ -0,0 +1,26 @@ + +抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取 +-- + +**屏幕效果截图** + +![enter image description here](https://github.com/gll19920817/tiktok_live/blob/main/WX20211129-144919@2x.png?raw=true) + +**项目思路** + +1. Selenium 无窗口且detach模式打开live直播间 +2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件 +3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式) +4. 反序列化弹幕消息分类后terminal输出且入库(mongodb) + +**使用方法** + +5. 安装[python3](https://www.python.org/downloads/) +6. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt` +7. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目) +8. terminal执行 `python3 main.py` + +**注意事项** + +1. 本源代码仅可作学习目的修改! +2. emm... 🤔 diff --git a/__pycache__/selenium.cpython-39.pyc b/__pycache__/selenium.cpython-39.pyc new file mode 100644 index 0000000..a753417 Binary files /dev/null and b/__pycache__/selenium.cpython-39.pyc differ diff --git a/main.py b/main.py index 2856255..ee935f9 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ -from scripts import watcher - +from scripts import watcher, webdriver if __name__ == '__main__': - w = watcher.Watcher(directory='/Users/geng/charles/autosaved') - w.run() \ No newline at end of file + webdriver.lunch() + + w = watcher.Watcher(directory='/Users/geng/douyin_live') + w.run() diff --git a/messages/__pycache__/base.cpython-39.pyc b/messages/__pycache__/base.cpython-39.pyc index 3409f45..0207048 100644 Binary files a/messages/__pycache__/base.cpython-39.pyc and b/messages/__pycache__/base.cpython-39.pyc differ diff --git a/messages/base.py b/messages/base.py index f144e17..f95beb7 100644 --- a/messages/base.py +++ b/messages/base.py @@ -6,7 +6,7 @@ class Base: def set_payload(self, payload): self.instance.ParseFromString(payload) - + def user(self): return self.instance.user diff --git a/requirements.txt b/requirements.txt index ba487b9..f1d62a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,22 @@ +async-generator==1.10 +attrs==21.2.0 +certifi==2021.10.8 +cffi==1.15.0 colorama==0.4.4 +cryptography==36.0.0 +h11==0.12.0 +idna==3.3 +outcome==1.1.0 protobuf==3.19.1 +pycparser==2.21 pymongo==3.12.1 +pyOpenSSL==21.0.0 +selenium==4.1.0 +six==1.16.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +trio==0.19.0 +trio-websocket==0.9.2 +urllib3==1.26.7 watchdog==2.1.6 +wsproto==1.0.0 diff --git a/scripts/__pycache__/webdriver.cpython-39.pyc b/scripts/__pycache__/webdriver.cpython-39.pyc new file mode 100644 index 0000000..d13bd27 Binary files /dev/null and b/scripts/__pycache__/webdriver.cpython-39.pyc differ diff --git a/scripts/mitmproxy.py b/scripts/mitmproxy.py new file mode 100644 index 0000000..4ac654a --- /dev/null +++ b/scripts/mitmproxy.py @@ -0,0 +1,13 @@ +# ! IMPORT ! make sure you ran mitmproxy with this script, +# eg: `/path/to/mitmproxy -s mitmproxy.py` + +import uuid +from mitmproxy import http + +class Writer: + def response(self, flow: http.HTTPFlow) -> None: + if flow.request.host == 'live.douyin.com': + with open('/Users/geng/douyin_live/' + uuid.uuid4().hex, 'wb') as f: + f.write(bytes(flow.response.content)) + +addons = [Writer()] diff --git a/scripts/webdriver.py b/scripts/webdriver.py new file mode 100644 index 0000000..fcdb204 --- /dev/null +++ b/scripts/webdriver.py @@ -0,0 +1,13 @@ +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + +def lunch(): + chrome_options = Options() + chrome_options.add_argument('--proxy-server=127.0.0.1:8080') + chrome_options.add_argument('--headless') + chrome_options.add_experimental_option('detach', True) + + driver = webdriver.Chrome(options=chrome_options) + + driver.get('https://live.douyin.com/515848215303') +