update
This commit is contained in:
parent
febaf8599a
commit
c5a38f00ae
37
README.md
37
README.md
@ -1,19 +1,26 @@
|
||||
**抖音直播间(web)弹幕抓取**
|
||||
|
||||
抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取
|
||||
--
|
||||
|
||||
**屏幕效果截图**
|
||||
|
||||

|
||||
> Pre Requirements
|
||||
- Python3
|
||||
- Charles
|
||||
|
||||
1. `git clone https://github.com/gll19920817/tiktok_live`
|
||||
2. `pip install -r requirements.txt`
|
||||
3. `Open Charles > Tools > Mirror > Mirrors Setting`
|
||||
- `Enable Mirror`
|
||||
- `Save to a folder, eg:/Users/douyin/feeds/`
|
||||
- `Add location: https://live.douyin.com/webcast/im/fetch/`
|
||||
4. `change main.py Watcher directory parameter to folder that Step 3 choose, eg: /Users/douyin/feeds/`
|
||||
5. `python3 main.py`
|
||||
**项目思路**
|
||||
|
||||
Final thoughts :
|
||||
1. ~~Save data to mongodb~~
|
||||
2. Charles alternative: maybe mitmproxy & scapy
|
||||
1. Selenium 无窗口且detach模式打开live直播间
|
||||
2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件
|
||||
3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式)
|
||||
4. 反序列化弹幕消息分类后terminal输出且入库(mongodb)
|
||||
|
||||
**使用方法**
|
||||
|
||||
1. 安装[python3](https://www.python.org/downloads/)
|
||||
2. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt`
|
||||
3. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目)
|
||||
4. terminal执行 `python3 main.py`
|
||||
|
||||
**注意事项**
|
||||
|
||||
1. 本源代码改动仅可作学习目的!!!
|
||||
2. emm... 🤔
|
||||
|
26
Welcome file (3).md
Normal file
26
Welcome file (3).md
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取
|
||||
--
|
||||
|
||||
**屏幕效果截图**
|
||||
|
||||

|
||||
|
||||
**项目思路**
|
||||
|
||||
1. Selenium 无窗口且detach模式打开live直播间
|
||||
2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件
|
||||
3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式)
|
||||
4. 反序列化弹幕消息分类后terminal输出且入库(mongodb)
|
||||
|
||||
**使用方法**
|
||||
|
||||
5. 安装[python3](https://www.python.org/downloads/)
|
||||
6. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt`
|
||||
7. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目)
|
||||
8. terminal执行 `python3 main.py`
|
||||
|
||||
**注意事项**
|
||||
|
||||
1. 本源代码仅可作学习目的修改!
|
||||
2. emm... 🤔
|
BIN
__pycache__/selenium.cpython-39.pyc
Normal file
BIN
__pycache__/selenium.cpython-39.pyc
Normal file
Binary file not shown.
7
main.py
7
main.py
@ -1,6 +1,7 @@
|
||||
from scripts import watcher
|
||||
|
||||
from scripts import watcher, webdriver
|
||||
|
||||
if __name__ == '__main__':
|
||||
w = watcher.Watcher(directory='/Users/geng/charles/autosaved')
|
||||
webdriver.lunch()
|
||||
|
||||
w = watcher.Watcher(directory='/Users/geng/douyin_live')
|
||||
w.run()
|
Binary file not shown.
@ -1,4 +1,22 @@
|
||||
async-generator==1.10
|
||||
attrs==21.2.0
|
||||
certifi==2021.10.8
|
||||
cffi==1.15.0
|
||||
colorama==0.4.4
|
||||
cryptography==36.0.0
|
||||
h11==0.12.0
|
||||
idna==3.3
|
||||
outcome==1.1.0
|
||||
protobuf==3.19.1
|
||||
pycparser==2.21
|
||||
pymongo==3.12.1
|
||||
pyOpenSSL==21.0.0
|
||||
selenium==4.1.0
|
||||
six==1.16.0
|
||||
sniffio==1.2.0
|
||||
sortedcontainers==2.4.0
|
||||
trio==0.19.0
|
||||
trio-websocket==0.9.2
|
||||
urllib3==1.26.7
|
||||
watchdog==2.1.6
|
||||
wsproto==1.0.0
|
||||
|
BIN
scripts/__pycache__/webdriver.cpython-39.pyc
Normal file
BIN
scripts/__pycache__/webdriver.cpython-39.pyc
Normal file
Binary file not shown.
13
scripts/mitmproxy.py
Normal file
13
scripts/mitmproxy.py
Normal file
@ -0,0 +1,13 @@
|
||||
# ! IMPORT ! make sure you ran mitmproxy with this script,
|
||||
# eg: `/path/to/mitmproxy -s mitmproxy.py`
|
||||
|
||||
import uuid
|
||||
from mitmproxy import http
|
||||
|
||||
class Writer:
|
||||
def response(self, flow: http.HTTPFlow) -> None:
|
||||
if flow.request.host == 'live.douyin.com':
|
||||
with open('/Users/geng/douyin_live/' + uuid.uuid4().hex, 'wb') as f:
|
||||
f.write(bytes(flow.response.content))
|
||||
|
||||
addons = [Writer()]
|
13
scripts/webdriver.py
Normal file
13
scripts/webdriver.py
Normal file
@ -0,0 +1,13 @@
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
|
||||
def lunch():
|
||||
chrome_options = Options()
|
||||
chrome_options.add_argument('--proxy-server=127.0.0.1:8080')
|
||||
chrome_options.add_argument('--headless')
|
||||
chrome_options.add_experimental_option('detach', True)
|
||||
|
||||
driver = webdriver.Chrome(options=chrome_options)
|
||||
|
||||
driver.get('https://live.douyin.com/515848215303')
|
||||
|
Reference in New Issue
Block a user