update
This commit is contained in:
parent
febaf8599a
commit
c5a38f00ae
37
README.md
37
README.md
@ -1,19 +1,26 @@
|
|||||||
**抖音直播间(web)弹幕抓取**
|
|
||||||
|
抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取
|
||||||
|
--
|
||||||
|
|
||||||
|
**屏幕效果截图**
|
||||||
|
|
||||||

|

|
||||||
> Pre Requirements
|
|
||||||
- Python3
|
|
||||||
- Charles
|
|
||||||
|
|
||||||
1. `git clone https://github.com/gll19920817/tiktok_live`
|
**项目思路**
|
||||||
2. `pip install -r requirements.txt`
|
|
||||||
3. `Open Charles > Tools > Mirror > Mirrors Setting`
|
|
||||||
- `Enable Mirror`
|
|
||||||
- `Save to a folder, eg:/Users/douyin/feeds/`
|
|
||||||
- `Add location: https://live.douyin.com/webcast/im/fetch/`
|
|
||||||
4. `change main.py Watcher directory parameter to folder that Step 3 choose, eg: /Users/douyin/feeds/`
|
|
||||||
5. `python3 main.py`
|
|
||||||
|
|
||||||
Final thoughts :
|
1. Selenium 无窗口且detach模式打开live直播间
|
||||||
1. ~~Save data to mongodb~~
|
2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件
|
||||||
2. Charles alternative: maybe mitmproxy & scapy
|
3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式)
|
||||||
|
4. 反序列化弹幕消息分类后terminal输出且入库(mongodb)
|
||||||
|
|
||||||
|
**使用方法**
|
||||||
|
|
||||||
|
1. 安装[python3](https://www.python.org/downloads/)
|
||||||
|
2. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt`
|
||||||
|
3. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目)
|
||||||
|
4. terminal执行 `python3 main.py`
|
||||||
|
|
||||||
|
**注意事项**
|
||||||
|
|
||||||
|
1. 本源代码改动仅可作学习目的!!!
|
||||||
|
2. emm... 🤔
|
||||||
|
26
Welcome file (3).md
Normal file
26
Welcome file (3).md
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取
|
||||||
|
--
|
||||||
|
|
||||||
|
**屏幕效果截图**
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
**项目思路**
|
||||||
|
|
||||||
|
1. Selenium 无窗口且detach模式打开live直播间
|
||||||
|
2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件
|
||||||
|
3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式)
|
||||||
|
4. 反序列化弹幕消息分类后terminal输出且入库(mongodb)
|
||||||
|
|
||||||
|
**使用方法**
|
||||||
|
|
||||||
|
5. 安装[python3](https://www.python.org/downloads/)
|
||||||
|
6. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt`
|
||||||
|
7. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目)
|
||||||
|
8. terminal执行 `python3 main.py`
|
||||||
|
|
||||||
|
**注意事项**
|
||||||
|
|
||||||
|
1. 本源代码仅可作学习目的修改!
|
||||||
|
2. emm... 🤔
|
BIN
__pycache__/selenium.cpython-39.pyc
Normal file
BIN
__pycache__/selenium.cpython-39.pyc
Normal file
Binary file not shown.
7
main.py
7
main.py
@ -1,6 +1,7 @@
|
|||||||
from scripts import watcher
|
from scripts import watcher, webdriver
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
w = watcher.Watcher(directory='/Users/geng/charles/autosaved')
|
webdriver.lunch()
|
||||||
|
|
||||||
|
w = watcher.Watcher(directory='/Users/geng/douyin_live')
|
||||||
w.run()
|
w.run()
|
Binary file not shown.
@ -1,4 +1,22 @@
|
|||||||
|
async-generator==1.10
|
||||||
|
attrs==21.2.0
|
||||||
|
certifi==2021.10.8
|
||||||
|
cffi==1.15.0
|
||||||
colorama==0.4.4
|
colorama==0.4.4
|
||||||
|
cryptography==36.0.0
|
||||||
|
h11==0.12.0
|
||||||
|
idna==3.3
|
||||||
|
outcome==1.1.0
|
||||||
protobuf==3.19.1
|
protobuf==3.19.1
|
||||||
|
pycparser==2.21
|
||||||
pymongo==3.12.1
|
pymongo==3.12.1
|
||||||
|
pyOpenSSL==21.0.0
|
||||||
|
selenium==4.1.0
|
||||||
|
six==1.16.0
|
||||||
|
sniffio==1.2.0
|
||||||
|
sortedcontainers==2.4.0
|
||||||
|
trio==0.19.0
|
||||||
|
trio-websocket==0.9.2
|
||||||
|
urllib3==1.26.7
|
||||||
watchdog==2.1.6
|
watchdog==2.1.6
|
||||||
|
wsproto==1.0.0
|
||||||
|
BIN
scripts/__pycache__/webdriver.cpython-39.pyc
Normal file
BIN
scripts/__pycache__/webdriver.cpython-39.pyc
Normal file
Binary file not shown.
13
scripts/mitmproxy.py
Normal file
13
scripts/mitmproxy.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
# ! IMPORT ! make sure you ran mitmproxy with this script,
|
||||||
|
# eg: `/path/to/mitmproxy -s mitmproxy.py`
|
||||||
|
|
||||||
|
import uuid
|
||||||
|
from mitmproxy import http
|
||||||
|
|
||||||
|
class Writer:
|
||||||
|
def response(self, flow: http.HTTPFlow) -> None:
|
||||||
|
if flow.request.host == 'live.douyin.com':
|
||||||
|
with open('/Users/geng/douyin_live/' + uuid.uuid4().hex, 'wb') as f:
|
||||||
|
f.write(bytes(flow.response.content))
|
||||||
|
|
||||||
|
addons = [Writer()]
|
13
scripts/webdriver.py
Normal file
13
scripts/webdriver.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
|
||||||
|
def lunch():
|
||||||
|
chrome_options = Options()
|
||||||
|
chrome_options.add_argument('--proxy-server=127.0.0.1:8080')
|
||||||
|
chrome_options.add_argument('--headless')
|
||||||
|
chrome_options.add_experimental_option('detach', True)
|
||||||
|
|
||||||
|
driver = webdriver.Chrome(options=chrome_options)
|
||||||
|
|
||||||
|
driver.get('https://live.douyin.com/515848215303')
|
||||||
|
|
Reference in New Issue
Block a user