From c5a38f00ae2c4a5a5f2bcebaaa334319cc8c3bd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=80=BF=E4=BC=A6=E4=BC=A6?= Date: Tue, 30 Nov 2021 16:41:23 +0800 Subject: [PATCH] update --- README.md | 37 +++++++++++-------- Welcome file (3).md | 26 +++++++++++++ __pycache__/selenium.cpython-39.pyc | Bin 0 -> 452 bytes main.py | 9 +++-- messages/__pycache__/base.cpython-39.pyc | Bin 1420 -> 1420 bytes messages/base.py | 2 +- requirements.txt | 18 +++++++++ scripts/__pycache__/webdriver.cpython-39.pyc | Bin 0 -> 601 bytes scripts/mitmproxy.py | 13 +++++++ scripts/webdriver.py | 13 +++++++ 10 files changed, 98 insertions(+), 20 deletions(-) create mode 100644 Welcome file (3).md create mode 100644 __pycache__/selenium.cpython-39.pyc create mode 100644 scripts/__pycache__/webdriver.cpython-39.pyc create mode 100644 scripts/mitmproxy.py create mode 100644 scripts/webdriver.py diff --git a/README.md b/README.md index 5092d28..c6d3947 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,26 @@ -**抖音直播间(web)弹幕抓取** + +抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取 +-- + +**屏幕效果截图** ![enter image description here](https://github.com/gll19920817/tiktok_live/blob/main/WX20211129-144919@2x.png?raw=true) -> Pre Requirements - - Python3 - - Charles - 1. `git clone https://github.com/gll19920817/tiktok_live` - 2. `pip install -r requirements.txt` - 3. `Open Charles > Tools > Mirror > Mirrors Setting` - - `Enable Mirror` - - `Save to a folder, eg:/Users/douyin/feeds/` - - `Add location: https://live.douyin.com/webcast/im/fetch/` -4. `change main.py Watcher directory parameter to folder that Step 3 choose, eg: /Users/douyin/feeds/` -5. `python3 main.py` +**项目思路** -Final thoughts : - 1. ~~Save data to mongodb~~ - 2. Charles alternative: maybe mitmproxy & scapy \ No newline at end of file +1. Selenium 无窗口且detach模式打开live直播间 +2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件 +3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式) +4. 反序列化弹幕消息分类后terminal输出且入库(mongodb) + +**使用方法** + +1. 安装[python3](https://www.python.org/downloads/) +2. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt` +3. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目) +4. terminal执行 `python3 main.py` + +**注意事项** + +1. 本源代码改动仅可作学习目的!!! +2. emm... 🤔 diff --git a/Welcome file (3).md b/Welcome file (3).md new file mode 100644 index 0000000..cb1c796 --- /dev/null +++ b/Welcome file (3).md @@ -0,0 +1,26 @@ + +抖音web直播间([live.douyin.com](https://live.douyin.com))弹幕抓取 +-- + +**屏幕效果截图** + +![enter image description here](https://github.com/gll19920817/tiktok_live/blob/main/WX20211129-144919@2x.png?raw=true) + +**项目思路** + +1. Selenium 无窗口且detach模式打开live直播间 +2. mitmproxy 捕获live.douyin.com http请求并保存响应为指定目录下文件 +3. watchdog监控步骤2指定目录下文件变化后反序列化文件(application/protobuf格式) +4. 反序列化弹幕消息分类后terminal输出且入库(mongodb) + +**使用方法** + +5. 安装[python3](https://www.python.org/downloads/) +6. clone本项目,terminal进入项目目录,执行 `pip install requirements.txt` +7. 安装[mitmproxy](https://mitmproxy.org/) terminal执行`mitmproxy -s scripts/mitiproxy.py` (scripts/mitiproxy.py见项目) +8. terminal执行 `python3 main.py` + +**注意事项** + +1. 本源代码仅可作学习目的修改! +2. emm... 🤔 diff --git a/__pycache__/selenium.cpython-39.pyc b/__pycache__/selenium.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a7534176965a58ec51e12d28b7a6c04f225e4f23 GIT binary patch literal 452 zcmYjOy-ve05Vn&vNm1I4%t$O1QVc^VAcTw^4 z>$}N}+zd^OzbHM%Pc-Ud)qaLC z<=GQ(O`O|GW@RCiO+m&C0g%WhE{MVY-=5 z%h@6;{G`mu;CK+61gGc0;4J93l`DWjk2l>Msc&Q>u!S4cK&`$ zX$12Mc$PyUW7cGpeX~}qS6FYQ%n8U3#Cy8Q8h($+g8(kb1d0h6f|$y(A~Mr%8r9m# cPEoZ`?qy!m8G|eRLr8cUT$Xm|}06jwnN&o-= diff --git a/messages/base.py b/messages/base.py index f144e17..f95beb7 100644 --- a/messages/base.py +++ b/messages/base.py @@ -6,7 +6,7 @@ class Base: def set_payload(self, payload): self.instance.ParseFromString(payload) - + def user(self): return self.instance.user diff --git a/requirements.txt b/requirements.txt index ba487b9..f1d62a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,22 @@ +async-generator==1.10 +attrs==21.2.0 +certifi==2021.10.8 +cffi==1.15.0 colorama==0.4.4 +cryptography==36.0.0 +h11==0.12.0 +idna==3.3 +outcome==1.1.0 protobuf==3.19.1 +pycparser==2.21 pymongo==3.12.1 +pyOpenSSL==21.0.0 +selenium==4.1.0 +six==1.16.0 +sniffio==1.2.0 +sortedcontainers==2.4.0 +trio==0.19.0 +trio-websocket==0.9.2 +urllib3==1.26.7 watchdog==2.1.6 +wsproto==1.0.0 diff --git a/scripts/__pycache__/webdriver.cpython-39.pyc b/scripts/__pycache__/webdriver.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d13bd27989da2b5edb3caab98e8efc55771385d0 GIT binary patch literal 601 zcmYjPJ#X7E5Is_slDJCOP8|Yd(IC-I;?^+QAb>NcLy|>{p^Aj1MqePQG&Y>6S@R$4 zAw&L>fJ~Y47dquAJ8h1*<2~?r@A1HBcee{@U!ESK6#;lQ;Jr}K_8>56AT*u zz%49bNjO;{Nt@A4Mh(r1642JK&H}i7Pj1PAEN$+X3JyrVbvFO(`;{zjX1)>q?Cn%TRg1%X*d;L0sl+&>sws zh6nw@Veim$q=j-DH{yH}$!VR7LWiFW5;v8Qslib;SvQsDouM;MWSNV`nhNdF@MGM< zO^MOcj_MKQ4p#p?Vwb32v8gDg>{O^LT~+%6Kn0wu+o9RL6T literal 0 HcmV?d00001 diff --git a/scripts/mitmproxy.py b/scripts/mitmproxy.py new file mode 100644 index 0000000..4ac654a --- /dev/null +++ b/scripts/mitmproxy.py @@ -0,0 +1,13 @@ +# ! IMPORT ! make sure you ran mitmproxy with this script, +# eg: `/path/to/mitmproxy -s mitmproxy.py` + +import uuid +from mitmproxy import http + +class Writer: + def response(self, flow: http.HTTPFlow) -> None: + if flow.request.host == 'live.douyin.com': + with open('/Users/geng/douyin_live/' + uuid.uuid4().hex, 'wb') as f: + f.write(bytes(flow.response.content)) + +addons = [Writer()] diff --git a/scripts/webdriver.py b/scripts/webdriver.py new file mode 100644 index 0000000..fcdb204 --- /dev/null +++ b/scripts/webdriver.py @@ -0,0 +1,13 @@ +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + +def lunch(): + chrome_options = Options() + chrome_options.add_argument('--proxy-server=127.0.0.1:8080') + chrome_options.add_argument('--headless') + chrome_options.add_experimental_option('detach', True) + + driver = webdriver.Chrome(options=chrome_options) + + driver.get('https://live.douyin.com/515848215303') +