架构修改,删除Edge浏览器支持,配置可以自己保存

This commit is contained in:
Jerry Yan 2022-07-13 15:05:42 +08:00
parent becd46e4f9
commit 134cf0186a
17 changed files with 283 additions and 235 deletions

View File

@ -1,15 +1,21 @@
import contextlib import contextlib
import logging
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
if TYPE_CHECKING: if TYPE_CHECKING:
from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.remote.webdriver import WebDriver
from config import ConfigManager
_log = logging.getLogger("IDriver")
_log.setLevel(logging.DEBUG)
class IDriver(): class IDriver():
browser: "WebDriver" browser: "WebDriver"
_config_manager: "ConfigManager"
def __del__(self): def __init__(self, config_manager):
self.terminate() self._config_manager = config_manager
def terminate(self): def terminate(self):
self.browser.quit() self.browser.quit()
@ -20,19 +26,25 @@ class IDriver():
def change_tab(self, tab_handler: str): def change_tab(self, tab_handler: str):
... ...
def close_tab(self, tab_handler: str):
...
def open_url(self, url: str, tab_handler: str = ""): def open_url(self, url: str, tab_handler: str = ""):
... ...
@contextlib.contextmanager @contextlib.contextmanager
def op_tab(self, tab_handler: str): def op_tab(self, tab_handler: str):
cur_handle = self.browser.current_window_handle cur_handle = self.browser.current_window_handle
_log.debug("切换Tab旧Tab%s新Tab%s", cur_handle, tab_handler)
if tab_handler == "": if tab_handler == "":
tab_handler = cur_handle tab_handler = cur_handle
try: try:
self.change_tab(tab_handler) self.change_tab(tab_handler)
_log.debug("切换至新Tab%s", tab_handler)
yield self yield self
finally: finally:
self.change_tab(cur_handle) self.change_tab(cur_handle)
_log.debug("切换至旧Tab%s", cur_handle)
def refresh(self, tab_handler: str = ""): def refresh(self, tab_handler: str = ""):
... ...

View File

@ -1,37 +1,41 @@
import logging
from selenium import webdriver from selenium import webdriver
from selenium.webdriver import Proxy, DesiredCapabilities from selenium.webdriver import Proxy, DesiredCapabilities
from selenium.webdriver.common.proxy import ProxyType from selenium.webdriver.common.proxy import ProxyType
from config.helper import config
from browser.IDriver import IDriver from browser.IDriver import IDriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
_log = logging.getLogger("ChromeDriver")
_log.setLevel(logging.DEBUG)
class ChromeDriver(IDriver): class ChromeDriver(IDriver):
def __init__(self): def __init__(self, config_manager):
super(ChromeDriver, self).__init__() super(ChromeDriver, self).__init__(config_manager)
options = Options() options = Options()
if config()['webdriver']['headless']: if self._config_manager.config['webdriver']['headless']:
options.add_argument("--headless") options.add_argument("--headless")
options.add_argument("--window-size=1920,1080") options.add_argument("--window-size=1920,1080")
options.add_argument('--proxy-server=%s:%s' % (config()['mitm']['host'], config()['mitm']['port'])) options.add_argument('--proxy-server=%s:%s' % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port']))
options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors') options.add_argument('--ignore-ssl-errors')
options.add_argument('--incognito') options.add_argument('--incognito')
options.add_experimental_option('excludeSwitches', ['ignore-certificate-errors']) options.add_experimental_option('excludeSwitches', ['ignore-certificate-errors'])
if config()['webdriver']['chrome']['no_sandbox']: if self._config_manager.config['webdriver']['chrome']['no_sandbox']:
_log.debug("添加启动参数NoSandbox")
options.add_argument('--no-sandbox') options.add_argument('--no-sandbox')
proxy = Proxy() proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) proxy.http_proxy = "%s:%s" % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port'])
proxy.ssl_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) proxy.ssl_proxy = "%s:%s" % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port'])
capabilities = DesiredCapabilities.CHROME capabilities = DesiredCapabilities.CHROME
proxy.add_to_capabilities(capabilities) proxy.add_to_capabilities(capabilities)
self.browser = webdriver.Chrome(options=options, self.browser = webdriver.Chrome(options=options,
desired_capabilities=capabilities, desired_capabilities=capabilities,
executable_path=config()['webdriver']['chrome']['bin'] executable_path=self._config_manager.config['webdriver']['chrome']['bin']
) )
_log.info("浏览器启动完毕")
def new_tab(self) -> str: def new_tab(self) -> str:
current_window_handles = self.browser.window_handles current_window_handles = self.browser.window_handles
@ -39,16 +43,20 @@ class ChromeDriver(IDriver):
new_window_handles = self.browser.window_handles new_window_handles = self.browser.window_handles
for _handle in new_window_handles: for _handle in new_window_handles:
if _handle not in current_window_handles: if _handle not in current_window_handles:
_log.debug("新窗口句柄:%s", _handle)
return _handle return _handle
_log.warning("打开新窗口,未发现新句柄")
return "" return ""
def change_tab(self, tab_handler: str): def change_tab(self, tab_handler: str):
if tab_handler not in self.browser.window_handles: if tab_handler not in self.browser.window_handles:
return return
if tab_handler == self.browser.current_window_handle:
return
self.browser.switch_to.window(tab_handler) self.browser.switch_to.window(tab_handler)
def close_tab(self, tab_handler: str):
with self.op_tab(tab_handler):
self.browser.close()
def open_url(self, url: str, tab_handler: str = ""): def open_url(self, url: str, tab_handler: str = ""):
with self.op_tab(tab_handler): with self.op_tab(tab_handler):
self.browser.get(url) self.browser.get(url)

View File

@ -1,59 +0,0 @@
from selenium import webdriver
from selenium.webdriver import Proxy, DesiredCapabilities
from selenium.webdriver.common.proxy import ProxyType
from config.helper import config
from browser.IDriver import IDriver
from selenium.webdriver.edge.options import Options
class EdgeDriver(IDriver):
def __init__(self):
super(EdgeDriver, self).__init__()
options = Options()
if config()['webdriver']['headless']:
options.add_argument("--headless")
options.add_argument("--window-size=1920,1080")
options.add_argument('--proxy-server=%s:%s' % (config()['mitm']['host'], config()['mitm']['port']))
options.add_argument('--ignore-certificate-errors')
options.add_argument('--ignore-ssl-errors')
options.add_argument('--incognito')
proxy = Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port'])
proxy.ssl_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port'])
capabilities = DesiredCapabilities.EDGE
proxy.add_to_capabilities(capabilities)
self.browser = webdriver.Chrome(options=options,
desired_capabilities=capabilities,
executable_path=config()['webdriver']['edge']['bin']
)
def new_tab(self) -> str:
current_window_handles = self.browser.window_handles
self.browser.execute_script("window.open('')")
new_window_handles = self.browser.window_handles
for _handle in new_window_handles:
if _handle not in current_window_handles:
return _handle
return ""
def change_tab(self, tab_handler: str):
if tab_handler not in self.browser.window_handles:
return
if tab_handler == self.browser.current_window_handle:
return
self.browser.switch_to.window(tab_handler)
def open_url(self, url: str, tab_handler: str = ""):
with self.op_tab(tab_handler):
self.browser.get(url)
def refresh(self, tab_handler: str = ""):
with self.op_tab(tab_handler):
self.browser.refresh()
def screenshot(self, tab_handler: str = "") -> str:
with self.op_tab(tab_handler):
return self.browser.get_screenshot_as_base64()

View File

@ -1,102 +1,59 @@
import threading import logging
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from urllib.parse import urlparse
from browser.chrome import ChromeDriver from browser.chrome import ChromeDriver
from browser.edge import EdgeDriver from common import Singleton
from config.helper import config
if TYPE_CHECKING: if TYPE_CHECKING:
from typing import Type, Optional, List from typing import Type, List
from browser.IDriver import IDriver from browser.IDriver import IDriver
from config import ConfigManager
from common.items import TabInfo
_manager: "Optional[BrowserManager]" = None _log = logging.getLogger("BrowserManager")
class BrowserManager(): class BrowserManager(metaclass=Singleton):
_config_manager: "ConfigManager"
_mapping: "dict[str, Type[IDriver]]" = { _mapping: "dict[str, Type[IDriver]]" = {
"chrome": ChromeDriver, "chrome": ChromeDriver,
"edge": EdgeDriver
} }
def __init__(self): def __init__(self, config_manager: "ConfigManager"):
_config = config()["webdriver"]["use"] self._config_manager = config_manager
_config = self._config_manager.config["webdriver"]["use"]
if _config not in self._mapping: if _config not in self._mapping:
_log.error("不支持的浏览器:%s", _config)
raise Exception("不支持的浏览器") raise Exception("不支持的浏览器")
self._driver: IDriver = self._mapping[_config]() self._driver: IDriver = self._mapping[_config](self._config_manager)
self._tabs: "List[TabInfo]" = [] self._tabs: "List[TabInfo]" = []
_log.debug("初始化完毕")
def init_browser(self):
_live_config = config().get("live", {})
_users = _live_config.get("users", [])
if type(_users) is not list:
_users = [_users]
_rooms = _live_config.get("rooms", [])
if type(_rooms) is not list:
_rooms = [_rooms]
for _user in _users:
self.open_user_page(str(_user))
for _room in _rooms:
self.open_live_page(str(_room))
@property @property
def driver(self): def driver(self):
return self._driver return self._driver
def open_user_page(self, sec_user_id: str):
tab = TabInfo()
tab.tab_type = TabInfo.TAB_TYPE_USER
tab.user_id = sec_user_id
if urlparse(sec_user_id).scheme:
tab.url = sec_user_id
else:
# 单独的用户id
tab.url = "https://www.douyin.com/user/" + sec_user_id
self.open_tab(tab)
def open_live_page(self, live_url: str):
tab = TabInfo()
tab.tab_type = TabInfo.TAB_TYPE_LIVE
if not urlparse(live_url).scheme:
# 单独的房间号
live_url = "https://live.douyin.com/" + live_url
tab.url = live_url
self.open_tab(tab)
def open_tab(self, tab_info: "TabInfo"): def open_tab(self, tab_info: "TabInfo"):
tab_handler = self._driver.new_tab() if not tab_info.tab_handler:
tab_info.tab_handler = tab_handler tab_handler = self._driver.new_tab()
tab_info.tab_handler = tab_handler
if not tab_info.tab_type: if not tab_info.tab_type:
tab_info.tab_type = TabInfo.TAB_TYPE_OTHER tab_info.tab_type = TabInfo.TAB_TYPE_OTHER
self.driver.open_url(tab_info.url, tab_handler) _log.debug("打开URL%s】@%s", tab_info.url, tab_info.tab_handler)
self.driver.open_url(tab_info.url, tab_info.tab_handler)
_log.info("打开URL完毕%s】@%s", tab_info.url, tab_info.tab_handler)
if tab_info not in self._tabs: if tab_info not in self._tabs:
self._tabs.append(tab_info) self._tabs.append(tab_info)
def close_tab(self, tab_info: "TabInfo"):
if tab_info not in self._tabs:
_log.warning("提供的标签不在标签组中,不予执行")
return
_log.debug("关闭标签:%s", tab_info.tab_handler)
self._driver.close_tab(tab_info.tab_handler)
_log.info("关闭标签完毕:%s", tab_info.tab_handler)
self._tabs.remove(tab_info)
def terminate(self): def terminate(self):
if self._driver: if self._driver:
self._driver.terminate() self._driver.terminate()
class TabInfo(object):
TAB_TYPE_OTHER = "other"
TAB_TYPE_USER = "user"
TAB_TYPE_LIVE = "live"
def __init__(self):
self.tab_handler: str = ""
self.user_id: str = ""
self.url: str = ""
self.tab_type: str = self.TAB_TYPE_OTHER
def init_manager():
global _manager
_manager = BrowserManager()
threading.Thread(target=_manager.init_browser).start()
return _manager
def get_manager():
if _manager is None:
return init_manager()
return _manager

1
common/__init__.py Normal file
View File

@ -0,0 +1 @@
from .singleton import Singleton

1
common/items/__init__.py Normal file
View File

@ -0,0 +1 @@
from .tab_info import TabInfo

14
common/items/tab_info.py Normal file
View File

@ -0,0 +1,14 @@
class TabInfo(object):
TAB_TYPE_OTHER = "other"
TAB_TYPE_USER = "user"
TAB_TYPE_LIVE = "live"
def __init__(self):
self.tab_handler: str = ""
"""WebDriver中该标签的句柄ID"""
self.url: str = ""
"""标签地址"""
self.tab_type: str = self.TAB_TYPE_OTHER
"""标签类型,展示用"""
self.tab_keep: bool = False
"""关闭标签时,避免被误关"""

10
common/singleton.py Normal file
View File

@ -0,0 +1,10 @@
class Singleton(type):
"""
单例模式以metaclass方式实现
"""
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super().__call__(*args, **kwargs)
return cls._instances[cls]

View File

@ -0,0 +1 @@
from .manager import ConfigManager

View File

@ -1,9 +0,0 @@
from pathlib import Path
from ruamel import yaml
def config():
settings_file = str(Path(__file__).parent.absolute()) + '/settings.yml'
with open(settings_file, 'r') as f:
return yaml.load(f, Loader=yaml.UnsafeLoader)

77
config/manager.py Normal file
View File

@ -0,0 +1,77 @@
import logging
import os
from ruamel.yaml import YAML
from common import Singleton
_log = logging.getLogger("ConfigManager")
class ConfigManager(metaclass=Singleton):
"""默认配置"""
_default_config = {
"mitm": {
"host": "127.0.0.1",
"port": 8080,
},
"webdriver": {
"headless": False,
"use": "chrome",
"chrome": {
"bin": "chromedriver",
"no_sandbox": True
}
},
"output": {
"use": [],
"xml": {
"save_path": "./",
"file_pattern": "{room_id}_{ts}.xml"
},
"debug": {
"save_path": "./debug",
"known": False
},
},
"douyin": {
"rooms": [],
"users": [],
},
}
"""配置文件路径"""
_config_file: "os.PathLike[str] || str"
"""当前实例中,配置文件内容"""
_current_config: "dict" = {}
def __init__(self, config_file="settings.yml"):
_log.debug("配置文件路径:%s", config_file)
self._config_file = config_file
if not os.path.exists(config_file):
_log.warning("配置文件不存在,写入初始化配置")
self._current_config = self._default_config
self._write_config()
else:
self._read_config()
def _read_config(self):
_log.debug("读取文件%s的配置内容", self._config_file)
with open(self._config_file, "r", encoding="UTF8") as _f:
yaml = YAML(typ="unsafe", pure=True)
self._current_config = yaml.load(_f)
_log.debug("读取文件%s的配置内容完毕", self._config_file)
def _write_config(self):
_log.debug("向文件%s写入配置", self._config_file)
with open(self._config_file, "w", encoding="UTF8") as _f:
_log.debug("配置内容:", self._current_config)
yaml = YAML(typ="unsafe", pure=True)
yaml.dump(self._current_config, _f)
_log.debug("向文件%s写入配置完毕", self._config_file)
@property
def config(self):
return self._current_config
def get(self, key: str, default: str = None):
...

View File

@ -1,35 +0,0 @@
mitm:
host: 127.0.0.1
port: 8080
webdriver:
headless: False
use: edge
edge:
bin: msedgedriver.exe
chrome:
bin: chromedriver
no_sandbox: True
output:
use:
- print
- xml
- debug
xml:
save_path: "./"
file_pattern: "{room_id}_{ts}.xml"
debug:
save_path:
error: "./error"
unknown: "./debug"
known: False
live:
rooms:
- ""
users:
- ""
api:
userinfo: 'https://live.douyin.com/webcast/user/?aid=6383&target_uid='

4
core/__init__.py Normal file
View File

@ -0,0 +1,4 @@
"""核心调度"""
from .manager import CoreManager

79
core/manager.py Normal file
View File

@ -0,0 +1,79 @@
import logging
from urllib.parse import urlparse
from common import Singleton
from common.items import TabInfo
from config import ConfigManager
from browser import BrowserManager
from proxy import ProxyManager
_log = logging.getLogger("CoreManager")
_log.setLevel(logging.DEBUG)
class CoreManager(metaclass=Singleton):
config_manager: "ConfigManager"
browser_manager: "BrowserManager"
proxy_manager: "ProxyManager"
def __del__(self):
"""
析构CoreManager需要gracefully shutdown
"""
_log.debug("析构开始")
try:
_log.debug("析构浏览器管理器")
self.browser_manager.terminate()
except:
pass
finally:
_log.debug("析构浏览器管理器完毕")
try:
_log.debug("析构Mitm代理管理器")
self.proxy_manager.terminate()
except:
pass
finally:
_log.debug("析构Mitm代理管理器完毕")
def __init__(self):
"""
初始化CoreManager初始化所有模块
"""
_log.debug("初始化配置管理器")
self.config_manager = ConfigManager()
_log.info("初始化配置管理器完毕")
_log.debug("初始化Mitm代理管理器")
self.proxy_manager = ProxyManager(self.config_manager)
self.proxy_manager.start_loop()
_log.info("初始化Mitm代理管理器完毕")
_log.debug("初始化浏览器管理器")
self.browser_manager = BrowserManager(self.config_manager)
_log.info("初始化浏览器管理器完毕")
self._open_config_tabs()
def restart(self):
"""服务有问题?重启一下把"""
self.__del__()
self.__init__()
def open_tab(self, tab_info: "TabInfo"):
self.browser_manager.open_tab(tab_info)
def close_tab(self, tab_info: "TabInfo"):
self.browser_manager.close_tab(tab_info)
def _open_config_tabs(self):
rooms = self.config_manager.config["douyin"]["rooms"]
if type(rooms) is not list:
rooms = [rooms]
for room in rooms:
if not urlparse(room).scheme:
# 单独的房间号
live_url = "https://live.douyin.com/" + room
else:
live_url = room
tab_info = TabInfo()
tab_info.url = live_url
tab_info.tab_type = TabInfo.TAB_TYPE_LIVE
self.open_tab(tab_info)

39
main.py
View File

@ -1,29 +1,18 @@
import logging
import atexit import atexit
import signal
from browser.manager import init_manager as init_browser_manager from core import CoreManager
from output.manager import OutputManager
from proxy.manager import init_manager as init_proxy_manager logging.basicConfig(level=logging.INFO)
def _on_exit():
c = CoreManager()
del c
atexit.register(_on_exit)
if __name__ == '__main__': if __name__ == '__main__':
proxy_manager = init_proxy_manager() c = CoreManager()
proxy_manager.start_loop() c.proxy_manager.join()
browser_manager = init_browser_manager()
output_manager = OutputManager()
def terminate(*_):
print("terminate")
browser_manager.terminate()
output_manager.terminate()
proxy_manager.terminate()
atexit.register(terminate)
signal.signal(signal.SIGTERM, terminate)
signal.signal(signal.SIGINT, terminate)
output_manager.start_loop()
try:
proxy_manager.join()
finally:
terminate()

View File

@ -0,0 +1 @@
from .manager import ProxyManager

View File

@ -1,44 +1,51 @@
import asyncio import asyncio
import logging
import threading import threading
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from mitmproxy.options import Options from mitmproxy.options import Options
from mitmproxy.tools.dump import DumpMaster from mitmproxy.tools.dump import DumpMaster
from config.helper import config from common import Singleton
from proxy.addon.danmaku_ws import DanmakuWebsocketAddon from proxy.addon.danmaku_ws import DanmakuWebsocketAddon
from proxy.queues import MESSAGE_QUEUE from proxy.queues import MESSAGE_QUEUE
if TYPE_CHECKING: if TYPE_CHECKING:
from typing import Optional from config import ConfigManager
_manager: "Optional[ProxyManager]" = None _log = logging.getLogger("ProxyManager")
class ProxyManager: class ProxyManager(metaclass=Singleton):
def __init__(self): _config_manager: "ConfigManager"
def __init__(self, config_manager):
self._config_manager = config_manager
self._mitm_instance = None self._mitm_instance = None
self._loop: "asyncio.AbstractEventLoop" = asyncio.new_event_loop() self._loop: "asyncio.AbstractEventLoop" = asyncio.new_event_loop()
opts = Options( opts = Options(
listen_host=config()['mitm']['host'], listen_host=self._config_manager.config['mitm']['host'],
listen_port=config()['mitm']['port'], listen_port=self._config_manager.config['mitm']['port'],
) )
async def _init_mitm_instance(): async def _init_mitm_instance():
_log.debug("初始化Mitm实例")
self._mitm_instance = DumpMaster(options=opts) self._mitm_instance = DumpMaster(options=opts)
self._load_addon() self._load_addon()
opts.update_defer( opts.update_defer(
flow_detail=0, flow_detail=0,
termlog_verbosity="error", termlog_verbosity="error",
) )
_log.debug("初始化Mitm实例完毕")
self._loop.run_until_complete(_init_mitm_instance()) self._loop.run_until_complete(_init_mitm_instance())
self._thread = None self._thread = None
def __del__(self):
self.terminate()
def terminate(self): def terminate(self):
if self._mitm_instance: if self._mitm_instance:
_log.debug("关闭mitm实例")
self._mitm_instance.shutdown() self._mitm_instance.shutdown()
_log.info("关闭mitm实例完成")
if self._loop: if self._loop:
if self._loop.is_running(): if self._loop.is_running():
self._loop.stop() self._loop.stop()
@ -52,21 +59,11 @@ class ProxyManager:
self._loop.run_until_complete(self._mitm_instance.run()) self._loop.run_until_complete(self._mitm_instance.run())
def start_loop(self): def start_loop(self):
_log.debug("新建进程运行mitm")
self._thread = threading.Thread(target=self._start, args=()) self._thread = threading.Thread(target=self._start, args=())
self._thread.start() self._thread.start()
_log.debug("新建进程已运行mitm")
def join(self): def join(self):
if self._thread: if self._thread:
self._thread.join() self._thread.join()
def init_manager():
global _manager
_manager = ProxyManager()
return _manager
def get_manager():
if _manager is None:
return init_manager()
return _manager