diff --git a/browser/IDriver.py b/browser/IDriver.py index a89c681..6db1941 100644 --- a/browser/IDriver.py +++ b/browser/IDriver.py @@ -1,15 +1,21 @@ import contextlib +import logging from typing import TYPE_CHECKING if TYPE_CHECKING: from selenium.webdriver.remote.webdriver import WebDriver + from config import ConfigManager + +_log = logging.getLogger("IDriver") +_log.setLevel(logging.DEBUG) class IDriver(): browser: "WebDriver" + _config_manager: "ConfigManager" - def __del__(self): - self.terminate() + def __init__(self, config_manager): + self._config_manager = config_manager def terminate(self): self.browser.quit() @@ -20,19 +26,25 @@ class IDriver(): def change_tab(self, tab_handler: str): ... + def close_tab(self, tab_handler: str): + ... + def open_url(self, url: str, tab_handler: str = ""): ... @contextlib.contextmanager def op_tab(self, tab_handler: str): cur_handle = self.browser.current_window_handle + _log.debug("切换Tab:旧Tab:%s,新Tab:%s", cur_handle, tab_handler) if tab_handler == "": tab_handler = cur_handle try: self.change_tab(tab_handler) + _log.debug("切换至新Tab:%s", tab_handler) yield self finally: self.change_tab(cur_handle) + _log.debug("切换至旧Tab:%s", cur_handle) def refresh(self, tab_handler: str = ""): ... diff --git a/browser/chrome.py b/browser/chrome.py index 5a44aa2..aefb478 100644 --- a/browser/chrome.py +++ b/browser/chrome.py @@ -1,37 +1,41 @@ +import logging from selenium import webdriver from selenium.webdriver import Proxy, DesiredCapabilities from selenium.webdriver.common.proxy import ProxyType -from config.helper import config from browser.IDriver import IDriver from selenium.webdriver.chrome.options import Options +_log = logging.getLogger("ChromeDriver") +_log.setLevel(logging.DEBUG) class ChromeDriver(IDriver): - def __init__(self): - super(ChromeDriver, self).__init__() + def __init__(self, config_manager): + super(ChromeDriver, self).__init__(config_manager) options = Options() - if config()['webdriver']['headless']: + if self._config_manager.config['webdriver']['headless']: options.add_argument("--headless") options.add_argument("--window-size=1920,1080") - options.add_argument('--proxy-server=%s:%s' % (config()['mitm']['host'], config()['mitm']['port'])) + options.add_argument('--proxy-server=%s:%s' % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port'])) options.add_argument('--ignore-certificate-errors') options.add_argument('--ignore-ssl-errors') options.add_argument('--incognito') options.add_experimental_option('excludeSwitches', ['ignore-certificate-errors']) - if config()['webdriver']['chrome']['no_sandbox']: + if self._config_manager.config['webdriver']['chrome']['no_sandbox']: + _log.debug("添加启动参数NoSandbox") options.add_argument('--no-sandbox') proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL - proxy.http_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) - proxy.ssl_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) + proxy.http_proxy = "%s:%s" % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port']) + proxy.ssl_proxy = "%s:%s" % (self._config_manager.config['mitm']['host'], self._config_manager.config['mitm']['port']) capabilities = DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) self.browser = webdriver.Chrome(options=options, desired_capabilities=capabilities, - executable_path=config()['webdriver']['chrome']['bin'] + executable_path=self._config_manager.config['webdriver']['chrome']['bin'] ) + _log.info("浏览器启动完毕") def new_tab(self) -> str: current_window_handles = self.browser.window_handles @@ -39,16 +43,20 @@ class ChromeDriver(IDriver): new_window_handles = self.browser.window_handles for _handle in new_window_handles: if _handle not in current_window_handles: + _log.debug("新窗口句柄:%s", _handle) return _handle + _log.warning("打开新窗口,未发现新句柄") return "" def change_tab(self, tab_handler: str): if tab_handler not in self.browser.window_handles: return - if tab_handler == self.browser.current_window_handle: - return self.browser.switch_to.window(tab_handler) + def close_tab(self, tab_handler: str): + with self.op_tab(tab_handler): + self.browser.close() + def open_url(self, url: str, tab_handler: str = ""): with self.op_tab(tab_handler): self.browser.get(url) diff --git a/browser/edge.py b/browser/edge.py deleted file mode 100644 index c3618c7..0000000 --- a/browser/edge.py +++ /dev/null @@ -1,59 +0,0 @@ -from selenium import webdriver -from selenium.webdriver import Proxy, DesiredCapabilities -from selenium.webdriver.common.proxy import ProxyType - -from config.helper import config -from browser.IDriver import IDriver -from selenium.webdriver.edge.options import Options - - -class EdgeDriver(IDriver): - def __init__(self): - super(EdgeDriver, self).__init__() - options = Options() - if config()['webdriver']['headless']: - options.add_argument("--headless") - options.add_argument("--window-size=1920,1080") - options.add_argument('--proxy-server=%s:%s' % (config()['mitm']['host'], config()['mitm']['port'])) - options.add_argument('--ignore-certificate-errors') - options.add_argument('--ignore-ssl-errors') - options.add_argument('--incognito') - proxy = Proxy() - proxy.proxy_type = ProxyType.MANUAL - proxy.http_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) - proxy.ssl_proxy = "%s:%s" % (config()['mitm']['host'], config()['mitm']['port']) - capabilities = DesiredCapabilities.EDGE - proxy.add_to_capabilities(capabilities) - - self.browser = webdriver.Chrome(options=options, - desired_capabilities=capabilities, - executable_path=config()['webdriver']['edge']['bin'] - ) - - def new_tab(self) -> str: - current_window_handles = self.browser.window_handles - self.browser.execute_script("window.open('')") - new_window_handles = self.browser.window_handles - for _handle in new_window_handles: - if _handle not in current_window_handles: - return _handle - return "" - - def change_tab(self, tab_handler: str): - if tab_handler not in self.browser.window_handles: - return - if tab_handler == self.browser.current_window_handle: - return - self.browser.switch_to.window(tab_handler) - - def open_url(self, url: str, tab_handler: str = ""): - with self.op_tab(tab_handler): - self.browser.get(url) - - def refresh(self, tab_handler: str = ""): - with self.op_tab(tab_handler): - self.browser.refresh() - - def screenshot(self, tab_handler: str = "") -> str: - with self.op_tab(tab_handler): - return self.browser.get_screenshot_as_base64() diff --git a/browser/manager.py b/browser/manager.py index 751559e..b5b9d93 100644 --- a/browser/manager.py +++ b/browser/manager.py @@ -1,102 +1,59 @@ -import threading +import logging from typing import TYPE_CHECKING -from urllib.parse import urlparse from browser.chrome import ChromeDriver -from browser.edge import EdgeDriver -from config.helper import config +from common import Singleton if TYPE_CHECKING: - from typing import Type, Optional, List + from typing import Type, List from browser.IDriver import IDriver + from config import ConfigManager + from common.items import TabInfo -_manager: "Optional[BrowserManager]" = None +_log = logging.getLogger("BrowserManager") -class BrowserManager(): +class BrowserManager(metaclass=Singleton): + _config_manager: "ConfigManager" _mapping: "dict[str, Type[IDriver]]" = { "chrome": ChromeDriver, - "edge": EdgeDriver } - def __init__(self): - _config = config()["webdriver"]["use"] + def __init__(self, config_manager: "ConfigManager"): + self._config_manager = config_manager + _config = self._config_manager.config["webdriver"]["use"] if _config not in self._mapping: + _log.error("不支持的浏览器:%s", _config) raise Exception("不支持的浏览器") - self._driver: IDriver = self._mapping[_config]() + self._driver: IDriver = self._mapping[_config](self._config_manager) self._tabs: "List[TabInfo]" = [] - - def init_browser(self): - _live_config = config().get("live", {}) - _users = _live_config.get("users", []) - if type(_users) is not list: - _users = [_users] - _rooms = _live_config.get("rooms", []) - if type(_rooms) is not list: - _rooms = [_rooms] - for _user in _users: - self.open_user_page(str(_user)) - for _room in _rooms: - self.open_live_page(str(_room)) + _log.debug("初始化完毕") @property def driver(self): return self._driver - def open_user_page(self, sec_user_id: str): - tab = TabInfo() - tab.tab_type = TabInfo.TAB_TYPE_USER - tab.user_id = sec_user_id - if urlparse(sec_user_id).scheme: - tab.url = sec_user_id - else: - # 单独的用户id - tab.url = "https://www.douyin.com/user/" + sec_user_id - self.open_tab(tab) - - def open_live_page(self, live_url: str): - tab = TabInfo() - tab.tab_type = TabInfo.TAB_TYPE_LIVE - if not urlparse(live_url).scheme: - # 单独的房间号 - live_url = "https://live.douyin.com/" + live_url - tab.url = live_url - self.open_tab(tab) - def open_tab(self, tab_info: "TabInfo"): - tab_handler = self._driver.new_tab() - tab_info.tab_handler = tab_handler + if not tab_info.tab_handler: + tab_handler = self._driver.new_tab() + tab_info.tab_handler = tab_handler if not tab_info.tab_type: tab_info.tab_type = TabInfo.TAB_TYPE_OTHER - self.driver.open_url(tab_info.url, tab_handler) + _log.debug("打开URL:【%s】@%s", tab_info.url, tab_info.tab_handler) + self.driver.open_url(tab_info.url, tab_info.tab_handler) + _log.info("打开URL完毕:【%s】@%s", tab_info.url, tab_info.tab_handler) if tab_info not in self._tabs: self._tabs.append(tab_info) + def close_tab(self, tab_info: "TabInfo"): + if tab_info not in self._tabs: + _log.warning("提供的标签不在标签组中,不予执行") + return + _log.debug("关闭标签:%s", tab_info.tab_handler) + self._driver.close_tab(tab_info.tab_handler) + _log.info("关闭标签完毕:%s", tab_info.tab_handler) + self._tabs.remove(tab_info) + def terminate(self): if self._driver: self._driver.terminate() - - -class TabInfo(object): - TAB_TYPE_OTHER = "other" - TAB_TYPE_USER = "user" - TAB_TYPE_LIVE = "live" - - def __init__(self): - self.tab_handler: str = "" - self.user_id: str = "" - self.url: str = "" - self.tab_type: str = self.TAB_TYPE_OTHER - - -def init_manager(): - global _manager - _manager = BrowserManager() - threading.Thread(target=_manager.init_browser).start() - return _manager - - -def get_manager(): - if _manager is None: - return init_manager() - return _manager diff --git a/common/__init__.py b/common/__init__.py new file mode 100644 index 0000000..9079c19 --- /dev/null +++ b/common/__init__.py @@ -0,0 +1 @@ +from .singleton import Singleton \ No newline at end of file diff --git a/common/items/__init__.py b/common/items/__init__.py new file mode 100644 index 0000000..83a0d4a --- /dev/null +++ b/common/items/__init__.py @@ -0,0 +1 @@ +from .tab_info import TabInfo \ No newline at end of file diff --git a/common/items/tab_info.py b/common/items/tab_info.py new file mode 100644 index 0000000..055ae66 --- /dev/null +++ b/common/items/tab_info.py @@ -0,0 +1,14 @@ +class TabInfo(object): + TAB_TYPE_OTHER = "other" + TAB_TYPE_USER = "user" + TAB_TYPE_LIVE = "live" + + def __init__(self): + self.tab_handler: str = "" + """WebDriver中,该标签的句柄ID""" + self.url: str = "" + """标签地址""" + self.tab_type: str = self.TAB_TYPE_OTHER + """标签类型,展示用""" + self.tab_keep: bool = False + """关闭标签时,避免被误关""" diff --git a/common/singleton.py b/common/singleton.py new file mode 100644 index 0000000..1823aea --- /dev/null +++ b/common/singleton.py @@ -0,0 +1,10 @@ +class Singleton(type): + """ + 单例模式(以metaclass方式实现) + """ + _instances = {} + + def __call__(cls, *args, **kwargs): + if cls not in cls._instances: + cls._instances[cls] = super().__call__(*args, **kwargs) + return cls._instances[cls] \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py index e69de29..245d43a 100644 --- a/config/__init__.py +++ b/config/__init__.py @@ -0,0 +1 @@ +from .manager import ConfigManager \ No newline at end of file diff --git a/config/helper.py b/config/helper.py deleted file mode 100644 index 4c67e88..0000000 --- a/config/helper.py +++ /dev/null @@ -1,9 +0,0 @@ -from pathlib import Path -from ruamel import yaml - -def config(): - settings_file = str(Path(__file__).parent.absolute()) + '/settings.yml' - - with open(settings_file, 'r') as f: - return yaml.load(f, Loader=yaml.UnsafeLoader) - diff --git a/config/manager.py b/config/manager.py new file mode 100644 index 0000000..269ea53 --- /dev/null +++ b/config/manager.py @@ -0,0 +1,77 @@ +import logging +import os + +from ruamel.yaml import YAML + +from common import Singleton + +_log = logging.getLogger("ConfigManager") + + +class ConfigManager(metaclass=Singleton): + """默认配置""" + _default_config = { + "mitm": { + "host": "127.0.0.1", + "port": 8080, + }, + "webdriver": { + "headless": False, + "use": "chrome", + "chrome": { + "bin": "chromedriver", + "no_sandbox": True + } + }, + "output": { + "use": [], + "xml": { + "save_path": "./", + "file_pattern": "{room_id}_{ts}.xml" + }, + "debug": { + "save_path": "./debug", + "known": False + }, + }, + "douyin": { + "rooms": [], + "users": [], + }, + } + """配置文件路径""" + _config_file: "os.PathLike[str] || str" + """当前实例中,配置文件内容""" + _current_config: "dict" = {} + + def __init__(self, config_file="settings.yml"): + _log.debug("配置文件路径:%s", config_file) + self._config_file = config_file + if not os.path.exists(config_file): + _log.warning("配置文件不存在,写入初始化配置") + self._current_config = self._default_config + self._write_config() + else: + self._read_config() + + def _read_config(self): + _log.debug("读取文件%s的配置内容", self._config_file) + with open(self._config_file, "r", encoding="UTF8") as _f: + yaml = YAML(typ="unsafe", pure=True) + self._current_config = yaml.load(_f) + _log.debug("读取文件%s的配置内容完毕", self._config_file) + + def _write_config(self): + _log.debug("向文件%s写入配置", self._config_file) + with open(self._config_file, "w", encoding="UTF8") as _f: + _log.debug("配置内容:", self._current_config) + yaml = YAML(typ="unsafe", pure=True) + yaml.dump(self._current_config, _f) + _log.debug("向文件%s写入配置完毕", self._config_file) + + @property + def config(self): + return self._current_config + + def get(self, key: str, default: str = None): + ... diff --git a/config/settings.yml b/config/settings.yml deleted file mode 100644 index e727802..0000000 --- a/config/settings.yml +++ /dev/null @@ -1,35 +0,0 @@ -mitm: - host: 127.0.0.1 - port: 8080 - -webdriver: - headless: False - use: edge - edge: - bin: msedgedriver.exe - chrome: - bin: chromedriver - no_sandbox: True - -output: - use: - - print - - xml - - debug - xml: - save_path: "./" - file_pattern: "{room_id}_{ts}.xml" - debug: - save_path: - error: "./error" - unknown: "./debug" - known: False - -live: - rooms: - - "" - users: - - "" - -api: - userinfo: 'https://live.douyin.com/webcast/user/?aid=6383&target_uid=' \ No newline at end of file diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..5e3b10d --- /dev/null +++ b/core/__init__.py @@ -0,0 +1,4 @@ +"""核心调度""" + + +from .manager import CoreManager diff --git a/core/manager.py b/core/manager.py new file mode 100644 index 0000000..df2d7ca --- /dev/null +++ b/core/manager.py @@ -0,0 +1,79 @@ +import logging +from urllib.parse import urlparse + +from common import Singleton +from common.items import TabInfo +from config import ConfigManager +from browser import BrowserManager +from proxy import ProxyManager + +_log = logging.getLogger("CoreManager") +_log.setLevel(logging.DEBUG) + + +class CoreManager(metaclass=Singleton): + config_manager: "ConfigManager" + browser_manager: "BrowserManager" + proxy_manager: "ProxyManager" + + def __del__(self): + """ + 析构CoreManager,需要gracefully shutdown + """ + _log.debug("析构开始") + try: + _log.debug("析构浏览器管理器") + self.browser_manager.terminate() + except: + pass + finally: + _log.debug("析构浏览器管理器完毕") + try: + _log.debug("析构Mitm代理管理器") + self.proxy_manager.terminate() + except: + pass + finally: + _log.debug("析构Mitm代理管理器完毕") + + def __init__(self): + """ + 初始化CoreManager,初始化所有模块 + """ + _log.debug("初始化配置管理器") + self.config_manager = ConfigManager() + _log.info("初始化配置管理器完毕") + _log.debug("初始化Mitm代理管理器") + self.proxy_manager = ProxyManager(self.config_manager) + self.proxy_manager.start_loop() + _log.info("初始化Mitm代理管理器完毕") + _log.debug("初始化浏览器管理器") + self.browser_manager = BrowserManager(self.config_manager) + _log.info("初始化浏览器管理器完毕") + self._open_config_tabs() + + def restart(self): + """服务有问题?重启一下把""" + self.__del__() + self.__init__() + + def open_tab(self, tab_info: "TabInfo"): + self.browser_manager.open_tab(tab_info) + + def close_tab(self, tab_info: "TabInfo"): + self.browser_manager.close_tab(tab_info) + + def _open_config_tabs(self): + rooms = self.config_manager.config["douyin"]["rooms"] + if type(rooms) is not list: + rooms = [rooms] + for room in rooms: + if not urlparse(room).scheme: + # 单独的房间号 + live_url = "https://live.douyin.com/" + room + else: + live_url = room + tab_info = TabInfo() + tab_info.url = live_url + tab_info.tab_type = TabInfo.TAB_TYPE_LIVE + self.open_tab(tab_info) diff --git a/main.py b/main.py index 56c492c..82c44da 100644 --- a/main.py +++ b/main.py @@ -1,29 +1,18 @@ +import logging import atexit -import signal -from browser.manager import init_manager as init_browser_manager -from output.manager import OutputManager -from proxy.manager import init_manager as init_proxy_manager +from core import CoreManager + +logging.basicConfig(level=logging.INFO) + + +def _on_exit(): + c = CoreManager() + del c + + +atexit.register(_on_exit) if __name__ == '__main__': - proxy_manager = init_proxy_manager() - proxy_manager.start_loop() - browser_manager = init_browser_manager() - output_manager = OutputManager() - - - def terminate(*_): - print("terminate") - browser_manager.terminate() - output_manager.terminate() - proxy_manager.terminate() - - - atexit.register(terminate) - signal.signal(signal.SIGTERM, terminate) - signal.signal(signal.SIGINT, terminate) - output_manager.start_loop() - try: - proxy_manager.join() - finally: - terminate() + c = CoreManager() + c.proxy_manager.join() diff --git a/proxy/__init__.py b/proxy/__init__.py index e69de29..2d5040e 100644 --- a/proxy/__init__.py +++ b/proxy/__init__.py @@ -0,0 +1 @@ +from .manager import ProxyManager \ No newline at end of file diff --git a/proxy/manager.py b/proxy/manager.py index b30e025..2a70d2c 100644 --- a/proxy/manager.py +++ b/proxy/manager.py @@ -1,44 +1,51 @@ import asyncio +import logging import threading from typing import TYPE_CHECKING from mitmproxy.options import Options from mitmproxy.tools.dump import DumpMaster -from config.helper import config +from common import Singleton from proxy.addon.danmaku_ws import DanmakuWebsocketAddon from proxy.queues import MESSAGE_QUEUE if TYPE_CHECKING: - from typing import Optional + from config import ConfigManager -_manager: "Optional[ProxyManager]" = None +_log = logging.getLogger("ProxyManager") -class ProxyManager: - def __init__(self): +class ProxyManager(metaclass=Singleton): + _config_manager: "ConfigManager" + + def __init__(self, config_manager): + self._config_manager = config_manager self._mitm_instance = None self._loop: "asyncio.AbstractEventLoop" = asyncio.new_event_loop() opts = Options( - listen_host=config()['mitm']['host'], - listen_port=config()['mitm']['port'], + listen_host=self._config_manager.config['mitm']['host'], + listen_port=self._config_manager.config['mitm']['port'], ) + async def _init_mitm_instance(): + _log.debug("初始化Mitm实例") self._mitm_instance = DumpMaster(options=opts) self._load_addon() opts.update_defer( flow_detail=0, termlog_verbosity="error", ) + _log.debug("初始化Mitm实例完毕") + self._loop.run_until_complete(_init_mitm_instance()) self._thread = None - def __del__(self): - self.terminate() - def terminate(self): if self._mitm_instance: + _log.debug("关闭mitm实例") self._mitm_instance.shutdown() + _log.info("关闭mitm实例完成") if self._loop: if self._loop.is_running(): self._loop.stop() @@ -52,21 +59,11 @@ class ProxyManager: self._loop.run_until_complete(self._mitm_instance.run()) def start_loop(self): + _log.debug("新建进程,运行mitm") self._thread = threading.Thread(target=self._start, args=()) self._thread.start() + _log.debug("新建进程,已运行mitm") def join(self): if self._thread: self._thread.join() - - -def init_manager(): - global _manager - _manager = ProxyManager() - return _manager - - -def get_manager(): - if _manager is None: - return init_manager() - return _manager