diff --git a/main.py b/main.py index ce2e464..d45d766 100644 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ import sys +import threading from urllib.parse import urlparse from scripts import watcher, webdriver @@ -9,7 +10,9 @@ if __name__ == '__main__': print('Invalid url provided, please check...') sys.exit(1) - webdriver.go(sys.argv[1]) + t = threading.Thread(target=webdriver.go, args=(sys.argv[1],)) + t.start() w = watcher.Watcher(directory=config()['watchdog']['dir']) w.run() + \ No newline at end of file diff --git a/messages/__pycache__/utils.cpython-39.pyc b/messages/__pycache__/utils.cpython-39.pyc index 6b9e066..f5451f7 100644 Binary files a/messages/__pycache__/utils.cpython-39.pyc and b/messages/__pycache__/utils.cpython-39.pyc differ diff --git a/messages/utils.py b/messages/utils.py index 561e3b9..7eed281 100644 --- a/messages/utils.py +++ b/messages/utils.py @@ -1,3 +1,4 @@ +import os from protobuf import message_pb2 from messages.member import MemberMessage @@ -29,6 +30,8 @@ def unpackMsgBin(filepath): decodeMsg(response.messages) except Exception as e: pass + finally: + os.remove(filepath) def decodeMsg(messages): for message in messages: diff --git a/scripts/__pycache__/watcher.cpython-39.pyc b/scripts/__pycache__/watcher.cpython-39.pyc index 309aac4..7b44dd7 100644 Binary files a/scripts/__pycache__/watcher.cpython-39.pyc and b/scripts/__pycache__/watcher.cpython-39.pyc differ diff --git a/scripts/__pycache__/webdriver.cpython-39.pyc b/scripts/__pycache__/webdriver.cpython-39.pyc index 94d2a3f..d5278fe 100644 Binary files a/scripts/__pycache__/webdriver.cpython-39.pyc and b/scripts/__pycache__/webdriver.cpython-39.pyc differ diff --git a/scripts/watcher.py b/scripts/watcher.py index 5623741..10b3f8b 100644 --- a/scripts/watcher.py +++ b/scripts/watcher.py @@ -39,5 +39,6 @@ class Handler(FileSystemEventHandler): return None elif event.event_type == 'created': + print('here') q.put(event.src_path) diff --git a/scripts/webdriver.py b/scripts/webdriver.py index 7ae1ed2..9a49a00 100644 --- a/scripts/webdriver.py +++ b/scripts/webdriver.py @@ -1,15 +1,21 @@ +import requests +import json + from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.proxy import Proxy, ProxyType +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support.expected_conditions import presence_of_element_located from config.helper import config +from store.mongo import MongoStore def go(url): chrome_options = Options() chrome_options.add_argument('--proxy-server=%s' % config()['webdriver']['proxy']) # chrome_options.add_argument('--headless') - chrome_options.add_experimental_option('detach', True) proxy = Proxy() proxy.proxy_type = ProxyType.MANUAL @@ -19,6 +25,33 @@ def go(url): capabilities = DesiredCapabilities.CHROME proxy.add_to_capabilities(capabilities) - driver = webdriver.Chrome(options=chrome_options, desired_capabilities=capabilities, executable_path=config()['webdriver']['bin']) + with webdriver.Chrome(options=chrome_options, + desired_capabilities=capabilities, + executable_path=config()['webdriver']['bin'] + ) as driver: + wait = WebDriverWait(driver, 10) - driver.get(url) + driver.implicitly_wait(24 * 60 * 60) + + driver.get(url) + + first_result = wait.until(presence_of_element_located((By.ID, "RENDER_DATA"))) + json_str = requests.utils.unquote(first_result.get_attribute("textContent")) + json_obj = json.loads(json_str) + + roomInfo = json_obj['initialState']['roomStore']['roomInfo'] + + store = MongoStore() + store.set_collection('room') + store.insert_one({ + 'roomId': roomInfo['roomId'], + 'web_rid': roomInfo['web_rid'], + 'title': roomInfo['room']['title'], + 'user_count_str': roomInfo['room']['user_count_str'], + 'cover': roomInfo['room']['cover']['url_list'][0], + 'admin_user_ids': roomInfo['room']['admin_user_ids'], + 'owner': roomInfo['room']['owner'] + }) + + wait.until(presence_of_element_located((By.CLASS_NAME, "oSu9Aw19"))) + \ No newline at end of file