Commit d260882e by yexing

u

parent 58d033ad
__pycache__
.vscode
celerybeat-*
.pytest_cache
log
pid
image
.idea
config
tmp
from time import sleep
import asyncio import asyncio
import json import json
import random import random
...@@ -78,13 +77,13 @@ class RedisSingleton: ...@@ -78,13 +77,13 @@ class RedisSingleton:
def __init__(self, redis_url=None): def __init__(self, redis_url=None):
self.redis_url = redis_url self.redis_url = redis_url
def get_connection(self): async def get_connection(self):
if self._redis_pool is None: if self._redis_pool is None:
if self.redis_url: if self.redis_url:
self._redis_pool = from_url(self.redis_url, decode_responses=True) self._redis_pool = await from_url(self.redis_url, decode_responses=True)
else: else:
# 默认连接地址 # 默认连接地址
self._redis_pool = from_url('redis://localhost', decode_responses=True) self._redis_pool = await from_url('redis://localhost', decode_responses=True)
return self._redis_pool return self._redis_pool
...@@ -93,7 +92,7 @@ class SiteType(Data): ...@@ -93,7 +92,7 @@ class SiteType(Data):
de = 2 de = 2
def callback(param): async def callback(param):
""" """
回调接口 回调接口
...@@ -108,46 +107,46 @@ def callback(param): ...@@ -108,46 +107,46 @@ def callback(param):
) )
def refresh_local_cookie(data: dict, site: str = "com"): async def refresh_local_cookie(data: dict, site: str = "com"):
""" """
刷新本地cookie 刷新本地cookie
""" """
redis = redis_singleton.get_connection() redis = await redis_singleton.get_connection()
redis.set(f"cookie:{site}", json.dumps(data)) await redis.set(f"cookie:{site}", json.dumps(data))
def get_cookie_error(): async def get_cookie_error():
""" """
获取cookie错误 获取cookie错误
:return: :return:
""" """
redis = redis_singleton.get_connection() redis = await redis_singleton.get_connection()
return redis.get("amazon:cookie-error") return await redis.get("amazon:cookie-error")
def delete_cookie_error(): async def delete_cookie_error():
""" """
删除cookie错误 删除cookie错误
:return: :return:
""" """
redis = redis_singleton.get_connection() redis = await redis_singleton.get_connection()
return redis.delete("amazon:cookie-error") return await redis.delete("amazon:cookie-error")
def input_postcode( async def input_postcode(
tab: WebPageTab, postcode: str, locator: str = "#GLUXZipUpdateInput" tab: WebPageTab, postcode: str, locator: str = "#GLUXZipUpdateInput"
): ):
location_input = tab.ele(locator, timeout=3) location_input = tab.ele(locator, timeout=3)
if location_input is None: if location_input is None:
raise Exception("未找到输入框") raise Exception("未找到输入框")
location_input.input(postcode) location_input.input(postcode)
sleep(1) await asyncio.sleep(1)
def get_cookie(tab: WebPageTab, site_type: int = 1): async def get_cookie(tab: WebPageTab, site_type: int = 1):
""" """
获取cookie 获取cookie
...@@ -162,21 +161,21 @@ def get_cookie(tab: WebPageTab, site_type: int = 1): ...@@ -162,21 +161,21 @@ def get_cookie(tab: WebPageTab, site_type: int = 1):
"time": int(time.time()), "time": int(time.time()),
} }
logger.info(f"获取到cookie: {json.dumps(response)}") logger.info(f"获取到cookie: {json.dumps(response)}")
callback({"type": site_type, "data": response}) await callback({"type": site_type, "data": response})
return cookie return cookie
def run(site: str = "com", postcode: str = "20001", site_type: int = 1): async def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
def _close(): async def _close():
cookie = get_cookie(tab, site_type) cookie = await get_cookie(tab, site_type)
if IS_DEBUG: if IS_DEBUG:
refresh_local_cookie({"cookie": cookie, "user-agent": UA}, site=site) await refresh_local_cookie({"cookie": cookie, "user-agent": UA}, site=site)
chromium.clear_cache() chromium.clear_cache()
chromium.quit() chromium.quit()
delete_cookie_error() await delete_cookie_error()
if not IS_DEBUG: if not IS_DEBUG:
number = get_cookie_error() number = await get_cookie_error()
number = int(number) if number else 0 number = int(number) if number else 0
if number < 50: if number < 50:
logger.success("Cookie正常") logger.success("Cookie正常")
...@@ -198,7 +197,7 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1): ...@@ -198,7 +197,7 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
logger.info("邮编或语言错误, 开始设置邮编和语言") logger.info("邮编或语言错误, 开始设置邮编和语言")
else: else:
logger.info("邮编和语言正确") logger.info("邮编和语言正确")
_close() await _close()
return return
location = tab.ele("#nav-global-location-popover-link", timeout=3) location = tab.ele("#nav-global-location-popover-link", timeout=3)
...@@ -209,10 +208,10 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1): ...@@ -209,10 +208,10 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
postcode_parts = postcode.split("-") postcode_parts = postcode.split("-")
if len(postcode_parts) == 2: if len(postcode_parts) == 2:
input_postcode(tab, postcode_parts[0], "#GLUXZipUpdateInput_0") await input_postcode(tab, postcode_parts[0], "#GLUXZipUpdateInput_0")
input_postcode(tab, postcode_parts[1], "#GLUXZipUpdateInput_1") await input_postcode(tab, postcode_parts[1], "#GLUXZipUpdateInput_1")
else: else:
input_postcode(tab, postcode) await input_postcode(tab, postcode)
locs = [ locs = [
"#GLUXZipUpdate", "#GLUXZipUpdate",
...@@ -227,12 +226,12 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1): ...@@ -227,12 +226,12 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
ele.wait.clickable(timeout=3, raise_err=False).click() ele.wait.clickable(timeout=3, raise_err=False).click()
tab.wait(2) tab.wait(2)
_close() await _close()
except Exception as e: except Exception as e:
logger.error(e) logger.error(e)
def main(): async def main():
if IS_DEBUG: if IS_DEBUG:
items = random.choices(list(Site.zip(Postcode))) items = random.choices(list(Site.zip(Postcode)))
else: else:
...@@ -242,8 +241,8 @@ def main(): ...@@ -242,8 +241,8 @@ def main():
if site_type is None: if site_type is None:
continue continue
logger.info(f"开始获取cookie: {site} {postcode}") logger.info(f"开始获取cookie: {site} {postcode}")
run(site, postcode) await run(site, postcode)
sleep(10) await asyncio.sleep(10)
if IS_DEBUG: if IS_DEBUG:
exit() exit()
......
...@@ -33,7 +33,7 @@ app.conf.task_queues = ( ...@@ -33,7 +33,7 @@ app.conf.task_queues = (
) )
app.conf.task_routes = { app.conf.task_routes = {
"celery_tasks.detail_spider_task": {"queue": "detail"}, "celery_tasks.detail_spider_task": {"queue": "detail"},
"celery_tasks.monitor_spider_task": {"queue": "detail"}, "celery_tasks.monitor_spider_task": {"queue": "monitor"},
"celery_tasks.search_spider_task": {"queue": "search"}, "celery_tasks.search_spider_task": {"queue": "search"},
"celery_tasks.*_dial_task": {"queue": "dial"}, "celery_tasks.*_dial_task": {"queue": "dial"},
"celery_tasks.*": {"queue": "detail"}, "celery_tasks.*": {"queue": "detail"},
......
# import aioredis # import aioredis
from redis.asyncio import from_url from redis import from_url
class RedisSingleton: class RedisSingleton:
......
from time import sleep from time import sleep
import asyncio
import html import html
import json import json
import random import random
...@@ -869,9 +868,7 @@ class Goods: ...@@ -869,9 +868,7 @@ class Goods:
# 分批 # 分批
if len(collection_skus) > 0: if len(collection_skus) > 0:
for i in range(0, len(collection_skus), 8): for i in range(0, len(collection_skus), 8):
for response in asyncio.gather( for response in collection_skus[i : i + 8]:
*collection_skus[i : i + 8]
):
try: try:
if response.get("brand"): if response.get("brand"):
brand.append(response["brand"]) brand.append(response["brand"])
...@@ -1021,8 +1018,6 @@ class Goods: ...@@ -1021,8 +1018,6 @@ class Goods:
tasks = [json.loads(task) for task in tasks] tasks = [json.loads(task) for task in tasks]
for task in tasks: for task in tasks:
queue.append(self.run(task)) queue.append(self.run(task))
if queue:
asyncio.gather(*queue)
logger.info(f"任务耗时: {time.time() - start_time}") logger.info(f"任务耗时: {time.time() - start_time}")
if self.is_debug: if self.is_debug:
......
from time import sleep from time import sleep
import asyncio
import json import json
import re import re
import time import time
...@@ -420,7 +419,7 @@ class Monitoring: ...@@ -420,7 +419,7 @@ class Monitoring:
success_number = 0 success_number = 0
logger.info(f"任务数: {len(queue)}") logger.info(f"任务数: {len(queue)}")
if queue: if queue:
for items in asyncio.gather(*queue): for items in queue:
success_number += 1 success_number += 1
logger.info(f"任务耗时: {time.time() - start_time}, 成功数: {success_number}") logger.info(f"任务耗时: {time.time() - start_time}, 成功数: {success_number}")
from time import sleep from time import sleep
import asyncio
import functools import functools
import json import json
import os import os
...@@ -650,8 +649,6 @@ class Search: ...@@ -650,8 +649,6 @@ class Search:
tasks = [json.loads(task) for task in tasks] tasks = [json.loads(task) for task in tasks]
for task in tasks: for task in tasks:
queue.append(self.run(task)) queue.append(self.run(task))
if queue:
asyncio.gather(*queue)
logger.info(f"任务耗时: {time.time() - start_time}") logger.info(f"任务耗时: {time.time() - start_time}")
if self.is_debug: if self.is_debug:
......
from time import sleep
import asyncio import asyncio
import json import json
import os import os
...@@ -94,7 +93,7 @@ def run(task_config: dict = task_monitoring_config): ...@@ -94,7 +93,7 @@ def run(task_config: dict = task_monitoring_config):
time.sleep(5) time.sleep(5)
def cookie(): async def cookie():
for site in Site.values(): for site in Site.values():
time_key = cookie_config['cookie_time_key'] time_key = cookie_config['cookie_time_key']
time_key = f"{time_key}:{site}" time_key = f"{time_key}:{site}"
...@@ -108,10 +107,10 @@ def cookie(): ...@@ -108,10 +107,10 @@ def cookie():
logger.info(f"获取cookie") logger.info(f"获取cookie")
for site in Site.values(): for site in Site.values():
try: try:
task_manager.get_cookie(site) await task_manager.get_cookie(site)
except: except:
logger.error(f"获取cookie异常") logger.error(f"获取cookie异常")
sleep(5) await asyncio.sleep(5)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -7,7 +7,7 @@ from datetime import datetime, timedelta ...@@ -7,7 +7,7 @@ from datetime import datetime, timedelta
from babel.dates import get_month_names, get_day_names from babel.dates import get_month_names, get_day_names
from curl_cffi import requests from curl_cffi import requests
from curl_cffi.requests import AsyncSession from curl_cffi.requests import Session
from dateutil import parser from dateutil import parser
from dateutil.relativedelta import relativedelta from dateutil.relativedelta import relativedelta
from loguru import logger from loguru import logger
...@@ -145,7 +145,7 @@ class Request: ...@@ -145,7 +145,7 @@ class Request:
:param url: :param url:
:return: :return:
""" """
with AsyncSession(max_clients=50) as s: with Session() as s:
# 清空 请求的值 # 清空 请求的值
s.headers.clear() s.headers.clear()
s.cookies.clear() s.cookies.clear()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment