Commit 9fcc7b4f by yexing

u

parent a5474c0e
......@@ -8,3 +8,4 @@ image
.idea
config
tmp
test
import json
import re
import curl_cffi
from loguru import logger
from tenacity import retry, stop_after_attempt, wait_random
from lxml import etree
from const import Postcode, Site
from db import RedisSingleton
from proxy import ProxyManager
from tool import Request, Task
from tool import Fmt, Request, Task
from conf import config
REDIS = RedisSingleton(redis_url=config["redis"]["url"])
task_monitoring_config = config["task-product-detail"]
task_info_config = config["task-info-detail"]
class Tool:
@staticmethod
......@@ -133,26 +140,52 @@ class Info(ProxyMixin):
task_manager = Task(REDIS)
def __init__(self):
self.task_key = task_monitoring_config["task_key"]
self.item_key = task_monitoring_config["item_key"]
self.task_number = int(task_monitoring_config["task_number"])
self.enabled = task_monitoring_config["enabled"] == "True"
self.request_timeout = int(task_monitoring_config["request_timeout"])
self.is_debug = task_monitoring_config["is_debug"] == "True"
self.task_key = task_info_config["task_key"]
self.item_key = task_info_config["item_key"]
self.task_number = int(task_info_config["task_number"])
self.enabled = task_info_config["enabled"] == "True"
self.request_timeout = int(task_info_config["request_timeout"])
self.is_debug = task_info_config["is_debug"] == "True"
def format_content(self, text):
html = etree.HTML(text)
free_delivery = html.xpath(
'//div[@id="mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE"]/span/span/text()'
)
detail_bullets = html.xpath('//*[@id="productDetails_detailBullets_sections1"]/tbody/tr')
free_delivery = Fmt.parse_date(free_delivery[0]) if len(free_delivery) else ""
return {
"free_delivery": free_delivery,
}
@retry(stop=stop_after_attempt(20), wait=wait_random(3, 6), retry_error_cls=lambda *_:...)
def run(self, task):
url = task.get("url", "")
asin = Tool.get_url_asin(url)
url = f"https://www.amazon.{self.site}/dp/" + asin + "?th=1&psc=1"
_proxy = self.get_proxy()
headers = self.task_manager.get_loca_cookie(site=self.site)
Request.request_html(
url,
_proxy["proxy"],
**{
"headers": headers,
"timeout": self.request_timeout,
"postcode": self.postcode,
},
)
if _proxy is None:
raise Exception("没有代理")
try:
headers = self.task_manager.get_loca_cookie(site=self.site)
text = Request.request_html(
url,
_proxy["proxy"],
**{"headers": headers, "timeout": self.request_timeout, "postcode": self.postcode},
)
response = self.format_content(text)
return response
except curl_cffi.curl.CurlError:
logger.error(f"请求超时: {url}")
except Exception as e:
if str(e) == "出现验证码":
self.delete_proxy(_proxy["temp_proxy"])
if str(e) == "采集邮编错误":
self.cookie_error()
logger.error(f"请求异常: {e} - {url}")
finally:
self.join_proxy(_proxy["temp_proxy"])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment