Commit ebd5d295 by yexing

整理文件

parent 48f21530
...@@ -5,3 +5,4 @@ fu_data ...@@ -5,3 +5,4 @@ fu_data
out out
.venv .venv
*.lock *.lock
test
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -13,6 +13,7 @@ dependencies = [ ...@@ -13,6 +13,7 @@ dependencies = [
"tenacity", "tenacity",
"redis>=7.0.1", "redis>=7.0.1",
"requests>=2.32.5", "requests>=2.32.5",
"walmartbot>=0.0.3",
] ]
[[tool.uv.index]] [[tool.uv.index]]
......
import platform
import subprocess
import time
import os
import json
import uuid
import requests
import hashlib
import traceback
from typing import Callable, List, Tuple
from DrissionPage import Chromium
from DrissionPage.items import MixTab
from DrissionPage.errors import ElementNotFoundError
from .const import BROWSER_PATH, DRIVER_PATH, SOCKET_PORT, logger
class SuperBrowserBridge:
def __init__(self, user_info: dict):
self.user_info = user_info
self.browser_list: list = []
self.driver: Chromium = None
self.tab: MixTab = None
self._is_windows: bool = platform.system() == 'Windows'
self._is_mac: bool = platform.system() == 'Darwin'
self._opt_id: int = 0
self._opts: List[Tuple[int, Callable]] = []
def add_opt(self, opt: Callable):
"""添加自动化操作"""
self._opt_id += 1
self._opts.append((self._opt_id, opt))
def encrypt_sha1(self, fpath: str) -> str:
with open(fpath, 'rb') as f:
return hashlib.new('sha1', f.read()).hexdigest()
def download_file(self, url, save_path):
# 发送GET请求获取文件内容
response = requests.get(url, stream=True)
# 检查请求是否成功
if response.status_code == 200:
# 创建一个本地文件并写入下载的内容(如果文件已存在,将被覆盖)
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
logger.info(f"文件已成功下载并保存到:{save_path}")
else:
logger.error(f"下载失败,响应状态码为:{response.status_code}")
def download_driver(self):
if self._is_windows:
config_url = "https://cdn-superbrowser-attachment.ziniao.com/webdriver/exe_32/config.json"
elif self._is_mac:
arch = platform.machine()
if arch == 'x86_64':
config_url = "https://cdn-superbrowser-attachment.ziniao.com/webdriver/mac/x64/config.json"
elif arch == 'arm64':
config_url = "https://cdn-superbrowser-attachment.ziniao.com/webdriver/mac/arm64/config.json"
else:
return
else:
return
response = requests.get(config_url)
# 检查请求是否成功
if response.status_code == 200:
# 获取文本内容
txt_content = response.text
config = json.loads(txt_content)
else:
logger.error(f"下载驱动失败,状态码:{response.status_code}")
exit()
if not os.path.exists(DRIVER_PATH):
os.makedirs(DRIVER_PATH)
# 获取文件夹中所有chromedriver文件
driver_list = [filename for filename in os.listdir(DRIVER_PATH) if filename.startswith('chromedriver')]
for item in config:
filename = item['name']
if self._is_windows:
filename = filename + ".exe"
local_file_path = os.path.join(DRIVER_PATH, filename)
if filename in driver_list:
# 判断sha1是否一致
file_sha1 = self.encrypt_sha1(local_file_path)
if file_sha1 == item['sha1']:
logger.info(f"驱动{filename}已存在,sha1校验通过...")
else:
logger.info(f"驱动{filename}的sha1不一致,重新下载...")
self.download_file(item['url'], local_file_path)
# mac首次下载修改文件权限
if self._is_mac:
cmd = ['chmod', '+x', local_file_path]
subprocess.Popen(cmd)
else:
logger.info(f"驱动{filename}不存在,开始下载...")
self.download_file(item['url'], local_file_path)
# mac首次下载修改文件权限
if self._is_mac:
cmd = ['chmod', '+x', local_file_path]
subprocess.Popen(cmd)
def exit(self):
"""关闭客户端"""
data = {"action": "exit", "requestId": str(uuid.uuid4())}
data.update(self.user_info)
logger.info('browser exit ...' + json.dumps(data, ensure_ascii=False))
self.send_http(data)
def update_core(self):
"""
下载所有内核,打开店铺前调用,需客户端版本5.285.7以上
因为http有超时时间,所以这个action适合循环调用,直到返回成功
"""
data = {
"action": "updataCore",
"requestId": str(uuid.uuid4()),
}
data.update(self.user_info)
while True:
result = self.send_http(data)
logger.info(result)
if result is None:
logger.info("等待客户端启动...")
time.sleep(2)
continue
if result.get("statusCode") is None or result.get("statusCode") == -10003:
logger.info("当前版本不支持此接口,请升级客户端")
return
elif result.get("statusCode") == 0:
logger.info("更新内核完成")
return
else:
logger.info(f"等待更新内核: {json.dumps(result)}")
time.sleep(2)
def send_http(self, data):
try:
url = 'http://127.0.0.1:{}'.format(SOCKET_PORT)
response = requests.post(url, json.dumps(data).encode('utf-8'), timeout=120)
return json.loads(response.text)
except Exception as err:
logger.error(err)
def kill_process(self):
if self._is_windows:
os.system('taskkill /f /t /im SuperBrowser.exe')
elif self._is_mac:
os.system('killall ziniao')
time.sleep(3)
def start_browser(self):
try:
if self._is_windows:
cmd = [BROWSER_PATH, '--run_type=web_driver', '--ipc_type=http', '--port=' + str(SOCKET_PORT)]
elif self._is_mac:
cmd = ['open', '-a', BROWSER_PATH, '--args', '--run_type=web_driver', '--ipc_type=http',
'--port=' + str(SOCKET_PORT)]
else:
logger.warning('platform not supported')
exit()
subprocess.Popen(cmd)
time.sleep(5)
except Exception as e:
logger.error('start browser process failed')
logger.info('browser launch')
def get_browser_list(self) -> list:
request_id = str(uuid.uuid4())
data = {
"action": "getBrowserList",
"requestId": request_id
}
data.update(self.user_info)
r = self.send_http(data)
if str(r.get("statusCode")) == "0":
logger.debug(r)
return r.get("browserList")
elif str(r.get("statusCode")) == "-10003":
logger.error(f"login Err {json.dumps(r, ensure_ascii=False)}")
exit()
else:
logger.error(f"Fail {json.dumps(r, ensure_ascii=False)} ")
exit()
def open_store(self, store_info, isWebDriverReadOnlyMode=0, isprivacy=0, isHeadless=0, cookieTypeSave=0, jsInfo=""):
request_id = str(uuid.uuid4())
data = {
"action": "startBrowser"
, "isWaitPluginUpdate": 0
, "isHeadless": isHeadless
, "requestId": request_id
, "isWebDriverReadOnlyMode": isWebDriverReadOnlyMode
, "cookieTypeLoad": 0
, "cookieTypeSave": cookieTypeSave
, "runMode": "1"
, "isLoadUserPlugin": False
, "pluginIdType": 1
, "privacyMode": isprivacy
}
data.update(self.user_info)
if store_info.isdigit():
data["browserId"] = store_info
else:
data["browserOauth"] = store_info
if len(str(jsInfo)) > 2:
data["injectJsInfo"] = json.dumps(jsInfo)
r = self.send_http(data)
if str(r.get("statusCode")) == "0":
return r
elif str(r.get("statusCode")) == "-10003":
logger.error(f"login Err {json.dumps(r, ensure_ascii=False)}")
exit()
else:
logger.error(f"Fail {json.dumps(r, ensure_ascii=False)} ")
exit()
def close_store(self, browser_oauth):
request_id = str(uuid.uuid4())
data = {
"action": "stopBrowser"
, "requestId": request_id
, "duplicate": 0
, "browserOauth": browser_oauth
}
data.update(self.user_info)
r = self.send_http(data)
if str(r.get("statusCode")) == "0":
return r
elif str(r.get("statusCode")) == "-10003":
logger.info(f"login Err {json.dumps(r, ensure_ascii=False)}")
exit()
else:
logger.info(f"Fail {json.dumps(r, ensure_ascii=False)} ")
exit()
def get_driver(self, open_ret_json):
core_type = open_ret_json.get('core_type')
if core_type == 'Chromium' or core_type == 0:
major = open_ret_json.get('core_version').split('.')[0]
if self._is_windows:
chrome_driver_path = os.path.join(DRIVER_PATH, 'chromedriver%s.exe') % major
else:
chrome_driver_path = os.path.join(DRIVER_PATH, 'chromedriver%s') % major
logger.info(f"chrome_driver_path: {chrome_driver_path}")
port = open_ret_json.get('debuggingPort')
self.driver = Chromium(port)
self.tab = self.driver.latest_tab
logger.info('webdriver 初始化完毕')
else:
return None
def _run_task(self, browser):
"""
打开一个店铺运行脚本
:param browser: 店铺信息
"""
# 如果要指定店铺ID, 获取方法:登录紫鸟客户端->账号管理->选择对应的店铺账号->点击"查看账号"进入账号详情页->账号名称后面的ID即为店铺ID
store_id = browser.get('browserOauth')
store_name = browser.get("browserName")
# 打开店铺
logger.info(f"=====打开店铺:{store_name}=====")
ret_json: dict = self.open_store(store_id)
logger.info(ret_json)
store_id = ret_json.get("browserOauth")
if store_id is None:
store_id = ret_json.get("browserId")
# 使用驱动实例开启会话
self.get_driver(ret_json)
if self.driver is None:
logger.info(f"=====关闭店铺:{store_name}=====")
self.close_store(store_id)
return
# 获取ip检测页地址
ip_check_url = ret_json.get("ipDetectionPage")
if not ip_check_url:
logger.error("ip检测页地址为空,请升级紫鸟浏览器到最新版")
self.driver.quit()
logger.info(f"=====关闭店铺:{store_name}=====")
self.close_store(store_id)
exit()
# 执行脚本
try:
ip_usable = self.open_ip_check(ip_check_url)
if ip_usable:
logger.info("ip检测通过,打开店铺平台主页")
self.tab.get(ret_json.get("launcherPage"))
# 打开店铺平台主页后进行后续自动化操作
for i, opt in self._opts:
if not callable(opt):
logger.error(f"{i}号任务不可调用")
continue
logger.info(f"开始执行{i}号任务")
try:
opt(self.tab, self.driver) # 运行
logger.info(f"{i}号任务执行完毕")
except Exception as e:
logger.error(f"{i}号任务执行失败:{e}")
else:
logger.error("ip检测不通过,请检查")
except:
logger.error("脚本运行异常:" + traceback.format_exc())
finally:
self.driver.quit()
logger.info(f"=====关闭店铺:{store_name}=====")
self.close_store(store_id)
def open_ip_check(self, ip_check_url):
"""
打开ip检测页检测ip是否正常
:param driver: driver实例
:param ip_check_url ip检测页地址
:return 检测结果
"""
try:
self.tab.get(ip_check_url)
self.tab.ele('//button[contains(@class, "styles_btn--success")]')
return True
except ElementNotFoundError:
logger.info("未找到ip检测成功元素")
return False
except Exception as e:
logger.info("ip检测异常:" + traceback.format_exc())
return False
def init(self):
self.download_driver()
self.kill_process()
self.start_browser()
self.update_core()
self.browser_list = self.get_browser_list()
if not self.browser_list:
logger.error("browser list is empty")
exit()
def run(self, sn: int = None):
browsers = self.browser_list if sn is None else [self.browser_list[sn]]
for browser in browsers:
self._run_task(browser)
\ No newline at end of file
import os
from tool.log import create_logger
BROWSER_PATH = os.environ.get(
'BROWSER_PATH',
r"D:\SuperBrowser\starter.exe"
)
SOCKET_PORT = os.environ.get(
'SOCKET_PORT',
16851
)
DRIVER_PATH = os.environ.get(
'DRIVER_PATH',
r"D:\webdriver"
)
EXPORT_PATH = os.environ.get(
'EXPORT_PATH',
r"E:\wfs_export_data"
)
USER_INFO = {
"company": "深圳市泰极电子商务有限公司",
"username": "Leshiliuzong",
"password": "leshi2023"
}
WFS_HEADERS = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
"wm_aurora.market": "US",
}
logger = create_logger("wfs_export")
from DrissionPage import Chromium
from DrissionPage.items import MixTab
from .browser import SuperBrowserBridge
from .const import USER_INFO, logger
class WFSExport:
def __init__(self, start_date: str, end_date: str):
self._start_date = start_date
self._end_date = end_date
self.bridge = SuperBrowserBridge(USER_INFO)
self.bridge.init()
self.cookies: dict = None
self.uri: str = None
def download_report(self, names: dict, params: dict, ext: str = 'csv'):
pass
def download_reports(self, tab: MixTab, driver: Chromium):
"""下载报表"""
self.uri = "https://seller.walmart.com/aurora/v1/wfs/reports"
tab.run_js()
self.download_report({
"apiName": "salesReport",
"showName": "Orders"
}, {
"fromDate": self._start_date,
"toDate": self._end_date,
}, 'xlsx')
self.download_report({
"apiName": "salesReport",
"showName": "Orders",
"sheetName": "Multchannel_Fulfillment_Details"
}, {
"fromDate": self._start_date,
"toDate": self._end_date,
"type": "MCS"
}, 'xlsx')
self.download_report({
"apiName": "returnsReport",
"showName": "Customer returns"
}, {
"fromDate": f"{self._start_date}T00:00:00",
"toDate": f"{self._end_date}T23:59:59",
})
self.download_report({
"apiName": "poAudit",
"showName": "Inbound receipts"
}, {
"fromDate": self._start_date,
"toDate": self._end_date,
"gtin": ""
})
self.download_report({
"apiName": "inventoryReconciliation",
"showName": "Inventory reconciliation"
}, {
"fromDate": self._start_date,
"toDate": self._end_date,
})
breakpoint()
# self.download_report({
# "apiName": "inventoryHealthReport",
# "showName": "Inventory health"
# }, None)
self.download_report({
"apiName": "feeDetailReport",
"showName": "Settlement"
}, {
"startDate": self._start_date,
"endDate": self._end_date,
})
self.download_report({
"apiName": "storageFeeReport",
"showName": "Storage"
}, {
"startDate": self._start_date,
"endDate": self._end_date,
})
def download_payments(self, tab: MixTab, driver: Chromium):
breakpoint()
def pass_test(self, tab: MixTab, driver: Chromium):
"""人机检测"""
breakpoint()
logger.info("尝试通过人机检测")
# tab.actions.move_to("#px-captcha").hold().release()
tab.ele("@text()=登录").click()
tab.wait.ele_displayed('@text()=WFS')
cookies = tab.cookies()
logger.debug(f"获取到cookies: {cookies.as_str()}")
self.cookies = cookies.as_dict()
def run(self):
try:
self.bridge.browser_list = [
browser for browser in self.bridge.browser_list
if browser['platform_name'] == '沃尔玛-全球'
]
self.bridge.add_opt(self.pass_test)
self.bridge.add_opt(self.download_reports)
self.bridge.add_opt(self.download_payments)
self.bridge.run(sn=1)
finally:
self.bridge.exit()
\ No newline at end of file
import asyncio
import json
import os
import random
import re
import sys
import threading
import traceback
import uuid
from abc import ABCMeta, abstractmethod
import aiofiles
from bs4 import BeautifulSoup
from curl_cffi.requests import AsyncSession
from fake_useragent import UserAgent
from loguru import logger
from oss2 import Auth, Bucket
from tenacity import retry, RetryError, stop_after_attempt
def ignore_exceptions(*args, **kwargs):
pass
sys.excepthook = ignore_exceptions
UA = UserAgent(platforms=['pc'])
DOMAIN = "https://20tools.net"
OSS_CONFIG = {
}
class ProxiesError(Exception):
pass
class Tool:
@staticmethod
async def download_image(s, url):
"""
下载图片
:param s:
:param url:
:return:
"""
name = str(uuid.uuid4()) + ".jpg"
file_path = f'../data/{name}'
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'user-agent': UA.random,
}
for i in range(3):
try:
proxies = {
"http": "http://127.0.0.1:7890",
"https": "http://127.0.0.1:7890",
}
response = await s.get(url, headers, timeout=60, proxies=proxies)
content = response.content
async with aiofiles.open(file_path, 'wb') as file:
await file.write(content)
return os.path.abspath(file_path)
except Exception as e:
logger.error(f"下载图片失败: {url} {e}")
@staticmethod
async def oss_upload(s, url):
"""
上传图片到阿里云oss
:param s:
:param url:
:return:
"""
# 下载图片并上传
image = await Tool.download_image(s, url)
if image:
for i in range(5):
try:
save_name = f'temp-image/{str(uuid.uuid4())}.jpg'
auth = Auth(OSS_CONFIG['access_key_id'], OSS_CONFIG['access_key_secret'])
bucket = Bucket(auth, OSS_CONFIG['endpoint'], OSS_CONFIG['bucket_name'])
bucket.put_object_from_file(save_name, image)
# 删除本地图片
os.remove(image)
return {
url: "https://yunyi-live.oss-cn-hangzhou.aliyuncs.com/" + save_name
}
except:
logger.error(f"上传图片失败: {url}")
@staticmethod
async def replace_case(_text, sub_str, replace_str):
"""
替换指定字符串,不区分大小写
:param _text:
:param sub_str:
:param replace_str:
:return:
"""
compileObj = re.compile(re.escape(sub_str), re.IGNORECASE)
resultantStr = compileObj.sub(replace_str, _text)
return resultantStr
@staticmethod
async def remove_duplicate_images(image_list):
seen_images = set()
unique_images = []
for image in image_list:
if image is None:
continue
# 图片处理
new_image = image.replace('_AC_SL1500_', '').replace('_AC_SX1500_', '')
if new_image not in seen_images:
unique_images.append(image)
seen_images.add(new_image)
return unique_images
@staticmethod
async def remove_img_with_maximum(_html, _max=0):
"""
删除html中超出指定数量的img标签
:param _html:
:param _max:
:return:
"""
soup = BeautifulSoup(_html, 'html.parser')
images = soup.find_all('img')
if len(images) > _max:
# 末尾删除
images = images[_max:len(images)]
for img in images:
img.decompose()
new_html_str = str(soup)
return new_html_str
@staticmethod
async def get_default_headers():
return {
"Content-Type": "application/json",
"Accept": "application/json",
}
@staticmethod
@retry(stop=stop_after_attempt(3))
async def get_task(s, task_key: str = "", number: int = 1):
"""
获取任务
:param s:
:param task_key:
:param number:
:return:
"""
url = f"{DOMAIN}/api/collection/task?number={number}&queue={task_key}"
res = await s.get(url)
if res.status_code != 200:
raise Exception(f"获取任务失败")
response = res.json()
return response.get('data', {}).get('list', {})
@staticmethod
async def get_html_to_json(text):
"""
获取HTML中的JSON数据
:param text:
:return:
"""
soup = BeautifulSoup(text, 'html.parser')
# 找到包含JSON数据的script标签
script_tag = soup.find('script', {'id': '__NEXT_DATA__', 'type': 'application/json'})
if script_tag is None:
raise Exception(f"获取JSON数据失败")
json_data = script_tag.string
return json.loads(json_data)
@staticmethod
@retry(stop=stop_after_attempt(10))
async def callback(s, data):
"""
任务回调
:param s
:param data:
:return:
"""
default_header = await Tool.get_default_headers()
url = f"{DOMAIN}/api/collection/task"
response = await s.post(url, headers=default_header, data=json.dumps(data), timeout=10)
data = response.json()
if data["code"] != 0:
raise Exception(f"回调接口失败")
return data
@staticmethod
async def get_walmart_headers():
"""
获取Walmart请求头
:return:
"""
return {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'referer': 'https://www.walmart.com/',
'user-agent': UA.edge,
# 'Cookie': 'xptwj=uz:37e0877dc804c3221718:bdVasz6/73NdttPtcK27fPToos/4oZxy1UVlykJE0nkfLqVhmrKHI452MWIshP06rvlg6Oo4i/CAN+vCMS/BDqQPJtb1jF2UpHbJHuf+N3jupylUCiRaFrAtkQyytLu9SHMeQtQzWLBWK0mgSKk5GRqrGp86eHJ+TbhU//sz5ypzHMVRNtnq; ak_bmsc=2773E11742238181A0BCDEF70472B0E8~000000000000000000000000000000~YAAQjPw7F8Q79RiRAQAAJI6FIRhEoerjT5Fn46CibnTHiZ8nQJv6pLxGhTxu+OWA2qwUlBGLclYLJGQXToJ+BTKfwzjwI5+ud07a9A2L57hKMVbIX2vAo4ZGXA9p0BWAKKn/SbV4VHTnZa/i+pIMZB0ylq0shc7noTTr/tsto5DDd/FHf5vFFICqtEujI+2AckaTNJGYW8PPy9VW6DqXCNpVcgo3qVSntqYOT1bhJLjdYtWmspJGBhEFH5vRQdC7IdN3VqC3BxJLZWzVTaQsmsJYN2Pem7MKiHyk8/AgjKv9ZRs57VOCn/YXSPICVuP9SNUGIJcxZG8Le0VS+L4XcgyY3ngnOx8XBn8MNzpceQ0rKVCI44zb1SD11n6ympD4JV12cwN8L1JFpPs=; vtc=UcbKnqHGuhRWhgfWtgcr70; bstc=UcbKnqHGuhRWhgfWtgcr70; pxcts=85e47474-52ff-11ef-8dba-4f0e258ac181; _pxvid=85e46481-52ff-11ef-8dba-e4bc4df1a16b; adblocked=true; hasACID=true; assortmentStoreId=3081; _shcc=US; _intlbu=false; hasLocData=1; abqme=true; mobileweb=0; xpth=x-o-mart%2BB2C~x-o-mverified%2Bfalse; xpa=54G-6|CoEEB|D2oRZ|ELwUx|IuElO|McEea|MoRkL|NbX17|O8sIU|OFImx|Ocfr2|SqH-y|VyWly|XIItK|eo_el|nzZmL|rd3k-|zf8aF; exp-ck=54G-61D2oRZ1ELwUx1IuElO1NbX172O8sIU1OFImx1SqH-y2VyWly1XIItK2eo_el3nzZmL1zf8aF2; _pxhd=c8185cb38f153869ee089d6ab969bf1ed0ba0f4d4e66cd414b5dfb0daa85c913:85e46481-52ff-11ef-8dba-e4bc4df1a16b; bm_mi=ABB90B88348B58A9787BACA8B2B84DC5~YAAQjPw7F7w99RiRAQAA7KGFIRgasMlRZloJg1t00D254khXjSN/IrLyrciUo2TiMd/5dzEdpQ0rZdLkmWbcqhDvW4LcpJsY0/ViOAItAsERoIpacm5TGpo4+dliNw8JD8aa2peQ5nWBF46y0YMbmPatpEzPfi+SasMjQmt+oWQMr2Q9I3p9CBFvXsmAazCwcGDmXNtTShQbyQ9Gfq93Zgc1eh3WXmhtsDw7hanPmZF2kqaqIL1bBE46OKpVQzJKpiBZVtluHYILY+4LsIaKmwxNJmW1gbqIDx8Sbm4anTBTryfr26L/s3IA5mQ3yyk=~1; _px3=27f094085ac92f8a53a7507dbb323f50efb95f173b554348e0b72a5732857d78:2VRKXx+P0wIrkvwIM7+Xtfysy6oYDVs6V9uhgK8m88W6Ck43XPZSkDLlnFReenWMPrQ3MmpViErhyjVaXANA0A==:1000:5ZwO3UHAT/3uI8KmWYckrGicT4zhb/RLBnKTB2fZu7NK2BVIs9Tp4YrQEPmeQLr27F/Csvs7uj4SuQMN8cPuZyDda7XwJIqyx7V/BlbxwhefKls21slpn9Hkiz0U44U2DITgh0p/sfol2JVGAEXwS66TjQY9DEa3M2GGuD2Xf4+3KT5MAymWIMYp1w5P3Rqtv0KcYxURCTMZDW2B3Ol9/sKFAOeEgEWRfvTh0NaYVLI=; _astc=52d130b133cbd1b501460d9fdae93a97; xptc=_m%2B9~assortmentStoreId%2B3081; xpm=1%2B1722844296%2BUcbKnqHGuhRWhgfWtgcr70~%2B0; _pxde=c92ceb7d7d808ccee6d120ac60cabdac3b14ba9f42bfb2ff4ed5e6ef8f8a7396:eyJ0aW1lc3RhbXAiOjE3MjI4NDQyOTkyMjN9; xptwg=2769187247:ADDF1B60AE2118:1B17408:73DC67D1:89F8600E:2CEC42A1:; TS012768cf=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS01a90220=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS2a5e0c5c027=08edc6f644ab20005250728372d83aee067d8ef4429ed38ad3f72422cd7beb712284fa2bb6dcc53008e648c99a113000bbea3d56aaf743f8797d1fd537dfebae66e076aea8557039e6abbfe3d370af617c79b48e084bdcd637ffb8a8b7b06568; QMReplaySample=true; io_id=2b1e23f1-a177-4e38-86da-561e276b6abf; TS016ef4c8=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS01f89308=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS8cb5a80e027=08bd2f8669ab2000009af9c8550cbc249ae938bfdca0492f8d384c6808c0e90a144e4024b84fbf37082893210e113000ad32e6b74f355e50fc204aad58e20722d6ed74efd203ad1c6a356b2d93d18f547cc29e00ec15f9e4c59e73bb2f5fc352; bm_sv=60519E529ABF4EFE97D2B63408DD5BD1~YAAQjPw7F35D9RiRAQAAANaFIRionbimzr8LgiM2GAxwy+I6Bu2U7faKmNM03jfRJ1ukw3hFQzT+obDLwlGwWa4HEiO9wHosev0vkl9j46QR9DoFq+6/MAGwpf9A8wuMswRgYpSFSZvyAm8uCG9mGPzhuuN5sOmxMflboFyOm2+5jFgcDmBA3WzZRPhRRy1M0xYfthXmO5D7IppDKw8+Zbzj7sG6Wdg5pUBb5XzzWaDNDswJnHdONYEd7O7hOGbyIw==~1'
}
@staticmethod
@retry(stop=stop_after_attempt(3))
async def get_proxies(s):
"""
获取代理
:param s:
:return:
"""
proxies_url = "https://20tools.net/api/proxies?sign=ftd*kcm.ygh4mjp7ERJ"
response = await s.get(proxies_url)
ip = response.text
# ip = "127.0.0.1:7890"
if not ip:
raise ProxiesError(f"获取代理失败")
if 'status' in ip:
return None
return {
"http": f"http://{ip}",
"https": f"http://{ip}"
}
@staticmethod
async def get_impersonate():
"""
获取伪装头
:return:
"""
impersonates = [
"edge99",
"edge101",
# "safari15_3",
# "safari15_5",
]
return random.choice(impersonates)
@staticmethod
@retry(stop=stop_after_attempt(3))
async def get_html(s, url, header, proxies):
"""
获取HTML内容
:param s:
:param url:
:param header:
:param proxies:
:return:
"""
content = ""
try:
s.headers.clear()
s.cookies.clear()
s.proxies.clear()
walmart_headers = await Tool.get_walmart_headers()
impersonate = await Tool.get_impersonate()
# response = await s.get(url, proxies=proxies, headers=walmart_headers, timeout=10, data={}, impersonate=impersonate)
response = await s.get(url, proxies=proxies, headers=walmart_headers, timeout=10, data={})
content = response.text
except Exception as e:
logger.error(f"获取HTML失败: {url} {e}")
pass
return content
@staticmethod
async def check_content(content):
"""
检查内容是否正常
:param content:
:return:
"""
if not content or content is None:
raise ProxiesError(f"不存在采集信息")
if not content or "Robot or human" in content:
raise ProxiesError(f"出现验证")
if 'Request Rejected' in content or content == "Forbidden":
raise ProxiesError(f"请求被拒绝")
if 'Forbidden' in content:
raise ProxiesError(f"请求被拒绝")
return content
@staticmethod
async def replace_chinese(_text):
"""
去除字符串里的中文内容
:param _text:
:return:
"""
return re.sub('[\u4e00-\u9fa5]', '', _text)
class BaseWalmartCrawler(metaclass=ABCMeta):
@abstractmethod
async def get_task_type(self):
pass
@abstractmethod
async def format_(self, content):
pass
@abstractmethod
async def run(self):
pass
@abstractmethod
async def main(self, proxies=None):
pass
@staticmethod
async def start(data, proxies, s):
"""
开始
:param data:
:param proxies:
:param s:
:return:
"""
task_name = data['task_name']
_type = data['task_type']
number = data.get('number', 10)
items = await Tool.get_task(s=s, task_key=_type, number=number)
logger.debug(f"{task_name} - 获取到任务 {len(items)} 条")
if len(items) == 0:
await asyncio.sleep(10)
logger.debug(f"{task_name} - 无任务")
return items, proxies
if proxies is None:
proxies = await Tool.get_proxies(s)
logger.debug(f"{task_name} - 代理IP: {proxies}")
if not proxies:
logger.error(f"{task_name} - 切换代理IP")
return items, proxies
if proxies == 'error':
logger.error(f"{task_name} - 没有代理IP可用")
sys.exit(0)
return items, proxies
@staticmethod
async def get_content(s, item, walmart_headers, proxies):
"""
获取内容
:param s:
:param item:
:param walmart_headers:
:param proxies:
:return:
"""
url = item['url']
content = await Tool.get_html(s, url, walmart_headers, proxies)
return await Tool.check_content(content)
@staticmethod
async def retry(s, items, success_item, data):
"""
重试
:param s:
:param items:
:param success_item:
:param data:
:return:
"""
queue = data['queue']
task_name = data['task_name']
value = data.get('value', 'url')
new_items = list()
for item in items:
if item[value] not in success_item:
new_items.append(item)
if new_items:
data = {
"data": new_items,
"queue": queue,
"type": 10,
}
logger.error(f"{task_name} - 上传失败的任务 {json.dumps(data)}")
await Tool.callback(s, data)
class Monitoring(BaseWalmartCrawler):
"""
沃尔玛商品监控
"""
task_name = "沃尔玛商品监控"
async def get_task_type(self):
"""
获取任务类型
:return:
"""
return "walmart:product"
async def format_(self, content):
"""
格式化数据
:param content:
:return:
"""
data_dict = await Tool.get_html_to_json(content)
try:
data = data_dict['props']['pageProps']['initialData']['data']
except:
raise ProxiesError(f"获取数据异常")
product = data['product']
product = {} if product is None else product
skus = []
is_link_error = ""
free_delivery = ""
freight_price = 0
if not product:
logger.error("商品不存在")
is_link_error = "Commodity nonexistence"
fulfillment_type = ""
buy_now_eligible = ""
postal_code = product.get('location', {}).get('postalCode', '')
if product:
buy_now_eligible = product.get('buyNowEligible', False)
buy_now_eligible = 'no' if buy_now_eligible is False else ''
variant_criteria = product['variantCriteria']
skus = []
variant_list = {}
for item in variant_criteria:
item_variant_list = item.get('variantList', {})
for variant in item_variant_list:
variant_list[variant['id']] = variant['name']
variants_map = product['variantsMap']
fulfillment_options = product.get('fulfillmentOptions', [])
fulfillment_type = product.get('fulfillmentType', '')
for fulfillment_option in fulfillment_options:
if fulfillment_option['__typename'] == 'ShippingOptionV2':
speed_details = fulfillment_option['speedDetails']
if speed_details:
free_delivery = speed_details.get('deliveryDate', '')
freight_price = speed_details.get('fulfillmentPrice', {})
if freight_price:
freight_price = freight_price.get('price', 0)
brand = product.get('brand', "")
for (map_id, _map) in variants_map.items():
_map = _map if _map is not None else {}
variants = _map.get('variants', {})
sku_name = []
if not variants:
sku_name = ['default']
else:
for sku in variants:
sku_name.append(variant_list[sku])
current_price = _map.get('priceInfo', {}).get('currentPrice', {})
current_price = current_price if current_price is not None else {}
skus.append({
'price': current_price.get('price', 0),
"postal_code": postal_code,
'status': _map.get('availabilityStatus', ''),
"free_delivery": free_delivery,
"brand": brand,
"is_link_error": is_link_error,
"asin": _map.get('id', ''),
"sku_name": sku_name,
"star_level": product['averageRating'],
"freight_price": freight_price,
"fulfillment_type": fulfillment_type,
"buy_now_eligible": buy_now_eligible,
'ship_from': '',
'sold_by': '',
})
if not skus:
current_price = product.get('priceInfo', {}).get('currentPrice', {})
current_price = current_price if current_price is not None else {}
skus.append({
'price': current_price.get('price', 0),
'status': product.get('availabilityStatus', ''),
"free_delivery": free_delivery,
"is_link_error": is_link_error,
"asin": product.get('id', ''),
"sku_name": "default",
"star_level": product.get('averageRating', 0),
"freight_price": freight_price,
"fulfillment_type": fulfillment_type,
"buy_now_eligible": buy_now_eligible,
"postal_code": postal_code,
'ship_from': '',
'sold_by': '',
})
return {
'is_link_error': is_link_error,
'skus': skus
}
async def main(self, proxies=None):
"""
运行
:return:
"""
walmart_headers = await Tool.get_walmart_headers()
async with AsyncSession(max_clients=50, timeout=10) as s:
upload_list = list()
items = list()
success_item = []
try:
start_data = {
'task_name': self.task_name,
'task_type': await self.get_task_type(),
'number': 10,
}
items, proxies = await self.start(start_data, proxies, s)
item_ids = [d['item_id'] for d in items]
for item in items:
logger.debug(f"{self.task_name} - 采集 {item['url']}")
content = await self.get_content(s, item, walmart_headers, proxies)
response = await self.format_(content)
for sku in response.get('skus', []):
sku['collection_type'] = item['collection_type']
sku['item_id'] = item['item_id']
sku['admin_users_id'] = item['admin_users_id']
sku['app_name'] = item.get('app_name', 'admin')
upload_list.append(sku)
success_item.append(item['url'])
except ProxiesError as e:
logger.error(f"{self.task_name} - 代理异常 - {e}")
proxies = None
except Exception as e:
logger.exception(f"{self.task_name} - 任务异常 - {e}")
if items:
data = {
"data": {
"error_items": item_ids,
"collection": upload_list,
},
"type": 5,
}
logger.success(f"{self.task_name} - 共采集到 {len(upload_list)} 条数据")
try:
await Tool.callback(s, data)
except RetryError:
logger.error(f"{self.task_name} - 回调失败")
return proxies
async def run(self):
proxies = None
while True:
proxies = await self.main(proxies)
class Search(BaseWalmartCrawler):
task_name = "沃尔玛搜索"
async def get_task_type(self):
"""
获取任务类型
:return:
"""
return "walmart:search"
async def format_(self, content):
"""
格式化数据
:param content:
:return:
"""
if 'clear your filters and start over' in content:
logger.error('搜索没有值')
return None
data_dict = await Tool.get_html_to_json(content)
result = []
try:
items = data_dict['props']['pageProps']['initialData']['searchResult']['itemStacks'][0]['items']
except:
raise ProxiesError(f"获取数据异常")
for item in items:
if item.get('__typename', 'AdPlaceholder') != 'Product':
continue
delivery = item['fulfillmentSummary'][0]['deliveryDate'] if item.get('fulfillmentSummary', None) else ""
fulfillment_type = item.get('fulfillmentType', "")
result.append({
"title": item.get('name', ""),
"price": item['priceInfo']['linePrice'],
"delivery": delivery,
"review_count": item['numberOfReviews'],
"unique_value": item['usItemId'],
"image": item['image'],
"is_prime": "",
"star_level": item['averageRating'],
"fulfillment_type": fulfillment_type
})
return result
async def main(self, proxies=None):
"""
运行
:return:
"""
walmart_headers = await Tool.get_walmart_headers()
async with AsyncSession(max_clients=50) as s:
success_item = []
items = []
try:
start_data = {
'task_name': self.task_name,
'task_type': await self.get_task_type(),
'number': 10,
}
items, proxies = await self.start(start_data, proxies, s)
for item in items:
logger.debug(f"{self.task_name} - 采集 {item['url']}")
content = await self.get_content(s, item, walmart_headers, proxies)
result = await self.format_(content)
if result is None:
success_item.append(item['url'])
continue
data = {
"data": {
"task_id": item["task_id"],
"collection_data": result,
'platform_type': item.get('platform_type', 3)
},
"type": item['callback_type'],
"admin_users_id": item["admin_users_id"],
"app_name": item.get("app_name", "admin")
}
logger.success(f"{self.task_name} - 采集完成")
try:
await Tool.callback(s, data)
except RetryError:
logger.error(f"{self.task_name} - 回调失败")
success_item.append(item['url'])
except ProxiesError as e:
logger.error(f"{self.task_name} - 代理异常 - {e}")
proxies = None
except Exception as e:
traceback.print_exc()
logger.error(f"{self.task_name} - 任务异常 - {e}")
retry_data = {
"queue": start_data['task_type'],
"task_name": self.task_name,
}
await self.retry(s, items, success_item, retry_data)
return proxies
async def run(self):
proxies = None
while True:
proxies = await self.main(proxies)
class Goods(BaseWalmartCrawler):
task_name = "沃尔玛商品"
async def get_task_type(self):
"""
获取任务类型
:return:
"""
return "walmart:product-detail"
async def format_(self, content):
"""
格式化数据
:param content:
:return:
"""
data_dict = await Tool.get_html_to_json(content)
try:
data = data_dict['props']['pageProps']['initialData']['data']
except:
raise ProxiesError(f"获取数据异常")
product = data.get('product', {})
if product is None:
return None
imageInfo = product.get('imageInfo', {})
if imageInfo:
all_images = imageInfo.get('allImages', [])
else:
all_images = []
images = []
idml = data['idml']
for image in all_images:
images.append(image.get('url', ''))
image_map = product['imageMap']
variant_criteria = product['variantCriteria']
skus = []
variant_list = {}
variant_images = {}
for item in variant_criteria:
item_variant_list = item.get('variantList', {})
for variant in item_variant_list:
variant_products = variant.get('products', [])
for variant_product in variant_products:
if variant['images']:
variant_images[variant_product] = variant['images'][0]
variant_list[variant['id']] = variant['name']
fulfillment_type = product.get('fulfillmentType', '')
variants_map = product['variantsMap']
is_link_error = ""
freight_price = ''
free_delivery = ''
fulfillment_options = product.get('fulfillmentOptions', [])
buy_now_eligible = product.get('buyNowEligible', False)
for fulfillment_option in fulfillment_options:
if fulfillment_option['__typename'] == 'ShippingOptionV2':
speed_details = fulfillment_option['speedDetails']
if speed_details:
free_delivery = speed_details.get('deliveryDate', '')
freight_price = speed_details.get('fulfillmentPrice', {})
if freight_price:
freight_price = freight_price.get('price', 0)
brand = product.get('brand', "")
product_use_item_id = product.get('usItemId', '')
product_sku_name = ''
for (id, map) in variants_map.items():
variants = map.get('variants', [])
sku_name = []
# 重新排序内容
new_variants = []
for sku in variants:
# 保证颜色为第一个
if sku.find('actual_color') != -1:
new_variants.insert(0, sku)
else:
new_variants.append(sku)
for sku in new_variants:
sku_name.append(variant_list[sku])
image_name = variant_images.get(id, "")
if image_name:
sku_image = image_map[image_name]['url']
else:
sku_image = map.get('imageInfo', {}).get('thumbnailUrl', '')
item_id = map.get('usItemId', '')
try:
price = map.get('priceInfo', {}).get('currentPrice', {}).get('price', 0)
except Exception as e:
logger.error(f"获取价格失败: {e}")
price = 0
skus.append({
'price': price,
'status': map.get('availabilityStatus', ''),
"free_delivery": free_delivery,
"brand": brand,
"is_link_error": is_link_error,
"sku_image": sku_image,
"asin": map.get('id', ''),
'item_id': item_id,
"sku_name": sku_name,
"star_level": product['averageRating'],
"freight_price": freight_price,
'fulfillment_type': fulfillment_type,
'buy_now_eligible': buy_now_eligible,
'ship_from': '',
'sold_by': '',
"delivery_info": '',
"is_buy_now": '',
})
if product_use_item_id == item_id:
product_sku_name = sku_name
if not skus:
default_sku_name = "default"
skus.append({
'price': product.get('priceInfo', {}).get('currentPrice', {}).get('price', 0),
'status': product.get('availabilityStatus', ''),
"free_delivery": free_delivery,
"brand": brand,
"is_link_error": is_link_error,
"sku_image": images[0],
"asin": product.get('id', ''),
'item_id': product_use_item_id,
"sku_name": default_sku_name,
"star_level": product['averageRating'],
"freight_price": freight_price,
'fulfillment_type': fulfillment_type,
'buy_now_eligible': buy_now_eligible,
'ship_from': '',
'sold_by': '',
"delivery_info": '',
"is_buy_now": '',
})
product_sku_name = default_sku_name
# 删除最后一张图片
images = images[0:-1]
return {
"title": product.get('name', ''),
"images": images,
"point_description": idml['shortDescription'],
"descriptions": idml['longDescription'],
"skus": skus,
'brand': brand,
"asin": product_use_item_id,
'sku_name': product_sku_name,
}
async def main(self, proxies=None):
"""
运行
:return:
"""
async with AsyncSession(max_clients=50) as s:
success_item = []
items = []
try:
start_data = {
'task_name': self.task_name,
'task_type': await self.get_task_type(),
'number': 5,
}
items, proxies = await self.start(start_data, proxies, s)
walmart_headers = await Tool.get_walmart_headers()
# 错误商品信息
for item in items:
logger.debug(f"{self.task_name} - 采集 {item['url']}")
content = await self.get_content(s, item, walmart_headers, proxies)
response = await self.format_(content)
if response is None:
logger.success(f"{item['url']} - 商品不存在")
success_item.append(item['url'])
continue
brand = list()
result = {
"default_info": {
"title": response['title'],
"images": response['images'],
"point_description": response['point_description'],
"descriptions": response['descriptions'],
'asin': response.get('asin', ''),
'sku_name': response.get('sku_name', ''),
}
}
images = result['default_info']['images']
title = await Tool.replace_chinese(result['default_info']['title'])
point_description = result['default_info']['point_description']
# 上传所有图片
download_images = []
for image in images:
try:
download_images.append(image)
except:
continue
for sku in response['skus']:
try:
download_images.append(sku['sku_image'])
except:
continue
new_images = []
# 去除相同图片
logger.debug(f"{self.task_name} - 开始下载图片")
download_images = await Tool.remove_duplicate_images(download_images)
if download_images:
download_list = []
for image in download_images:
download_list.append(Tool.oss_upload(s, image))
new_images = await asyncio.gather(*download_list)
new_images = new_images
logger.debug(f"{self.task_name} - 图片下载完成")
new_skus = []
replace_image = []
# 替换图片
for new in new_images:
for sku in response['skus']:
if sku['sku_image'] in new:
sku['sku_image'] = new[sku['sku_image']]
new_skus.append(sku)
for image in images:
if image in new:
replace_image.append(new[image])
images = replace_image
brand.append(response['brand'])
descriptions = response['descriptions']
if not descriptions:
descriptions = ""
for image in images:
descriptions += f"<img src='{image}' />"
descriptions = await Tool.remove_img_with_maximum(descriptions, 25)
new_point_description = point_description
brand = brand[0]
if brand:
descriptions = await Tool.replace_case(descriptions, brand, '')
new_point_description = await Tool.replace_case(point_description, brand, '')
new_point_description = re.sub(r'<li></li>', '', new_point_description)
title = await Tool.replace_case(title, brand, '')
# 标题超过255个字符时截取
if len(title) > 255:
title = title[0:255]
descriptions = new_point_description + descriptions
descriptions = descriptions.replace('100%', '')
title = title.replace('100%', '')
result['default_info']['descriptions'] = descriptions
result['default_info']['point_description'] = new_point_description
result['skus'] = new_skus
result['brand'] = response['brand']
result['default_info']['title'] = title
result['default_info']['images'] = images
result['url'] = item.get('url', '')
data = {
"data": {
"task_id": item.get('id', 0),
"collection_data": result,
"log_id": item.get('log_id', 0),
'platform_type': item.get('platform_type', 3),
'export_type': item.get('export_type', 1),
},
"type": 4,
"admin_users_id": item.get('admin_users_id', 0),
"app_name": item.get("app_name", "admin"),
}
# logger.success(f"上传数据: {json.dumps(data)}")
logger.success(f"{self.task_name} - 采集完成")
await Tool.callback(s, data)
success_item.append(item['url'])
except ProxiesError as e:
logger.error(f"{self.task_name} - 代理异常 - {e}")
proxies = None
except Exception as e:
traceback.print_exc()
logger.error(f"{self.task_name} - 任务异常 - {e}")
retry_data = {
"queue": start_data['task_type'],
"task_name": self.task_name,
}
await self.retry(s, items, success_item, retry_data)
return proxies
async def run(self):
proxies = None
while True:
proxies = await self.main(proxies)
class Test:
async def run(self):
while True:
try:
async with AsyncSession() as s:
url = "http://www.zdopen.com/ShortProxy/BindIP?api=202406041824314753&akey=4fee8e19764876e1&i=1"
content = await s.get(url)
logger.success(content.json())
await asyncio.sleep(30)
except:
logger.error(f"获取代理失败")
await asyncio.sleep(30)
async def run():
thread = []
for i in range(tread_number):
if task_type == "1":
t = Monitoring()
elif task_type == "3":
t = Goods()
elif task_type == "4":
t = Test()
else:
t = Search()
t.task_name = "线程-" + str(i + 1)
t = threading.Thread(target=asyncio.run, args=(t.run(),))
thread.append(t)
for t in thread:
t.start()
for t in thread:
t.join()
if __name__ == '__main__':
# task_type = input("请选择类型(默认: 1 ):\n 1.监控\n 2.搜索\n 3.商品\n 请输入: ")
task_type = sys.argv[1] if len(sys.argv) > 1 else None
if not task_type or task_type not in ["1", "2", "3", "4"]:
task_type = "1"
tread_number = sys.argv[2] if len(sys.argv) > 2 else None
if not tread_number:
tread_number = 1
else:
tread_number = int(tread_number)
logger.success(f"任务类型: {task_type}")
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
asyncio.get_event_loop().run_until_complete(run())
...@@ -104,8 +104,8 @@ class Tool: ...@@ -104,8 +104,8 @@ class Tool:
""" """
return { return {
"user-agent": UA.chrome, "user-agent": UA.chrome,
# "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
# "referer": "https://www.walmart.com/", "referer": "https://www.walmart.com/",
# 'Cookie': 'xptwj=uz:37e0877dc804c3221718:bdVasz6/73NdttPtcK27fPToos/4oZxy1UVlykJE0nkfLqVhmrKHI452MWIshP06rvlg6Oo4i/CAN+vCMS/BDqQPJtb1jF2UpHbJHuf+N3jupylUCiRaFrAtkQyytLu9SHMeQtQzWLBWK0mgSKk5GRqrGp86eHJ+TbhU//sz5ypzHMVRNtnq; ak_bmsc=2773E11742238181A0BCDEF70472B0E8~000000000000000000000000000000~YAAQjPw7F8Q79RiRAQAAJI6FIRhEoerjT5Fn46CibnTHiZ8nQJv6pLxGhTxu+OWA2qwUlBGLclYLJGQXToJ+BTKfwzjwI5+ud07a9A2L57hKMVbIX2vAo4ZGXA9p0BWAKKn/SbV4VHTnZa/i+pIMZB0ylq0shc7noTTr/tsto5DDd/FHf5vFFICqtEujI+2AckaTNJGYW8PPy9VW6DqXCNpVcgo3qVSntqYOT1bhJLjdYtWmspJGBhEFH5vRQdC7IdN3VqC3BxJLZWzVTaQsmsJYN2Pem7MKiHyk8/AgjKv9ZRs57VOCn/YXSPICVuP9SNUGIJcxZG8Le0VS+L4XcgyY3ngnOx8XBn8MNzpceQ0rKVCI44zb1SD11n6ympD4JV12cwN8L1JFpPs=; vtc=UcbKnqHGuhRWhgfWtgcr70; bstc=UcbKnqHGuhRWhgfWtgcr70; pxcts=85e47474-52ff-11ef-8dba-4f0e258ac181; _pxvid=85e46481-52ff-11ef-8dba-e4bc4df1a16b; adblocked=true; hasACID=true; assortmentStoreId=3081; _shcc=US; _intlbu=false; hasLocData=1; abqme=true; mobileweb=0; xpth=x-o-mart%2BB2C~x-o-mverified%2Bfalse; xpa=54G-6|CoEEB|D2oRZ|ELwUx|IuElO|McEea|MoRkL|NbX17|O8sIU|OFImx|Ocfr2|SqH-y|VyWly|XIItK|eo_el|nzZmL|rd3k-|zf8aF; exp-ck=54G-61D2oRZ1ELwUx1IuElO1NbX172O8sIU1OFImx1SqH-y2VyWly1XIItK2eo_el3nzZmL1zf8aF2; _pxhd=c8185cb38f153869ee089d6ab969bf1ed0ba0f4d4e66cd414b5dfb0daa85c913:85e46481-52ff-11ef-8dba-e4bc4df1a16b; bm_mi=ABB90B88348B58A9787BACA8B2B84DC5~YAAQjPw7F7w99RiRAQAA7KGFIRgasMlRZloJg1t00D254khXjSN/IrLyrciUo2TiMd/5dzEdpQ0rZdLkmWbcqhDvW4LcpJsY0/ViOAItAsERoIpacm5TGpo4+dliNw8JD8aa2peQ5nWBF46y0YMbmPatpEzPfi+SasMjQmt+oWQMr2Q9I3p9CBFvXsmAazCwcGDmXNtTShQbyQ9Gfq93Zgc1eh3WXmhtsDw7hanPmZF2kqaqIL1bBE46OKpVQzJKpiBZVtluHYILY+4LsIaKmwxNJmW1gbqIDx8Sbm4anTBTryfr26L/s3IA5mQ3yyk=~1; _px3=27f094085ac92f8a53a7507dbb323f50efb95f173b554348e0b72a5732857d78:2VRKXx+P0wIrkvwIM7+Xtfysy6oYDVs6V9uhgK8m88W6Ck43XPZSkDLlnFReenWMPrQ3MmpViErhyjVaXANA0A==:1000:5ZwO3UHAT/3uI8KmWYckrGicT4zhb/RLBnKTB2fZu7NK2BVIs9Tp4YrQEPmeQLr27F/Csvs7uj4SuQMN8cPuZyDda7XwJIqyx7V/BlbxwhefKls21slpn9Hkiz0U44U2DITgh0p/sfol2JVGAEXwS66TjQY9DEa3M2GGuD2Xf4+3KT5MAymWIMYp1w5P3Rqtv0KcYxURCTMZDW2B3Ol9/sKFAOeEgEWRfvTh0NaYVLI=; _astc=52d130b133cbd1b501460d9fdae93a97; xptc=_m%2B9~assortmentStoreId%2B3081; xpm=1%2B1722844296%2BUcbKnqHGuhRWhgfWtgcr70~%2B0; _pxde=c92ceb7d7d808ccee6d120ac60cabdac3b14ba9f42bfb2ff4ed5e6ef8f8a7396:eyJ0aW1lc3RhbXAiOjE3MjI4NDQyOTkyMjN9; xptwg=2769187247:ADDF1B60AE2118:1B17408:73DC67D1:89F8600E:2CEC42A1:; TS012768cf=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS01a90220=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS2a5e0c5c027=08edc6f644ab20005250728372d83aee067d8ef4429ed38ad3f72422cd7beb712284fa2bb6dcc53008e648c99a113000bbea3d56aaf743f8797d1fd537dfebae66e076aea8557039e6abbfe3d370af617c79b48e084bdcd637ffb8a8b7b06568; QMReplaySample=true; io_id=2b1e23f1-a177-4e38-86da-561e276b6abf; TS016ef4c8=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS01f89308=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS8cb5a80e027=08bd2f8669ab2000009af9c8550cbc249ae938bfdca0492f8d384c6808c0e90a144e4024b84fbf37082893210e113000ad32e6b74f355e50fc204aad58e20722d6ed74efd203ad1c6a356b2d93d18f547cc29e00ec15f9e4c59e73bb2f5fc352; bm_sv=60519E529ABF4EFE97D2B63408DD5BD1~YAAQjPw7F35D9RiRAQAAANaFIRionbimzr8LgiM2GAxwy+I6Bu2U7faKmNM03jfRJ1ukw3hFQzT+obDLwlGwWa4HEiO9wHosev0vkl9j46QR9DoFq+6/MAGwpf9A8wuMswRgYpSFSZvyAm8uCG9mGPzhuuN5sOmxMflboFyOm2+5jFgcDmBA3WzZRPhRRy1M0xYfthXmO5D7IppDKw8+Zbzj7sG6Wdg5pUBb5XzzWaDNDswJnHdONYEd7O7hOGbyIw==~1' # 'Cookie': 'xptwj=uz:37e0877dc804c3221718:bdVasz6/73NdttPtcK27fPToos/4oZxy1UVlykJE0nkfLqVhmrKHI452MWIshP06rvlg6Oo4i/CAN+vCMS/BDqQPJtb1jF2UpHbJHuf+N3jupylUCiRaFrAtkQyytLu9SHMeQtQzWLBWK0mgSKk5GRqrGp86eHJ+TbhU//sz5ypzHMVRNtnq; ak_bmsc=2773E11742238181A0BCDEF70472B0E8~000000000000000000000000000000~YAAQjPw7F8Q79RiRAQAAJI6FIRhEoerjT5Fn46CibnTHiZ8nQJv6pLxGhTxu+OWA2qwUlBGLclYLJGQXToJ+BTKfwzjwI5+ud07a9A2L57hKMVbIX2vAo4ZGXA9p0BWAKKn/SbV4VHTnZa/i+pIMZB0ylq0shc7noTTr/tsto5DDd/FHf5vFFICqtEujI+2AckaTNJGYW8PPy9VW6DqXCNpVcgo3qVSntqYOT1bhJLjdYtWmspJGBhEFH5vRQdC7IdN3VqC3BxJLZWzVTaQsmsJYN2Pem7MKiHyk8/AgjKv9ZRs57VOCn/YXSPICVuP9SNUGIJcxZG8Le0VS+L4XcgyY3ngnOx8XBn8MNzpceQ0rKVCI44zb1SD11n6ympD4JV12cwN8L1JFpPs=; vtc=UcbKnqHGuhRWhgfWtgcr70; bstc=UcbKnqHGuhRWhgfWtgcr70; pxcts=85e47474-52ff-11ef-8dba-4f0e258ac181; _pxvid=85e46481-52ff-11ef-8dba-e4bc4df1a16b; adblocked=true; hasACID=true; assortmentStoreId=3081; _shcc=US; _intlbu=false; hasLocData=1; abqme=true; mobileweb=0; xpth=x-o-mart%2BB2C~x-o-mverified%2Bfalse; xpa=54G-6|CoEEB|D2oRZ|ELwUx|IuElO|McEea|MoRkL|NbX17|O8sIU|OFImx|Ocfr2|SqH-y|VyWly|XIItK|eo_el|nzZmL|rd3k-|zf8aF; exp-ck=54G-61D2oRZ1ELwUx1IuElO1NbX172O8sIU1OFImx1SqH-y2VyWly1XIItK2eo_el3nzZmL1zf8aF2; _pxhd=c8185cb38f153869ee089d6ab969bf1ed0ba0f4d4e66cd414b5dfb0daa85c913:85e46481-52ff-11ef-8dba-e4bc4df1a16b; bm_mi=ABB90B88348B58A9787BACA8B2B84DC5~YAAQjPw7F7w99RiRAQAA7KGFIRgasMlRZloJg1t00D254khXjSN/IrLyrciUo2TiMd/5dzEdpQ0rZdLkmWbcqhDvW4LcpJsY0/ViOAItAsERoIpacm5TGpo4+dliNw8JD8aa2peQ5nWBF46y0YMbmPatpEzPfi+SasMjQmt+oWQMr2Q9I3p9CBFvXsmAazCwcGDmXNtTShQbyQ9Gfq93Zgc1eh3WXmhtsDw7hanPmZF2kqaqIL1bBE46OKpVQzJKpiBZVtluHYILY+4LsIaKmwxNJmW1gbqIDx8Sbm4anTBTryfr26L/s3IA5mQ3yyk=~1; _px3=27f094085ac92f8a53a7507dbb323f50efb95f173b554348e0b72a5732857d78:2VRKXx+P0wIrkvwIM7+Xtfysy6oYDVs6V9uhgK8m88W6Ck43XPZSkDLlnFReenWMPrQ3MmpViErhyjVaXANA0A==:1000:5ZwO3UHAT/3uI8KmWYckrGicT4zhb/RLBnKTB2fZu7NK2BVIs9Tp4YrQEPmeQLr27F/Csvs7uj4SuQMN8cPuZyDda7XwJIqyx7V/BlbxwhefKls21slpn9Hkiz0U44U2DITgh0p/sfol2JVGAEXwS66TjQY9DEa3M2GGuD2Xf4+3KT5MAymWIMYp1w5P3Rqtv0KcYxURCTMZDW2B3Ol9/sKFAOeEgEWRfvTh0NaYVLI=; _astc=52d130b133cbd1b501460d9fdae93a97; xptc=_m%2B9~assortmentStoreId%2B3081; xpm=1%2B1722844296%2BUcbKnqHGuhRWhgfWtgcr70~%2B0; _pxde=c92ceb7d7d808ccee6d120ac60cabdac3b14ba9f42bfb2ff4ed5e6ef8f8a7396:eyJ0aW1lc3RhbXAiOjE3MjI4NDQyOTkyMjN9; xptwg=2769187247:ADDF1B60AE2118:1B17408:73DC67D1:89F8600E:2CEC42A1:; TS012768cf=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS01a90220=01d032572a9131c004c984f1591f1050b2bc64767650396a370f20a1d0dcb0c458b394f0f12ffbd85b8ab44153a1cbf2c143166c54; TS2a5e0c5c027=08edc6f644ab20005250728372d83aee067d8ef4429ed38ad3f72422cd7beb712284fa2bb6dcc53008e648c99a113000bbea3d56aaf743f8797d1fd537dfebae66e076aea8557039e6abbfe3d370af617c79b48e084bdcd637ffb8a8b7b06568; QMReplaySample=true; io_id=2b1e23f1-a177-4e38-86da-561e276b6abf; TS016ef4c8=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS01f89308=018f75cfbcd4def242c1bbe08d5578972d0f66b599a484d002e1540db87e4ac90c4800be2ab90e9078fff48b5e8c5739eb3d440c3b; TS8cb5a80e027=08bd2f8669ab2000009af9c8550cbc249ae938bfdca0492f8d384c6808c0e90a144e4024b84fbf37082893210e113000ad32e6b74f355e50fc204aad58e20722d6ed74efd203ad1c6a356b2d93d18f547cc29e00ec15f9e4c59e73bb2f5fc352; bm_sv=60519E529ABF4EFE97D2B63408DD5BD1~YAAQjPw7F35D9RiRAQAAANaFIRionbimzr8LgiM2GAxwy+I6Bu2U7faKmNM03jfRJ1ukw3hFQzT+obDLwlGwWa4HEiO9wHosev0vkl9j46QR9DoFq+6/MAGwpf9A8wuMswRgYpSFSZvyAm8uCG9mGPzhuuN5sOmxMflboFyOm2+5jFgcDmBA3WzZRPhRRy1M0xYfthXmO5D7IppDKw8+Zbzj7sG6Wdg5pUBb5XzzWaDNDswJnHdONYEd7O7hOGbyIw==~1'
} }
...@@ -182,9 +182,9 @@ class Tool: ...@@ -182,9 +182,9 @@ class Tool:
s.headers.clear() s.headers.clear()
s.cookies.clear() s.cookies.clear()
s.proxies.clear() s.proxies.clear()
try:
proxies = {"http": f"http://{proxy_ip}", "https": f"http://{proxy_ip}"} proxies = {"http": f"http://{proxy_ip}", "https": f"http://{proxy_ip}"}
walmart_headers = Tool.get_walmart_headers() walmart_headers = Tool.get_walmart_headers()
try:
impersonate = Tool.get_impersonate() impersonate = Tool.get_impersonate()
response = await s.get( response = await s.get(
url, url,
...@@ -201,7 +201,7 @@ class Tool: ...@@ -201,7 +201,7 @@ class Tool:
@staticmethod @staticmethod
@retry(stop=stop_after_attempt(3), wait=wait_random(1, 2), reraise=True) @retry(stop=stop_after_attempt(3), wait=wait_random(1, 2), reraise=True)
async def get_tasks(s: AsyncSession) -> list: async def get_tasks(s: AsyncSession) -> dict:
"""获取任务 """获取任务
:param s: 异步会话 :param s: 异步会话
...@@ -215,8 +215,8 @@ class Tool: ...@@ -215,8 +215,8 @@ class Tool:
logger.info(resp.text) logger.info(resp.text)
data: dict = resp.json() data: dict = resp.json()
if data["status"].lower() == "ok": if data["status"].lower() == "ok":
return [it["itemId"] for it in data.get("shop_items", [{}])] return data
return [] return {}
class Goods: class Goods:
...@@ -345,14 +345,17 @@ class Goods: ...@@ -345,14 +345,17 @@ class Goods:
async with AsyncSession(max_clients=50) as s: async with AsyncSession(max_clients=50) as s:
try: try:
start = datetime.now().timestamp() start = datetime.now().timestamp()
task_ips = await Tool.get_tasks(s) tasks = await Tool.get_tasks(s)
if IS_DEBUG:
tasks = {"shop_items":[{"itemId":"17966764908"},{"itemId":"17968810088"},{"itemId":"17944620055"},{"itemId":"17926655039"},{"itemId":"17932960518"},{"itemId":"17846316295"},{"itemId":"17930914065"},{"itemId":"17909763301"},{"itemId":"17889919293"},{"itemId":"15260371868"},{"itemId":"15273504950"},{"itemId":"17887921270"},{"itemId":"17889869343"},{"itemId":"17946264703"},{"itemId":"17923362664"},{"itemId":"17893521905"},{"itemId":"17958760022"},{"itemId":"17865916920"},{"itemId":"17933200514"},{"itemId":"17944752349"},{"itemId":"17971364235"},{"itemId":"17917919511"},{"itemId":"17974264921"},{"itemId":"14926567281"},{"itemId":"17163956782"},{"itemId":"17955461898"},{"itemId":"17968065170"},{"itemId":"17925772724"},{"itemId":"17898659471"},{"itemId":"14795072711"},{"itemId":"17965313701"},{"itemId":"17971612570"},{"itemId":"14557971286"},{"itemId":"17935166374"},{"itemId":"14659255070"},{"itemId":"14571314339"},{"itemId":"18014104805"},{"itemId":"17935100549"},{"itemId":"18081707007"},{"itemId":"18090364461"},{"itemId":"18014316712"},{"itemId":"18058800132"},{"itemId":"18080403655"},{"itemId":"18090461119"},{"itemId":"18103607481"},{"itemId":"18108655242"},{"itemId":"18094400804"},{"itemId":"18071069409"},{"itemId":"18113402610"},{"itemId":"18124964013"},{"itemId":"18118057997"},{"itemId":"17978970129"},{"itemId":"17913866078"},{"itemId":"18085366416"},{"itemId":"18157961089"},{"itemId":"18124617250"},{"itemId":"18161620246"},{"itemId":"18181064906"},{"itemId":"745157482"},{"itemId":"18140374597"},{"itemId":"18227460354"},{"itemId":"18235754276"},{"itemId":"18235754269"},{"itemId":"18178124168"},{"itemId":"18217253826"},{"itemId":"17981354243"},{"itemId":"18050173911"},{"itemId":"17921511525"},{"itemId":"18210550677"},{"itemId":"18010310433"},{"itemId":"18143354723"},{"itemId":"18210258831"},{"itemId":"18225411651"},{"itemId":"18222708574"},{"itemId":"18250257775"},{"itemId":"18120872184"},{"itemId":"18119723857"},{"itemId":"18165415352"},{"itemId":"18219900341"},{"itemId":"18102911251"},{"itemId":"18127069252"},{"itemId":"18042559640"},{"itemId":"18207567692"},{"itemId":"18138867224"},{"itemId":"18102561720"},{"itemId":"18071169546"},{"itemId":"18051724076"},{"itemId":"18217467344"},{"itemId":"18230711959"},{"itemId":"18093658467"},{"itemId":"18269754274"},{"itemId":"18211622283"},{"itemId":"18310504143"},{"itemId":"18284519554"},{"itemId":"18339870483"},{"itemId":"18310551433"},{"itemId":"18410011348"},{"itemId":"18458007697"},{"itemId":"18414951268"},{"itemId":"18414509346"},{"itemId":"18456973755"},{"itemId":"18447072366"},{"itemId":"18462016162"},{"itemId":"18484673910"},{"itemId":"18431213618"},{"itemId":"18380022060"},{"itemId":"18513919460"},{"itemId":"18561606844"},{"itemId":"18572904938"},{"itemId":"18547900817"},{"itemId":"18550258833"},{"itemId":"18563202342"},{"itemId":"18594417360"},{"itemId":"18622770295"},{"itemId":"18691661844"},{"itemId":"18633466546"},{"itemId":"18685120473"},{"itemId":"18719356870"},{"itemId":"18681701434"},{"itemId":"18705367358"},{"itemId":"18690071350"},{"itemId":"18707121636"},{"itemId":"18762909695"},{"itemId":"18878910433"},{"itemId":"18817623987"},{"itemId":"18842212571"},{"itemId":"18850308272"},{"itemId":"18869157593"},{"itemId":"18829859250"},{"itemId":"18893869606"},{"itemId":"18971156147"},{"itemId":"18980163360"},{"itemId":"18955316168"},{"itemId":"19045600542"}]}
pass
task_ips = [it["itemId"] for it in tasks.get("shop_items", [])]
if IS_DEBUG:
task_ips = task_ips[:3]
pass
logger.info(f"任务开始({len(task_ips)})") logger.info(f"任务开始({len(task_ips)})")
json_data = {"shop_items": []} json_data = {"shop_items": []}
shop_items = json_data["shop_items"] shop_items = json_data["shop_items"]
if IS_DEBUG:
# task_ips = random.choices(task_ips, k=5)
# task_ips = ["201027728", "277486534", "469411809"]
pass
success = 0 success = 0
sc = StrCounter() sc = StrCounter()
while task_ips: while task_ips:
...@@ -396,8 +399,6 @@ class Goods: ...@@ -396,8 +399,6 @@ class Goods:
except Exception as e: except Exception as e:
logger.exception(e) logger.exception(e)
logger.error(f"{self.task_name} - 任务异常 - {e}") logger.error(f"{self.task_name} - 任务异常 - {e}")
await AsyncRedisClient().close_redis()
if __name__ == "__main__": if __name__ == "__main__":
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
......
from src.export import WFSExport
def test_WFSExport():
WFSExport("2025-02-01", "2025-02-10").run()
if __name__ == '__main__':
test_WFSExport()
import requests
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36",
"sec-ch-ua": "\"Chromium\";v=\"118\", \"Google Chrome\";v=\"118\", \"Not=A?Brand\";v=\"99\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
cookies = {
"ak_bmsc": "8C5D234DDBC3C5BE5EE5189E38ABB2F2~000000000000000000000000000000~YAAQUWgBF5gQcu2UAQAAuEi3NhoIs5WcoaGgSWFfDyuXvhimUZ1tIWzJqAuKiTIQ0fdqkuqX5RpqriQM+hmZHRGHD7198YU87nB3nQo42wjSW8K+vT6liW8LPBa2EmEZJabBKcq9o06ZQsFNUO1wEBQ3mvk6//F3cZrYY5Now2cELIUfwLUnfjkUiSSvq1q/a0iuo16ZBZ3k4dPvdS2m2kYCH3rF9NNN2RBYlsD9msGUXLeOc8DHvfrSTn872APs9egOMDzHcilmqq4LYvuIe01LDo6XSHeJx5aYlKo68Umvm5RjGWrX4H3jJ42BKNfywn3UWQoOyyrnjldVUdL06zMDvpMHe7PJhrbwU2zVJrz/gqRVlGNDite6BVRcybiNnKyeUvs=",
"bm_sv": "0AFB7E09ACE2B35BE403AEBA37713C7B~YAAQk+gyFwVufjWVAQAAkQvfNhoib4bJKFxcgodJZ9uxNCsQLEyxJNIBJrMEir0LB/sWMGFu7dimUNylRaADo9LZt6x2b0pzshf2n/VEvkDUguaublm8SZDVE9St8L3RtPAYQvjrXWeZygy4hqEny2lHCsv1wjT4xL5Mj9Ab4/sAODlJjRySpAVN85nXEjVsgP2hkTZZAUNYvrBfDyMlpw5WTfKaD0YHQBcmots82t5W4toZG5aDLL9HTf8SV5rBi7qhUDI=~1",
"TSaf1ede28027": "0800b316f6ab2000520868722a76791b18fa14a9824dc2976821dad8905d7c43dc43711f69d8e2a00827a65cb61130008d29006038c400850fe9db236c7784ca48072ae069dde9fd37b9610a457640b557818e0b366e7b86cd37b1bb603ea5d4",
"TS89e05807027": "08cb8c7367ab2000364e99bb60e31ce6fa69fe45e90e55cb31514801f124bb981208d52ba8c33d2f08e1c6b4f0113000363d3b498677d1a91f31970e0c58f80322671c2d354b603eeeb08503420532466767670670984eb0fc36ebd15325df8c"
}
url = "https://marketplace.walmartapis.com/v3/reports/getReport/order-report/PO_Data_100025456602025-02-24-06-10-43-622.xlsx"
params = {
"sv": "2023-08-03",
"se": "2025-02-24T07:54:57Z",
"sr": "b",
"sp": "r",
"sig": "6MXgDrPOxGSSuHfxbpHDa3LiTT2pHLLHAS+Zrh+FyCs="
}
response = requests.get(url, headers=headers, cookies=cookies, params=params)
print(response.text)
print(response)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment