Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amazon-mult-site-sync
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
yexing
amazon-mult-site-sync
Commits
d260882e
Commit
d260882e
authored
Jun 13, 2025
by
yexing
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
u
parent
58d033ad
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
50 additions
and
51 deletions
+50
-51
.gitignore
+10
-0
amazon_cookie.py
+31
-32
callback.py
+0
-0
celery_app.py
+1
-1
db.py
+1
-1
spider/base_detail.py
+1
-6
spider/base_monitor.py
+1
-2
spider/base_search.py
+0
-3
task.py
+3
-4
tool.py
+2
-2
No files found.
.gitignore
0 → 100644
View file @
d260882e
__pycache__
.vscode
celerybeat-*
.pytest_cache
log
pid
image
.idea
config
tmp
amazon_cookie.py
View file @
d260882e
from
time
import
sleep
import
asyncio
import
asyncio
import
json
import
json
import
random
import
random
...
@@ -78,13 +77,13 @@ class RedisSingleton:
...
@@ -78,13 +77,13 @@ class RedisSingleton:
def
__init__
(
self
,
redis_url
=
None
):
def
__init__
(
self
,
redis_url
=
None
):
self
.
redis_url
=
redis_url
self
.
redis_url
=
redis_url
def
get_connection
(
self
):
async
def
get_connection
(
self
):
if
self
.
_redis_pool
is
None
:
if
self
.
_redis_pool
is
None
:
if
self
.
redis_url
:
if
self
.
redis_url
:
self
.
_redis_pool
=
from_url
(
self
.
redis_url
,
decode_responses
=
True
)
self
.
_redis_pool
=
await
from_url
(
self
.
redis_url
,
decode_responses
=
True
)
else
:
else
:
# 默认连接地址
# 默认连接地址
self
.
_redis_pool
=
from_url
(
'redis://localhost'
,
decode_responses
=
True
)
self
.
_redis_pool
=
await
from_url
(
'redis://localhost'
,
decode_responses
=
True
)
return
self
.
_redis_pool
return
self
.
_redis_pool
...
@@ -93,7 +92,7 @@ class SiteType(Data):
...
@@ -93,7 +92,7 @@ class SiteType(Data):
de
=
2
de
=
2
def
callback
(
param
):
async
def
callback
(
param
):
"""
"""
回调接口
回调接口
...
@@ -108,46 +107,46 @@ def callback(param):
...
@@ -108,46 +107,46 @@ def callback(param):
)
)
def
refresh_local_cookie
(
data
:
dict
,
site
:
str
=
"com"
):
async
def
refresh_local_cookie
(
data
:
dict
,
site
:
str
=
"com"
):
"""
"""
刷新本地cookie
刷新本地cookie
"""
"""
redis
=
redis_singleton
.
get_connection
()
redis
=
await
redis_singleton
.
get_connection
()
redis
.
set
(
f
"cookie:{site}"
,
json
.
dumps
(
data
))
await
redis
.
set
(
f
"cookie:{site}"
,
json
.
dumps
(
data
))
def
get_cookie_error
():
async
def
get_cookie_error
():
"""
"""
获取cookie错误
获取cookie错误
:return:
:return:
"""
"""
redis
=
redis_singleton
.
get_connection
()
redis
=
await
redis_singleton
.
get_connection
()
return
redis
.
get
(
"amazon:cookie-error"
)
return
await
redis
.
get
(
"amazon:cookie-error"
)
def
delete_cookie_error
():
async
def
delete_cookie_error
():
"""
"""
删除cookie错误
删除cookie错误
:return:
:return:
"""
"""
redis
=
redis_singleton
.
get_connection
()
redis
=
await
redis_singleton
.
get_connection
()
return
redis
.
delete
(
"amazon:cookie-error"
)
return
await
redis
.
delete
(
"amazon:cookie-error"
)
def
input_postcode
(
async
def
input_postcode
(
tab
:
WebPageTab
,
postcode
:
str
,
locator
:
str
=
"#GLUXZipUpdateInput"
tab
:
WebPageTab
,
postcode
:
str
,
locator
:
str
=
"#GLUXZipUpdateInput"
):
):
location_input
=
tab
.
ele
(
locator
,
timeout
=
3
)
location_input
=
tab
.
ele
(
locator
,
timeout
=
3
)
if
location_input
is
None
:
if
location_input
is
None
:
raise
Exception
(
"未找到输入框"
)
raise
Exception
(
"未找到输入框"
)
location_input
.
input
(
postcode
)
location_input
.
input
(
postcode
)
sleep
(
1
)
await
asyncio
.
sleep
(
1
)
def
get_cookie
(
tab
:
WebPageTab
,
site_type
:
int
=
1
):
async
def
get_cookie
(
tab
:
WebPageTab
,
site_type
:
int
=
1
):
"""
"""
获取cookie
获取cookie
...
@@ -162,21 +161,21 @@ def get_cookie(tab: WebPageTab, site_type: int = 1):
...
@@ -162,21 +161,21 @@ def get_cookie(tab: WebPageTab, site_type: int = 1):
"time"
:
int
(
time
.
time
()),
"time"
:
int
(
time
.
time
()),
}
}
logger
.
info
(
f
"获取到cookie: {json.dumps(response)}"
)
logger
.
info
(
f
"获取到cookie: {json.dumps(response)}"
)
callback
({
"type"
:
site_type
,
"data"
:
response
})
await
callback
({
"type"
:
site_type
,
"data"
:
response
})
return
cookie
return
cookie
def
run
(
site
:
str
=
"com"
,
postcode
:
str
=
"20001"
,
site_type
:
int
=
1
):
async
def
run
(
site
:
str
=
"com"
,
postcode
:
str
=
"20001"
,
site_type
:
int
=
1
):
def
_close
():
async
def
_close
():
cookie
=
get_cookie
(
tab
,
site_type
)
cookie
=
await
get_cookie
(
tab
,
site_type
)
if
IS_DEBUG
:
if
IS_DEBUG
:
refresh_local_cookie
({
"cookie"
:
cookie
,
"user-agent"
:
UA
},
site
=
site
)
await
refresh_local_cookie
({
"cookie"
:
cookie
,
"user-agent"
:
UA
},
site
=
site
)
chromium
.
clear_cache
()
chromium
.
clear_cache
()
chromium
.
quit
()
chromium
.
quit
()
delete_cookie_error
()
await
delete_cookie_error
()
if
not
IS_DEBUG
:
if
not
IS_DEBUG
:
number
=
get_cookie_error
()
number
=
await
get_cookie_error
()
number
=
int
(
number
)
if
number
else
0
number
=
int
(
number
)
if
number
else
0
if
number
<
50
:
if
number
<
50
:
logger
.
success
(
"Cookie正常"
)
logger
.
success
(
"Cookie正常"
)
...
@@ -198,7 +197,7 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
...
@@ -198,7 +197,7 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
logger
.
info
(
"邮编或语言错误, 开始设置邮编和语言"
)
logger
.
info
(
"邮编或语言错误, 开始设置邮编和语言"
)
else
:
else
:
logger
.
info
(
"邮编和语言正确"
)
logger
.
info
(
"邮编和语言正确"
)
_close
()
await
_close
()
return
return
location
=
tab
.
ele
(
"#nav-global-location-popover-link"
,
timeout
=
3
)
location
=
tab
.
ele
(
"#nav-global-location-popover-link"
,
timeout
=
3
)
...
@@ -209,10 +208,10 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
...
@@ -209,10 +208,10 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
postcode_parts
=
postcode
.
split
(
"-"
)
postcode_parts
=
postcode
.
split
(
"-"
)
if
len
(
postcode_parts
)
==
2
:
if
len
(
postcode_parts
)
==
2
:
input_postcode
(
tab
,
postcode_parts
[
0
],
"#GLUXZipUpdateInput_0"
)
await
input_postcode
(
tab
,
postcode_parts
[
0
],
"#GLUXZipUpdateInput_0"
)
input_postcode
(
tab
,
postcode_parts
[
1
],
"#GLUXZipUpdateInput_1"
)
await
input_postcode
(
tab
,
postcode_parts
[
1
],
"#GLUXZipUpdateInput_1"
)
else
:
else
:
input_postcode
(
tab
,
postcode
)
await
input_postcode
(
tab
,
postcode
)
locs
=
[
locs
=
[
"#GLUXZipUpdate"
,
"#GLUXZipUpdate"
,
...
@@ -227,12 +226,12 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
...
@@ -227,12 +226,12 @@ def run(site: str = "com", postcode: str = "20001", site_type: int = 1):
ele
.
wait
.
clickable
(
timeout
=
3
,
raise_err
=
False
)
.
click
()
ele
.
wait
.
clickable
(
timeout
=
3
,
raise_err
=
False
)
.
click
()
tab
.
wait
(
2
)
tab
.
wait
(
2
)
_close
()
await
_close
()
except
Exception
as
e
:
except
Exception
as
e
:
logger
.
error
(
e
)
logger
.
error
(
e
)
def
main
():
async
def
main
():
if
IS_DEBUG
:
if
IS_DEBUG
:
items
=
random
.
choices
(
list
(
Site
.
zip
(
Postcode
)))
items
=
random
.
choices
(
list
(
Site
.
zip
(
Postcode
)))
else
:
else
:
...
@@ -242,8 +241,8 @@ def main():
...
@@ -242,8 +241,8 @@ def main():
if
site_type
is
None
:
if
site_type
is
None
:
continue
continue
logger
.
info
(
f
"开始获取cookie: {site} {postcode}"
)
logger
.
info
(
f
"开始获取cookie: {site} {postcode}"
)
run
(
site
,
postcode
)
await
run
(
site
,
postcode
)
sleep
(
10
)
await
asyncio
.
sleep
(
10
)
if
IS_DEBUG
:
if
IS_DEBUG
:
exit
()
exit
()
...
...
callback.py
View file @
d260882e
celery_app.py
View file @
d260882e
...
@@ -33,7 +33,7 @@ app.conf.task_queues = (
...
@@ -33,7 +33,7 @@ app.conf.task_queues = (
)
)
app
.
conf
.
task_routes
=
{
app
.
conf
.
task_routes
=
{
"celery_tasks.detail_spider_task"
:
{
"queue"
:
"detail"
},
"celery_tasks.detail_spider_task"
:
{
"queue"
:
"detail"
},
"celery_tasks.monitor_spider_task"
:
{
"queue"
:
"
detail
"
},
"celery_tasks.monitor_spider_task"
:
{
"queue"
:
"
monitor
"
},
"celery_tasks.search_spider_task"
:
{
"queue"
:
"search"
},
"celery_tasks.search_spider_task"
:
{
"queue"
:
"search"
},
"celery_tasks.*_dial_task"
:
{
"queue"
:
"dial"
},
"celery_tasks.*_dial_task"
:
{
"queue"
:
"dial"
},
"celery_tasks.*"
:
{
"queue"
:
"detail"
},
"celery_tasks.*"
:
{
"queue"
:
"detail"
},
...
...
db.py
View file @
d260882e
# import aioredis
# import aioredis
from
redis
.asyncio
import
from_url
from
redis
import
from_url
class
RedisSingleton
:
class
RedisSingleton
:
...
...
spider/base_detail.py
View file @
d260882e
from
time
import
sleep
from
time
import
sleep
import
asyncio
import
html
import
html
import
json
import
json
import
random
import
random
...
@@ -869,9 +868,7 @@ class Goods:
...
@@ -869,9 +868,7 @@ class Goods:
# 分批
# 分批
if
len
(
collection_skus
)
>
0
:
if
len
(
collection_skus
)
>
0
:
for
i
in
range
(
0
,
len
(
collection_skus
),
8
):
for
i
in
range
(
0
,
len
(
collection_skus
),
8
):
for
response
in
asyncio
.
gather
(
for
response
in
collection_skus
[
i
:
i
+
8
]:
*
collection_skus
[
i
:
i
+
8
]
):
try
:
try
:
if
response
.
get
(
"brand"
):
if
response
.
get
(
"brand"
):
brand
.
append
(
response
[
"brand"
])
brand
.
append
(
response
[
"brand"
])
...
@@ -1021,8 +1018,6 @@ class Goods:
...
@@ -1021,8 +1018,6 @@ class Goods:
tasks
=
[
json
.
loads
(
task
)
for
task
in
tasks
]
tasks
=
[
json
.
loads
(
task
)
for
task
in
tasks
]
for
task
in
tasks
:
for
task
in
tasks
:
queue
.
append
(
self
.
run
(
task
))
queue
.
append
(
self
.
run
(
task
))
if
queue
:
asyncio
.
gather
(
*
queue
)
logger
.
info
(
f
"任务耗时: {time.time() - start_time}"
)
logger
.
info
(
f
"任务耗时: {time.time() - start_time}"
)
if
self
.
is_debug
:
if
self
.
is_debug
:
...
...
spider/base_monitor.py
View file @
d260882e
from
time
import
sleep
from
time
import
sleep
import
asyncio
import
json
import
json
import
re
import
re
import
time
import
time
...
@@ -420,7 +419,7 @@ class Monitoring:
...
@@ -420,7 +419,7 @@ class Monitoring:
success_number
=
0
success_number
=
0
logger
.
info
(
f
"任务数: {len(queue)}"
)
logger
.
info
(
f
"任务数: {len(queue)}"
)
if
queue
:
if
queue
:
for
items
in
asyncio
.
gather
(
*
queue
)
:
for
items
in
queue
:
success_number
+=
1
success_number
+=
1
logger
.
info
(
f
"任务耗时: {time.time() - start_time}, 成功数: {success_number}"
)
logger
.
info
(
f
"任务耗时: {time.time() - start_time}, 成功数: {success_number}"
)
spider/base_search.py
View file @
d260882e
from
time
import
sleep
from
time
import
sleep
import
asyncio
import
functools
import
functools
import
json
import
json
import
os
import
os
...
@@ -650,8 +649,6 @@ class Search:
...
@@ -650,8 +649,6 @@ class Search:
tasks
=
[
json
.
loads
(
task
)
for
task
in
tasks
]
tasks
=
[
json
.
loads
(
task
)
for
task
in
tasks
]
for
task
in
tasks
:
for
task
in
tasks
:
queue
.
append
(
self
.
run
(
task
))
queue
.
append
(
self
.
run
(
task
))
if
queue
:
asyncio
.
gather
(
*
queue
)
logger
.
info
(
f
"任务耗时: {time.time() - start_time}"
)
logger
.
info
(
f
"任务耗时: {time.time() - start_time}"
)
if
self
.
is_debug
:
if
self
.
is_debug
:
...
...
task.py
View file @
d260882e
from
time
import
sleep
import
asyncio
import
asyncio
import
json
import
json
import
os
import
os
...
@@ -94,7 +93,7 @@ def run(task_config: dict = task_monitoring_config):
...
@@ -94,7 +93,7 @@ def run(task_config: dict = task_monitoring_config):
time
.
sleep
(
5
)
time
.
sleep
(
5
)
def
cookie
():
async
def
cookie
():
for
site
in
Site
.
values
():
for
site
in
Site
.
values
():
time_key
=
cookie_config
[
'cookie_time_key'
]
time_key
=
cookie_config
[
'cookie_time_key'
]
time_key
=
f
"{time_key}:{site}"
time_key
=
f
"{time_key}:{site}"
...
@@ -108,10 +107,10 @@ def cookie():
...
@@ -108,10 +107,10 @@ def cookie():
logger
.
info
(
f
"获取cookie"
)
logger
.
info
(
f
"获取cookie"
)
for
site
in
Site
.
values
():
for
site
in
Site
.
values
():
try
:
try
:
task_manager
.
get_cookie
(
site
)
await
task_manager
.
get_cookie
(
site
)
except
:
except
:
logger
.
error
(
f
"获取cookie异常"
)
logger
.
error
(
f
"获取cookie异常"
)
sleep
(
5
)
await
asyncio
.
sleep
(
5
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
tool.py
View file @
d260882e
...
@@ -7,7 +7,7 @@ from datetime import datetime, timedelta
...
@@ -7,7 +7,7 @@ from datetime import datetime, timedelta
from
babel.dates
import
get_month_names
,
get_day_names
from
babel.dates
import
get_month_names
,
get_day_names
from
curl_cffi
import
requests
from
curl_cffi
import
requests
from
curl_cffi.requests
import
Async
Session
from
curl_cffi.requests
import
Session
from
dateutil
import
parser
from
dateutil
import
parser
from
dateutil.relativedelta
import
relativedelta
from
dateutil.relativedelta
import
relativedelta
from
loguru
import
logger
from
loguru
import
logger
...
@@ -145,7 +145,7 @@ class Request:
...
@@ -145,7 +145,7 @@ class Request:
:param url:
:param url:
:return:
:return:
"""
"""
with
AsyncSession
(
max_clients
=
50
)
as
s
:
with
Session
(
)
as
s
:
# 清空 请求的值
# 清空 请求的值
s
.
headers
.
clear
()
s
.
headers
.
clear
()
s
.
cookies
.
clear
()
s
.
cookies
.
clear
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment