Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
amazon-mult-site-sync
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
yexing
amazon-mult-site-sync
Commits
9fcc7b4f
Commit
9fcc7b4f
authored
Jun 13, 2025
by
yexing
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
u
parent
a5474c0e
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
19 deletions
+53
-19
.gitignore
+1
-0
spider/base_info.py
+52
-19
No files found.
.gitignore
View file @
9fcc7b4f
...
...
@@ -8,3 +8,4 @@ image
.idea
config
tmp
test
spider/base_info.py
View file @
9fcc7b4f
import
json
import
re
import
curl_cffi
from
loguru
import
logger
from
tenacity
import
retry
,
stop_after_attempt
,
wait_random
from
lxml
import
etree
from
const
import
Postcode
,
Site
from
db
import
RedisSingleton
from
proxy
import
ProxyManager
from
tool
import
Request
,
Task
from
tool
import
Fmt
,
Request
,
Task
from
conf
import
config
REDIS
=
RedisSingleton
(
redis_url
=
config
[
"redis"
][
"url"
])
task_
monitoring_config
=
config
[
"task-product
-detail"
]
task_
info_config
=
config
[
"task-info
-detail"
]
class
Tool
:
@staticmethod
...
...
@@ -133,26 +140,52 @@ class Info(ProxyMixin):
task_manager
=
Task
(
REDIS
)
def
__init__
(
self
):
self
.
task_key
=
task_monitoring_config
[
"task_key"
]
self
.
item_key
=
task_monitoring_config
[
"item_key"
]
self
.
task_number
=
int
(
task_monitoring_config
[
"task_number"
])
self
.
enabled
=
task_monitoring_config
[
"enabled"
]
==
"True"
self
.
request_timeout
=
int
(
task_monitoring_config
[
"request_timeout"
])
self
.
is_debug
=
task_monitoring_config
[
"is_debug"
]
==
"True"
self
.
task_key
=
task_info_config
[
"task_key"
]
self
.
item_key
=
task_info_config
[
"item_key"
]
self
.
task_number
=
int
(
task_info_config
[
"task_number"
])
self
.
enabled
=
task_info_config
[
"enabled"
]
==
"True"
self
.
request_timeout
=
int
(
task_info_config
[
"request_timeout"
])
self
.
is_debug
=
task_info_config
[
"is_debug"
]
==
"True"
def
format_content
(
self
,
text
):
html
=
etree
.
HTML
(
text
)
free_delivery
=
html
.
xpath
(
'//div[@id="mir-layout-DELIVERY_BLOCK-slot-PRIMARY_DELIVERY_MESSAGE_LARGE"]/span/span/text()'
)
detail_bullets
=
html
.
xpath
(
'//*[@id="productDetails_detailBullets_sections1"]/tbody/tr'
)
free_delivery
=
Fmt
.
parse_date
(
free_delivery
[
0
])
if
len
(
free_delivery
)
else
""
return
{
"free_delivery"
:
free_delivery
,
}
@retry
(
stop
=
stop_after_attempt
(
20
),
wait
=
wait_random
(
3
,
6
),
retry_error_cls
=
lambda
*
_
:
...
)
def
run
(
self
,
task
):
url
=
task
.
get
(
"url"
,
""
)
asin
=
Tool
.
get_url_asin
(
url
)
url
=
f
"https://www.amazon.{self.site}/dp/"
+
asin
+
"?th=1&psc=1"
_proxy
=
self
.
get_proxy
()
headers
=
self
.
task_manager
.
get_loca_cookie
(
site
=
self
.
site
)
Request
.
request_html
(
url
,
_proxy
[
"proxy"
],
**
{
"headers"
:
headers
,
"timeout"
:
self
.
request_timeout
,
"postcode"
:
self
.
postcode
,
},
)
if
_proxy
is
None
:
raise
Exception
(
"没有代理"
)
try
:
headers
=
self
.
task_manager
.
get_loca_cookie
(
site
=
self
.
site
)
text
=
Request
.
request_html
(
url
,
_proxy
[
"proxy"
],
**
{
"headers"
:
headers
,
"timeout"
:
self
.
request_timeout
,
"postcode"
:
self
.
postcode
},
)
response
=
self
.
format_content
(
text
)
return
response
except
curl_cffi
.
curl
.
CurlError
:
logger
.
error
(
f
"请求超时: {url}"
)
except
Exception
as
e
:
if
str
(
e
)
==
"出现验证码"
:
self
.
delete_proxy
(
_proxy
[
"temp_proxy"
])
if
str
(
e
)
==
"采集邮编错误"
:
self
.
cookie_error
()
logger
.
error
(
f
"请求异常: {e} - {url}"
)
finally
:
self
.
join_proxy
(
_proxy
[
"temp_proxy"
])
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment