1. JS代码slat与sign
salt、sign 加密有两种实现方式:
- 一种是通过前端 JS 实现
- 另一种是后台服务器生成加密串,并在返回响应信息时,将加密信息交给接浏览器客户端。(通过预览响应信息可知,并没有涉及 salt、sign 的信息,因此可以排除这种方法。)
-
获取包括salt的js文件
获取JS
-
使用 Sources 选项卡将 fanyi.min.js 文件中的 JS 代码格式化输出,并使用 Ctrl+F 找到相应的 "salt" 位置
image.png
分析salt
1. 获取所有data
2. 获取r.salt
3.证实第2步中为salt
JS源码:
var r = function(e) {
var t = n.md5(navigator.appVersion)
, r = "" + (new Date).getTime()
, i = r + parseInt(10 * Math.random(), 10);
return {
ts: r,
bv: t,
salt: i,
sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
}
};
2.对应的python脚本实现:
# var r = function(e) {
# var t = n.md5(navigator.appVersion)
# , r = "" + (new Date).getTime()
# , i = r + parseInt(10 * Math.random(), 10); # 从0(包括0)往上,但是不包括1(排除1)
# return {
# ts: r,
# bv: t,
# salt: i,
# sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5")
# }
# };
"""
i: 测试12345
salt: 16618249339661
sign: 6de81b23ff84d559dacd3e336c71c49d
lts: 1661824933966 # 13位时间戳
from: AUTO
to: AUTO
smartresult: dict
client: fanyideskweb
bv: 50b61ff102560ebc7bb0148b22d7715c
doctype: json
version: 2.1
keyfrom: fanyi.web
action: FY_BY_REALTlME
"""
import time
times = time.time()
# lts 即r.ts
lts = '%d' % (times * 1000)
print(lts)
# salt即r.salt
import random
salt = lts + str(int(random.random() * 10))
print(salt)
# sign
from hashlib import md5
word = "测试" # 待翻译字符串
md5 = md5()
s = "fanyideskweb" + word + salt + "Ygy_4c=r#e#4EX^NUGUc5"
print(s)
md5.update(s.encode(encoding="utf-8")) # md5加密
sign = md5.hexdigest() # 获取加密后的值
print(sign)
# utf-8方式-: 8506ed5d39137a95f1ff1d4b907ea25d
# GBK方式-: e0324fdc80ae27ef54808ad0b26f0b12
# GB2312方式-:ed535761cf57f69b2cfc18f89b1f4fd8
# GB18030方式:e380f52ecfc0c532438375392fbf44f1
3.爬虫
# -*- coding:utf-8 -*-
"""
@author:百草Lily
@file:test_youdao.py
@time:2022/8/30
"""
from hashlib import md5
import time
import random
import requests
from faker import Faker
fake = Faker(locale="zh_CN")
ua = fake.user_agent()
class YouDao:
def __init__(self, word):
self.word = word # 待翻译
# # url一定要写抓包时抓到的POST请求的提交地址,但是还需要去掉 url中的“_o”,
# # “_o”这是一种url反爬策略,做了页面跳转,若直接访问会返回{"errorCode":50}
# self.url = "https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
self.url = "https://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule"
t = f"{int(time.time()*1000)}"
self.headers = {
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", # 不是json;所以不能使用json传参
"User-Agent": ua,
# "cookie": f"td_cookie=4070516452; OUTFOX_SEARCH_USER_ID=782221499@10.110.96.159; OUTFOX_SEARCH_USER_ID_NCOO=1261010561.9584796; ___rl__test__cookies={t}",
# "Referer": "https://fanyi.youdao.com/",
# "Accept-Language": "zh-CN,zh;q=0.9",
# "Cache-Control": "no-cache",
# "Host": "fanyi.youdao.com",
# "Origin": "https://fanyi.youdao.com",
# "Pragma": "no-cache",
# "Sec-Fetch-Dest": "empty",
# "Sec-Fetch-Mode": "cors",
# "Sec-Fetch-Site": "same-origin",
# "X-Requested-With": "XMLHttpRequest",
# "sec-ch-ua": '"Chromium";v="104", " Not A;Brand";v="99", "Google Chrome";v="104"',
# "sec-ch-ua-mobile": ?0,
# "sec-ch-ua-platform": "Windows"
}
# Cookie中的OUTFOX_SEARCH_USER_ID、Referer和User-Agent三个参数也是必须的
def get_sign(self):
"""获取请求中变化数值"""
# lts 即r.ts
times = time.time()
lts = '%d' % (times * 1000)
print(lts)
# salt
salt = lts + str(int(random.random() * 10))
print(salt)
# sign
# word = "测试" # 待翻译字符串
smd5 = md5()
s = "fanyideskweb" + self.word + salt + "Ygy_4c=r#e#4EX^NUGUc5"
smd5.update(s.encode()) # md5加密
sign = smd5.hexdigest() # 获取加密后的值
print(sign)
return lts, salt, sign
def get_request(self):
"""
发起请求
:return:
"""
lts, salt, sign = self.get_sign()
data = {
"i": self.word,
"from": "AUTO", # 待翻译的语言类型 "zh-CHS", #
"to": "AUTO", # 输出的语言类型,结果返回:type='EN2ZH_CN' "en", #
"smartresult": "dict",
"client": "fanyideskweb",
"salt": salt,
"sign": sign,
"lts": lts,
"bv": "50b61ff102560ebc7bb0148b22d7715c",
"doctype": "json",
"version": "2.1",
"keyfrom": "fanyi.web",
"action": "FY_BY_REALTlME"
}
# res = requests.post(self.url, json=data, headers=self.headers).json()
resp = requests.post(self.url, data=data, headers=self.headers)
res = resp.json()
print(res)
if __name__ == "__main__":
a = YouDao("test")
a.get_request()
# 中文翻译英文异常?
# {'type': 'EN2ZH_CN', 'errorCode': 0, 'elapsedTime': 0, 'translateResult': [[{'src': '??', 'tgt': '??'}]]}
# 原因,md5加密前s的解码方式=>未解决












网友评论