diff --git a/.gitignore b/.gitignore index 9882734..9212f9b 100644 --- a/.gitignore +++ b/.gitignore @@ -23,12 +23,12 @@ tools/toutiao.py news/middlewares/httpproxy_vps.py # config -config/* -!config/__init__.py -!config/default.py - -env_*.sh -!env_default.sh +#config/* +#!config/__init__.py +#!config/default.py +# +#env_*.sh +#!env_default.sh # gitbook diff --git a/README.md b/README.md index 445c210..c96a57a 100644 --- a/README.md +++ b/README.md @@ -10,9 +10,9 @@ - Redis - NodeJS -本项目依赖第三方验证码识别服务, 注册地址: http://www.ruokuai.com/ +本项目依赖第三方验证码识别服务 -更新配置 config/develop.py 用户名和密码 +更新配置 config/default.py 用户名和密码 ``` RK_CONFIG = { 'username': '******', @@ -20,6 +20,14 @@ RK_CONFIG = { 'soft_id': '93676', 'soft_key': '5d0e00b196c244cb9d8413809c62f9d5', } + +# 斐斐打码 +FF_CONFIG = { + 'pd_id': '******', + 'pd_key': '******', + 'app_id': '312451', + 'app_key': '5YuN+6isLserKBZti4hoaI6UR2N5UT2j', +} ``` ```bash @@ -28,7 +36,7 @@ virtualenv news_spider.env # 创建虚拟环境 # python3 virtualenv news_spider.env -p python3 # 创建虚拟环境 -source env_develop.sh # 激活虚拟环境 +source env_default.sh # 激活虚拟环境 pip install -r requirements-py2.txt # 安装环境依赖 # 开发环境 模拟单次抓取 python tasks/job_put_tasks.py wx # 初次创建任务 @@ -150,12 +158,16 @@ scrapy crawl weibo ### 验证码识别 -http://www.ruokuai.com/ +~~http://www.ruokuai.com/~~ + +~~http://wiki.ruokuai.com/~~ -http://wiki.ruokuai.com/ +~~价格类型:~~ +~~http://www.ruokuai.com/home/pricetype~~ -价格类型: -http://www.ruokuai.com/home/pricetype +热心网友反映`若快`已经关闭, 接下来会支持`斐斐打码`, 敬请期待 + +斐斐打码开发文档 [http://docs.fateadm.com](http://docs.fateadm.com) ### 索引说明 @@ -192,7 +204,9 @@ LongText | 2的32次方–1 | 4,294,967,295 | 4GB M端2个参数获取方法已公开, 参考蜘蛛 toutiao_m -PC端3个参数获取方法已破解, 由于公开之后会引起头条反爬机制更新, 故没有公开, 如有需要, 敬请私聊, 仅供学习, 谢绝商用 +~~PC端3个参数获取方法已破解, 由于公开之后会引起头条反爬机制更新, 故没有公开, 如有需要, 敬请私聊, 仅供学习, 谢绝商用~~ + +因M端已满足数据获取要求, 不再开源PC端签名破解 ### TODO diff --git a/apps/client_rk.py b/apps/client_rk.py index 1d26a94..e701100 100644 --- a/apps/client_rk.py +++ b/apps/client_rk.py @@ -9,7 +9,7 @@ """ -from libs.rk import RClient +from libs.rk import RKClient from libs.counter import CounterClient from apps.client_db import redis_client from tools.cookies import len_cookies @@ -21,7 +21,7 @@ RK_LIMIT_COUNT_DAILY = current_config.RK_LIMIT_COUNT_DAILY COOKIES_QUEUE_COUNT = current_config.COOKIES_QUEUE_COUNT -rc_client = RClient(**RK_CONFIG) +rc_client = RKClient(**RK_CONFIG) rk_counter_client = CounterClient(redis_client, 'rk') diff --git a/config/default.py b/config/default.py index 2c9f709..c2c5d8c 100644 --- a/config/default.py +++ b/config/default.py @@ -51,6 +51,14 @@ 'soft_key': '5d0e00b196c244cb9d8413809c62f9d5', } +# 斐斐打码 +FF_CONFIG = { + 'pd_id': '******', + 'pd_key': '******', + 'app_id': '312451', + 'app_key': '5YuN+6isLserKBZti4hoaI6UR2N5UT2j', +} + # 熔断机制 每天请求限制(200元==500000快豆) RK_LIMIT_COUNT_DAILY = 925 diff --git a/config/develop.py b/config/develop.py deleted file mode 100644 index d6cb549..0000000 --- a/config/develop.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -""" -@author: zhanghe -@software: PyCharm -@file: develop.py -@time: 2018-02-10 15:07 -""" - -from __future__ import print_function -from __future__ import unicode_literals - -import os - -BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - -# requests 超时设置 -REQUESTS_TIME_OUT = (30, 30) - -HOST_IP = '0.0.0.0' - -# 数据库 MySQL -DB_MYSQL = { - 'host': HOST_IP, - 'user': 'root', - 'passwd': '123456', - 'port': 3306, - 'db': 'news_spider' -} - -SQLALCHEMY_DATABASE_URI_MYSQL = \ - 'mysql+mysqldb://%s:%s@%s:%s/%s?charset=utf8' % \ - (DB_MYSQL['user'], DB_MYSQL['passwd'], DB_MYSQL['host'], DB_MYSQL['port'], DB_MYSQL['db']) - -SQLALCHEMY_POOL_SIZE = 5 # 默认 pool_size=5 - -# 缓存,队列 -REDIS = { - 'host': HOST_IP, - 'port': 6379, - # 'password': '123456' # redis-cli AUTH 123456 -} - -# 若快验证码识别 -RK_CONFIG = { - 'username': '******', - 'password': '******', - 'soft_id': '93676', - 'soft_key': '5d0e00b196c244cb9d8413809c62f9d5', -} - -# 熔断机制 每天请求限制(200元==500000快豆) -RK_LIMIT_COUNT_DAILY = 925 - -# 队列保留 cookies 数量 -COOKIES_QUEUE_COUNT = 5 - -# 分布式文件系统 -WEED_FS_URL = 'http://%s:9333' % HOST_IP - -# 优先级配置(深度优先) -DEPTH_PRIORITY = 1 -PRIORITY_CONFIG = { - 'list': 600, - 'next': 500, - 'detail': 800, -} - -# 启动时间(启动时间之前的内容不抓取, 适用于新闻) -START_TIME = '2018-01-01 00:00:00' diff --git a/config/product.py b/config/product.py deleted file mode 100644 index 529f2ba..0000000 --- a/config/product.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python -# encoding: utf-8 - -""" -@author: zhanghe -@software: PyCharm -@file: product.py -@time: 2018-02-10 15:09 -""" - -from __future__ import print_function -from __future__ import unicode_literals - -import os - -BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - -# requests 超时设置 -REQUESTS_TIME_OUT = (30, 30) - -HOST_IP = '0.0.0.0' - -# 数据库 MySQL -DB_MYSQL = { - 'host': HOST_IP, - 'user': 'root', - 'passwd': '123456', - 'port': 3306, - 'db': 'news_spider' -} - -SQLALCHEMY_DATABASE_URI_MYSQL = \ - 'mysql+mysqldb://%s:%s@%s:%s/%s?charset=utf8' % \ - (DB_MYSQL['user'], DB_MYSQL['passwd'], DB_MYSQL['host'], DB_MYSQL['port'], DB_MYSQL['db']) - -SQLALCHEMY_POOL_SIZE = 5 # 默认 pool_size=5 - -# 缓存,队列 -REDIS = { - 'host': HOST_IP, - 'port': 6379, - # 'password': '123456' # redis-cli AUTH 123456 -} - -# 若快验证码识别 -RK_CONFIG = { - 'username': '******', - 'password': '******', - 'soft_id': '93676', - 'soft_key': '5d0e00b196c244cb9d8413809c62f9d5', -} - -# 熔断机制 每天请求限制(200元==500000快豆) -RK_LIMIT_COUNT_DAILY = 925 - -# 队列保留 cookies 数量 -COOKIES_QUEUE_COUNT = 5 - -# 分布式文件系统 -WEED_FS_URL = 'http://%s:9333' % HOST_IP - -# 优先级配置(深度优先) -DEPTH_PRIORITY = 1 -PRIORITY_CONFIG = { - 'list': 600, - 'next': 500, - 'detail': 800, -} - -# 启动时间(启动时间之前的内容不抓取, 适用于新闻) -START_TIME = '2018-01-01 00:00:00' diff --git a/db/data/mysql.sql b/db/data/mysql.sql index 54af50f..615c0b1 100644 --- a/db/data/mysql.sql +++ b/db/data/mysql.sql @@ -152,3 +152,4 @@ INSERT INTO `fetch_task` VALUES (131, 2, 0, '1006063481197561', '中国互联网 INSERT INTO `fetch_task` VALUES (132, 2, 0, '1002061768025224', '互联网周刊', '', '', 1, '', '2017-01-11 11:01:05', '2017-01-11 11:01:05'); INSERT INTO `fetch_task` VALUES (133, 2, 0, '1002063819805149', '互联网焦点网', '', '', 1, '', '2017-01-11 11:01:05', '2017-01-11 11:01:05'); INSERT INTO `fetch_task` VALUES (134, 3, 0, '55982516338', '奇文志怪', '', 'http://m.toutiao.com/profile/55982516338/', 1, '', '2018-09-06 14:01:05', '2018-09-06 14:01:05'); +INSERT INTO `fetch_task` VALUES (135, 3, 0, '6014591174', '鹏君读书', '', 'http://m.toutiao.com/profile/6014591174/', 1, '', '2017-01-11 11:01:05', '2017-01-11 11:01:05'); diff --git a/env_develop.sh b/env_develop.sh deleted file mode 100644 index 977f5c9..0000000 --- a/env_develop.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -source news_spider.env/bin/activate - -export PATH=${PWD}:${PATH} -export PYTHONPATH=${PWD} -export PYTHONIOENCODING=utf-8 -export MODE=develop diff --git a/env_product.sh b/env_product.sh deleted file mode 100644 index feb73b5..0000000 --- a/env_product.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env bash - -source news_spider.env/bin/activate - -export PATH=${PWD}:${PATH} -export PYTHONPATH=${PWD} -export PYTHONIOENCODING=utf-8 -export MODE=product diff --git a/etc/toutiao.ini b/etc/toutiao.ini index f0b31b8..8b39dbb 100644 --- a/etc/toutiao.ini +++ b/etc/toutiao.ini @@ -20,11 +20,11 @@ autorestart=true redirect_stderr=true stdout_logfile=logs/scrapy_toutiao.log -[program:reboot_net] -command=python tasks/run_job_reboot_net_china_net.py -startsecs=0 -stopwaitsecs=0 -autostart=false -autorestart=true -redirect_stderr=true -stdout_logfile=logs/reboot_net_china_net.log +;[program:reboot_net] +;command=python tasks/run_job_reboot_net_china_net.py +;startsecs=0 +;stopwaitsecs=0 +;autostart=false +;autorestart=true +;redirect_stderr=true +;stdout_logfile=logs/reboot_net_china_net.log diff --git a/libs/ft.py b/libs/ft.py new file mode 100644 index 0000000..c7e4be2 --- /dev/null +++ b/libs/ft.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# encoding: utf-8 + +""" +@author: zhanghe +@software: PyCharm +@file: ff.py +@time: 2019-05-26 14:26 +""" + +import base64 +import hashlib +import time +import requests + + +URL = "http://pred.fateadm.com" + + +class FTClient(object): + def __init__(self, pd_id, pd_key, app_id='', app_key=''): + self.pd_id = pd_id + self.pd_key = pd_key + self.app_id = app_id + self.app_key = app_key + self.host = URL + self.s = requests.session() + self.timeout = 30 + + @staticmethod + def calc_sign(pd_id, pd_key, timestamp): + md5 = hashlib.md5() + md5.update(timestamp + pd_key) + sign_a = md5.hexdigest() + + md5 = hashlib.md5() + md5.update(pd_id + timestamp + sign_a) + sign_b = md5.hexdigest() + return sign_b + + @staticmethod + def calc_card_sign(card_id, card_key, timestamp, pd_key): + md5 = hashlib.md5() + md5.update(pd_key + timestamp + card_id + card_key) + return md5.hexdigest() + + def query_balance(self): + """查询余额""" + tm = str(int(time.time())) + sign = self.calc_sign(self.pd_id, self.pd_key, tm) + param = { + "user_id": self.pd_id, + "timestamp": tm, + "sign": sign + } + url = self.host + "/api/custval" + rsp = self.s.post(url, param, timeout=self.timeout).json() + return rsp + + def query_tts(self, predict_type): + """查询网络延迟""" + tm = str(int(time.time())) + sign = self.calc_sign(self.pd_id, self.pd_key, tm) + param = { + "user_id": self.pd_id, + "timestamp": tm, + "sign": sign, + "predict_type": predict_type, + } + if self.app_id != "": + asign = self.calc_sign(self.app_id, self.app_key, tm) + param["appid"] = self.app_id + param["asign"] = asign + url = self.host + "/api/qcrtt" + rsp = self.s.post(url, param, timeout=self.timeout).json() + return rsp + + def predict(self, predict_type, img_data): + """识别验证码""" + tm = str(int(time.time())) + sign = self.calc_sign(self.pd_id, self.pd_key, tm) + img_base64 = base64.b64encode(img_data) + param = { + "user_id": self.pd_id, + "timestamp": tm, + "sign": sign, + "predict_type": predict_type, + "img_data": img_base64, + } + if self.app_id != "": + asign = self.calc_sign(self.app_id, self.app_key, tm) + param["appid"] = self.app_id + param["asign"] = asign + url = self.host + "/api/capreg" + rsp = self.s.post(url, param, timeout=self.timeout).json() + return rsp + + def predict_from_file(self, predict_type, file_name): + """从文件进行验证码识别""" + with open(file_name, "rb+") as f: + data = f.read() + return self.predict(predict_type, data) + + def justice(self, request_id): + """识别失败,进行退款请求""" + if request_id == "": + return + tm = str(int(time.time())) + sign = self.calc_sign(self.pd_id, self.pd_key, tm) + param = { + "user_id": self.pd_id, + "timestamp": tm, + "sign": sign, + "request_id": request_id + } + url = self.host + "/api/capjust" + rsp = self.s.post(url, param, timeout=self.timeout).json() + return rsp + + def charge(self, card_id, card_key): + """充值接口""" + tm = str(int(time.time())) + sign = self.calc_sign(self.pd_id, self.pd_key, tm) + card_sign = self.calc_card_sign(card_id, card_key, tm, self.pd_key) + param = { + "user_id": self.pd_id, + "timestamp": tm, + "sign": sign, + 'cardid': card_id, + 'csign': card_sign + } + url = self.host + "/api/charge" + rsp = self.s.post(url, param, timeout=self.timeout).json() + return rsp + + +def test_ft(): + """ + 测试 + {u'RspData': u'{"cust_val":1010}', u'RetCode': u'0', u'ErrMsg': u'succ', u'RequestId': u''} + {u'RspData': u'{"result": "8x4g"}', u'RetCode': u'0', u'ErrMsg': u'', u'RequestId': u'2019052615005042ad98b2000518d493'} + :return: + """ + pd_id = "xxxxxx" + pd_key = "xxxxxx" + app_id = "312451" + app_key = "5YuN+6isLserKBZti4hoaI6UR2N5UT2j" + predict_type = "30400" + api = FTClient(pd_id, pd_key, app_id, app_key) + # 查询余额接口 + res = api.query_balance() + print(res) + file_name = "img.jpg" + rsp = api.predict_from_file(predict_type, file_name) + print(rsp) + +if __name__ == "__main__": + test_ft() diff --git a/libs/img.jpg b/libs/img.jpg new file mode 100644 index 0000000..2a94859 Binary files /dev/null and b/libs/img.jpg differ diff --git a/libs/rk.py b/libs/rk.py index 0d86efc..5033a12 100644 --- a/libs/rk.py +++ b/libs/rk.py @@ -13,7 +13,7 @@ import requests -class RClient(object): +class RKClient(object): def __init__(self, username, password, soft_id, soft_key): self.username = username self.password = md5(password).hexdigest() @@ -69,6 +69,6 @@ def rk_report_error(self, im_id): if __name__ == '__main__': - rc = RClient('username', 'password', 'soft_id', 'soft_key') + rc = RKClient('username', 'password', 'soft_id', 'soft_key') im = open('a.jpg', 'rb').read() print(rc.rk_create(im, 3040)) diff --git a/requirements-py2.txt b/requirements-py2.txt index 27a5895..e4fad64 100644 --- a/requirements-py2.txt +++ b/requirements-py2.txt @@ -1,42 +1,42 @@ asn1crypto==0.24.0 -attrs==18.1.0 +attrs==19.1.0 Automat==0.7.0 -certifi==2018.8.24 -cffi==1.11.5 +certifi==2019.3.9 +cffi==1.12.3 chardet==3.0.4 constantly==15.1.0 -cryptography==2.3.1 +cryptography==2.6.1 cssselect==1.0.3 enum34==1.1.6 functools32==3.2.3.post2 -future==0.16.0 -hyperlink==18.0.0 -idna==2.7 +future==0.17.1 +hyperlink==19.0.0 +idna==2.8 incremental==17.5.0 -inflect==1.0.0 +inflect==2.1.0 ipaddress==1.0.22 -lxml==4.2.4 -mysqlclient==1.3.13 -parsel==1.5.0 -Pillow==5.2.0 -psutil==5.4.7 -pyasn1==0.4.4 -pyasn1-modules==0.2.2 -pycparser==2.18 +lxml==4.3.3 +mysqlclient==1.4.2.post1 +parsel==1.5.1 +Pillow==6.0.0 +psutil==5.6.2 +pyasn1==0.4.5 +pyasn1-modules==0.2.5 +pycparser==2.19 PyDispatcher==2.0.5 PyExecJS==1.5.1 PyHamcrest==1.9.0 -pyOpenSSL==18.0.0 +pyOpenSSL==19.0.0 queuelib==1.5.0 -redis==2.10.6 -requests==2.19.1 -schedule==0.5.0 -Scrapy==1.5.1 -service-identity==17.0.0 -six==1.11.0 +redis==3.2.1 +requests==2.22.0 +schedule==0.6.0 +Scrapy==1.6.0 +service-identity==18.1.0 +six==1.12.0 sqlacodegen==1.1.6 -SQLAlchemy==1.2.11 -Twisted==18.7.0 -urllib3==1.23 -w3lib==1.19.0 -zope.interface==4.5.0 +SQLAlchemy==1.3.3 +Twisted==19.2.0 +urllib3==1.25.3 +w3lib==1.20.0 +zope.interface==4.6.0 diff --git a/requirements-py3.txt b/requirements-py3.txt index 4f55946..733d443 100644 --- a/requirements-py3.txt +++ b/requirements-py3.txt @@ -1,39 +1,39 @@ asn1crypto==0.24.0 -attrs==18.1.0 +attrs==19.1.0 Automat==0.7.0 -certifi==2018.8.24 -cffi==1.11.5 +certifi==2019.3.9 +cffi==1.12.3 chardet==3.0.4 constantly==15.1.0 -cryptography==2.3.1 +cryptography==2.6.1 cssselect==1.0.3 -future==0.16.0 -hyperlink==18.0.0 -idna==2.7 +future==0.17.1 +hyperlink==19.0.0 +idna==2.8 incremental==17.5.0 -inflect==1.0.0 -lxml==4.2.4 -mysqlclient==1.3.13 -parsel==1.5.0 -Pillow==5.2.0 -psutil==5.4.7 -pyasn1==0.4.4 -pyasn1-modules==0.2.2 -pycparser==2.18 +inflect==2.1.0 +lxml==4.3.3 +mysqlclient==1.4.2.post1 +parsel==1.5.1 +Pillow==6.0.0 +psutil==5.6.2 +pyasn1==0.4.5 +pyasn1-modules==0.2.5 +pycparser==2.19 PyDispatcher==2.0.5 PyExecJS==1.5.1 PyHamcrest==1.9.0 -pyOpenSSL==18.0.0 +pyOpenSSL==19.0.0 queuelib==1.5.0 -redis==2.10.6 -requests==2.19.1 -schedule==0.5.0 -Scrapy==1.5.1 -service-identity==17.0.0 -six==1.11.0 +redis==3.2.1 +requests==2.22.0 +schedule==0.6.0 +Scrapy==1.6.0 +service-identity==18.1.0 +six==1.12.0 sqlacodegen==1.1.6 -SQLAlchemy==1.2.11 -Twisted==18.7.0 -urllib3==1.23 -w3lib==1.19.0 -zope.interface==4.5.0 +SQLAlchemy==1.3.3 +Twisted==19.2.0 +urllib3==1.25.3 +w3lib==1.20.0 +zope.interface==4.6.0