美文网首页
获取代理ip并检查可用性

获取代理ip并检查可用性

作者: 奈斯凸米特 | 来源:发表于2019-10-08 17:36 被阅读0次

1. 首先新建一个proxy.txt文件,将批量提取的ip保存进去,一行一个ip,格式为122.96.51.9:8080

2. proxy.txt的来源可以到小幻代理批量提取: https://ip.ihuan.me/ti.html

再提供一个可免费获取ip的地址:http://www.xsdaili.com/

"""
该程序用来获取proxy.txt中可用的免费国外代理ip
"""
import os
import telnetlib


def create_files():
    """
    创建文件
    :return:
    """
    if not os.path.exists('proxy.txt'):
        with open('proxy.txt', 'w') as f:
            f.write('')
    if not os.path.exists('useful.txt'):
        with open('useful.txt', 'w') as f:
            f.write('')


def deal_datas():
    """
    处理proxy.txt中的数据
    :return:
    """
    with open('proxy.txt', 'r', encoding='utf-8') as f:
        content = f.readlines()
    ip_list = []
    for i in content:
        ip = i.split(':')[0]
        port = i.split(':')[1].strip()
        msg = {
            'ip': ip,
            '端口': port,
        }
        ip_list.append(msg)
    return ip_list


def get_useful_ip(ip_list):
    """
    筛选出可用的ip
    :param ip_list:
    :return:
    """
    print('正在筛选可用ip...')
    useful_ip = []
    for msg in ip_list:
        ip = msg['ip']
        port = msg['端口']
        message = {'http': '%s:%s' % (ip, port)}
        try:
            telnetlib.Telnet(ip, port=port, timeout=20)
        except:
            print('不可用:%s' % str(message))
            continue
        else:
            print('可用:%s' % message)
            useful_ip.append(message)
            with open('useful.txt', 'a', encoding='utf-8') as f:
                f.write(str(message) + '\n')
    if not useful_ip:
        print('本次筛选无可用ip')
    else:
        print('筛选完成,本次共筛选%s个ip,%s个可用' % (str(len(ip_list)), str(len(useful_ip))))


def run():
    create_files()
    ip_list = deal_datas()
    get_useful_ip(ip_list)


if __name__ == '__main__':
    run()

再来一个获取快代理网站的ip(正则提取)

import re
import requests
import time
import telnetlib

"""
该程序用来获取快代理的免费代理,并检测可用性
"""


def get_ip():
    """
    获取代理ip
    :return:
    """
    url = 'https://www.kuaidaili.com/free/inha/'
    # 生成url列表,10代表只爬取10页(按需求更改)
    url_list = [url + str(i + 1) for i in range(10)]
    ip_list = []
    print('正在筛选...')
    for i in range(len(url_list)):
        url = url_list[i]
        html = requests.get(url=url, ).text
        regip = '<td.*?>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>.*?<td.*?>(\d{1,5})</td>'
        matcher = re.compile(regip, re.S)
        ipstr = re.findall(matcher, html)
        time.sleep(1)

        for j in ipstr:
            # ip+端口号
            ip_list.append(j[0] + ':' + j[1])
    print('共获取到%d个代理ip' % len(ip_list))
    print(ip_list)
    return ip_list


def ip_check(ip_list):
    usable_ip = []
    for i in ip_list:
        ip = i.split(':')[0]
        port = i.split(':')[-1]
        try:
            telnetlib.Telnet(ip, port=port, timeout=20)
        except:
            print(i + '不可用')
        else:
            print('可用ip:%s' % i)
            usable_ip.append(i)
    print('共%s个IP可用:' % len(usable_ip))
    print(usable_ip)
    return usable_ip


if __name__ == '__main__':
    ip_list = get_ip()
    ip_check(ip_list)

相关文章

网友评论

      本文标题:获取代理ip并检查可用性

      本文链接:https://www.haomeiwen.com/subject/hsfipctx.html