使用Timer定时抓取,并检查,成功则保存到redis
c#有三种定时器,这里定时器是使用System.Threading命名空间, 这个Timer会开启新的线程,抓取三个网页定义了三个Timer对象。每一次抓取都会保存上一次抓取的集合,检查前,会进行对比,取出新的集合也就是没有重复的那部分。有效性的ip比较低,这里没有做统计,如果代码再优化一下,可以做一下统计,看看程序的主入口吧,最终的实现如下:
class Program
{
static bool timer_ip3366_isCompleted = true;
static bool timer_xicidaili_isCompleted = true;
static bool timer_66ip_isCompleted = true;
static Timer timer_ip3366, timer_xicidaili, timer_66ip;
private static List<string> lastListip3366,lastList66ip,lastListxicidaili;//保存上一次抓取的代理,与下一次进行对比,取新的集合进行检查筛选
static async Task Main(string[] args)
{
System.Net.ServicePointManager.DefaultConnectionLimit = 2000;
Console.WriteLine("hellow proxyIp");
Console.ReadLine();
lastList66ip = new List<string>();
lastListip3366 = new List<string>();
lastListxicidaili = new List<string>();
timer_ip3366 = new Timer(async (state) =>
{
await TimerIp3366Async();
}, "processing timer_ip3366 event", 0,1000*30);
timer_xicidaili = new Timer(async (state) =>
{
await TimerXicidailiAsync();
}, "processing timer_xicidaili event", 0, 1000 * 60);
timer_66ip = new Timer(async (state) =>
{
await Timer66ipAsync();
}, "processing timer_66ip event", 0, 1000*30);
Console.ReadLine();
}
private static async Task Timer66ipAsync()
{
if (timer_66ip_isCompleted)
{
timer_66ip_isCompleted = false;
List<string> checkList = new List<string>();
var listProxyIp = ProxyIpHelper.Get66ipProxy();
if (listProxyIp.Count > 0)
{
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("66ip.cn 抓取到" + listProxyIp.Count + "条记录,正在对比.........");
listProxyIp.ForEach(f =>
{
if (!lastList66ip.Contains(f))
{
checkList.Add(f);
}
});
lastList66ip = listProxyIp;
if (checkList.Count > 0)
{
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("66ip.cn 需要检查" + checkList.Count + "条记录,正在进行检测是否有效..........");
for (int i = 0; i < checkList.Count; i++)
{
string ipAddress = checkList[i];
await ProxyIpHelper.CheckProxyIpAsync(ipAddress, () =>
{
bool insertSuccess = RedisHelper.InsertSet(ipAddress);
Console.ForegroundColor = ConsoleColor.White;
Console.WriteLine("66ip.cn");
if (insertSuccess)
{
Console.WriteLine("success" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}
Console.WriteLine("重复插入" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}, (error) =>
{
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("66ip.cn");
Console.WriteLine("error:" + ipAddress + error + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
});
}
timer_66ip_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("66ip.cn" + checkList.Count + "条记录,已经检测完成,正在进行下一次检查");
}
else
{
timer_66ip_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("66ip.cn没有需要检查的代理ip");
}
}
else
{
timer_66ip_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("66ip.cn没有获取到代理ip");
}
}
}
private static async Task TimerXicidailiAsync()
{
if (timer_xicidaili_isCompleted)
{
//取出需要检查的ip地址,第一次100条则checklist就是100条记录,
//第二次的100条中只有10是和上一次的不重复,则第二次只需要检查这10条记录
timer_xicidaili_isCompleted = false;
List<string> checkList = new List<string>();
var listProxyIp = ProxyIpHelper.GetXicidailiProxy(1);
if (listProxyIp.Count > 0)
{
Console.WriteLine("xicidaili.com 抓取到" + listProxyIp.Count + "条记录,正在对比............");
listProxyIp.ForEach(f =>
{
if (!lastListxicidaili.Contains(f))
{
checkList.Add(f);
}
});
lastListxicidaili = listProxyIp;
if (checkList.Count > 0)
{
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("xicidaili.com 需要检查" + checkList.Count + "条记录,正在进行检测是否有效..........");
for (int i = 0; i < checkList.Count; i++)
{
string ipAddress = checkList[i];
await ProxyIpHelper.CheckProxyIpAsync(ipAddress, () =>
{
bool insertSuccess = RedisHelper.InsertSet(ipAddress);
Console.ForegroundColor = ConsoleColor.White;
Console.WriteLine("xicidaili.com");
if (insertSuccess)
{
Console.WriteLine("success" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}
else
Console.WriteLine("重复插入" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}, (error) =>
{
Console.WriteLine("xicidaili.com");
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("error:" + ipAddress + error + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
});
}
timer_xicidaili_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("xicidaili.com" + checkList.Count + "条记录,已经检测完成,正在进行下一次检查");
}
else
{
timer_xicidaili_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("xicidaili.com没有需要检查的代理ip");
}
}
else
{
timer_xicidaili_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("xicidaili.com没有获取到代理ip");
}
}
}
private static async Task TimerIp3366Async()
{
if (timer_ip3366_isCompleted)
{
timer_ip3366_isCompleted = false;
List<string> checkList = new List<string>();
var listProxyIp = ProxyIpHelper.GetIp3366Proxy(4);
if (listProxyIp.Count > 0)
{
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("ip3366.net 抓取到" + listProxyIp.Count + "条记录,正在进行检测是否有效..........");
listProxyIp.ForEach(f =>
{
if (!lastListip3366.Contains(f))
{
checkList.Add(f);
}
});
lastListip3366 = listProxyIp;
if (checkList.Count != 0)
{
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("ip3366.net 需要检查" + checkList.Count + "条记录,正在进行检测是否有效..........");
for (int i = 0; i < checkList.Count; i++)
{
string ipAddress = checkList[i];
await ProxyIpHelper.CheckProxyIpAsync(ipAddress, () =>
{
bool insertSuccess = RedisHelper.InsertSet(ipAddress);
Console.ForegroundColor = ConsoleColor.White;
Console.WriteLine("ip3366.net");
if (insertSuccess)
{
Console.WriteLine("success" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}
else
{
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("重复插入" + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
}
}, (error) =>
{
Console.ForegroundColor = ConsoleColor.Yellow;
Console.WriteLine("ip3366.net");
Console.WriteLine("error " + ipAddress + "任务编号:" + i + "当前任务线程:" + Thread.CurrentThread.ManagedThreadId);
});
}
timer_ip3366_isCompleted = true;
Console.WriteLine("ip3366.net" + checkList.Count + "条记录,已经检测完成,正在进行下一次检查");
}
else
{
timer_ip3366_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("ip3366.net没有需要检查的代理ip");
}
}
else
{
timer_ip3366_isCompleted = true;
Console.ForegroundColor = ConsoleColor.DarkCyan;
Console.WriteLine("ip3366.net没有获取到代理ip");
}
}
}
}










