import urllib2, socket
socket.setdefaulttimeout(180)
# read the list of proxy IPs in proxyList
proxyList = ['172.30.1.1:8080', '172.30.3.3:8080'] # there are two sample proxy ip
def is_bad_proxy(pip):
try:
proxy_handler = urllib2.ProxyHandler({'http': pip})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request('http://www.google.com') # change the url address here
sock=urllib2.urlopen(req)
except urllib2.HTTPError, e:
print 'Error code: ', e.code
return e.code
except Exception, detail:
print "ERROR:", detail
return 1
return 0
for item in proxyList:
if is_bad_proxy(item):
print "Bad Proxy", item
else:
print item, "is working"
--thread로--
#-*- coding: utf-8 -*-
#!/usr/bin/python
import time
import socket
import httplib
import urllib2
import threading
import Queue
import sys
import requests
TIMEOUT = 10
socket.setdefaulttimeout(TIMEOUT)
def check(host, port):
try:
proxy_handler = urllib2.ProxyHandler({'http': str(host)+":"+str(port)})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [('User-agent', 'Mozilla/5.0')]
urllib2.install_opener(opener)
req=urllib2.Request('http://www.wenti.de') # change the URL to test here
sock=urllib2.urlopen(req)
except urllib2.HTTPError, e:
return False
except Exception, detail:
print "Bad Proxy - "+host+":"+port
return False
with open('check.txt', 'a') as file:
file.write(host+":"+port+"\n")
return True
class CheckThread(threading.Thread):
def __init__(self,no,q, r):
threading.Thread.__init__(self)
self.no = no
self.q = q
self.r = r
def run(self):
while True:
proxy = []
try:
proxy = self.q.get(True,2)
except:
pass
if len(proxy) == 0:
break
tstart = time.time()
ret = check(proxy[0],proxy[1].replace("\n",""))
tuse = time.time() - tstart
if (ret) and tuse < TIMEOUT * 2:
proxy.append(tuse)
self.r.append(proxy)
class ProxyCheck:
def __init__(self, tnum, file):
self.tnum = tnum
self.file = file
def run(self, file2):
q = Queue.Queue()
r = []
# read file
fd = open(self.file,'r')
for line in fd:
arr = line.split(":")
if len(arr) == 2:
q.put(arr)
tlist = []
for i in xrange(self.tnum):
cur = CheckThread(i,q,r)
cur.start()
tlist.append(cur)
for cur in tlist:
cur.join()
print "All is OK!"
print len(r)
def main():
file1 = 'proxy.txt'
file2 = 'check.txt'
r = ProxyCheck(100,file1)
r.run(file2)
main()
'Coding > Python' 카테고리의 다른 글
Mechanize 로 로그인하기 (0) | 2016.02.26 |
---|---|
파이썬 정규식 정리 (0) | 2016.02.25 |
BeautifulSoup 웹파싱 (0) | 2016.01.29 |
Pytesser OCR (0) | 2016.01.27 |
소켓 통신할때 Response 못받아오는 문제 (0) | 2016.01.08 |