注册 登录
编程论坛 Python论坛

[急]python的多线程爬虫,跑着跑着就出现了“段错误”,求大虾指点?

sophiabing 发布于 2010-04-06 16:44, 1706 次点击
附上代码:
-------------
jobs=Queue.Queue()
limit=20

def thread():
    while True:
        i=jobs.get()
        line=linecache.getline('nick.txt',i)

        lj=('(.*?)\n')   

        mat=lj.match(line)

        if mat:

                nick=mat.groups()[0]
        try:
            f=urllib.urlopen('http://'+nick+'.').read()
            print str(i)+'\t'+nick+'\t'+f[:10]   
        except:
            pass   
        jobs.task_done()   


for n in xrange(limit):
        t = threading.Thread(target=thread)
        t.setDaemon(True)
        t.start()


for i in xrange(10000):
        jobs.put(i)

jobs.join()
----------

thread()部分还要做些其他的处理,这个程序问题在哪里啊?
跪求大虾指点!!
4 回复
#2
外部三电铃2010-04-06 20:06
只给这一小段段代码无法调试啊
#3
sophiabing2010-04-06 21:49
全部的代码。。。
import linecache
import re,urllib
import threading
import time
import Queue
from xml.sax import make_parser

from xml.sax import ContentHandler



class FriendHandler(ContentHandler):

        isFriend=""

        Friend=""

        mode=""

        dateCreated=""

        isBirth=""

        birth=""

        interests=""

        isposted=""

        yaposted=""

        def startElement(self,name,attrs):   

                if name=="rdf:RDF":

                        self.mode="person"

                elif name=="foaf:knows":

                        self.mode="knows"



                if name=="foaf:dateOfBirth":

                        self.isBirth=1

            

                if name=="foaf:weblog" and self.mode=='person':        

                        self.dateCreated=attrs.get('lj:dateCreated')

               

                elif self.mode=="knows" and name=="foaf:nick":

                        self.isFriend=1

                elif name=="ya:posted":

                        self.isposted=1



        def endElement(self,name):            

                if name=="foaf:nick" and self.mode=="knows":

                        self.isFriend=""

                        self.mode=""

                if name=="foaf:dateOfBirth":

                        self.isBirth=""

                if name=="ya:posted":

                        self.isposted=""

        

        

        def characters(self,content):         

                if self.isFriend:

                        self.Friend+=content+','

                elif self.isBirth:

                        self.birth=content

                elif self.isposted:

                        self.yaposted=content
################





def thread():
    while True:
        i=jobs.get()
        line=linecache.getline('nick50000.txt',i)

        lj=('(.*?)\n')   

        mat=lj.match(line)

        if mat:

                nick=mat.groups()[0]
        try:   

                    saxparser.parse('http://'+nick+'.')
            print i
                    f2=file('foaf.txt','a')        

                    f2.write(ch.data)

                    f2.close()   

                       ch.data=''

        except:
            pass
        jobs.task_done()   



jobs=Queue.Queue()
limit=10
ch = FriendHandler()                           

saxparser = make_parser()                     

saxparser.setContentHandler(ch)  

for n in xrange(limit):
        t = threading.Thread(target=thread)
        t.setDaemon(True)
        t.start()
   
for i in xrange(1,1001):
        jobs.put(i)
        
jobs.join()
#4
sophiabing2010-04-06 21:50
拜托啦!!
#5
wangfeng37692010-05-21 17:36
是不是thread 和 import thread 重名了
1