python爬虫糗事百科

获取不含图片的糗事内容

python+BeautifulSoup

# -*- coding=utf-8 -*-
#python pullingdata.py 100 data.txt
import requests
from BeautifulSoup import BeautifulSoup
import sys

try:
   num=int(sys.argv[1])
   out=sys.argv[2]
except Exception,e:
   print 'args error',e
j=0
s=requests.Session()
link='http://www.qiushibaike.com/'
r=s.get(link)
with open(out,'w') as file:
   pass
while j<=num:
   soup=BeautifulSoup(r.text)
   txt = soup.findAll('div',{'class':'content'})
   res=''
   for i in txt:
       if i.findNextSibling('div',{'class':'thumb'}) is None:
           j+=1
           res+= str(j)+'. '+i.contents[0].string.strip()+'\n\n'
   with open(out,'a+') as file:
       file.write(res.encode('utf-8'))
   next=soup.find('a',{'class':'next'})
   r=s.get(link[:-1]+next.get('href'))

sunday算法的简单实现

# -*- coding: utf-8 -*-

def sunday(dst,sub):
   ld=len(dst)
   ls=len(sub)
   i=j=0#i表示dst中的起始位置，j表示已经匹配了的长度
   while i+j+1<ld:
       #print i,j
       if dst[i+j]==sub[j]:
           if j==ls-1:
               return i
           else:j+=1
           continue
       else:
           j=0
           tmp = find(sub,ls,dst[i+ls])
           if tmp==-1:
               i=i+ls
           else:i=i+ls-tmp
       if i+ls>ld:
           return -1
   return -1

def find(sub,ls,ch):
   for i in range(ls-1,-1,-1):
       if sub[i]==ch:
           return i
   return -1

if __name__=="__main__":
   d="abcxxxbaaaabaaaxbbaaabcdamno"
   s="aaab"
   print sunday(d,s)

shadow's Blog

Happy coding

python爬虫糗事百科

sunday算法的简单实现

shadow

分类

最新评论

最新留言

链接

RSS

功能

shadow's Blog

Happy coding

python爬虫 糗事百科

sunday算法的简单实现

shadow

分类

最新评论

最新留言

链接

RSS

功能

python爬虫糗事百科