python爬虫 糗事百科
获取不含图片的糗事内容
python+BeautifulSoup
# -*- coding=utf-8 -*-
#python pullingdata.py 100 data.txt
import requests
from BeautifulSoup import BeautifulSoup
import sys
try:
num=int(sys.argv[1])
out=sys.argv[2]
except Exception,e:
print 'args error',e
j=0
s=requests.Session()
link='http://www.qiushibaike.com/'
r=s.get(link)
with open(out,'w') as file:
pass
while j<=num:
soup=BeautifulSoup(r.text)
txt = soup.findAll('div',{'class':'content'})
res=''
for i in txt:
if i.findNextSibling('div',{'class':'thumb'}) is None:
j+=1
res+= str(j)+'. '+i.contents[0].string.strip()+'\n\n'
with open(out,'a+') as file:
file.write(res.encode('utf-8'))
next=soup.find('a',{'class':'next'})
r=s.get(link[:-1]+next.get('href'))
数的表示
将一个整数转换为3的指数加减的形式,如7转换为30-31+32。
# -*- coding: utf-8 -*-
def inttoint3(num):
ret=[]
tmp=num
while tmp!=0:
ret.append(tmp%3)
tmp/=3
ret.append(tmp)
return ret
def int3tostring3(num):
pnum=0#负数标志
if num<0:
num=-num
pnum=1
lst=inttoint3(num)#十进制转换为三进制
ll=len(lst)
res=[0]*ll
flag=0#进位标志
for i in range(ll):
lst[i]+=flag
if lst[i]==1 or lst[i]==0:
res[i]=lst[i]
flag=0
elif lst[i]==2:
res[i]=-1
flag=1
elif lst[i]==3:
res[i]==0
flag=1
if i>0 and res[i]==0:
res.pop()
ll-=1
#print res,ll
if pnum==1:
for j in range(ll):
res[j]=0-res[j]
return res
if __name__=="__main__":
num=[-3,-7,0,1,2,3,4,7,11,28]
for i in num:
print int3tostring3(i)
sunday算法的简单实现
# -*- coding: utf-8 -*-
def sunday(dst,sub):
ld=len(dst)
ls=len(sub)
i=j=0#i表示dst中的起始位置,j表示已经匹配了的长度
while i+j+1<ld:
#print i,j
if dst[i+j]==sub[j]:
if j==ls-1:
return i
else:j+=1
continue
else:
j=0
tmp = find(sub,ls,dst[i+ls])
if tmp==-1:
i=i+ls
else:i=i+ls-tmp
if i+ls>ld:
return -1
return -1
def find(sub,ls,ch):
for i in range(ls-1,-1,-1):
if sub[i]==ch:
return i
return -1
if __name__=="__main__":
d="abcxxxbaaaabaaaxbbaaabcdamno"
s="aaab"
print sunday(d,s)