5.Python实现爬取某瓣电影排行榜

Python是十分擅长开发爬虫程序的，今天我给大家带来一个小工具，可以快速爬取某瓣电影信息。

编写代码

下面是一个简单的Python脚本，可以快速爬取某瓣电影 TOP250 相关信息：

import requests
import re


def top250_crawer(url, sum):
    headers = {
        'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/1'
    }
    response = requests.get(url, headers=headers)
    title = re.findall('<span class="title">(.*?)</span>', response.text, re.S)
    new_title = []
    for t in title:
        if '&nbsp;/&nbsp' not in t:
            new_title.append(t)
    data = re.findall('<br>(.*?)</p>', response.text, re.S)
    time = []
    country = []
    for str1 in data:
        str1 = str1.replace(' ', '')
        str1 = str1.replace('n', '')
        time_data = str1.split('&nbsp;/&nbsp;')[0]
        country_data = str1.split('&nbsp;/&nbsp;')[1]
        time.append(time_data)
        country.append(country_data)
    for j in range(len(country)):
        sum += 1
        print(str(sum) + '.' + new_title[j] + ',' + country[j] + ',' + time[j])


url = 'https://movie.douban.com/top250'
sum = 0
'遍历10页数据，250条结果'
for a in range(10):
    if sum == 0:
        top250_crawer(url, sum)
        sum += 25
    else:
        page = '?start=' + str(sum) + '&filter='
        new_url = url + page
        top250_crawer(new_url, sum)
        sum += 25