爬取豆瓣动画电影排行榜的海报

1、想需要爬取页面展示

![]()

2、代码

import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.edge.EdgeDriver;

public class SpiderPlaybill {
    public static void main(String[] args) throws Exception {
        System.setProperty("webdriver.edge.driver", DriverCommon.getDriverName(DriverCommon.getOSType()));
        EdgeDriver driver = new EdgeDriver();
        Crawler crawler = new Crawler(driver);
        crawler.start();
        //先登陆豆瓣
        for (int page = 0; page <= 20; page += 20) {
            String url = "https://movie.douban.com/explore#!type=movie&tag=动画&sort=recommend&page_limit=20&page_start=0";
            driver.get(url);
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            Document doc  = Jsoup.parse(driver.getPageSource());
            //逐层分析html
            Elements a = doc.select("div[class=list-wp]");
            Elements b= a.select("a[class=item]");

            for(Element element : b){
                Element first = element.select("img").first();
                String video_name = first.attr("alt")+".jpg";   
                String videoImg = first.attr("old-src");
                downloadFileFromUrl(videoImg, video_name, "D:\\image\\");
                System.out.println("video_name"+video_name);
            }
        }
        driver.close();
    }
    /**
     * 下载文件
     * @param fileUrl
     * @param fileName
     * @param savePath
     * @throws Exception
     */
    private static void downloadFileFromUrl(String fileUrl, String fileName, String savePath) throws Exception {
        //获取连接
        URL url = new URL(fileUrl);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setConnectTimeout(3 * 1000);
        //设置请求头
        connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36");
        //获取输入流
        InputStream in = connection.getInputStream();

        File saveDir = new File(savePath);
        if (!saveDir.exists()) {
            saveDir.mkdirs();
        }
        File file = new File(savePath + fileName);

        OutputStream out = new FileOutputStream(file);

        byte[] bytes = new byte[1024];
        int len = 0;
        while ((len = in.read(bytes)) != -1) {
            out.write(bytes, 0, len);
        }
        out.close();
        in.close();
    }
}
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

public class DriverCommon {
    /**
     * 获取操作系统类型
     */
    public static String getOSType(){
        String temp  = System.getProperty("os.name");
        if(temp.contains("Mac")){
            return "mac";
        }else if(temp.contains("Win")){
            return "win";
        }else{
            try {
                Process process = Runtime.getRuntime().exec("getconf LONG_BIT");
                BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(process.getInputStream()));
                String s = bufferedReader.readLine();
                if(s.contains("64")){
                    return "linux64";
                }else{
                    return "linux32";
                }
            } catch (IOException e) {
                e.printStackTrace();
                return "linux64"; //默认Linux64
            }
        }
    }

    /**
     * 获取浏览器驱动
     * @param os
     * @return
     * @throws IOException
     */
    public static String getDriverName(String os) throws IOException{
        if(os == null)
            return null;
        switch (os) {
        case "win":
            return "C:/myworkspace/spiderMovie/msedgedriver.exe";
        case "mac":
            return "chromedriver_mac";
        case "linux_32":
            return "chromedriver_linux32";
        case "linux_64":
        default:
            return "chromedriver_linux64";
        }
    }
}
import org.openqa.selenium.By;
import org.openqa.selenium.edge.EdgeDriver;

public class Crawler {
    // 网站的登陆链接
    private String baseUrl = "https://accounts.douban.com/passport/login?redir=https%3A%2F%2Fwww.douban.com%2Fgroup%2F";
    private EdgeDriver edgeDriver;
    public Crawler() {
    }
    public Crawler(EdgeDriver driver) {
        super();
        this.edgeDriver = driver;
    }
    //登陆 豆瓣
    public void start() {
        // 登入网站
        edgeDriver.get(baseUrl);
        // 先点击 密码 登陆,使password 存在 才行
        edgeDriver.findElement(By.cssSelector("ul.tab-start > li:nth-child(2)")).click();
        // 输入密码
        edgeDriver.findElement(By.id("username")).sendKeys("139000000");// 手机号
        edgeDriver.findElement(By.id("password")).sendKeys("*********");
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
        edgeDriver.findElement(By.cssSelector("div.account-form-field-submit  > a")).click();
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
}

3、代码所需要的 jar 包

链接:https://pan.baidu.com/s/1Cm\_36caoq\_UEFwYYxTR2qA
提取码:g6j8
复制这段内容后打开百度网盘手机App,操作更方便哦

![]()

4、爬取结果

![]()

声明:该文章系转载,转载该文章的目的在于更广泛的传递信息,并不代表本网站赞同其观点,文章内容仅供参考。

本站是一个个人学习和交流平台,网站上部分文章为网站管理员和网友从相关媒体转载而来,并不用于任何商业目的,内容为作者个人观点, 并不代表本网站赞同其观点和对其真实性负责。

我们已经尽可能的对作者和来源进行了通告,但是可能由于能力有限或疏忽,导致作者和来源有误,亦可能您并不期望您的作品在我们的网站上发布。我们为这些问题向您致歉,如果您在我站上发现此类问题,请及时联系我们,我们将根据您的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。