爬取京东品牌和分类信息

如果你不相信努力和时光,那么成果就会是第一个选择辜负你的。不要去否定你自己的过去,也不要用你的过去牵扯你现在的努力和对未来的展望。不是因为拥有希望你才去努力,而是去努力了,你才有可能看到希望的光芒。爬取京东品牌和分类信息,希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com,来源:原文

爬取存入数据库 

/**
 * 京东爬虫依赖
 *
 * <dependency>
 * <groupId>org.jsoup</groupId>
 * <artifactId>jsoup</artifactId>
 * <version>1.11.3</version>
 * </dependency>
 * <p>
 * 爬取京东品牌和分类信息
 */
@RestController
public class DemoController {

    @Autowired
    CategoryService categoryService;

    @Autowired
    BrandService brandService;

    @GetMapping("getCategoryFromJD")
    public AxiosResult<Void> setData() throws IOException {
        Document document = Jsoup.connect("https://www.jd.com/allSort.aspx").get();
        Elements elementsByClass = document.getElementsByClass("category-items");

        for (int i = 0; i < elementsByClass.size(); i++) {
            Element element = elementsByClass.get(i);
            Elements element1 = element.getElementsByClass("category-item");
            for (int j = 0; j < element1.size(); j++) {
                Element element2 = element1.get(j);


                //一级分类名
                String firstCategoryName = element2.getElementsByTag("span").text();
                Category firstCategory = new Category();
                firstCategory.setCatetoryName(firstCategoryName);
                firstCategory.setCategoryLevel(1);
                firstCategory.setParentId(0L);
                categoryService.save(firstCategory);

                //二级分类
                Elements dt = element2.getElementsByTag("dl");
                for (int k = 0; k < dt.size(); k++) {
                    Element element3 = dt.get(k);
                    String secondCategoryName = element3.getElementsByTag("dt").get(0).getElementsByTag("a").get(0).text();
                    Category sencondCategory = new Category();
                    sencondCategory.setParentId(firstCategory.getId());
                    sencondCategory.setCatetoryName(secondCategoryName);
                    sencondCategory.setCategoryLevel(2);
                    categoryService.save(sencondCategory);
                    Elements a = element3.getElementsByTag("dd").get(0).getElementsByTag("a");
                    for (int l = 0; l < a.size(); l++) {

                        Category threeCategory = new Category();
                        threeCategory.setParentId(sencondCategory.getId());
                        threeCategory.setCatetoryName(a.get(l).text());
                        threeCategory.setCategoryLevel(3);
                        categoryService.save(threeCategory);

                    }


                }


            }
        }
        return AxiosResult.success();

    }


    @GetMapping("getBrandFromJD")
    public AxiosResult<Void> getBrandFromJd() throws Exception {
        Document document = Jsoup.connect("https://www.jd.com/brand.aspx").get();

        Elements brandslist = document.getElementsByClass("brandslist");
        for (int i = 0; i < brandslist.size(); i++) {
            Element element1 = brandslist.get(i);
            Elements li = element1.getElementsByTag("li");
            for (int j = 0; j < li.size(); j++) {
                Element img = li.get(j).getElementsByTag("img").get(0);
                System.out.println(img);
                String src = img.attr("src");
                String alt = img.attr("alt");
                System.out.println(src);
                Element span = li.get(j).getElementsByTag("span").get(1).getElementsByTag("a").get(0);
                String text = span.text();
                Brand brand = new Brand();
                brand.setBrandName(text);
                brand.setBrandDesc(alt);
                brand.setBrandLogo(src);
                brand.setBrandSite("http://www.baidu.com");
                brandService.save(brand);

            }


        }


        return AxiosResult.success();

    }


}

 爬取输出txt

 public static void setData() throws IOException {
        Document document = Jsoup.connect("https://www.jd.com/allSort.aspx").get();
        Elements elementsByClass = document.getElementsByClass("category-items");
        StringBuffer stringBuffer = new StringBuffer();
        StringBuffer append = null;
        for (int i = 0; i < elementsByClass.size(); i++) {
            Element element = elementsByClass.get(i);
            Elements element1 = element.getElementsByClass("category-item");
            for (int j = 0; j < element1.size(); j++) {
                Element element2 = element1.get(j);
                //一级分类名
                String firstCategoryName = element2.getElementsByTag("span").text();
                //二级分类
                Elements dt = element2.getElementsByTag("dl");
                for (int k = 0; k < dt.size(); k++) {
                    Element element3 = dt.get(k);
                    String secondCategoryName = element3.getElementsByTag("dt").get(0).getElementsByTag("a").get(0).text();
                    Elements a = element3.getElementsByTag("dd").get(0).getElementsByTag("a");
                    for (int l = 0; l < a.size(); l++) {
                        System.out.print(a.get(l).text() + " ");
                        String text = a.get(l).text();
                        append = stringBuffer.append(text + " ");
                    }
                    System.out.println("\n");
                    append.append("\r\n");
                }
            }
        }
        FileOutputStream stream = new FileOutputStream("C://Users//Desktop//京东分类目录.txt");
        byte[] bytes = append.toString().getBytes(StandardCharsets.UTF_8);
        stream.write(bytes);
        stream.close();
    }

 

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/192885.html

(0)
飞熊的头像飞熊bm

相关推荐

发表回复

登录后才能评论
极客之音——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!