Maven依赖
<!--jsoup 是一款 Java 的HTML 解析器--> <dependency> <groupId>org.jsoup</groupId> <artifactId>jsoup</artifactId> <version>1.11.3</version> </dependency>
package com.sm.cn.test;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
/**
* 下载商城菜单,输出到txt,淘宝为例子
*/
public class Test01 {
public static void main(String[] args) throws IOException {
f1();
f2();
f3();
}
/**
* 淘宝
* @throws IOException
*/
public static void f1() throws IOException {
StringBuffer stringBuffer = new StringBuffer();
StringBuffer append = null;
/*获取当前网页的document对象*/
Document document = Jsoup.connect("https://www.taobao.com/").get();
Elements elementsByClass = document.getElementsByClass("service-bd");
for (Element byClass : elementsByClass) {
Elements li = byClass.getElementsByTag("li");
for (Element element : li) {
Elements a = element.getElementsByTag("a");
/*获取a标签中文件并换行输出*/
//将可变字符串变为固定长度的字符串,方便下面的转码;
append = stringBuffer.append(a.text() + "\r\n");
}
}
String string = append.toString();
/*使用字节流输出*/
//因为中文可能会乱码,这里使用了转码,转成UTF-8;
byte[] bytes = string.getBytes("utf-8");
FileOutputStream out = new FileOutputStream("C://Users//Desktop//淘宝菜单目录.txt");
out.write(bytes);
out.close();
}
/**
* 京东
* @throws IOException
*/
public static void f2() throws IOException {
StringBuffer stringBuffer = new StringBuffer();
StringBuffer append = null;
/*获取当前网页的document对象*/
Document document = Jsoup.connect("https://www.jd.com/").get();
Elements menu = document.getElementsByClass("cate_menu");
for (int i = 0; i < menu.size(); i++) {
Element element = menu.get(i);
Elements li = element.getElementsByTag("li");
for (int j = 0; j < li.size(); j++) {
Element element1 = li.get(j);
Elements a = element1.getElementsByTag("a");
/*获取a标签中文件并换行输出*/
//将可变字符串变为固定长度的字符串,方便下面的转码;
append = stringBuffer.append(a.text() + "\r\n");
}
}
String string = append.toString();
/*使用字符流输出*/
FileWriter fileWriter = new FileWriter("C://Users//Desktop//京东商城目录.txt");
fileWriter.write(string);
fileWriter.close();
}
/**
* 亚马逊
* @throws IOException
*/
public static void f3() throws IOException {
StringBuffer stringBuffer = new StringBuffer();
StringBuffer append = null;
Document document = Jsoup.connect("https://www.amazon.cn/").get();
Element searchDropdownBox = document.getElementById("searchDropdownBox");
Elements option = searchDropdownBox.getElementsByTag("option");
for (Element element : option) {
append = stringBuffer.append(element.text() + "\r\n");
}
FileOutputStream stream = new FileOutputStream("C://Users//Desktop//亚马逊目录.txt");
byte[] bytes = append.toString().getBytes(StandardCharsets.UTF_8);
stream.write(bytes);
stream.close();
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/192886.html