最近有一些学习资料大概几个G,搞得全是PDF。没办法编辑。所以就想转成word。但是搜了很多软件没有批量转换功能。只能一个一个处理,太浪费时间。最主要的还是全部收费。所以决定自己写一个还能节省时间。
需要注意的事项:jar包必须破解,如果不是破解版每个文档只能转换4页。
怎么破解jar包网上有教程感兴趣的可以自己破解一下。我就不写了。
jar包资源:aspose-pdf.zip-互联网文档类资源-CSDN下载
1.将需要的aspose.pdf.jar包引入项目
2.封装读取文件夹里面PDF文件的工具类
package com.question.syncdemo.utils;
import cn.hutool.core.io.file.FileReader;
import cn.hutool.core.util.StrUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
/**
* description: FileUtils 文件操作工具类<br>
*
* @date: 2020/11/17 0017 下午 5:06 <br>
* @author: William <br>
* version: 1.0 <br>
*/
public class FileUtils {
//因为我这个是临时用所以没有考虑并发,如果并发自己修改一下就好了
public static List<String> resultList = new ArrayList<>();
/**
*@description: 通过文件路径,修改该路径下所有文件的名字
* @param path 文件夹路径
* @return:
* @author: William
* @date 2019/8/8 14:52
*/
public static List<String> getFilesPaths(String path,List<String> stringList){
File file = new File(path);
if(file.exists()){
File[] files = file.listFiles();
if (null == files || files.length == 0) {
System.out.println("文件夹是空的!");
} else {
for (File file2 : files) {
if (file2.isDirectory()) {
getFilesPaths(file2.getAbsolutePath(),stringList);
} else {
String filePath = file2.getAbsolutePath();
stringList.add(filePath);
}
}
}
}else{
System.out.println("该路径不存在");
}
return stringList;
}
}
3.封装PDF处理工具类
package com.question.syncdemo.utils;
import com.aspose.pdf.Document;
import com.aspose.pdf.License;
import com.aspose.pdf.SaveFormat;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicReference;
/**
* description: PDFUtil <br>
*
* @date: 2021/2/4 0004 上午 10:09 <br>
* @author: William <br>
* version: 1.0 <br>
*/
public class PDFUtil {
private static InputStream license;
public static void main(String[] args) throws Exception {
pdf2word();
}
//多线程处理需要转换格式的文件
public static void produceData(List<String> list) throws InterruptedException {
//每个线程处理的数据,我这里只开了三个线程,
int threadSize = list.size()/3;
//int threadSize = 500; 可以每个线程处理500条数据
int remainder = list.size() % threadSize;
//线程数
int threadNum = 0;
if (remainder == 0) {
threadNum = list.size() / threadSize;
} else {
threadNum = list.size() / threadSize + 1;
}
long begin = System.currentTimeMillis();
//创建一个线程池
ExecutorService eService = Executors.newFixedThreadPool(threadNum);
List<Callable<String>> cList = new ArrayList<>();
Callable<String> task = null;
List<String> sList = null;
for (int i = 0; i < threadNum; i++) {
if (i == threadNum - 1) {
sList = list.subList(i * threadSize, list.size());
} else {
sList = list.subList(i * threadSize, (i + 1) * threadSize);
}
final List<String> nowList = sList;
task = new Callable<String>() {
@Override
public String call() throws Exception {
nowList.forEach(filesPath -> {
if(filesPath.contains(".pdf")){
File file = new File(filesPath);
String paperName = file.getName();
paperName = paperName.substring(0,paperName.lastIndexOf("."));
String tempFilesPath = filesPath.substring(0,filesPath.lastIndexOf(File.separator));
tempFilesPath = tempFilesPath +"\\"+paperName+".docx";
System.out.println(tempFilesPath);
try {
saveAsWord(filesPath,tempFilesPath);
} catch (Exception e) {
e.printStackTrace();
}
}
});
return "ok";
}
};
cList.add(task);
}
List<Future<String>> results = eService.invokeAll(cList);
for (Future<String> str : results) {
//System.out.println(str.get());
}
eService.shutdown();
long end = System.currentTimeMillis();
System.out.println("执行耗时:" + (end - begin));
}
public static void pdf2word() throws Exception {
List<String> strings = new ArrayList<>();
List<String> filesPaths = FileUtils.getFilesPaths("D:\\work\\temp\\中学学段2019科目二", strings);
produceData(filesPaths);
}
//将PDF保存为word
public static void saveAsWord(String targetFile,String newFile) throws Exception {
File target = new File(targetFile);
if(!target.exists()){
target.mkdirs();
}
FileInputStream targetInputStream = new FileInputStream(target);
//调用去水印的方法 读取license.xml文件
if (!getLicense()) {
System.out.println("获取验证失败");
}
Document targetDocument = new Document(targetInputStream);
targetDocument.save(newFile, SaveFormat.DocX);
targetInputStream.close();
targetDocument.close();
}
//证书获取
public static synchronized boolean getLicense() {
boolean result = false;
try {
String license2 = "<License>\n"
+ " <Data>\n"
+ " <Products>\n"
+ " <Product>Aspose.Total for Java</Product>\n"
+ " <Product>Aspose.Words for Java</Product>\n"
+ " </Products>\n"
+ " <EditionType>Enterprise</EditionType>\n"
+ " <SubscriptionExpiry>20991231</SubscriptionExpiry>\n"
+ " <LicenseExpiry>20991231</LicenseExpiry>\n"
+ " <SerialNumber>8bfe198c-7f0c-4ef8-8ff0-acc3237bf0d7</SerialNumber>\n"
+ " </Data>\n"
+ " <Signature>111</Signature>\n"
+ "</License>";
license = new ByteArrayInputStream(license2.getBytes("UTF-8"));
License aposeLic = new License();
aposeLic.setLicense(license);
result = true;
} catch (Exception e) {
e.printStackTrace();
}
return result;
}
}
如果没有币的直接加我微信获取就好了
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/97029.html