package com.ruoyi;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.ObjectUtils;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xmlunit.builder.DiffBuilder;
import org.xmlunit.diff.Comparison;
import org.xmlunit.diff.DefaultNodeMatcher;
import org.xmlunit.diff.Diff;
import org.xmlunit.diff.Difference;
import org.xmlunit.diff.ElementSelectors;
public class HtmlComparator {
public static void main(String[] args) throws Exception {
String html1 = "<html><body><b>vv</b><table><tr><td>A</td></tr><tr><td>001</td></tr></table><div>我爱你</div><h1>Q2</h1><h2>Q2</h2></body></html>";//正式文档
String html2 = "<html><body><div>我爱你</div><b>vv</b><h1>Q2</h1></body></html>";//测试文档
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// 注意:这里需要设置忽略DOCTYPE和实体引用,否则可能因DOCTYPE不匹配导致错误
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
Document doc1 = factory.newDocumentBuilder() .parse(new InputSource(new StringReader(html1)));
Document doc2 = factory.newDocumentBuilder().parse(new InputSource(new StringReader(html2)));
Diff myDiff = DiffBuilder.compare(doc1)
.withTest(doc2)
.ignoreWhitespace()
.checkForSimilar()//忽略结构顺序
.withNodeFilter(n->n.getNodeType()!=Node.PROCESSING_INSTRUCTION_NODE&&
n.getNodeType()!=Node.DOCUMENT_NODE)
.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.Default))//byName//Default//byNameAndText
.build();
if (myDiff.hasDifferences()) {
XPath xpath = XPathFactory.newInstance().newXPath();
for (Difference difference : myDiff.getDifferences()) {
Comparison comparison = difference.getComparison();
switch (comparison.getType()) {
case CHILD_NODELIST_LENGTH:
String controlParentXPath = difference.getComparison().getControlDetails().getXPath();
String testParentXPath = difference.getComparison().getTestDetails().getXPath();
// 获取控制文档中父节点及其子节点列表
NodeList controlParentNodes = (NodeList) xpath.evaluate(controlParentXPath, doc1, XPathConstants.NODESET);
Node controlParentNode = controlParentNodes.item(0);
NodeList controlChildNodes = controlParentNode.getChildNodes();
// 获取测试文档中父节点及其子节点列表
NodeList testParentNodes = (NodeList) xpath.evaluate(testParentXPath, doc2, XPathConstants.NODESET);
Node testParentNode = testParentNodes.item(0);
NodeList testChildNodes = testParentNode.getChildNodes();
int expectedLength = controlChildNodes.getLength();
int actualLength = testChildNodes.getLength();
if (actualLength < expectedLength) {
System.out.println("Missing nodes at path: " + controlParentXPath);
// 输出控制文档中存在的、但在测试文档中缺失的子节点
for (int i = actualLength; i < expectedLength; i++) {
Node missingNode = controlChildNodes.item(i);
// 复制控制文档中的缺失节点到测试文档,并设置属性data-patch="missing"
Node copiedNode = doc2.importNode(missingNode, true);
copiedNode.getAttributes().setNamedItem(doc2.createAttribute("data-patch"));
copiedNode.getAttributes().getNamedItem("data-patch").setTextContent("missing");
testParentNode.appendChild(copiedNode);
System.out.println(" Missing node: " + missingNode.getNodeName());
}
} else if (actualLength > expectedLength) {
System.out.println("Extra nodes at path: " + testParentXPath);
// 输出测试文档中存在的、但在控制文档中不存在的子节点
for (int i = expectedLength; i < actualLength; i++) {
Node extraNode = testChildNodes.item(i);
System.out.println(" Extra node: " + extraNode.getNodeName());
}
}
break;
case CHILD_NODELIST_SEQUENCE:
// 根据描述信息判断是哪个节点的子节点顺序不同,并在相应文档中进行标记或操作
break;
case CHILD_LOOKUP:
String tesXPath = difference.getComparison().getTestDetails().getXPath();
if(!org.springframework.util.ObjectUtils.isEmpty(tesXPath)) {
// 获取测试文档中的新增节点
Node newNode = (Node) xpath.evaluate(tesXPath, doc2, XPathConstants.NODE);
Node testparentNode = newNode.getParentNode();
if (newNode != null) {
Attr attrAdded = newNode.getOwnerDocument().createAttribute("data-patch");
attrAdded.setValue("added");
if(newNode != null && newNode.getNodeType() == Node.TEXT_NODE) {
testparentNode.getAttributes().setNamedItem(attrAdded);
}else {
newNode.getAttributes().setNamedItem(attrAdded);
}
}}
break;
case TEXT_VALUE:
String controlXPath = difference.getComparison().getControlDetails().getXPath();
Node controlTextNode = (Node) xpath.evaluate(controlXPath, doc1, XPathConstants.NODE);
String controlTextValue = controlTextNode.getTextContent();
Node conparentNode = controlTextNode.getParentNode();
String testXPath = difference.getComparison().getTestDetails().getXPath();
Node testTextNode = (Node) xpath.evaluate(testXPath, doc2, XPathConstants.NODE);
String testTextValue = testTextNode.getTextContent();
Node testparentNode = testTextNode.getParentNode();
// 如果内容不同,则在控制文档节点上添加属性标记
if (!controlTextValue.equals(testTextValue)) {
Attr attrModified = controlTextNode.getOwnerDocument().createAttribute("data-patch");
attrModified.setValue("modified");
conparentNode.getAttributes().setNamedItem(attrModified);
// 如果需要同时在测试文档节点上做标记,请在此处添加类似操作
attrModified = testTextNode.getOwnerDocument().createAttribute("data-patch");
attrModified.setValue("modified");
testparentNode.getAttributes().setNamedItem(attrModified);
}
break;
}
}
// 输出更新后的doc2内容(假设使用Transformer写出)
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
DOMSource source = new DOMSource(doc2);
StreamResult result = new StreamResult(System.out);
transformer.transform(source, result);
// 输出整个带有标记的HTML内容
StringWriter writer1 = new StringWriter();
Transformer transformer1 = TransformerFactory.newInstance().newTransformer();
transformer1.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer1.transform(new DOMSource(doc2), new StreamResult(writer1));
String modifiedHtml1 = writer1.toString();
System.out.println(modifiedHtml1);
}
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/200895.html