验证是常见的反爬虫策略之一,在现在的很多站点中我们会引入滑动验证的方式,来校验访问者的真实性。譬如下面著名的 jQuery 滑动插件:
在模拟登陆时,我们往往需要绕过这样的滑动验证,而基于 Puppeteer 的动态爬虫也给予了便利;往往我们需要进行以下步骤:移动到滑条中间,按下鼠标,移动鼠标,释放鼠标。
const puppeteer = require("puppeteer"); async function run() { const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1366, height: 768 } }); const page = await browser.newPage(); await page.goto("http://kthornbloom.com/slidetosubmit/"); await page.type('input[name="name"]', "Puppeteer Bot"); await page.type('input[name="email"]', "js@automation.com"); let sliderElement = await page.$(".slide-submit"); let slider = await sliderElement.boundingBox(); let sliderHandle = await page.$(".slide-submit-thumb"); let handle = await sliderHandle.boundingBox(); await page.mouse.move( handle.x + handle.width / 2, handle.y + handle.height / 2 ); await page.mouse.down(); await page.mouse.move(handle.x + slider.width, handle.y + handle.height / 2, { steps: 10 }); await page.mouse.up(); await page.waitFor(3000); // success! await browser.close(); } run();
在实际的案例中,我们可以以淘宝的注册界面为例:
const puppeteer = require("puppeteer"); async function run() { const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1366, height: 768 } }); const page = await browser.newPage(); await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, "webdriver", { get: () => false }); }); await page.goto("https://world.taobao.com/markets/all/sea/register"); let frame = page.frames()[1]; await frame.waitForSelector(".nc_iconfont.btn_slide"); const sliderElement = await frame.$(".slidetounlock"); const slider = await sliderElement.boundingBox(); const sliderHandle = await frame.$(".nc_iconfont.btn_slide"); const handle = await sliderHandle.boundingBox(); await page.mouse.move( handle.x + handle.width / 2, handle.y + handle.height / 2 ); await page.mouse.down(); await page.mouse.move(handle.x + slider.width, handle.y + handle.height / 2, { steps: 50 }); await page.mouse.up(); await page.waitFor(3000); // success! await browser.close(); } run();
另一种常见的滑块则是如下这种拼图性质的滑块:
const puppeteer = require("puppeteer"); const Rembrandt = require("rembrandt"); async function run() { const browser = await puppeteer.launch({ headless: false, defaultViewport: { width: 1366, height: 768 } }); const page = await browser.newPage(); let originalImage = ""; await page.setRequestInterception(true); page.on("request", request => request.continue()); page.on("response", async response => { if (response.request().resourceType() === "image") originalImage = await response.buffer().catch(() => {}); }); await page.goto("https://monoplasty.github.io/vue-monoplasty-slide-verify/"); const sliderElement = await page.$(".slide-verify-slider"); const slider = await sliderElement.boundingBox(); const sliderHandle = await page.$(".slide-verify-slider-mask-item"); const handle = await sliderHandle.boundingBox(); let currentPosition = 0; let bestSlider = { position: 0, difference: 100 }; await page.mouse.move( handle.x + handle.width / 2, handle.y + handle.height / 2 ); await page.mouse.down(); while (currentPosition < slider.width - handle.width / 2) { await page.mouse.move( handle.x + currentPosition, handle.y + handle.height / 2 + Math.random() * 10 - 5 ); let sliderContainer = await page.$(".slide-verify"); let sliderImage = await sliderContainer.screenshot(); const rembrandt = new Rembrandt({ imageA: originalImage, imageB: sliderImage, thresholdType: Rembrandt.THRESHOLD_PERCENT }); let result = await rembrandt.compare(); let difference = result.percentageDifference * 100; if (difference < bestSlider.difference) { bestSlider.difference = difference; bestSlider.position = currentPosition; } currentPosition += 5; } await page.mouse.move( handle.x + bestSlider.position, handle.y + handle.height / 2, { steps: 10 } ); await page.mouse.up(); await page.waitFor(3000); // success! await browser.close(); } run();
这里我们采用了简单的图片对比的方式,即在滑动过程中,如果发现了有符合阈值的差异,则认为是已经滑动成功。
Cendertron
在 Cendertron 中,提供了一类特殊的 Slider Captcha Monkey,在传入的 SpiderOption 中添加如下参数即可:
export interface SpiderOption { allowRedirect: boolean; depth: number; // 页面插件 monkies?: { sliderCaptcha: { sliderElementSelector: string; sliderHandleSelector: string; }; }; }
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/77736.html