爬虫中滑动验证的绕过

导读:本篇文章讲解 爬虫中滑动验证的绕过,希望对大家有帮助,欢迎收藏,转发!站点地址:www.bmabk.com

验证是常见的反爬虫策略之一,在现在的很多站点中我们会引入滑动验证的方式,来校验访问者的真实性。譬如下面著名的 jQuery 滑动插件:

爬虫中滑动验证的绕过

 

在模拟登陆时,我们往往需要绕过这样的滑动验证,而基于 Puppeteer 的动态爬虫也给予了便利;往往我们需要进行以下步骤:移动到滑条中间,按下鼠标,移动鼠标,释放鼠标。

const puppeteer = require("puppeteer");

async function run() {
  const browser = await puppeteer.launch({
    headless: false,
    defaultViewport: { width: 1366, height: 768 }
  });
  const page = await browser.newPage();

  await page.goto("http://kthornbloom.com/slidetosubmit/");
  await page.type('input[name="name"]', "Puppeteer Bot");
  await page.type('input[name="email"]', "js@automation.com");

  let sliderElement = await page.$(".slide-submit");
  let slider = await sliderElement.boundingBox();

  let sliderHandle = await page.$(".slide-submit-thumb");
  let handle = await sliderHandle.boundingBox();

  await page.mouse.move(
    handle.x + handle.width / 2,
    handle.y + handle.height / 2
  );
  await page.mouse.down();
  await page.mouse.move(handle.x + slider.width, handle.y + handle.height / 2, {
    steps: 10
  });
  await page.mouse.up();

  await page.waitFor(3000);

  // success!

  await browser.close();
}

run();

在实际的案例中,我们可以以淘宝的注册界面为例:

const puppeteer = require("puppeteer");

async function run() {
  const browser = await puppeteer.launch({
    headless: false,
    defaultViewport: { width: 1366, height: 768 }
  });
  const page = await browser.newPage();

  await page.evaluateOnNewDocument(() => {
    Object.defineProperty(navigator, "webdriver", {
      get: () => false
    });
  });

  await page.goto("https://world.taobao.com/markets/all/sea/register");

  let frame = page.frames()[1];
  await frame.waitForSelector(".nc_iconfont.btn_slide");

  const sliderElement = await frame.$(".slidetounlock");
  const slider = await sliderElement.boundingBox();

  const sliderHandle = await frame.$(".nc_iconfont.btn_slide");
  const handle = await sliderHandle.boundingBox();
  await page.mouse.move(
    handle.x + handle.width / 2,
    handle.y + handle.height / 2
  );
  await page.mouse.down();
  await page.mouse.move(handle.x + slider.width, handle.y + handle.height / 2, {
    steps: 50
  });
  await page.mouse.up();

  await page.waitFor(3000);

  // success!

  await browser.close();
}

run();

爬虫中滑动验证的绕过

 

另一种常见的滑块则是如下这种拼图性质的滑块:

爬虫中滑动验证的绕过

 

const puppeteer = require("puppeteer");
const Rembrandt = require("rembrandt");

async function run() {
  const browser = await puppeteer.launch({
    headless: false,
    defaultViewport: { width: 1366, height: 768 }
  });
  const page = await browser.newPage();

  let originalImage = "";

  await page.setRequestInterception(true);
  page.on("request", request => request.continue());
  page.on("response", async response => {
    if (response.request().resourceType() === "image")
      originalImage = await response.buffer().catch(() => {});
  });

  await page.goto("https://monoplasty.github.io/vue-monoplasty-slide-verify/");

  const sliderElement = await page.$(".slide-verify-slider");
  const slider = await sliderElement.boundingBox();

  const sliderHandle = await page.$(".slide-verify-slider-mask-item");
  const handle = await sliderHandle.boundingBox();

  let currentPosition = 0;
  let bestSlider = {
    position: 0,
    difference: 100
  };

  await page.mouse.move(
    handle.x + handle.width / 2,
    handle.y + handle.height / 2
  );
  await page.mouse.down();

  while (currentPosition < slider.width - handle.width / 2) {
    await page.mouse.move(
      handle.x + currentPosition,
      handle.y + handle.height / 2 + Math.random() * 10 - 5
    );

    let sliderContainer = await page.$(".slide-verify");
    let sliderImage = await sliderContainer.screenshot();

    const rembrandt = new Rembrandt({
      imageA: originalImage,
      imageB: sliderImage,
      thresholdType: Rembrandt.THRESHOLD_PERCENT
    });

    let result = await rembrandt.compare();
    let difference = result.percentageDifference * 100;

    if (difference < bestSlider.difference) {
      bestSlider.difference = difference;
      bestSlider.position = currentPosition;
    }

    currentPosition += 5;
  }

  await page.mouse.move(
    handle.x + bestSlider.position,
    handle.y + handle.height / 2,
    { steps: 10 }
  );
  await page.mouse.up();

  await page.waitFor(3000);

  // success!

  await browser.close();
}

run();

这里我们采用了简单的图片对比的方式,即在滑动过程中,如果发现了有符合阈值的差异,则认为是已经滑动成功。

Cendertron

Cendertron 中,提供了一类特殊的 Slider Captcha Monkey,在传入的 SpiderOption 中添加如下参数即可:

export interface SpiderOption {
  allowRedirect: boolean;
  depth: number;
  // 页面插件
  monkies?: {
    sliderCaptcha: {
      sliderElementSelector: string;
      sliderHandleSelector: string;
    };
  };
}

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。

文章由极客之音整理,本文链接:https://www.bmabk.com/index.php/post/77736.html

(0)
小半的头像小半

相关推荐

极客之音——专业性很强的中文编程技术网站,欢迎收藏到浏览器,订阅我们!