使用 CasperJS 自动删除 Facebook 组 post

Automating Facebook group post deletion using CasperJS

我正在编写脚本以从 Facebook 群组中删除 posts,因为 Facebook 的 Graph API 不允许开发人员这样做,除非 posts是从开发者的帐户中制作的。

到目前为止,我已经能够登录 Facebook,然后导航到所需的群组页面。从那里我可以获得页面上可见的每个 post 的 XPath(使用选择器 a[data-testid='post_chevron_button'])。我的脚本尝试在每个 XPath 选择器上调用 this.click() 时失败。

我现在的脚本如下:

phantom.casperTest = true;
var x = require('casper').selectXPath;
var casper = require('casper').create({   
    verbose: true,
    pageSettings: {
         loadImages:  false,
         loadPlugins: false,
         userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4'
    }
});

// print out all the messages in the headless browser context
casper.on('remote.message', function(msg) {
    this.echo('remote message caught: ' + msg);
});

// print out all the messages in the headless browser context
casper.on("page.error", function(msg, trace) {
    this.echo("Page Error: " + msg, "ERROR");
});

var url = 'http://www.facebook.com/';

casper.start(url, function() {
    console.log("page loaded");
    this.test.assertExists('form#login_form', 'form is found');
    this.fill('form#login_form', { 
        email: '{email}',
        pass: '{password}'
    }, true);
    this.click('#u_0_q');
    this.wait(1000, function() {
        this.echo("Capturing image of page after login.");
        this.capture('loggedin.png');
    });
});

casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
    this.echo(this.getTitle());
    this.wait(1000, function() {
        this.capture('group.png');
    });

    var elements = casper.getElementsInfo("a[data-testid='post_chevron_button']");

    var index = 1;
    elements.forEach(function(element){
        var xpath = '//*[@id="' + element.attributes["id"] + '"]';
        console.log(xpath);
        this.click(x(xpath));
        this.wait(100, function() {
            this.capture('chevronlink' + index + '.png');
        });
        index++;
    });
});

casper.run();

当脚本到达 this.click(x(xpath)); 时,我收到错误消息 TypeError: undefined is not a constructor (evaluating 'this.click(x(xpath))')。如果我简单地替换创建数组的最后一位代码并用 this.click("a[data-testid='post_chevron_button']"); 遍历它,我的脚本没有问题。

有谁知道 CasperJS 不喜欢用 XPath 选择器调用 click() 的什么? XPath 似乎是 CasperJS 的 docs.

的有效选择器

更新

我更新了问题的标题以更准确地描述所需的结果。

根据 dasmelch 的建议,我对脚本做了一些修改,并将这一点合并到脚本中( casper.thenOpen 部分之后):

casper.then(function() {
  var elements = casper.getElementsAttribute("a[data-
testid='post_chevron_button']", 'id');
  while (elements.length > 0) {
    // get always the last element with target id
    element = elements.pop();
    (function(element) {
      var xpath = '//*[@id="' + element + '"]';
      console.log(xpath);
      // do it step by step
      casper.then(function() {
        this.click(x(xpath));
      });
      casper.then(function() {
        this.capture('chevronlink' + element + '.png');
      });
      // go back to the page with the links (if necessary)
      casper.then(function() {
        casper.back();
      });
    })(element);
  };
});

我现在收到此错误:Cannot dispatch mousedown event on nonexistent selector: xpath selector: //*[@id="u_0_47"]

昨晚,我决定改变一下。我离期望的最终结果更近了,但现在 CasperJS and/or PhantomJS 在单击 post_chevron_button 后无法找到下拉列表中存在的元素。这是我最终得到的结果(casper.thenOpen 之前的所有内容在最初显示的脚本中保持不变):

casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
    this.echo(this.getTitle());
    this.wait(1000, function() {
        this.capture('group.png');
    });

    var elements = casper.getElementsInfo("a[data-
    testid='post_chevron_button']");
    while (elements.length > 0) {
        this.click("a[data-testid='post_chevron_button']");
        this.wait(1000, function() {
            this.capture('chevron_click.png');
            console.log("chevron_click.png saved");
        });
        var chevronLinks = casper.getElementsInfo("a[ajaxify]")
        console.log("Found " + chevronLinks.length + " elements with ajaxify attribute.");
        var chevronLinksIndex = 1;
        chevronLinks.forEach(function(element){
            var ajaxifyValue = element.attributes["ajaxify"];
            console.log(ajaxifyValue);
            if (ajaxifyValue.indexOf("delete.php?group_id={group-id}") !== -1) {
                this.click("a[ajaxify='"+ajaxifyValue+"']");
                this.wait(100, function(){
                    this.capture('deletePost' + chevronLinksIndex);
                });
                chevronLinksIndex++;
            }
        });
        if (chevronLinksIndex === 1) {
            break;
        }
        elements = casper.getElementsInfo("a[data-testid='post_chevron_button']");
    } 
});

我知道应该有一个元素包含一个 ajaxify 属性和我正在搜索的值(因为我自己在浏览器中逐步浏览它会在单击 a[data-testid='post_chevron_button'] 后显示该元素), 但 Casper 找不到它。不仅如此,我的 chevron_click.png 图像文件应该在这个脚本的每个 运行 上更新,但事实并非如此。

部分代码执行不按顺序进行。例如,ajaxify 属性值的日志记录在看到 chevron_click.png saved 之前发生在控制台中。这可能是预料之中的,但不幸的是我没有很多 JS 经验。这个执行顺序问题可以解释为什么我对必要元素的搜索没有返回我期望的结果。

这里是一个需要点击删除元素的例子 post:

<a class="_54nc" href="#" rel="async-post" 
ajaxify="/ajax/groups/mall/delete.php?group_id={group-id}&amp;message_id=806608486110204&amp;story_dom_id=mall_post_806608486110204%3A6%3A0&amp;entstory_context=%7B%22last_view_time%22%3A1495072771%2C%22fbfeed_context%22%3Atrue%2C%22location_type%22%3A2%2C%22outer_object_element_id%22%3A%22mall_post_806608486110204%3A6%3A0%22%2C%22object_element_id%22%3A%22mall_post_806608486110204%3A6%3A0%22%2C%22is_ad_preview%22%3Afalse%2C%22is_editable%22%3Afalse%2C%22mall_how_many_post_comments%22%3A2%2C%22bump_reason%22%3A0%2C%22story_width%22%3A502%2C%22shimparams%22%3A%7B%22page_type%22%3A16%2C%22actor_id%22%3A664025626%2C%22story_id%22%3A806608486110204%2C%22ad_id%22%3A0%2C%22_ft_%22%3A%22%22%2C%22location%22%3A%22group%22%7D%2C%22story_id%22%3A%22u_0_21%22%2C%22caret_id%22%3A%22u_0_22%22%7D&amp;surface=group_post_chevron"
role="menuitem"><span><span class="_54nh"><div class="_41t5"><i
class="_41t7 img sp_gJvT8CoKHU- sx_0f12ae"></i><i class="_41t8 img
sp_s36yWP_7MD_ sx_7e9f7d"></i>Delete Post</div></span></span></a>

您的 xpath 操作正确,但 forEach 方法似乎不适用于此。 您可以使用 casper.getElementsAttribute 直接获取所有这些元素的 ID,使用 while 循环轻松迭代,这样更容易抛出它们:

...
casper.thenOpen('https://www.facebook.com/groups/{group-id}/', function() {
  this.echo(this.getTitle());
  this.wait(1000, function() {
    this.capture('group.png');
  });
});
// do a while loop with where you can use  every single element and jump back
casper.then(function() {
  var elements = casper.getElementsAttribute("a[data-testid='post_chevron_button']", 'id');
  while (elements.length > 0) {
    // get always the last element with target id
    element = elements.pop();
    (function(element) {
      var xpath = '//*[@id="' + element + '"]';
      console.log(xpath);
      // do it step by step
      casper.then(function() {
        this.click(x(xpath));
      });
      casper.then(function() {
        this.capture('chevronlink' + element + '.png');
      });
      // go back to the page with the links (if necessary)
      casper.then(function() {
        casper.back();
      });
    })(element);
  };
});
...

如果不看 FB,我猜你必须返回 (casper.back) 链接(元素)所在的站点。

我能够完成我尝试使用 Selenium 2 API for .NET 所做的事情。

解决代码如下:

class Program
{
    static void Main(string[] args)
    {
        var options = new ChromeOptions();
        options.AddUserProfilePreference("profile.default_content_setting_values.notifications", 2);

        using (IWebDriver driver = new ChromeDriver(options))
        {
            // Maximize window
            driver.Manage().Window.Maximize();

            // Log into Facebook
            driver.Navigate().GoToUrl("http://www.facebook.com/");
            driver.FindElement(By.Id("email")).SendKeys("username");
            driver.FindElement(By.Id("pass")).SendKeys("password");
            driver.FindElement(By.Id("pass")).SendKeys(Keys.Enter);

            driver.Navigate().GoToUrl("https://www.facebook.com/groups/{group-id}/");
            var chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
            chevronPostLinks.FirstOrDefault().Click();
            Thread.Sleep(1000);
            var deletePostElements = driver.FindElements(By.XPath("//a[contains(@ajaxify,'delete.php?group_id={group-id}')]"));
            while (deletePostElements.Count > 0 && chevronPostLinks.Count > 0)
            {
                Thread.Sleep(1000);
                deletePostElements.Where(x => x.Displayed == true).FirstOrDefault().Click();
                Thread.Sleep(1000);
                driver.FindElement(By.ClassName("layerConfirm")).Click();

                Thread.Sleep(2000);
                chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
                if (chevronPostLinks.Count > 0)
                {
                    chevronPostLinks.FirstOrDefault().Click();
                }
                else
                {
                    driver.Navigate().GoToUrl("https://www.facebook.com/groups/{group-id}/");
                    chevronPostLinks = driver.FindElements(By.XPath("//a[@data-testid='post_chevron_button']"));
                    chevronPostLinks.FirstOrDefault().Click();
                }
                Thread.Sleep(1000);
                deletePostElements = driver.FindElements(By.XPath("//a[contains(@ajaxify,'delete.php?group_id={group-id}')]"));
            }
        }
    }
}

我想做一些改进,比如使用 Selenium 等待元素可见而不是使用 Thread.Sleep(),但它对我的目的来说工作得很好。