如何使用 unitest class 测试我的 scrapy 方法
How to test my scrapy method with unitest class
我想在我的蜘蛛中尝试一些方法。
例如在我的项目中,我有这个模式:
toto/
├── __init__.py
├── items.py
├── pipelines.py
├── settings.py
├── spiders
│ ├── __init__.py
│ └── mySpider.py
└── Unitest
└── unitest.py
我的 unitest.py
看起来像这样:
# -*- coding: utf-8 -*-
import re
import weakref
import six
import unittest
from scrapy.selector import Selector
from scrapy.crawler import Crawler
from scrapy.utils.project import get_project_settings
from unittest.case import TestCase
from toto.spiders import runSpider
class SelectorTestCase(unittest.TestCase):
sscls = Selector
def test_demo(self):
print "test"
if __name__ == '__main__':
unittest.main()
和我的 mySpider.py
,看起来像这样:
import scrapy
class runSpider(scrapy.Spider):
name = 'blogspider'
start_urls = ['http://blog.scrapinghub.com']
def parse(self, response):
for url in response.css('ul li a::attr("href")').re(r'.*/\d\d\d\d/\d\d/$'):
yield scrapy.Request(response.urljoin(url), self.parse_titles)
def parse_titles(self, response):
for post_title in response.css('div.entries > ul > li a::text').extract():
yield {'title': post_title}
在我的 unitest.py 文件中,如何调用我的蜘蛛?
我试图在我的 unitest.py 文件中添加 from toto.spiders import runSpider
,但它没有...
我遇到了这个错误:
Traceback (most recent call last): File "unitest.py", line 10, in
from toto.spiders import runSpider ImportError: No module named toto.spiders
我该如何解决?
尝试:
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), '../..')) #2 folder back from current file
from spiders.mySpider import runSpider
我想在我的蜘蛛中尝试一些方法。 例如在我的项目中,我有这个模式:
toto/
├── __init__.py
├── items.py
├── pipelines.py
├── settings.py
├── spiders
│ ├── __init__.py
│ └── mySpider.py
└── Unitest
└── unitest.py
我的 unitest.py
看起来像这样:
# -*- coding: utf-8 -*-
import re
import weakref
import six
import unittest
from scrapy.selector import Selector
from scrapy.crawler import Crawler
from scrapy.utils.project import get_project_settings
from unittest.case import TestCase
from toto.spiders import runSpider
class SelectorTestCase(unittest.TestCase):
sscls = Selector
def test_demo(self):
print "test"
if __name__ == '__main__':
unittest.main()
和我的 mySpider.py
,看起来像这样:
import scrapy
class runSpider(scrapy.Spider):
name = 'blogspider'
start_urls = ['http://blog.scrapinghub.com']
def parse(self, response):
for url in response.css('ul li a::attr("href")').re(r'.*/\d\d\d\d/\d\d/$'):
yield scrapy.Request(response.urljoin(url), self.parse_titles)
def parse_titles(self, response):
for post_title in response.css('div.entries > ul > li a::text').extract():
yield {'title': post_title}
在我的 unitest.py 文件中,如何调用我的蜘蛛?
我试图在我的 unitest.py 文件中添加 from toto.spiders import runSpider
,但它没有...
我遇到了这个错误:
Traceback (most recent call last): File "unitest.py", line 10, in from toto.spiders import runSpider ImportError: No module named toto.spiders
我该如何解决?
尝试:
import sys
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath(__file__)), '../..')) #2 folder back from current file
from spiders.mySpider import runSpider