Scrapy找不到项目

Scrapy can't find items

我目前仍在学习 Scrapy 并尝试使用管道和 ItemLoader。

但是,我目前遇到的问题是蜘蛛显示 Item.py 不存在。我到底做错了什么,为什么我没有从蜘蛛获取任何数据到我的管道中?

运行 没有导入项目的 Spider 工作正常。管道也在 settings.py.

中激活

我的错误日志如下:

Traceback (most recent call last):
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\Scripts\scrapy.exe\__main__.py", line 7, in <module> 
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\cmdline.py", line 144, in execute
    cmd.crawler_process = CrawlerProcess(settings)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\crawler.py", line 280, in __init__
    super().__init__(settings)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\crawler.py", line 152, in __init__
    self.spider_loader = self._get_spider_loader(settings)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\crawler.py", line 146, in _get_spider_loader
    return loader_cls.from_settings(settings.frozencopy())
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\spiderloader.py", line 67, in from_settings
    return cls(settings)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\spiderloader.py", line 24, in __init__
    self._load_all_spiders()
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\spiderloader.py", line 51, in _load_all_spiders
    for module in walk_modules(name):
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\site-packages\scrapy\utils\misc.py", line 88, in 
walk_modules
    submod = import_module(fullpath)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\importlib\__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1006, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 688, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 883, in exec_module
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "C:\Users\Syrix\WebCrawler\watches\watches\spiders\watchbot.py", line 5, in <module>
    from watches.watches.items import WatchesItem
ModuleNotFoundError: No module named 'watches.watches'

我的蜘蛛看起来像这样:

import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.loader import ItemLoader
from watches.watches.items import WatchesItem
from scrapy.exceptions import DropItem


class WatchbotSpider(scrapy.Spider):
    name = 'watchbot'
    start_urls = ['https://www.watch.de/english/rolex.html']


    def parse(self, response, **kwargs):
        for link in response.css('div.product-item-link a::attr(href)'):
            url = link.get()
            yield scrapy.Request(url, callback=self.parse_categories)


    def parse_categories(self, response):
        for product in response.xpath('//*[@id="main"]/div[2]/div[1]'):
            l = ItemLoader(item=WatchesItem(), selector=product)
            l.add_xpath('name', '//span[@itemprop="sku"]/text()')
            l.add_xpath('reference', '//span[@itemprop="sku"]/text()')
            l.add_xpath('year', '//div[@class="product-option baujahr"]/div[@class="product-option-value"]/text()')

            yield l.load_item()

items.py:

import scrapy
from scrapy.loader import ItemLoader
from itemloaders.processors import TakeFirst, MapCompose
from w3lib.html import remove_tags


class WatchesItem(scrapy.Item):
    # define the fields for your item here like:
    name = scrapy.Field(input_processor = MapCompose(remove_tags, output_processor = TakeFirst()))
    reference = scrapy.Field(input_processor = MapCompose(remove_tags, output_processor = TakeFirst()))
    year = scrapy.Field(input_processor = MapCompose(remove_tags, output_processor = TakeFirst()))

最后但同样重要的是我的管道:

import mysql
import mysql.connector
from watches.watches.spiders import watchbot

class WatchesPipeline(object):

    def __init__(self):
        self.conn = mysql.connector.connect(
            host = '',
            user = '',
            passwd = '',
            database = ''
        )
        self.curr = self.conn.cursor()


    def process_item(self, item, spider):
        self.store_db(item)
        print("Pipleline = " + item['name'] + " " + item['reference'] + " " + item['year'])
        return item

    def store_db(self, item):
        self.curr.execute("""insert into test.watch values (%s, %s, %s)""", (
            item['name'][0],
            item['reference'][0],
            item['year'][0],
        ))
        self.conn.commit()

编辑:

PS E:\semester\webcrawler_watches\watches\Crawler> scrapy crawl watchbot
Traceback (most recent call last):
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "E:\semester\webcrawler_watches\venv\Scripts\scrapy.exe\__main__.py", line 7, in <module>
  File "E:\semester\webcrawler_watches\venv\lib\site-packages\scrapy\cmdline.py", line 114, in execute
    settings = get_project_settings()
  File "E:\semester\webcrawler_watches\venv\lib\site-packages\scrapy\utils\project.py", line 68, in get_project_settings
    settings.setmodule(settings_module_path, priority='project')
  File "E:\semester. semester\webcrawler_watches\venv\lib\site-packages\scrapy\settings\__init__.py", line 287, in setmodule
    module = import_module(module)
  File "C:\Users\Syrix\AppData\Local\Programs\Python\Python310\lib\importlib\__init__.py", line 126, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 992, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 241, in _call_with_frames_removed
  File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
  File "<frozen importlib._bootstrap>", line 1004, in _find_and_load_unlocked
ModuleNotFoundError: No module named 'watches'
PS E:\semester\webcrawler_watches\watches\Crawler>

这是我的作品。请关注这个。

import mysql
import mysql.connector
# from watches.watches.spiders import watchbot

class WatchesPipeline(object):

    def __init__(self):
        self.conn = mysql.connector.connect(
            host = 'localhost', 
            user = 'root',
            passwd = '', # your password
            database = '', # your databse
           
        )
        self.curr = self.conn.cursor()


    # def create_table(self):
    #     self.curr.execute("""DROP TABLE IF EXISTS scrapy_tb """)
    #     self.curr.execute("""create table scrapy_tb (name text, reference text, year text)""")


    def process_item(self, item, spider):
        self.store_db(item)
        # print("Pipleline = " + item['name'] + " " + item['reference'] + " " + item['year'])
        return item

    def store_db(self, item):
        self.curr.execute("""insert into scrapy_tb values (%s, %s, %s)""", 
            (
            item['name'][0],
            item['reference'][0],
            item['year'][0]
        ))
        self.conn.commit()
        return item
        self.conn.close()