fmartingr
/
shelfzilla
Archived
1
0
Fork 0
This repository has been archived on 2021-06-29. You can view files and clone it, but cannot push or open issues or pull requests.
shelfzilla/crawler/crawler/settings.py

39 lines
990 B
Python

# -*- coding: utf-8 -*-
# Scrapy settings for crawler project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'crawler'
SPIDER_MODULES = ['crawler.spiders']
NEWSPIDER_MODULE = 'crawler.spiders'
ITEM_PIPELINES = {
# Check duplicate
# Determine if other language than Spanish (drop them if is)
# Clean fields
'crawler.pipelines.DuplicatesPipeline': 100,
'crawler.pipelines.CheckLanguagePipeline': 200,
'crawler.pipelines.CleanFieldsPipeline': 300,
# ...
'scrapycouchdb.CouchDBPipeline': 1000,
}
COUCHDB_SERVER = 'http://127.0.0.1:5984/'
COUCHDB_DB = 'norma'
COUCHDB_UNIQ_KEY = 'uuid'
COUCHDB_IGNORE_FIELDS = []
LOG_LEVEL = 'ERROR'
LOG_FILE = 'scrapy.log'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'Test Crawler (+http://www.yourdomain.com)'