Spaces:

SungBeom
/

chatwine-korean

Runtime error

App Files Files Community

chatwine-korean / winenara_scrapy /tutorial /settings.py

SungBeom

Upload folder using huggingface_hub

4a51346 over 1 year ago

raw

history blame contribute delete

4.06 kB

	# Scrapy settings for tutorial project
	#
	# For simplicity, this file contains only settings considered important or
	# commonly used. You can find more settings consulting the documentation:
	#
	# https://docs.scrapy.org/en/latest/topics/settings.html
	# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
	# https://docs.scrapy.org/en/latest/topics/spider-middleware.html

	# SPLASH_URL = 'http://localhost:8050'

	# DOWNLOADER_MIDDLEWARES = {
	# 'scrapy_splash.SplashCookiesMiddleware': 723,
	# 'scrapy_splash.SplashMiddleware': 725,
	# 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
	# }

	# SPIDER_MIDDLEWARES = {
	# 'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
	# }

	# DUPEFILTER_CLASS = 'scrapy.dupefilters.BaseDupeFilter'
	# HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'

	BOT_NAME = "tutorial"

	SPIDER_MODULES = ["tutorial.spiders"]
	NEWSPIDER_MODULE = "tutorial.spiders"


	# Crawl responsibly by identifying yourself (and your website) on the user-agent
	#USER_AGENT = "tutorial (+http://www.yourdomain.com)"
	# settings.py
	USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'


	# Obey robots.txt rules
	ROBOTSTXT_OBEY = False

	# Configure maximum concurrent requests performed by Scrapy (default: 16)
	#CONCURRENT_REQUESTS = 32

	# Configure a delay for requests for the same website (default: 0)
	# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
	# See also autothrottle settings and docs
	# DOWNLOAD_DELAY = 1
	# The download delay setting will honor only one of:
	#CONCURRENT_REQUESTS_PER_DOMAIN = 16
	#CONCURRENT_REQUESTS_PER_IP = 16

	# Disable cookies (enabled by default)
	COOKIES_ENABLED = True

	# Disable Telnet Console (enabled by default)
	#TELNETCONSOLE_ENABLED = False

	# Override the default request headers:
	#DEFAULT_REQUEST_HEADERS = {
	# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8",
	# "Accept-Language": "en",
	#}

	# Enable or disable spider middlewares
	# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
	#SPIDER_MIDDLEWARES = {
	# "tutorial.middlewares.TutorialSpiderMiddleware": 543,
	#}

	# Enable or disable downloader middlewares
	# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
	#DOWNLOADER_MIDDLEWARES = {
	# "tutorial.middlewares.TutorialDownloaderMiddleware": 543,
	#}

	# Enable or disable extensions
	# See https://docs.scrapy.org/en/latest/topics/extensions.html
	#EXTENSIONS = {
	# "scrapy.extensions.telnet.TelnetConsole": None,
	#}

	# Configure item pipelines
	# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
	#ITEM_PIPELINES = {
	# "tutorial.pipelines.TutorialPipeline": 300,
	#}
	ITEM_PIPELINES = {'tutorial.pipelines.CustomImagesPipeline': 1}
	IMAGES_STORE = 'C:/Users/chois/Desktop/Audrey/data/tutorial/assets/img'


	# Enable and configure the AutoThrottle extension (disabled by default)
	# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
	#AUTOTHROTTLE_ENABLED = True
	# The initial download delay
	#AUTOTHROTTLE_START_DELAY = 5
	# The maximum download delay to be set in case of high latencies
	#AUTOTHROTTLE_MAX_DELAY = 60
	# The average number of requests Scrapy should be sending in parallel to
	# each remote server
	#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
	# Enable showing throttling stats for every response received:
	#AUTOTHROTTLE_DEBUG = False

	# Enable and configure HTTP caching (disabled by default)
	# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
	#HTTPCACHE_ENABLED = True
	#HTTPCACHE_EXPIRATION_SECS = 0
	#HTTPCACHE_DIR = "httpcache"
	#HTTPCACHE_IGNORE_HTTP_CODES = []
	#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"

	# Set settings whose default value is deprecated to a future-proof value
	REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
	TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
	FEED_EXPORT_ENCODING = "utf-8"