fmartingr
/
shelfzilla
Archived
1
0
Fork 0
This repository has been archived on 2021-06-29. You can view files and clone it, but cannot push or open issues or pull requests.
shelfzilla/utils/crawler_listadomanga/crawl.py

59 lines
1.4 KiB
Python

import sys
import json
from datCrawl import datCrawl
from datCrawl.downloaders import DefaultDownloader
from crawler import ListadoManga
from progressbar import ProgressBar
datcrawl = datCrawl()
datcrawl.register_downloader(DefaultDownloader)
datcrawl.register_crawler(ListadoManga)
# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=60')
# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=561')
# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1037')
# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1410')
# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=98')
# exit
ids = []
for i in range(1, 11):
ids.extend(datcrawl.run("http://www.listadomanga.es/lista.php?genero={}".format(i)))
_list = []
errors = 0
success = 0
custom_options = {
'end': len(ids)-1,
'width': 50,
'fill': '#',
'format': '%(progress)s%% [%(fill)s%(blank)s]'
}
f = open('data.json', 'w')
p = ProgressBar(**custom_options)
print("Crawling process in progress...")
for _id in ids:
value = datcrawl.run("http://www.listadomanga.es/coleccion.php?id=%d" % _id)
if value is "Error":
errors += 1
else:
success += 1
_list.append(value)
sys.stdout.write("\r %s" % p)
p += 1
sys.stdout.flush()
json.dump(_list,f)
print " <-- Completed!"
f.close()
print ""
print "Summary:"
print "--------"
print "Success: %d" % success
print "Errors: %d" % errors