From 28c26fdc770152d307eebf694659cb52653e834b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felipe=20Marti=CC=81n?= Date: Mon, 25 Aug 2014 09:26:43 +0200 Subject: [PATCH] Updated crawler with all manga sources --- utils/crawler_listadomanga/crawl.py | 4 +++- utils/crawler_listadomanga/crawler.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/crawler_listadomanga/crawl.py b/utils/crawler_listadomanga/crawl.py index c377c68..a95130f 100644 --- a/utils/crawler_listadomanga/crawl.py +++ b/utils/crawler_listadomanga/crawl.py @@ -17,7 +17,9 @@ datcrawl.register_crawler(ListadoManga) # exit -ids = datcrawl.run("http://www.listadomanga.es/lista.php") +ids = [] +for i in range(1, 11): + ids.extend(datcrawl.run("http://www.listadomanga.es/lista.php?genero={}".format(i))) _list = [] errors = 0 success = 0 diff --git a/utils/crawler_listadomanga/crawler.py b/utils/crawler_listadomanga/crawler.py index 2465936..61d33cf 100644 --- a/utils/crawler_listadomanga/crawler.py +++ b/utils/crawler_listadomanga/crawler.py @@ -8,7 +8,7 @@ from pprint import pprint class ListadoManga(Crawler): urls = [ ('get_manga', '(?P^http\:\/\/www\.listadomanga\.es\/coleccion\.php(.*)$)'), - ('get_links', '(?P^http\:\/\/www\.listadomanga\.es\/lista\.php)'), + ('get_links', '(?P^http\:\/\/www\.listadomanga\.es\/lista\.php\?genero=\d+)'), ] downloader = 'DefaultDownloader'