From 7432f212c679d72cc7ed2feb3cf4c8a4bfd836f8 Mon Sep 17 00:00:00 2001 From: Felipe Martin Date: Wed, 23 Apr 2014 00:34:17 +0200 Subject: [PATCH] Updated listadomanga crawler --- utils/__init__.py | 0 utils/crawler_listadomanga/crawl.py | 14 ++++++++++---- utils/crawler_listadomanga/crawler.py | 4 ++-- 3 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 utils/__init__.py diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/crawler_listadomanga/crawl.py b/utils/crawler_listadomanga/crawl.py index 4e6b24a..c377c68 100644 --- a/utils/crawler_listadomanga/crawl.py +++ b/utils/crawler_listadomanga/crawl.py @@ -9,6 +9,14 @@ datcrawl = datCrawl() datcrawl.register_downloader(DefaultDownloader) datcrawl.register_crawler(ListadoManga) +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=60') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=561') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1037') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1410') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=98') + +# exit + ids = datcrawl.run("http://www.listadomanga.es/lista.php") _list = [] errors = 0 @@ -25,10 +33,8 @@ f = open('data.json', 'w') p = ProgressBar(**custom_options) -print "Crawling process in progress..." +print("Crawling process in progress...") for _id in ids: - #print("ID: %d" % _id) - value = datcrawl.run("http://www.listadomanga.es/coleccion.php?id=%d" % _id) if value is "Error": errors += 1 @@ -47,4 +53,4 @@ print "" print "Summary:" print "--------" print "Success: %d" % success -print "Errors: %d" % errors \ No newline at end of file +print "Errors: %d" % errors diff --git a/utils/crawler_listadomanga/crawler.py b/utils/crawler_listadomanga/crawler.py index bff6dd1..2465936 100644 --- a/utils/crawler_listadomanga/crawler.py +++ b/utils/crawler_listadomanga/crawler.py @@ -94,9 +94,9 @@ class ListadoManga(Crawler): # Spanish publisher URL try: esp = root.xpath("//td[contains(., 'Editorial esp')]//text()[contains(., 'Editorial esp')]/following::a")[1] - obj['japanese_publisher_url'] = esp.attrib['href'] + obj['spanish_publisher_url'] = esp.attrib['href'] except: - obj['japanese_publisher_url'] = '' + obj['spanish_publisher_url'] = '' # Un/Published volumes obj['published_volumes'] = []