diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/utils/crawler_listadomanga/crawl.py b/utils/crawler_listadomanga/crawl.py index 4e6b24a..c377c68 100644 --- a/utils/crawler_listadomanga/crawl.py +++ b/utils/crawler_listadomanga/crawl.py @@ -9,6 +9,14 @@ datcrawl = datCrawl() datcrawl.register_downloader(DefaultDownloader) datcrawl.register_crawler(ListadoManga) +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=60') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=561') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1037') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=1410') +# datcrawl.run('http://www.listadomanga.es/coleccion.php?id=98') + +# exit + ids = datcrawl.run("http://www.listadomanga.es/lista.php") _list = [] errors = 0 @@ -25,10 +33,8 @@ f = open('data.json', 'w') p = ProgressBar(**custom_options) -print "Crawling process in progress..." +print("Crawling process in progress...") for _id in ids: - #print("ID: %d" % _id) - value = datcrawl.run("http://www.listadomanga.es/coleccion.php?id=%d" % _id) if value is "Error": errors += 1 @@ -47,4 +53,4 @@ print "" print "Summary:" print "--------" print "Success: %d" % success -print "Errors: %d" % errors \ No newline at end of file +print "Errors: %d" % errors diff --git a/utils/crawler_listadomanga/crawler.py b/utils/crawler_listadomanga/crawler.py index bff6dd1..2465936 100644 --- a/utils/crawler_listadomanga/crawler.py +++ b/utils/crawler_listadomanga/crawler.py @@ -94,9 +94,9 @@ class ListadoManga(Crawler): # Spanish publisher URL try: esp = root.xpath("//td[contains(., 'Editorial esp')]//text()[contains(., 'Editorial esp')]/following::a")[1] - obj['japanese_publisher_url'] = esp.attrib['href'] + obj['spanish_publisher_url'] = esp.attrib['href'] except: - obj['japanese_publisher_url'] = '' + obj['spanish_publisher_url'] = '' # Un/Published volumes obj['published_volumes'] = []