Download everything with retries on failures

This commit is contained in:
Felipe M 2021-02-09 23:38:18 +01:00
parent 3f6bf86f2a
commit cecdd22d99
Signed by: fmartingr
GPG Key ID: 716BC147715E716F
3 changed files with 139 additions and 42 deletions

2
.gitignore vendored
View File

@ -15,3 +15,5 @@
# Dependency directories (remove the comment below to include it)
# vendor/
# Test outputs
Output

View File

@ -15,6 +15,7 @@ import (
"time"
"code.fmartingr.dev/fmartingr/go-mangadex"
"code.fmartingr.dev/fmartingr/mangadex2cbr/pkg/tasks"
"github.com/sirupsen/logrus"
)
@ -23,6 +24,7 @@ type CliOPtions struct {
}
const defaultLogLevel string = "INFO"
const defaultLanguage string = "gb"
func fileExists(filename string) bool {
info, err := os.Stat(filename)
@ -40,7 +42,7 @@ func DownloadFile(url string, destinationPath string) error {
}
if response.StatusCode != 200 {
logrus.Errorf("Status code not OK: %d", response.StatusCode)
//logrus.Errorf("Status code not OK: %d", response.StatusCode)
return errors.New("Status code not 200")
}
@ -62,7 +64,10 @@ func DownloadFile(url string, destinationPath string) error {
return err
}
file.Write(body)
_, errWrite := file.Write(body)
if errWrite != nil {
logrus.Errorf("Error writting image: %s", errWrite)
}
return nil
}
@ -71,6 +76,7 @@ func Start() {
logLevelFlag := flag.String("log-level", defaultLogLevel, "Log level")
mangaIDFlag := flag.Int("manga-id", 0, "Manga ID to convert")
noCacheFlag := flag.Bool("no-cache", false, "Cache requests to mangadex")
languageFlag := flag.String("language", defaultLanguage, "Language to fetch chapters")
outputPath := "Output"
flag.Parse()
@ -108,18 +114,27 @@ func Start() {
var mangaChapters []mangadex.MangaChapterList
mangaGroups := map[int]mangadex.MangaGroup{}
chapterParams := mangadex.NewGetChaptersParams()
currentPage := 1
// TODO: Select language
selectedLanguage := "gb"
selectedLanguage := *languageFlag
var fetchChaptersTasks []tasks.Task
fetchChaptersTasks = tasks.TaskPush(fetchChaptersTasks, tasks.Task{Arguments: map[string]string{"page": "1"}})
for len(fetchChaptersTasks) > 0 {
var task tasks.Task
task, fetchChaptersTasks = tasks.TaskPop(fetchChaptersTasks)
logrus.Infof("Fetching chapters (page %s)", task.Arguments["page"])
page, errPage := strconv.Atoi(task.Arguments["page"])
if errPage != nil {
logrus.Error("Can't convert page to int: %s: %s", task.Arguments["page"], errPage)
}
chapterParams.Page = page
for currentPage != chapterParams.Page {
chapterParams.Page = currentPage
logrus.Infof("Downloading chapters page %d", currentPage)
chapters, groups, errChapters := manga.GetChapters(chapterParams)
if errChapters != nil {
logrus.Errorf("Error retrieving manga chapters: %s", errChapters)
}
for chapter := range chapters {
if chapters[chapter].Language == selectedLanguage {
mangaChapters = append(mangaChapters, chapters[chapter])
@ -135,7 +150,7 @@ func Start() {
// If we have the total number of items we try the next page
if len(chapters) == chapterParams.Limit {
currentPage++
fetchChaptersTasks = tasks.TaskPush(fetchChaptersTasks, tasks.Task{Arguments: map[string]string{"page": strconv.Itoa(page + 1)}})
}
}
@ -144,8 +159,7 @@ func Start() {
logrus.Printf(" %6d: %s", mangaGroups[group].ID, mangaGroups[group].Name)
}
// TODO: Select groups for digitalization
// TODO: Using all for testing
// TODO: Select groups for digitalization, Using all for testing
selectedGroups := make([]int, 0, len(mangaGroups))
for k := range mangaGroups {
selectedGroups = append(selectedGroups, k)
@ -157,19 +171,38 @@ func Start() {
mangaVolumeChapter := map[string]mangadex.MangaChapterList{}
var mangaVolumeChapterKeys []string
//testTest := map[string]map[string]mangadex.MangaChapter{}
// Using keys to get track of which chapters we already have in store, and for sorting
// through chatpers and volumes as well. Also keys are useful as page prefixes.
var downloadChaptersTasks []tasks.Task
for chapter := range mangaChapters {
// Sorting fix for chapters that do not contain "decimals"
// Usually chapters are just 1, 2, 3... but in some ocassions the chapters are sorted like
// 1, 1.1, 1.2, 2, ... and that make a mess when sorting. Making the "non-decimal" chapters
// fake "decimals" solves this issue easily for us.
// TODO Delete me
if mangaChapters[chapter].Volume != "1" {
continue
}
if !strings.Contains(mangaChapters[chapter].Chapter, ".") {
mangaChapters[chapter].Chapter += ".0"
}
volumeChapterKey := fmt.Sprintf("%04s_%08s", mangaChapters[chapter].Volume, mangaChapters[chapter].Chapter)
_, exists := mangaVolumeChapter[volumeChapterKey]
if !exists {
//testTest[fmt.Sprintf("%04s", mangaChapters[chapter].Volume)][fmt.Sprintf("%08s", mangaChapters[chapter].Chapter)] = mangaChapters[chapter]
logrus.Debugf("Collecting volume %4s chapter %4s from group %7d", mangaChapters[chapter].Volume, mangaChapters[chapter].Chapter, mangaChapters[chapter].Groups)
mangaVolumeChapter[volumeChapterKey] = mangaChapters[chapter]
mangaVolumeChapterKeys = append(mangaVolumeChapterKeys, volumeChapterKey)
downloadChaptersTasks = tasks.TaskPush(
downloadChaptersTasks,
tasks.Task{
Arguments: map[string]string{
"chapterID": strconv.Itoa(mangaChapters[chapter].ID),
"volume": mangaChapters[chapter].Volume,
"key": volumeChapterKey,
}})
}
}
@ -177,40 +210,77 @@ func Start() {
sort.Strings(mangaVolumeChapterKeys)
mangaOutputPath := filepath.Join(outputPath, manga.Title)
for i := range mangaVolumeChapterKeys {
chapter := mangaVolumeChapter[mangaVolumeChapterKeys[i]]
if chapter.Volume == "1" {
volumeOutputPath := filepath.Join(mangaOutputPath, fmt.Sprintf("%s - Volume %s", manga.Title, chapter.Volume))
logrus.Infof("Processing Volume %s Chapter %s", chapter.Volume, chapter.Chapter)
logrus.Tracef("Processing %s", mangaVolumeChapterKeys[i])
logrus.Infof("Downloading chapters and calculating pages ")
errMkdir := os.MkdirAll(volumeOutputPath, 0766)
if errMkdir != nil {
logrus.Fatalf("Error creating output directory: %s", errMkdir)
}
var downloadPagesTasks []tasks.Task
chapterDetail, errChapterDetail := manga.GetChapter(strconv.Itoa(chapter.ID))
if errChapterDetail != nil {
logrus.Errorf("Failed getting chapter detail: %s", errChapterDetail)
}
for page := range chapterDetail.Pages {
logrus.Infof("Processing page %s", chapterDetail.Pages[page])
extension := path.Ext(chapterDetail.Pages[page])
pageDestinationPath := filepath.Join(volumeOutputPath, fmt.Sprintf("%s_%03d%s", mangaVolumeChapterKeys[i], page, extension))
if !fileExists(pageDestinationPath) {
logrus.Tracef("Downloading page from %s", chapterDetail.Server+path.Join(chapterDetail.Hash, chapterDetail.Pages[page]))
logrus.Tracef("Downlading page to %s", pageDestinationPath)
errDownload := DownloadFile(chapterDetail.Server+path.Join(chapterDetail.Hash, chapterDetail.Pages[page]), pageDestinationPath)
if errDownload != nil {
logrus.Errorf("Error downloading page: %s", errDownload)
}
}
time.Sleep(100 * time.Millisecond)
}
for len(downloadChaptersTasks) > 0 {
var task tasks.Task
task, downloadChaptersTasks = tasks.TaskPop(downloadChaptersTasks)
// TODO: remove me
if task.Arguments["volume"] != "1" {
continue
}
chapter, errChapterDetail := manga.GetChapter(task.Arguments["chapterID"])
if errChapterDetail != nil {
logrus.Errorf("Failed getting chapter detail: %s", errChapterDetail)
}
volumeOutputPath := filepath.Join(mangaOutputPath, fmt.Sprintf("%s - Volume %s", manga.Title, chapter.Volume))
logrus.Infof("Processing Volume %s Chapter %s", chapter.Volume, chapter.Chapter)
logrus.Tracef("Processing %s", task.Arguments["key"])
errMkdir := os.MkdirAll(volumeOutputPath, 0766)
if errMkdir != nil {
logrus.Fatalf("Error creating output directory: %s", errMkdir)
}
for page := range chapter.Pages {
extension := path.Ext(chapter.Pages[page])
pageFilename := fmt.Sprintf("%s_%03d%s", task.Arguments["key"], page, extension)
pageDestinationPath := filepath.Join(volumeOutputPath, pageFilename)
downloadPagesTasks = tasks.TaskPush(downloadPagesTasks, tasks.Task{
Arguments: map[string]string{
"filename": pageFilename,
"destinationPath": pageDestinationPath,
"url": chapter.Server + path.Join(chapter.Hash, chapter.Pages[page]),
"urlFallback": chapter.ServerFallback + path.Join(chapter.Hash, chapter.Pages[page]),
},
})
}
}
logrus.Infof("Downloading %d pages", len(downloadPagesTasks))
for len(downloadPagesTasks) > 0 {
var task tasks.Task
task, downloadPagesTasks = tasks.TaskPop(downloadPagesTasks)
destinationPath := task.Arguments["destinationPath"]
if !fileExists(task.Arguments["destinationPath"]) {
logrus.Tracef("Downloading page from %s", task.Arguments["url"])
logrus.Tracef("Downlading page to %s", destinationPath)
errDownload := DownloadFile(task.Arguments["url"], destinationPath)
if errDownload != nil {
_, exists := task.Arguments["urlFallback"]
if exists {
logrus.Warnf("Using fallback for page %s", task.Arguments["filename"])
downloadPagesTasks = tasks.TaskPush(downloadPagesTasks, tasks.Task{
Arguments: map[string]string{
"destinationPath": task.Arguments["destinationPath"],
"url": task.Arguments["urlFallback"],
"key": task.Arguments["key"],
},
})
} else {
logrus.Errorf("Error downloading page: %s", errDownload)
}
}
}
time.Sleep(100 * time.Millisecond)
}
covers, errCovers := manga.GetCovers()

25
pkg/tasks/runner.go Normal file
View File

@ -0,0 +1,25 @@
package tasks
const (
TaskTypeDownload = iota
TaskTypeFetch = iota
TaskTypeMkDir = iota
)
type Task struct {
Type int8
Name string
Failed bool
Arguments map[string]string
}
func TaskPop(taskList []Task) (Task, []Task) {
var result Task
// https://github.com/golang/go/wiki/SliceTricks
result, taskList = taskList[0], taskList[1:]
return result, taskList
}
func TaskPush(taskList []Task, task Task) []Task {
return append(taskList, task)
}