Remove repeated code for archiving bookmarks

This commit is contained in:
Radhi Fadlillah 2019-09-20 16:48:57 +07:00
parent 2da0c7e297
commit 64c62d6b12
11 changed files with 425 additions and 766 deletions

View File

@ -1,18 +1,10 @@
package cmd
import (
"bytes"
"fmt"
"io"
"net/http"
nurl "net/url"
fp "path/filepath"
"strings"
"time"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/model"
"github.com/spf13/cobra"
)
@ -45,28 +37,16 @@ func addHandler(cmd *cobra.Command, args []string) {
noArchival, _ := cmd.Flags().GetBool("no-archival")
logArchival, _ := cmd.Flags().GetBool("log-archival")
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
cError.Println("URL is not valid")
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
// Create bookmark item
book := model.Bookmark{
URL: tmp.String(),
Title: normalizeSpace(title),
Excerpt: normalizeSpace(excerpt),
URL: url,
Title: normalizeSpace(title),
Excerpt: normalizeSpace(excerpt),
CreateArchive: !noArchival,
}
// Create bookmark ID
book.ID, err = db.CreateNewID("bookmark")
if err != nil {
cError.Printf("Failed to create ID: %v\n", err)
return
if book.Title == "" {
book.Title = book.URL
}
// Set bookmark tags
@ -75,101 +55,51 @@ func addHandler(cmd *cobra.Command, args []string) {
book.Tags[i].Name = strings.TrimSpace(tag)
}
// If it's not offline mode, fetch data from internet
var imageURLs []string
if !offline {
func() {
cInfo.Println("Downloading article...")
// Prepare download request
req, err := http.NewRequest("GET", url, nil)
if err != nil {
cError.Printf("Failed to download article: %v\n", err)
return
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
if err != nil {
cError.Printf("Failed to download article: %v\n", err)
return
}
defer resp.Body.Close()
// Split response body so it can be processed twice
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
_, err = io.Copy(multiWriter, resp.Body)
if err != nil {
cError.Printf("Failed to process article: %v\n", err)
return
}
// If this is HTML, parse for readable content
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, url)
if err != nil {
cError.Printf("Failed to parse article: %v\n", err)
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
// If title and excerpt doesnt have submitted value, use from article
if book.Title == "" {
book.Title = article.Title
}
if book.Excerpt == "" {
book.Excerpt = article.Excerpt
}
if !isReadable {
book.Content = ""
}
// Get image URL
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
}
// If needed, create offline archive as well
if !noArchival {
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
archivalRequest := warc.ArchivalRequest{
URL: url,
Reader: archivalInput,
ContentType: contentType,
LogEnabled: logArchival,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
cError.Printf("Failed to create archive: %v\n", err)
return
}
}
}()
// Create bookmark ID
var err error
book.ID, err = db.CreateNewID("bookmark")
if err != nil {
cError.Printf("Failed to create ID: %v\n", err)
return
}
// Make sure title is not empty
if book.Title == "" {
book.Title = book.URL
// Clean up bookmark URL
book.URL, err = core.RemoveUTMParams(book.URL)
if err != nil {
cError.Printf("Failed to clean URL: %v\n", err)
return
}
// If it's not offline mode, fetch data from internet.
if !offline {
cInfo.Println("Downloading article...")
var isFatalErr bool
content, contentType, err := core.DownloadBookmark(book.URL)
if err != nil {
cError.Printf("Failed to download: %v\n", err)
}
if err == nil && content != nil {
request := core.ProcessRequest{
DataDir: dataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
LogArchival: logArchival,
}
book, isFatalErr, err = core.ProcessBookmark(request)
content.Close()
if err != nil {
cError.Printf("Failed: %v\n", err)
}
if isFatalErr {
return
}
}
}
// Save bookmark to database
@ -179,18 +109,6 @@ func addHandler(cmd *cobra.Command, args []string) {
return
}
// Save article image to local disk
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
break
} else {
cError.Printf("Failed to download image: %v\n", err)
continue
}
}
// Print added bookmark
fmt.Println()
printBookmarks(book)

View File

@ -2,11 +2,11 @@ package cmd
import (
"fmt"
nurl "net/url"
"os"
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/model"
"github.com/spf13/cobra"
)
@ -73,17 +73,14 @@ func importHandler(cmd *cobra.Command, args []string) {
url, _ := a.Attr("href")
strTags, _ := a.Attr("tags")
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
// Clean up URL
var err error
url, err = core.RemoveUTMParams(url)
if err != nil {
cError.Printf("Skip %s: URL is not valid\n", url)
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
// Make sure title is valid Utf-8
title = toValidUtf8(title, url)

View File

@ -2,13 +2,13 @@ package cmd
import (
"fmt"
nurl "net/url"
"os"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/model"
"github.com/spf13/cobra"
)
@ -59,17 +59,14 @@ func pocketHandler(cmd *cobra.Command, args []string) {
intModified, _ := strconv.ParseInt(strModified, 10, 64)
modified := time.Unix(intModified, 0)
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
// Clean up URL
var err error
url, err = core.RemoveUTMParams(url)
if err != nil {
cError.Printf("Skip %s: URL is not valid\n", url)
return
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
// Make sure title is valid Utf-8
title = toValidUtf8(title, url)

View File

@ -1,22 +1,14 @@
package cmd
import (
"bytes"
"fmt"
"io"
"net/http"
nurl "net/url"
"os"
fp "path/filepath"
"sort"
"strings"
"sync"
"time"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/spf13/cobra"
)
@ -83,17 +75,12 @@ func updateHandler(cmd *cobra.Command, args []string) {
excerpt = normalizeSpace(excerpt)
if cmd.Flags().Changed("url") {
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
cError.Println("URL is not valid")
return
// Clean up bookmark URL
url, err = core.RemoveUTMParams(url)
if err != nil {
panic(fmt.Errorf("failed to clean URL: %v", err))
}
tmp.Fragment = ""
clearUTMParams(tmp)
url = tmp.String()
// Since user uses custom URL, make sure there is only one ID to update
if len(ids) != 1 {
cError.Println("Update only accepts one index while using --url flag")
@ -149,6 +136,9 @@ func updateHandler(cmd *cobra.Command, args []string) {
for i, book := range bookmarks {
wg.Add(1)
// Mark whether book will be archived
book.CreateArchive = !noArchival
// If used, use submitted URL
if url != "" {
book.URL = url
@ -164,102 +154,32 @@ func updateHandler(cmd *cobra.Command, args []string) {
<-semaphore
}()
// Prepare download request
req, err := http.NewRequest("GET", book.URL, nil)
// Download data from internet
content, contentType, err := core.DownloadBookmark(book.URL)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
return
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to download %s: %v", book.URL, err)
return
request := core.ProcessRequest{
DataDir: dataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
KeepMetadata: keepMetadata,
LogArchival: logArchival,
}
defer resp.Body.Close()
// Split response body so it can be processed twice
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
book, _, err = core.ProcessBookmark(request)
content.Close()
_, err = io.Copy(multiWriter, resp.Body)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to process %s: %v", book.URL, err)
return
}
// If this is HTML, parse for readable content
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, book.URL)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to parse %s: %v", book.URL, err)
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
if !isReadable {
book.Content = ""
}
if !keepMetadata {
book.Title = article.Title
book.Excerpt = article.Excerpt
}
// Get image for thumbnail and save it to local disk
var imageURLs []string
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
imgPath := fp.Join(dataDir, "thumb", fmt.Sprintf("%d", book.ID))
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
break
}
}
}
// If needed, update offline archive as well.
// Make sure to delete the old one first.
if !noArchival {
archivePath := fp.Join(dataDir, "archive", fmt.Sprintf("%d", book.ID))
os.Remove(archivePath)
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: archivalInput,
ContentType: contentType,
LogEnabled: logArchival,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
chProblem <- book.ID
chMessage <- fmt.Errorf("Failed to create archive %s: %v", book.URL, err)
return
}
}
// Send success message
chMessage <- fmt.Sprintf("Downloaded %s", book.URL)

View File

@ -3,29 +3,17 @@ package cmd
import (
"errors"
"fmt"
"image"
clr "image/color"
"image/draw"
"image/jpeg"
"math"
"net/http"
nurl "net/url"
"os"
"os/exec"
fp "path/filepath"
"runtime"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/disintegration/imaging"
"github.com/fatih/color"
"github.com/go-shiori/shiori/internal/model"
"golang.org/x/crypto/ssh/terminal"
// Add supports for PNG image
_ "image/png"
)
var (
@ -54,95 +42,6 @@ func isURLValid(s string) bool {
return err == nil && tmp.Scheme != "" && tmp.Hostname() != ""
}
func clearUTMParams(url *nurl.URL) {
queries := url.Query()
for key := range queries {
if strings.HasPrefix(key, "utm_") {
queries.Del(key)
}
}
url.RawQuery = queries.Encode()
}
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
// Fetch data from URL
client := &http.Client{Timeout: timeout}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
// Make sure it's JPG or PNG image
cp := resp.Header.Get("Content-Type")
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
return fmt.Errorf("%s is not a supported image", url)
}
// At this point, the download has finished successfully.
// Prepare destination file.
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create image dir: %v", err)
}
dstFile, err := os.Create(dstPath)
if err != nil {
return fmt.Errorf("failed to create image file: %v", err)
}
defer dstFile.Close()
// Parse image and process it.
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
// Else, save it as it is.
img, _, err := image.Decode(resp.Body)
if err != nil {
return fmt.Errorf("failed to parse image %s: %v", url, err)
}
imgRect := img.Bounds()
imgWidth := imgRect.Dx()
imgHeight := imgRect.Dy()
imgRatio := float64(imgWidth) / float64(imgHeight)
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
err = jpeg.Encode(dstFile, img, nil)
} else {
// Create background
bg := image.NewNRGBA(imgRect)
draw.Draw(bg, imgRect, image.NewUniform(clr.White), image.Point{}, draw.Src)
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
bg = imaging.Blur(bg, 150)
bg = imaging.AdjustBrightness(bg, 30)
// Create foreground
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
// Merge foreground and background
bgRect := bg.Bounds()
fgRect := fg.Bounds()
fgPosition := image.Point{
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
}
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
// Save to file
err = jpeg.Encode(dstFile, bg, nil)
}
if err != nil {
return fmt.Errorf("failed to save image %s: %v", url, err)
}
return nil
}
func printBookmarks(bookmarks ...model.Bookmark) {
for _, bookmark := range bookmarks {
// Create bookmark index

31
internal/core/download.go Normal file
View File

@ -0,0 +1,31 @@
package core
import (
"io"
"net/http"
"time"
)
var httpClient = &http.Client{Timeout: time.Minute}
// DownloadBookmark downloads bookmarked page from specified URL.
// Return response body, make sure to close it later.
func DownloadBookmark(url string) (io.ReadCloser, string, error) {
// Prepare download request
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, "", err
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
if err != nil {
return nil, "", err
}
// Get content type
contentType := resp.Header.Get("Content-Type")
return resp.Body, contentType, nil
}

218
internal/core/processing.go Normal file
View File

@ -0,0 +1,218 @@
package core
import (
"bytes"
"fmt"
"image"
"image/color"
"image/draw"
"image/jpeg"
"io"
"math"
"os"
"path"
fp "path/filepath"
"strconv"
"strings"
"github.com/disintegration/imaging"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/shiori/pkg/warc"
// Add support for png
_ "image/png"
)
// ProcessRequest is the request for processing bookmark.
type ProcessRequest struct {
DataDir string
Bookmark model.Bookmark
Content io.Reader
ContentType string
KeepMetadata bool
LogArchival bool
}
// ProcessBookmark process the bookmark and archive it if needed.
// Return three values, the bookmark itself, is error fatal, and error value.
func ProcessBookmark(req ProcessRequest) (model.Bookmark, bool, error) {
book := req.Bookmark
contentType := req.ContentType
// Make sure bookmark ID is defined
if book.ID == 0 {
return book, true, fmt.Errorf("bookmark ID is not valid")
}
// Split bookmark content so it can be processed several times
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
var multiWriter io.Writer
if !strings.Contains(contentType, "text/html") {
multiWriter = io.MultiWriter(archivalInput)
} else {
multiWriter = io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
}
_, err := io.Copy(multiWriter, req.Content)
if err != nil {
return book, false, fmt.Errorf("failed to process article: %v", err)
}
// If this is HTML, parse for readable content
var imageURLs []string
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, book.URL)
if err != nil {
return book, false, fmt.Errorf("failed to parse article: %v", err)
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
// If title and excerpt doesnt have submitted value, use from article
if !req.KeepMetadata || book.Title == "" {
book.Title = article.Title
}
if !req.KeepMetadata || book.Excerpt == "" {
book.Excerpt = article.Excerpt
}
// Sometimes article doesn't have any title, so make sure it is not empty
if book.Title == "" {
book.Title = book.URL
}
// Get image URL
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
if !isReadable {
book.Content = ""
}
book.HasContent = book.Content != ""
}
// Save article image to local disk
strID := strconv.Itoa(book.ID)
imgPath := fp.Join(req.DataDir, "thumb", strID)
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath)
if err == nil {
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
break
}
}
// If needed, create offline archive as well
if book.CreateArchive {
archivePath := fp.Join(req.DataDir, "archive", fmt.Sprintf("%d", book.ID))
os.Remove(archivePath)
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: archivalInput,
ContentType: contentType,
LogEnabled: req.LogArchival,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
return book, false, fmt.Errorf("failed to create archive: %v", err)
}
book.HasArchive = true
}
return book, false, nil
}
func downloadBookImage(url, dstPath string) error {
// Fetch data from URL
resp, err := httpClient.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
// Make sure it's JPG or PNG image
cp := resp.Header.Get("Content-Type")
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
return fmt.Errorf("%s is not a supported image", url)
}
// At this point, the download has finished successfully.
// Prepare destination file.
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create image dir: %v", err)
}
dstFile, err := os.Create(dstPath)
if err != nil {
return fmt.Errorf("failed to create image file: %v", err)
}
defer dstFile.Close()
// Parse image and process it.
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
// Else, save it as it is.
img, _, err := image.Decode(resp.Body)
if err != nil {
return fmt.Errorf("failed to parse image %s: %v", url, err)
}
imgRect := img.Bounds()
imgWidth := imgRect.Dx()
imgHeight := imgRect.Dy()
imgRatio := float64(imgWidth) / float64(imgHeight)
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
err = jpeg.Encode(dstFile, img, nil)
} else {
// Create background
bg := image.NewNRGBA(imgRect)
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
bg = imaging.Blur(bg, 150)
bg = imaging.AdjustBrightness(bg, 30)
// Create foreground
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
// Merge foreground and background
bgRect := bg.Bounds()
fgRect := fg.Bounds()
fgPosition := image.Point{
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
}
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
// Save to file
err = jpeg.Encode(dstFile, bg, nil)
}
if err != nil {
return fmt.Errorf("failed to save image %s: %v", url, err)
}
return nil
}

28
internal/core/url.go Normal file
View File

@ -0,0 +1,28 @@
package core
import (
"fmt"
nurl "net/url"
"strings"
)
// RemoveUTMParams removes the UTM parameters from URL.
func RemoveUTMParams(url string) (string, error) {
// Parse string URL
tmp, err := nurl.Parse(url)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
return url, fmt.Errorf("URL is not valid")
}
// Remove UTM queries
queries := tmp.Query()
for key := range queries {
if strings.HasPrefix(key, "utm_") {
queries.Del(key)
}
}
tmp.Fragment = ""
tmp.RawQuery = queries.Encode()
return tmp.String(), nil
}

View File

@ -6,17 +6,12 @@ import (
"fmt"
"io"
"net/http"
nurl "net/url"
"os"
"path"
fp "path/filepath"
"strconv"
"strings"
"time"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/julienschmidt/httprouter"
)
@ -31,18 +26,15 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
err = json.NewDecoder(r.Body).Decode(&request)
checkError(err)
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(request.URL)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
panic(fmt.Errorf("URL is not valid"))
// Clean up bookmark URL
request.URL, err = core.RemoveUTMParams(request.URL)
if err != nil {
panic(fmt.Errorf("failed to clean URL: %v", err))
}
tmp.Fragment = ""
clearUTMParams(tmp)
request.URL = tmp.String()
// Check if bookmark already exists.
book, exist := h.DB.GetBookmark(0, request.URL)
book.CreateArchive = true
// If it already exists, we need to set ID and tags.
if exist {
@ -69,119 +61,37 @@ func (h *handler) apiInsertViaExtension(w http.ResponseWriter, r *http.Request,
// Since we are using extension, the extension might send the HTML content
// so no need to download it again here. However, if it's empty, it might be not HTML file
// so we download it here.
contentType := "text/html; charset=UTF-8"
contentBuffer := bytes.NewBufferString(book.HTML)
var contentType string
var contentBuffer io.Reader
if book.HTML == "" {
func() {
// Prepare download request
req, err := http.NewRequest("GET", book.URL, nil)
if err != nil {
return
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
if err != nil {
return
}
defer resp.Body.Close()
// Save response for later use
contentType = resp.Header.Get("Content-Type")
contentBuffer.Reset()
_, err = io.Copy(contentBuffer, resp.Body)
if err != nil {
return
}
}()
contentBuffer, contentType, _ = core.DownloadBookmark(book.URL)
} else {
contentType = "text/html; charset=UTF-8"
contentBuffer = bytes.NewBufferString(book.HTML)
}
// At this point the web page already downloaded.
// Time to process it.
func() {
// Split response so it can be processed several times
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
_, err = io.Copy(multiWriter, contentBuffer)
if err != nil {
return
}
// If it's HTML, parse the readable content.
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, book.URL)
if err != nil {
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
if book.Title == "" {
if article.Title == "" {
book.Title = book.URL
} else {
book.Title = article.Title
}
}
if book.Excerpt == "" {
book.Excerpt = article.Excerpt
}
if !isReadable {
book.Content = ""
}
book.HasContent = book.Content != ""
// Get image for thumbnail and save it to local disk
var imageURLs []string
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
// Save article image to local disk
strID := strconv.Itoa(book.ID)
imgPath := fp.Join(h.DataDir, "thumb", strID)
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
break
}
}
}
// Create offline archive as well
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
os.Remove(archivePath)
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: archivalInput,
if contentBuffer != nil {
request := core.ProcessRequest{
DataDir: h.DataDir,
Bookmark: book,
Content: contentBuffer,
ContentType: contentType,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
return
var isFatalErr bool
book, isFatalErr, err = core.ProcessBookmark(request)
if tmp, ok := contentBuffer.(io.ReadCloser); ok {
tmp.Close()
}
book.HasArchive = true
}()
if err != nil && isFatalErr {
panic(fmt.Errorf("failed to process bookmark: %v", err))
}
}
// Save bookmark to database
results, err := h.DB.SaveBookmarks(book)

View File

@ -1,13 +1,10 @@
package webserver
import (
"bytes"
"encoding/json"
"fmt"
"io"
"math"
"net/http"
nurl "net/url"
"os"
"path"
fp "path/filepath"
@ -16,10 +13,9 @@ import (
"sync"
"time"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/gofrs/uuid"
"github.com/julienschmidt/httprouter"
"golang.org/x/crypto/bcrypt"
@ -251,112 +247,35 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
err = json.NewDecoder(r.Body).Decode(&book)
checkError(err)
// Clean up URL by removing its fragment and UTM parameters
tmp, err := nurl.Parse(book.URL)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
panic(fmt.Errorf("URL is not valid"))
}
tmp.Fragment = ""
clearUTMParams(tmp)
book.URL = tmp.String()
// Create bookmark ID
book.ID, err = h.DB.CreateNewID("bookmark")
if err != nil {
panic(fmt.Errorf("failed to create ID: %v", err))
}
// Clean up bookmark URL
book.URL, err = core.RemoveUTMParams(book.URL)
if err != nil {
panic(fmt.Errorf("failed to clean URL: %v", err))
}
// Fetch data from internet
var imageURLs []string
func() {
// Prepare download request
req, err := http.NewRequest("GET", book.URL, nil)
if err != nil {
return
var isFatalErr bool
content, contentType, err := core.DownloadBookmark(book.URL)
if err == nil && content != nil {
request := core.ProcessRequest{
DataDir: h.DataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
if err != nil {
return
book, isFatalErr, err = core.ProcessBookmark(request)
content.Close()
if err != nil && isFatalErr {
panic(fmt.Errorf("failed to process bookmark: %v", err))
}
defer resp.Body.Close()
// Split response body so it can be processed twice
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
_, err = io.Copy(multiWriter, resp.Body)
if err != nil {
return
}
// If this is HTML, parse for readable content
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, book.URL)
if err != nil {
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
// If title and excerpt doesnt have submitted value, use from article
if book.Title == "" {
book.Title = article.Title
}
if book.Excerpt == "" {
book.Excerpt = article.Excerpt
}
// Get image URL
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
if !isReadable {
book.Content = ""
}
book.HasContent = book.Content != ""
}
// If needed, create offline archive as well
if book.CreateArchive {
archivePath := fp.Join(h.DataDir, "archive", fmt.Sprintf("%d", book.ID))
os.Remove(archivePath)
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: archivalInput,
ContentType: contentType,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
return
}
book.HasArchive = true
}
}()
// Make sure title is not empty
if book.Title == "" {
book.Title = book.URL
}
// Save bookmark to database
@ -366,17 +285,6 @@ func (h *handler) apiInsertBookmark(w http.ResponseWriter, r *http.Request, ps h
}
book = results[0]
// Save article image to local disk
strID := strconv.Itoa(book.ID)
imgPath := fp.Join(h.DataDir, "thumb", strID)
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
break
}
}
// Return the new bookmark
w.Header().Set("Content-Type", "application/json")
err = json.NewEncoder(w).Encode(&book)
@ -446,6 +354,12 @@ func (h *handler) apiUpdateBookmark(w http.ResponseWriter, r *http.Request, ps h
book.Excerpt = request.Excerpt
book.Public = request.Public
// Clean up bookmark URL
book.URL, err = core.RemoveUTMParams(book.URL)
if err != nil {
panic(fmt.Errorf("failed to clean URL: %v", err))
}
// Set new tags
for i := range book.Tags {
book.Tags[i].Deleted = true
@ -525,6 +439,9 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
for i, book := range bookmarks {
wg.Add(1)
// Mark whether book will be archived
book.CreateArchive = request.CreateArchive
go func(i int, book model.Bookmark, keepMetadata bool) {
// Make sure to finish the WG
defer wg.Done()
@ -535,107 +452,28 @@ func (h *handler) apiUpdateCache(w http.ResponseWriter, r *http.Request, ps http
<-semaphore
}()
// Prepare download request
req, err := http.NewRequest("GET", book.URL, nil)
// Download data from internet
content, contentType, err := core.DownloadBookmark(book.URL)
if err != nil {
chProblem <- book.ID
return
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
resp, err := httpClient.Do(req)
request := core.ProcessRequest{
DataDir: h.DataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
KeepMetadata: keepMetadata,
}
book, _, err = core.ProcessBookmark(request)
content.Close()
if err != nil {
chProblem <- book.ID
return
}
defer resp.Body.Close()
// Split response body so it can be processed twice
archivalInput := bytes.NewBuffer(nil)
readabilityInput := bytes.NewBuffer(nil)
readabilityCheckInput := bytes.NewBuffer(nil)
multiWriter := io.MultiWriter(archivalInput, readabilityInput, readabilityCheckInput)
_, err = io.Copy(multiWriter, resp.Body)
if err != nil {
chProblem <- book.ID
return
}
// If this is HTML, parse for readable content
strID := strconv.Itoa(book.ID)
contentType := resp.Header.Get("Content-Type")
if strings.Contains(contentType, "text/html") {
isReadable := readability.IsReadable(readabilityCheckInput)
article, err := readability.FromReader(readabilityInput, book.URL)
if err != nil {
chProblem <- book.ID
return
}
book.Author = article.Byline
book.Content = article.TextContent
book.HTML = article.Content
if !isReadable {
book.Content = ""
}
if !keepMetadata {
book.Title = article.Title
book.Excerpt = article.Excerpt
}
if book.Title == "" {
book.Title = book.URL
}
book.HasContent = book.Content != ""
// Get image for thumbnail and save it to local disk
var imageURLs []string
if article.Image != "" {
imageURLs = append(imageURLs, article.Image)
}
if article.Favicon != "" {
imageURLs = append(imageURLs, article.Favicon)
}
// Save article image to local disk
imgPath := fp.Join(h.DataDir, "thumb", strID)
for _, imageURL := range imageURLs {
err = downloadBookImage(imageURL, imgPath, time.Minute)
if err == nil {
book.ImageURL = path.Join("/", "bookmark", strID, "thumb")
break
}
}
}
// If needed, update offline archive as well.
// Make sure to delete the old one first.
if request.CreateArchive {
archivePath := fp.Join(h.DataDir, "archive", strID)
os.Remove(archivePath)
archivalRequest := warc.ArchivalRequest{
URL: book.URL,
Reader: archivalInput,
ContentType: contentType,
}
err = warc.NewArchive(archivalRequest, archivePath)
if err != nil {
chProblem <- book.ID
return
}
book.HasArchive = true
}
// Update list of bookmarks
mx.Lock()

View File

@ -3,13 +3,8 @@ package webserver
import (
"fmt"
"html/template"
"image"
"image/color"
"image/draw"
"image/jpeg"
"io"
"io/ioutil"
"math"
"mime"
"net"
"net/http"
@ -19,9 +14,6 @@ import (
"regexp"
"strings"
"syscall"
"time"
"github.com/disintegration/imaging"
)
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
@ -89,95 +81,6 @@ func fileExists(filePath string) bool {
return !os.IsNotExist(err) && !info.IsDir()
}
func clearUTMParams(url *nurl.URL) {
queries := url.Query()
for key := range queries {
if strings.HasPrefix(key, "utm_") {
queries.Del(key)
}
}
url.RawQuery = queries.Encode()
}
func downloadBookImage(url, dstPath string, timeout time.Duration) error {
// Fetch data from URL
client := &http.Client{Timeout: timeout}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
// Make sure it's JPG or PNG image
cp := resp.Header.Get("Content-Type")
if !strings.Contains(cp, "image/jpeg") && !strings.Contains(cp, "image/png") {
return fmt.Errorf("%s is not a supported image", url)
}
// At this point, the download has finished successfully.
// Prepare destination file.
err = os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create image dir: %v", err)
}
dstFile, err := os.Create(dstPath)
if err != nil {
return fmt.Errorf("failed to create image file: %v", err)
}
defer dstFile.Close()
// Parse image and process it.
// If image is smaller than 600x400 or its ratio is less than 4:3, resize.
// Else, save it as it is.
img, _, err := image.Decode(resp.Body)
if err != nil {
return fmt.Errorf("failed to parse image %s: %v", url, err)
}
imgRect := img.Bounds()
imgWidth := imgRect.Dx()
imgHeight := imgRect.Dy()
imgRatio := float64(imgWidth) / float64(imgHeight)
if imgWidth >= 600 && imgHeight >= 400 && imgRatio > 1.3 {
err = jpeg.Encode(dstFile, img, nil)
} else {
// Create background
bg := image.NewNRGBA(imgRect)
draw.Draw(bg, imgRect, image.NewUniform(color.White), image.Point{}, draw.Src)
draw.Draw(bg, imgRect, img, image.Point{}, draw.Over)
bg = imaging.Fill(bg, 600, 400, imaging.Center, imaging.Lanczos)
bg = imaging.Blur(bg, 150)
bg = imaging.AdjustBrightness(bg, 30)
// Create foreground
fg := imaging.Fit(img, 600, 400, imaging.Lanczos)
// Merge foreground and background
bgRect := bg.Bounds()
fgRect := fg.Bounds()
fgPosition := image.Point{
X: bgRect.Min.X - int(math.Round(float64(bgRect.Dx()-fgRect.Dx())/2)),
Y: bgRect.Min.Y - int(math.Round(float64(bgRect.Dy()-fgRect.Dy())/2)),
}
draw.Draw(bg, bgRect, fg, fgPosition, draw.Over)
// Save to file
err = jpeg.Encode(dstFile, bg, nil)
}
if err != nil {
return fmt.Errorf("failed to save image %s: %v", url, err)
}
return nil
}
func createTemplate(filename string, funcMap template.FuncMap) (*template.Template, error) {
// Open file
src, err := assets.Open(filename)