Merge branch 'master' into patch-1

This commit is contained in:
Dean Jackson 2020-08-06 19:27:38 +02:00 committed by GitHub
commit 5601ac7904
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
36 changed files with 462 additions and 1816 deletions

10
.github/FUNDING.yml vendored Normal file
View File

@ -0,0 +1,10 @@
# These are supported funding model platforms
open_collective: # Replace with a single Open Collective username
ko_fi: radhifadlillah
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
liberapay: # Replace with a single Liberapay username
issuehunt: # Replace with a single IssueHunt username
otechie: # Replace with a single Otechie username
custom: ['https://www.paypal.me/RadhiFadlillah']

View File

@ -3,8 +3,8 @@
[![Go Report Card](https://goreportcard.com/badge/github.com/go-shiori/shiori)](https://goreportcard.com/report/github.com/go-shiori/shiori)
[![Docker Image](https://img.shields.io/static/v1?label=image&message=Docker&color=1488C6&logo=docker)](https://hub.docker.com/r/radhifadlillah/shiori)
[![Deploy Heroku](https://img.shields.io/static/v1?label=deploy&message=Heroku&color=430098&logo=heroku)](https://heroku.com/deploy)
[![Donate PayPal](https://img.shields.io/static/v1?label=donate&message=PayPal&color=00457C&logo=paypal)](https://www.paypal.me/RadhiFadlillah)
[![Donate Ko-fi](https://img.shields.io/static/v1?label=donate&message=Ko-fi&color=F16061&logo=ko-fi)](https://ko-fi.com/radhifadlillah)
**This project is now maintained by Dean Jackson (@deanishe).** The awesome original author, @RadhiFadlillah, unfortunately no longer has the time (see [#256](https://github.com/go-shiori/shiori/issues/256)), and I am honoured to take over stewardship of such a fantastic application.
Shiori is a simple bookmarks manager written in the Go language. Intended as a simple clone of [Pocket](https://getpocket.com//). You can use it as a command line application or as a web application. This application is distributed as a single binary, which means it can be installed and used easily.
@ -30,4 +30,4 @@ All documentation is available in [wiki](https://github.com/RadhiFadlillah/shior
## License
Shiori is distributed under the terms of the [MIT license](https://choosealicense.com/licenses/mit/), which means you can use it and modify it however you want. However, if you make an enhancement for it, if possible, please send a pull request. If you like this project, please consider donating to me either via [PayPal](https://www.paypal.me/RadhiFadlillah) or [Ko-Fi](https://ko-fi.com/radhifadlillah).
Shiori is distributed under the terms of the [MIT license](https://choosealicense.com/licenses/mit/), which means you can use it and modify it however you want. However, if you make an enhancement for it, if possible, please send a pull request.

5
go.mod
View File

@ -7,6 +7,7 @@ require (
github.com/disintegration/imaging v1.6.0
github.com/fatih/color v1.7.0
github.com/go-shiori/go-readability v0.0.0-20190809152430-5413e9c4ec86
github.com/go-shiori/warc v0.0.0-20191003110312-7b3c5582fd83
github.com/go-sql-driver/mysql v1.4.1
github.com/gofrs/uuid v3.2.0+incompatible
github.com/jmoiron/sqlx v1.2.0
@ -21,12 +22,8 @@ require (
github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd
github.com/sirupsen/logrus v1.4.2
github.com/spf13/cobra v0.0.5
github.com/tdewolff/parse/v2 v2.3.7
go.etcd.io/bbolt v1.3.3
golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4
golang.org/x/image v0.0.0-20190802002840-cff245a6509b // indirect
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa // indirect
golang.org/x/tools v0.0.0-20190809145639-6d4652c779c4 // indirect
google.golang.org/appengine v1.6.1 // indirect
)

16
go.sum
View File

@ -16,8 +16,12 @@ github.com/disintegration/imaging v1.6.0/go.mod h1:xuIt+sRxDFrHS0drzXUlCJthkJ8k7
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25 h1:1ZfeL7TG+z4cjtC6XT+drfe23JxaVMwdqyGBh4O4foo=
github.com/go-shiori/dom v0.0.0-20190930082056-9d974a4f8b25/go.mod h1:360KoNl36ftFYhjLHuEty78kWUGw8i1opEicvIDLfRk=
github.com/go-shiori/go-readability v0.0.0-20190809152430-5413e9c4ec86 h1:tYq3F0DW27RniF4f5k5ACJdmOGYOmTlaXu17018YvG8=
github.com/go-shiori/go-readability v0.0.0-20190809152430-5413e9c4ec86/go.mod h1:1tFV9uTM/xnAKQw5EgPs+ip50udKhCjaP0nYdkSDXcU=
github.com/go-shiori/warc v0.0.0-20191003110312-7b3c5582fd83 h1:w3rI+ulrf6kMM+3313YDqxmG3r+7JctrU835wZifyL0=
github.com/go-shiori/warc v0.0.0-20191003110312-7b3c5582fd83/go.mod h1:uaK5DAxFig7atOzy+aqLzhs6qJacMDfs8NxHV5+shzc=
github.com/go-sql-driver/mysql v1.4.0 h1:7LxgVwFb2hIQtMm87NdgAVfXjnt4OePseqT1tKx+opk=
github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA=
@ -85,8 +89,8 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/tdewolff/parse/v2 v2.3.7 h1:DXoTUgrUE2Eap0m7zg1ljCO5C78vhEi7HTc4YnJWrRk=
github.com/tdewolff/parse/v2 v2.3.7/go.mod h1:HansaqmN4I/U7L6/tUp0NcwT2tFO0F4EAWYGSDzkYNk=
github.com/tdewolff/parse v2.3.4+incompatible h1:x05/cnGwIMf4ceLuDMBOdQ1qGniMoxpP46ghf0Qzh38=
github.com/tdewolff/parse v2.3.4+incompatible/go.mod h1:8oBwCsVmUkgHO8M5iCzSIDtpzXOT0WXX9cWhz+bIzJQ=
github.com/tdewolff/test v1.0.0 h1:jOwzqCXr5ePXEPGJaq2ivoR6HOCi+D5TPfpoyg8yvmU=
github.com/tdewolff/test v1.0.0/go.mod h1:DiQUlutnqlEvdvhSn2LPGy4TFwRauAaYDsL+683RNX4=
github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
@ -110,8 +114,8 @@ golang.org/x/net v0.0.0-20190520210107-018c4d40a106 h1:EZofHp/BzEf3j39/+7CX1JvH0
golang.org/x/net v0.0.0-20190520210107-018c4d40a106/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 h1:Ao/3l156eZf2AW5wK8a7/smtodRU+gha3+BeqJ69lRk=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17 h1:qPnAdmjNA41t3QBTx2mFGf/SD1IoslhYu7AmdsVzCcs=
golang.org/x/net v0.0.0-20190926025831-c00fd9afed17/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@ -122,8 +126,8 @@ golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190520201301-c432e742b0af h1:NXfmMfXz6JqGfG3ikSxcz2N93j6DgScr19Oo2uwFu88=
golang.org/x/sys v0.0.0-20190520201301-c432e742b0af/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190927073244-c990c680b611 h1:q9u40nxWT5zRClI/uU9dHCiYGottAg6Nzz4YUQyHxdA=
golang.org/x/sys v0.0.0-20190927073244-c990c680b611/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@ -10,7 +10,7 @@ import (
"time"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/go-shiori/warc"
"github.com/julienschmidt/httprouter"
"github.com/spf13/cobra"
)

View File

@ -122,7 +122,7 @@ func openMySQLDatabase() (database.DB, error) {
dbName, _ := os.LookupEnv("SHIORI_MYSQL_NAME")
dbAddress, _ := os.LookupEnv("SHIORI_MYSQL_ADDRESS")
connString := fmt.Sprintf("%s:%s@%s/%s", user, password, dbAddress, dbName)
connString := fmt.Sprintf("%s:%s@%s/%s?charset=utf8mb4", user, password, dbAddress, dbName)
return database.OpenMySQLDatabase(connString)
}

View File

@ -1,6 +1,8 @@
package cmd
import (
"strings"
"github.com/go-shiori/shiori/internal/webserver"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
@ -18,15 +20,40 @@ func serveCmd() *cobra.Command {
cmd.Flags().IntP("port", "p", 8080, "Port used by the server")
cmd.Flags().StringP("address", "a", "", "Address the server listens to")
cmd.Flags().StringP("webroot", "r", "/", "Root path that used by server")
return cmd
}
func serveHandler(cmd *cobra.Command, args []string) {
// Get flags value
port, _ := cmd.Flags().GetInt("port")
address, _ := cmd.Flags().GetString("address")
rootPath, _ := cmd.Flags().GetString("webroot")
err := webserver.ServeApp(db, dataDir, address, port)
// Validate root path
if rootPath == "" {
rootPath = "/"
}
if !strings.HasPrefix(rootPath, "/") {
rootPath = "/" + rootPath
}
if !strings.HasSuffix(rootPath, "/") {
rootPath += "/"
}
// Start server
serverConfig := webserver.Config{
DB: db,
DataDir: dataDir,
ServerAddress: address,
ServerPort: port,
RootPath: rootPath,
}
err := webserver.ServeApp(serverConfig)
if err != nil {
logrus.Fatalf("Server error: %v\n", err)
}

3
internal/core/core.go Normal file
View File

@ -0,0 +1,3 @@
package core
const userAgent = "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)"

View File

@ -18,7 +18,7 @@ func DownloadBookmark(url string) (io.ReadCloser, string, error) {
}
// Send download request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
req.Header.Set("User-Agent", userAgent)
resp, err := httpClient.Do(req)
if err != nil {
return nil, "", err

View File

@ -18,7 +18,7 @@ import (
"github.com/disintegration/imaging"
"github.com/go-shiori/go-readability"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/go-shiori/warc"
// Add support for png
_ "image/png"
@ -128,6 +128,7 @@ func ProcessBookmark(req ProcessRequest) (model.Bookmark, bool, error) {
URL: book.URL,
Reader: archivalInput,
ContentType: contentType,
UserAgent: userAgent,
LogEnabled: req.LogArchival,
}

View File

@ -113,7 +113,7 @@ func (db *MySQLDatabase) SaveBookmarks(bookmarks ...model.Bookmark) (result []mo
stmtInsertBook, err := tx.Preparex(`INSERT INTO bookmark
(id, url, title, excerpt, author, public, content, html, modified)
VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)
ON DUPLICATE KEY UPDATE
ON DUPLICATE KEY UPDATE
url = VALUES(url),
title = VALUES(title),
excerpt = VALUES(excerpt),
@ -241,7 +241,7 @@ func (db *MySQLDatabase) GetBookmarks(opts GetBookmarksOptions) ([]model.Bookmar
// Add where clause for search keyword
if opts.Keyword != "" {
query += ` AND (
url LIKE ? OR
url LIKE ? OR
MATCH(title, excerpt, content) AGAINST (? IN BOOLEAN MODE)
)`
@ -329,10 +329,10 @@ func (db *MySQLDatabase) GetBookmarks(opts GetBookmarksOptions) ([]model.Bookmar
}
// Fetch tags for each bookmarks
stmtGetTags, err := db.Preparex(`SELECT t.id, t.name
FROM bookmark_tag bt
stmtGetTags, err := db.Preparex(`SELECT t.id, t.name
FROM bookmark_tag bt
LEFT JOIN tag t ON bt.tag_id = t.id
WHERE bt.bookmark_id = ?
WHERE bt.bookmark_id = ?
ORDER BY t.name`)
if err != nil {
return nil, fmt.Errorf("failed to prepare tag query: %v", err)
@ -369,7 +369,7 @@ func (db *MySQLDatabase) GetBookmarksCount(opts GetBookmarksOptions) (int, error
// Add where clause for search keyword
if opts.Keyword != "" {
query += ` AND (
url LIKE ? OR
url LIKE ? OR
MATCH(title, excerpt, content) AGAINST (? IN BOOLEAN MODE)
)`
@ -497,7 +497,7 @@ func (db *MySQLDatabase) DeleteBookmarks(ids ...int) (err error) {
func (db *MySQLDatabase) GetBookmark(id int, url string) (model.Bookmark, bool) {
args := []interface{}{id}
query := `SELECT
id, url, title, excerpt, author, public,
id, url, title, excerpt, author, public,
content, html, modified, content <> '' has_content
FROM bookmark WHERE id = ?`
@ -562,7 +562,7 @@ func (db *MySQLDatabase) GetAccounts(opts GetAccountsOptions) ([]model.Account,
// Returns the account and boolean whether it's exist or not.
func (db *MySQLDatabase) GetAccount(username string) (model.Account, bool) {
account := model.Account{}
db.Get(&account, `SELECT
db.Get(&account, `SELECT
id, username, password, owner FROM account WHERE username = ?`,
username)
@ -603,8 +603,8 @@ func (db *MySQLDatabase) DeleteAccounts(usernames ...string) (err error) {
// GetTags fetch list of tags and their frequency.
func (db *MySQLDatabase) GetTags() ([]model.Tag, error) {
tags := []model.Tag{}
query := `SELECT bt.tag_id id, t.name, COUNT(bt.tag_id) n_bookmarks
FROM bookmark_tag bt
query := `SELECT bt.tag_id id, t.name, COUNT(bt.tag_id) n_bookmarks
FROM bookmark_tag bt
LEFT JOIN tag t ON bt.tag_id = t.id
GROUP BY bt.tag_id ORDER BY t.name`

View File

@ -2,56 +2,60 @@
<html lang="en">
<head>
<title>$$.Title$$ - Shiori - Bookmarks Manager</title>
<base href="$$.RootPath$$">
<title>$$.Book.Title$$ - Shiori - Bookmarks Manager</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="/res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="/res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="/res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="/res/favicon.ico">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="res/favicon.ico">
<link href="/css/source-sans-pro.min.css" rel="stylesheet">
<link href="/css/stylesheet.css" rel="stylesheet">
<link href="/css/custom-dialog.css" rel="stylesheet">
<link href="/css/bookmark-item.css" rel="stylesheet">
<link href="css/source-sans-pro.min.css" rel="stylesheet">
<link href="css/stylesheet.css" rel="stylesheet">
<link href="css/custom-dialog.css" rel="stylesheet">
<link href="css/bookmark-item.css" rel="stylesheet">
<script src="/js/dayjs.min.js"></script>
<script src="/js/vue.min.js"></script>
<script src="js/dayjs.min.js"></script>
<script src="js/vue.min.js"></script>
</head>
<body class="night">
<div id="content-scene" :class="{night: appOptions.nightMode}">
<div id="header">
<p id="metadata" v-cloak>Added {{localtime()}}</p>
<p id="title">$$.Title$$</p>
<p id="title">$$.Book.Title$$</p>
<div id="links">
<a href="$$.URL$$" target="_blank" rel="noopener">View Original</a>
$$if .HasArchive$$
<a href="/bookmark/$$.ID$$/archive">View Archive</a>
<a href="$$.Book.URL$$" target="_blank" rel="noopener">View Original</a>
$$if .Book.HasArchive$$
<a href="bookmark/$$.Book.ID$$/archive">View Archive</a>
$$end$$
</div>
</div>
<div id="content" v-pre>
$$html .HTML$$
$$html .Book.HTML$$
</div>
</div>
<script type="module">
// Create initial variable
import basePage from "/js/page/base.js";
import basePage from "./js/page/base.js";
new Vue({
el: '#content-scene',
mixins: [basePage],
data: {
modified: "$$.Modified$$"
modified: "$$.Book.Modified$$"
},
methods: {
localtime() {
var strTime = `${this.modified.replace(" ", "T")}+00:00`;
var strTime = this.modified.replace(" ", "T");
if (!strTime.endsWith("Z")) {
strTime += "Z";
}
return dayjs(strTime).format("D MMMM YYYY, HH:mm:ss");
},
loadSetting() {

View File

@ -1 +1 @@
:root{--dialogHeaderBg:#292929;--colorDialogHeader:#FFF}.custom-dialog-overlay{display:-webkit-box;display:flex;-webkit-box-orient:vertical;-webkit-box-direction:normal;flex-flow:column nowrap;-webkit-box-align:center;align-items:center;-webkit-box-pack:center;justify-content:center;min-width:0;min-height:0;overflow:hidden;position:fixed;top:0;left:0;width:100vw;height:100vh;z-index:10001;background-color:rgba(0,0,0,0.6);padding:32px}.custom-dialog-overlay .custom-dialog{display:-webkit-box;display:flex;-webkit-box-orient:vertical;-webkit-box-direction:normal;flex-flow:column nowrap;width:100%;max-width:400px;min-height:0;max-height:100%;overflow:auto;background-color:var(--contentBg);font-size:16px}.custom-dialog-overlay .custom-dialog .custom-dialog-header{padding:16px;color:var(--colorDialogHeader);background-color:var(--dialogHeaderBg);font-weight:600;font-size:1em;text-transform:uppercase;border-bottom:1px solid var(--border)}.custom-dialog-overlay .custom-dialog .custom-dialog-body{padding:16px 16px 0;display:grid;max-height:100%;min-height:80px;min-width:0;overflow:auto;font-size:1em;grid-template-columns:max-content 1fr;-webkit-box-align:baseline;align-items:baseline;grid-gap:16px}.custom-dialog-overlay .custom-dialog .custom-dialog-body::after{content:"";display:block;min-height:1px;grid-column-end:-1;grid-column-start:1}.custom-dialog-overlay .custom-dialog .custom-dialog-body .custom-dialog-content{grid-column-end:-1;grid-column-start:1;color:var(--color);align-self:baseline}.custom-dialog-overlay .custom-dialog .custom-dialog-body>label{color:var(--color);padding:8px 0;font-size:1em}.custom-dialog-overlay .custom-dialog .custom-dialog-body>input[type="text"],.custom-dialog-overlay .custom-dialog .custom-dialog-body>input[type="password"],.custom-dialog-overlay .custom-dialog .custom-dialog-body>textarea{color:var(--color);padding:8px;font-size:1em;border:1px solid var(--border);background-color:var(--contentBg);min-width:0}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field{color:var(--color);font-size:1em;display:-webkit-box;display:flex;-webkit-box-orient:horizontal;-webkit-box-direction:normal;flex-flow:row nowrap;padding:0;grid-column-start:1;grid-column-end:-1;cursor:pointer}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field:hover,.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field:focus{text-decoration:underline;-webkit-text-decoration-color:var(--main);text-decoration-color:var(--main)}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field>input[type="checkbox"]{margin-right:8px}.custom-dialog-overlay .custom-dialog .custom-dialog-body>textarea{height:6em;min-height:37px;resize:vertical}.custom-dialog-overlay .custom-dialog .custom-dialog-body>.suggestion{position:absolute;display:block;padding:8px;background-color:var(--contentBg);border:1px solid var(--border);color:var(--color);font-size:.9em}.custom-dialog-overlay .custom-dialog .custom-dialog-footer{padding:16px;display:-webkit-box;display:flex;-webkit-box-orient:horizontal;-webkit-box-direction:normal;flex-flow:row wrap;-webkit-box-pack:end;justify-content:flex-end;border-top:1px solid var(--border)}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a{padding:0 8px;font-size:.9em;font-weight:600;color:var(--color);text-transform:uppercase}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a:hover,.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a:focus{outline:none;color:var(--main)}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>i.fa-spinner.fa-spin{width:19px;line-height:19px;text-align:center;color:var(--color)}
:root{--dialogHeaderBg:#292929;--colorDialogHeader:#FFF}.custom-dialog-overlay{display:-webkit-box;display:flex;-webkit-box-orient:vertical;-webkit-box-direction:normal;flex-flow:column nowrap;-webkit-box-align:center;align-items:center;-webkit-box-pack:center;justify-content:center;min-width:0;min-height:0;overflow:hidden;position:fixed;top:0;left:0;width:100vw;height:100vh;z-index:10001;background-color:rgba(0,0,0,0.6);padding:32px}.custom-dialog-overlay .custom-dialog{display:-webkit-box;display:flex;-webkit-box-orient:vertical;-webkit-box-direction:normal;flex-flow:column nowrap;width:100%;max-width:400px;min-height:0;max-height:100%;overflow:auto;background-color:var(--contentBg);font-size:16px}.custom-dialog-overlay .custom-dialog .custom-dialog-header{padding:16px;color:var(--colorDialogHeader);background-color:var(--dialogHeaderBg);font-weight:600;font-size:1em;text-transform:uppercase;border-bottom:1px solid var(--border)}.custom-dialog-overlay .custom-dialog .custom-dialog-body{padding:16px 16px 0;display:grid;max-height:100%;min-height:80px;min-width:0;overflow:auto;font-size:1em;grid-template-columns:max-content 1fr;-webkit-align-content:start;align-content:start;;-webkit-box-align:baseline;align-items:baseline;grid-gap:16px}.custom-dialog-overlay .custom-dialog .custom-dialog-body::after{content:"";display:block;min-height:1px;grid-column-end:-1;grid-column-start:1}.custom-dialog-overlay .custom-dialog .custom-dialog-body .custom-dialog-content{grid-column-end:-1;grid-column-start:1;color:var(--color);align-self:baseline}.custom-dialog-overlay .custom-dialog .custom-dialog-body>label{color:var(--color);padding:8px 0;font-size:1em}.custom-dialog-overlay .custom-dialog .custom-dialog-body>input[type="text"],.custom-dialog-overlay .custom-dialog .custom-dialog-body>input[type="password"],.custom-dialog-overlay .custom-dialog .custom-dialog-body>textarea{color:var(--color);padding:8px;font-size:1em;border:1px solid var(--border);background-color:var(--contentBg);min-width:0}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field{color:var(--color);font-size:1em;display:-webkit-box;display:flex;-webkit-box-orient:horizontal;-webkit-box-direction:normal;flex-flow:row nowrap;padding:0;grid-column-start:1;grid-column-end:-1;cursor:pointer}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field:hover,.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field:focus{text-decoration:underline;-webkit-text-decoration-color:var(--main);text-decoration-color:var(--main)}.custom-dialog-overlay .custom-dialog .custom-dialog-body .checkbox-field>input[type="checkbox"]{margin-right:8px}.custom-dialog-overlay .custom-dialog .custom-dialog-body>textarea{height:6em;min-height:37px;resize:vertical}.custom-dialog-overlay .custom-dialog .custom-dialog-body>.suggestion{position:absolute;display:block;padding:8px;background-color:var(--contentBg);border:1px solid var(--border);color:var(--color);font-size:.9em}.custom-dialog-overlay .custom-dialog .custom-dialog-footer{padding:16px;display:-webkit-box;display:flex;-webkit-box-orient:horizontal;-webkit-box-direction:normal;flex-flow:row wrap;-webkit-box-pack:end;justify-content:flex-end;border-top:1px solid var(--border)}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a{padding:0 8px;font-size:.9em;font-weight:600;color:var(--color);text-transform:uppercase}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a:hover,.custom-dialog-overlay .custom-dialog .custom-dialog-footer>a:focus{outline:none;color:var(--main)}.custom-dialog-overlay .custom-dialog .custom-dialog-footer>i.fa-spinner.fa-spin{width:19px;line-height:19px;text-align:center;color:var(--color)}

View File

@ -2,25 +2,26 @@
<html lang="en">
<head>
<base href="$$.$$">
<title>Shiori - Bookmarks Manager</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="/res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="/res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="/res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="/res/favicon.ico">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="res/favicon.ico">
<link href="/css/source-sans-pro.min.css" rel="stylesheet">
<link href="/css/fontawesome.min.css" rel="stylesheet">
<link href="/css/stylesheet.css" rel="stylesheet">
<link href="/css/custom-dialog.css" rel="stylesheet">
<link href="/css/bookmark-item.css" rel="stylesheet">
<link href="css/source-sans-pro.min.css" rel="stylesheet">
<link href="css/fontawesome.min.css" rel="stylesheet">
<link href="css/stylesheet.css" rel="stylesheet">
<link href="css/custom-dialog.css" rel="stylesheet">
<link href="css/bookmark-item.css" rel="stylesheet">
<script src="/js/vue.min.js"></script>
<script src="/js/url.min.js"></script>
<script src="js/vue.min.js"></script>
<script src="js/url.min.js"></script>
</head>
<body class="night">
@ -84,22 +85,21 @@
secondText: "No",
mainClick: () => {
this.dialog.loading = true;
fetch("/api/logout", { method: "post" })
.then(response => {
if (!response.ok) throw response;
return response;
fetch(new URL("api/logout", document.baseURI), {
method: "post"
}).then(response => {
if (!response.ok) throw response;
return response;
}).then(() => {
localStorage.removeItem("shiori-account");
document.cookie = `session-id=; Path=${new URL(document.baseURI).pathname}; Expires=Thu, 01 Jan 1970 00:00:00 GMT;`;
location.href = new URL("login", document.baseURI);
}).catch(err => {
this.dialog.loading = false;
this.getErrorMessage(err).then(msg => {
this.showErrorDialog(msg);
})
.then(() => {
localStorage.removeItem("shiori-account");
document.cookie = "session-id=; Path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;";
location.href = "/login";
})
.catch(err => {
this.dialog.loading = false;
this.getErrorMessage(err).then(msg => {
this.showErrorDialog(msg);
})
});
});
}
});
},

View File

@ -64,9 +64,13 @@ export default {
},
computed: {
mainURL() {
if (this.hasContent) return `/bookmark/${this.id}/content`;
else if (this.hasArchive) return `/bookmark/${this.id}/archive`;
else return this.url;
if (this.hasContent) {
return new URL(`bookmark/${this.id}/content`, document.baseURI);
} else if (this.hasArchive) {
return new URL(`bookmark/${this.id}/archive`, document.baseURI);
} else {
return this.url;
}
},
hostnameURL() {
var url = new URL(this.url);

View File

@ -152,7 +152,7 @@ export default {
var value = field.value;
if (field.type === 'number') value = parseInt(value, 10) || 0;
else if (field.type === 'float') value = parseFloat(value) || 0.0;
else if (field.type === 'check') value = value !== '';
else if (field.type === 'check') value = Boolean(value);
data[field.name] = value;
})

View File

@ -81,7 +81,7 @@ export default {
}
},
isSessionError(err) {
switch (err.replace(/\(\d+\)/g, "").trim().toLowerCase()) {
switch (err.toString().replace(/\(\d+\)/g, "").trim().toLowerCase()) {
case "session is not exist":
case "session has been expired":
return true
@ -101,10 +101,10 @@ export default {
mainClick: () => {
this.dialog.visible = false;
if (sessionError) {
var loginUrl = new Url("/login");
var loginUrl = new Url("login", document.baseURI);
loginUrl.query.dst = window.location.href;
document.cookie = "session-id=; Path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;";
document.cookie = `session-id=; Path=${new URL(document.baseURI).pathname}; Expires=Thu, 01 Jan 1970 00:00:00 GMT;`;
location.href = loginUrl.toString();
}
}

View File

@ -197,7 +197,7 @@ export default {
keyword = keyword.trim().replace(/\s+/g, " ");
// Prepare URL for API
var url = new URL("/api/bookmarks", document.URL);
var url = new URL("api/bookmarks", document.baseURI);
url.search = new URLSearchParams({
keyword: keyword,
tags: tags.join(","),
@ -228,7 +228,7 @@ export default {
page: this.page
};
var url = new Url("/");
var url = new Url(document.baseURI);
url.hash = "home";
url.clearQuery();
if (this.page > 1) url.query.page = this.page;
@ -239,7 +239,7 @@ export default {
// Fetch tags if requested
if (fetchTags) {
return fetch("/api/tags");
return fetch(new URL("api/tags", document.baseURI));
} else {
this.loading = false;
throw skipFetchTags;
@ -408,7 +408,7 @@ export default {
};
this.dialog.loading = true;
fetch("/api/bookmarks", {
fetch(new URL("api/bookmarks", document.baseURI), {
method: "post",
body: JSON.stringify(data),
headers: { "Content-Type": "application/json" }
@ -497,7 +497,7 @@ export default {
// Send data
this.dialog.loading = true;
fetch("/api/bookmarks", {
fetch(new URL("api/bookmarks", document.baseURI), {
method: "put",
body: JSON.stringify(book),
headers: { "Content-Type": "application/json" }
@ -552,7 +552,7 @@ export default {
secondText: "No",
mainClick: () => {
this.dialog.loading = true;
fetch("/api/bookmarks", {
fetch(new URL("api/bookmarks", document.baseURI), {
method: "delete",
body: JSON.stringify(ids),
headers: { "Content-Type": "application/json" },
@ -622,7 +622,7 @@ export default {
};
this.dialog.loading = true;
fetch("/api/cache", {
fetch(new URL("api/cache", document.baseURI), {
method: "put",
body: JSON.stringify(data),
headers: { "Content-Type": "application/json" },
@ -700,7 +700,7 @@ export default {
}
this.dialog.loading = true;
fetch("/api/bookmarks/tags", {
fetch(new URL("api/bookmarks/tags", document.baseURI), {
method: "put",
body: JSON.stringify(request),
headers: { "Content-Type": "application/json" },
@ -766,7 +766,7 @@ export default {
};
this.dialog.loading = true;
fetch("/api/tag", {
fetch(new URL("api/tag", document.baseURI), {
method: "PUT",
body: JSON.stringify(newData),
headers: { "Content-Type": "application/json" },

View File

@ -98,7 +98,7 @@ export default {
if (this.loading) return;
this.loading = true;
fetch("/api/accounts")
fetch(new URL("api/accounts", document.baseURI))
.then(response => {
if (!response.ok) throw response;
return response.json();
@ -163,7 +163,7 @@ export default {
}
this.dialog.loading = true;
fetch("/api/accounts", {
fetch(new URL("api/accounts", document.baseURI), {
method: "post",
body: JSON.stringify(request),
headers: {
@ -246,7 +246,7 @@ export default {
}
this.dialog.loading = true;
fetch("/api/accounts", {
fetch(new URL("api/accounts", document.baseURI), {
method: "put",
body: JSON.stringify(request),
headers: {

View File

@ -50,6 +50,7 @@
overflow : auto;
font-size : 1em;
grid-template-columns: max-content 1fr;
align-content : start;
align-items : baseline;
grid-gap : 16px;

View File

@ -2,48 +2,51 @@
<html lang="en">
<head>
<base href="$$.$$">
<title>Login - Shiori</title>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="/res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="/res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="/res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="/res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="/res/favicon.ico">
<link rel="apple-touch-icon-precomposed" sizes="152x152" href="res/apple-touch-icon-152x152.png">
<link rel="apple-touch-icon-precomposed" sizes="144x144" href="res/apple-touch-icon-144x144.png">
<link rel="icon" type="image/png" href="res/favicon-32x32.png" sizes="32x32">
<link rel="icon" type="image/png" href="res/favicon-16x16.png" sizes="16x16">
<link rel="icon" type="image/x-icon" href="res/favicon.ico">
<link href="/css/source-sans-pro.min.css" rel="stylesheet">
<link href="/css/fontawesome.min.css" rel="stylesheet">
<link href="/css/stylesheet.css" rel="stylesheet">
<link href="css/source-sans-pro.min.css" rel="stylesheet">
<link href="css/fontawesome.min.css" rel="stylesheet">
<link href="css/stylesheet.css" rel="stylesheet">
<script src="/js/vue.min.js"></script>
<script src="/js/url.min.js"></script>
<script src="js/vue.min.js"></script>
<script src="js/url.min.js"></script>
</head>
<body>
<div id="login-scene" :class="{night: nightMode}">
<p class="error-message" v-if="error !== ''">{{error}}</p>
<div id="login-box">
<div id="logo-area">
<p id="logo">
<span></span>shiori
</p>
<p id="tagline">simple bookmark manager</p>
</div>
<div id="input-area">
<label for="username">Username: </label>
<input type="text" name="username" v-model.trim="username" placeholder="Username" tabindex="1">
<label for="password">Password: </label>
<input type="password" name="password" v-model.trim="password" placeholder="Password" tabindex="2" @keyup.enter="login">
<label class="checkbox-field"><input type="checkbox" name="remember" v-model="remember" tabindex="3">Remember me</label>
</div>
<div id="button-area">
<a v-if="loading">
<i class="fas fa-fw fa-spinner fa-spin"></i>
</a>
<a v-else class="button" tabindex="4" @click="login" @keyup.enter="login">Log In</a>
</div>
<form @submit.prevent="login">
<div id="logo-area">
<p id="logo">
<span></span>shiori
</p>
<p id="tagline">simple bookmark manager</p>
</div>
<div id="input-area">
<label for="username">Username: </label>
<input id="username" type="text" name="username" placeholder="Username" tabindex="1">
<label for="password">Password: </label>
<input id="password" type="password" name="password" placeholder="Password" tabindex="2" @keyup.enter="login">
<label class="checkbox-field"><input type="checkbox" name="remember" v-model="remember" tabindex="3">Remember me</label>
</div>
<div id="button-area">
<a v-if="loading">
<i class="fas fa-fw fa-spinner fa-spin"></i>
</a>
<a v-else class="button" tabindex="4" @click="login" @keyup.enter="login">Log In</a>
</div>
</form>
</div>
</div>
@ -71,6 +74,10 @@
}
},
login() {
// needed to work around autofill issue
// https://github.com/facebook/react/issues/1159#issuecomment-506584346
this.username = document.querySelector('#username').value;
this.password = document.querySelector('#password').value;
// Validate input
if (this.username === "") {
this.error = "Username must not empty";
@ -78,22 +85,28 @@
}
// Remove old cookie
document.cookie = "session-id=; Path=/; Expires=Thu, 01 Jan 1970 00:00:01 GMT;";
document.cookie = `session-id=; Path=${new URL(document.baseURI).pathname}; Expires=Thu, 01 Jan 1970 00:00:00 GMT;`;
// Send request
this.loading = true;
fetch("/api/login", {
var sessionAge = this.remember ? 12 : 1;
fetch(new URL("api/login", document.baseURI), {
method: "post",
body: JSON.stringify({
username: this.username,
password: this.password,
remember: this.remember ? 12 : 1,
remember: sessionAge,
}),
headers: { "Content-Type": "application/json" },
}).then(response => {
if (!response.ok) throw response;
return response.json();
}).then(json => {
// Save session id
var expTime = new Date(Date.now() + sessionAge * 3600 * 1000).toUTCString();
document.cookie = `session-id=${json.session}; Path=${new URL(document.baseURI).pathname}; Expires=${expTime}`;
// Save account data
localStorage.setItem("shiori-account", JSON.stringify(json.account));
@ -106,7 +119,7 @@
dstPage = "";
}
var newUrl = new Url(dstUrl || "/");
var newUrl = new Url(dstUrl || document.baseURI);
newUrl.hash = dstPage;
location.href = newUrl;
}).catch(err => {
@ -132,4 +145,4 @@
</script>
</body>
</html>
</html>

File diff suppressed because one or more lines are too long

View File

@ -42,7 +42,7 @@ func (h *handler) apiLogin(w http.ResponseWriter, r *http.Request, ps httprouter
// Save session ID to cache
strSessionID := sessionID.String()
h.SessionCache.Set(strSessionID, account.Owner, expTime)
h.SessionCache.Set(strSessionID, account, expTime)
// Save user's session IDs to cache as well
// useful for mass logout
@ -53,15 +53,7 @@ func (h *handler) apiLogin(w http.ResponseWriter, r *http.Request, ps httprouter
}
h.UserCache.Set(request.Username, sessionIDs, -1)
// Return session ID to user in cookies
http.SetCookie(w, &http.Cookie{
Name: "session-id",
Value: strSessionID,
Path: "/",
Expires: time.Now().Add(expTime),
})
// Send account data
// Send login result
account.Password = ""
loginResult := struct {
Session string `json:"session"`
@ -183,7 +175,7 @@ func (h *handler) apiGetBookmarks(w http.ResponseWriter, r *http.Request, ps htt
archivePath := fp.Join(h.DataDir, "archive", strID)
if fileExists(imgPath) {
bookmarks[i].ImageURL = path.Join("/", "bookmark", strID, "thumb")
bookmarks[i].ImageURL = path.Join(h.RootPath, "bookmark", strID, "thumb")
}
if fileExists(archivePath) {
@ -391,6 +383,7 @@ func (h *handler) apiUpdateBookmark(w http.ResponseWriter, r *http.Request, ps h
// Add thumbnail image to the saved bookmarks again
newBook := res[0]
newBook.ImageURL = request.ImageURL
newBook.HasArchive = request.HasArchive
// Return new saved result
w.Header().Set("Content-Type", "application/json")
@ -573,7 +566,7 @@ func (h *handler) apiUpdateBookmarkTags(w http.ResponseWriter, r *http.Request,
for i := range bookmarks {
strID := strconv.Itoa(bookmarks[i].ID)
imgPath := fp.Join(h.DataDir, "thumb", strID)
imgURL := path.Join("/", "bookmark", strID, "thumb")
imgURL := path.Join(h.RootPath, "bookmark", strID, "thumb")
if fileExists(imgPath) {
bookmarks[i].ImageURL = imgURL

View File

@ -4,10 +4,8 @@ import (
"bytes"
"compress/gzip"
"fmt"
"html/template"
"io"
"net/http"
nurl "net/url"
"os"
"path"
fp "path/filepath"
@ -15,32 +13,36 @@ import (
"strings"
"github.com/PuerkitoBio/goquery"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/warc"
"github.com/julienschmidt/httprouter"
)
// serveFile is handler for general file request
func (h *handler) serveFile(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
err := serveFile(w, r.URL.Path, true)
rootPath := strings.Trim(h.RootPath, "/")
urlPath := strings.Trim(r.URL.Path, "/")
filePath := strings.TrimPrefix(urlPath, rootPath)
err := serveFile(w, filePath, true)
checkError(err)
}
// serveJsFile is handler for GET /js/*filepath
func (h *handler) serveJsFile(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {
filePath := r.URL.Path
fileName := path.Base(filePath)
fileDir := path.Dir(filePath)
jsFilePath := ps.ByName("filepath")
jsFilePath = path.Join("js", jsFilePath)
jsDir, jsName := path.Split(jsFilePath)
if developmentMode && fp.Ext(fileName) == ".js" && strings.HasSuffix(fileName, ".min.js") {
fileName = strings.TrimSuffix(fileName, ".min.js") + ".js"
filePath = path.Join(fileDir, fileName)
if assetExists(filePath) {
redirectPage(w, r, filePath)
return
if developmentMode && fp.Ext(jsName) == ".js" && strings.HasSuffix(jsName, ".min.js") {
jsName = strings.TrimSuffix(jsName, ".min.js") + ".js"
tmpPath := path.Join(jsDir, jsName)
if assetExists(tmpPath) {
jsFilePath = tmpPath
}
}
err := serveFile(w, r.URL.Path, true)
err := serveFile(w, jsFilePath, true)
checkError(err)
}
@ -49,12 +51,17 @@ func (h *handler) serveIndexPage(w http.ResponseWriter, r *http.Request, ps http
// Make sure session still valid
err := h.validateSession(r)
if err != nil {
redirectURL := createRedirectURL("/login", r.URL.String())
newPath := path.Join(h.RootPath, "/login")
redirectURL := createRedirectURL(newPath, r.URL.String())
redirectPage(w, r, redirectURL)
return
}
err = serveFile(w, "index.html", false)
if developmentMode {
h.prepareTemplates()
}
err = h.templates["index"].Execute(w, h.RootPath)
checkError(err)
}
@ -63,11 +70,16 @@ func (h *handler) serveLoginPage(w http.ResponseWriter, r *http.Request, ps http
// Make sure session is not valid
err := h.validateSession(r)
if err == nil {
redirectPage(w, r, "/")
redirectURL := path.Join(h.RootPath, "/")
redirectPage(w, r, redirectURL)
return
}
err = serveFile(w, "login.html", false)
if developmentMode {
h.prepareTemplates()
}
err = h.templates["login"].Execute(w, h.RootPath)
checkError(err)
}
@ -88,7 +100,8 @@ func (h *handler) serveBookmarkContent(w http.ResponseWriter, r *http.Request, p
if bookmark.Public != 1 {
err = h.validateSession(r)
if err != nil {
redirectURL := createRedirectURL("/login", r.URL.String())
newPath := path.Join(h.RootPath, "/login")
redirectURL := createRedirectURL(newPath, r.URL.String())
redirectPage(w, r, redirectURL)
return
}
@ -116,7 +129,7 @@ func (h *handler) serveBookmarkContent(w http.ResponseWriter, r *http.Request, p
// Find all image and convert its source to use the archive URL.
createArchivalURL := func(archivalName string) string {
archivalURL := *r.URL
archivalURL.Path = path.Join("/", "bookmark", strID, "archive", archivalName)
archivalURL.Path = path.Join(h.RootPath, "bookmark", strID, "archive", archivalName)
return archivalURL.String()
}
@ -162,18 +175,17 @@ func (h *handler) serveBookmarkContent(w http.ResponseWriter, r *http.Request, p
checkError(err)
}
// Create template
funcMap := template.FuncMap{
"html": func(s string) template.HTML {
return template.HTML(s)
},
// Execute template
if developmentMode {
h.prepareTemplates()
}
tplCache, err := createTemplate("content.html", funcMap)
checkError(err)
tplData := struct {
RootPath string
Book model.Bookmark
}{h.RootPath, bookmark}
// Execute template
err = tplCache.Execute(w, &bookmark)
err = h.templates["content"].Execute(w, &tplData)
checkError(err)
}
@ -230,13 +242,9 @@ func (h *handler) serveBookmarkArchive(w http.ResponseWriter, r *http.Request, p
if bookmark.Public != 1 {
err = h.validateSession(r)
if err != nil {
urlQueries := nurl.Values{}
urlQueries.Set("dst", r.URL.Path)
redirectURL, _ := nurl.Parse("/login")
redirectURL.RawQuery = urlQueries.Encode()
redirectPage(w, r, redirectURL.String())
newPath := path.Join(h.RootPath, "/login")
redirectURL := createRedirectURL(newPath, r.URL.String())
redirectPage(w, r, redirectURL)
return
}
}
@ -274,23 +282,17 @@ func (h *handler) serveBookmarkArchive(w http.ResponseWriter, r *http.Request, p
checkError(err)
// Add Shiori overlay
tpl, err := template.New("archive").Parse(
`<div id="shiori-archive-header">
<p id="shiori-logo"><span></span>shiori</p>
<div class="spacer"></div>
<a href="{{.URL}}" target="_blank">View Original</a>
{{if .HasContent}}
<a href="/bookmark/{{.ID}}/content">View Readable</a>
{{end}}
</div>`)
checkError(err)
tplOutput := bytes.NewBuffer(nil)
err = tpl.Execute(tplOutput, &bookmark)
err = h.templates["archive"].Execute(tplOutput, &bookmark)
checkError(err)
doc.Find("head").AppendHtml(`<link href="/css/source-sans-pro.min.css" rel="stylesheet">`)
doc.Find("head").AppendHtml(`<link href="/css/archive.css" rel="stylesheet">`)
archiveCSSPath := path.Join(h.RootPath, "/css/archive.css")
sourceSansProCSSPath := path.Join(h.RootPath, "/css/source-sans-pro.min.css")
docHead := doc.Find("head")
docHead.PrependHtml(`<meta charset="UTF-8">`)
docHead.AppendHtml(`<link href="` + archiveCSSPath + `" rel="stylesheet">`)
docHead.AppendHtml(`<link href="` + sourceSansProCSSPath + `" rel="stylesheet">`)
doc.Find("body").PrependHtml(tplOutput.String())
// Revert back to HTML

View File

@ -2,9 +2,12 @@ package webserver
import (
"fmt"
"html/template"
"net/http"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/warc"
cch "github.com/patrickmn/go-cache"
)
@ -14,16 +17,18 @@ var developmentMode = false
type handler struct {
DB database.DB
DataDir string
RootPath string
UserCache *cch.Cache
SessionCache *cch.Cache
ArchiveCache *cch.Cache
templates map[string]*template.Template
}
// prepareLoginCache prepares login cache for future use
func (h *handler) prepareLoginCache() {
func (h *handler) prepareSessionCache() {
h.SessionCache.OnEvicted(func(key string, val interface{}) {
username := val.(string)
arr, found := h.UserCache.Get(username)
account := val.(model.Account)
arr, found := h.UserCache.Get(account.Username)
if !found {
return
}
@ -36,10 +41,54 @@ func (h *handler) prepareLoginCache() {
}
}
h.UserCache.Set(username, sessionIDs, -1)
h.UserCache.Set(account.Username, sessionIDs, -1)
})
}
func (h *handler) prepareArchiveCache() {
h.ArchiveCache.OnEvicted(func(key string, data interface{}) {
archive := data.(*warc.Archive)
archive.Close()
})
}
func (h *handler) prepareTemplates() error {
// Prepare variables
var err error
h.templates = make(map[string]*template.Template)
// Prepare func map
funcMap := template.FuncMap{
"html": func(s string) template.HTML {
return template.HTML(s)
},
}
// Create template for login, index and content
for _, name := range []string{"login", "index", "content"} {
h.templates[name], err = createTemplate(name+".html", funcMap)
if err != nil {
return err
}
}
// Create template for archive overlay
h.templates["archive"], err = template.New("archive").Delims("$$", "$$").Parse(
`<div id="shiori-archive-header">
<p id="shiori-logo"><span></span>shiori</p>
<div class="spacer"></div>
<a href="$$.URL$$" target="_blank">View Original</a>
$$if .HasContent$$
<a href="/bookmark/$$.ID$$/content">View Readable</a>
$$end$$
</div>`)
if err != nil {
return err
}
return nil
}
func (h *handler) getSessionID(r *http.Request) string {
// Get session-id from header and cookie
headerSessionID := r.Header.Get("X-Session-Id")
@ -76,7 +125,7 @@ func (h *handler) validateSession(r *http.Request) error {
// If this is not get request, make sure it's owner
if r.Method != "" && r.Method != "GET" {
if isOwner := val.(bool); !isOwner {
if account := val.(model.Account); !account.Owner {
return fmt.Errorf("account level is not sufficient")
}
}

View File

@ -3,62 +3,80 @@ package webserver
import (
"fmt"
"net/http"
"path"
"time"
"github.com/go-shiori/shiori/internal/database"
"github.com/go-shiori/shiori/pkg/warc"
"github.com/julienschmidt/httprouter"
cch "github.com/patrickmn/go-cache"
"github.com/sirupsen/logrus"
)
// Config is parameter that used for starting web server
type Config struct {
DB database.DB
DataDir string
ServerAddress string
ServerPort int
RootPath string
}
// ServeApp serves wb interface in specified port
func ServeApp(DB database.DB, dataDir string, address string, port int) error {
func ServeApp(cfg Config) error {
// Create handler
hdl := handler{
DB: DB,
DataDir: dataDir,
DB: cfg.DB,
DataDir: cfg.DataDir,
UserCache: cch.New(time.Hour, 10*time.Minute),
SessionCache: cch.New(time.Hour, 10*time.Minute),
ArchiveCache: cch.New(time.Minute, 5*time.Minute),
RootPath: cfg.RootPath,
}
hdl.ArchiveCache.OnEvicted(func(key string, data interface{}) {
archive := data.(*warc.Archive)
archive.Close()
})
hdl.prepareSessionCache()
hdl.prepareArchiveCache()
err := hdl.prepareTemplates()
if err != nil {
return fmt.Errorf("failed to prepare templates: %v", err)
}
// Create router
router := httprouter.New()
router.GET("/js/*filepath", hdl.serveJsFile)
router.GET("/res/*filepath", hdl.serveFile)
router.GET("/css/*filepath", hdl.serveFile)
router.GET("/fonts/*filepath", hdl.serveFile)
// jp here means "join path", as in "join route with root path"
jp := func(route string) string {
return path.Join(cfg.RootPath, route)
}
router.GET("/", hdl.serveIndexPage)
router.GET("/login", hdl.serveLoginPage)
router.GET("/bookmark/:id/thumb", hdl.serveThumbnailImage)
router.GET("/bookmark/:id/content", hdl.serveBookmarkContent)
router.GET("/bookmark/:id/archive/*filepath", hdl.serveBookmarkArchive)
router.GET(jp("/js/*filepath"), hdl.serveJsFile)
router.GET(jp("/res/*filepath"), hdl.serveFile)
router.GET(jp("/css/*filepath"), hdl.serveFile)
router.GET(jp("/fonts/*filepath"), hdl.serveFile)
router.POST("/api/login", hdl.apiLogin)
router.POST("/api/logout", hdl.apiLogout)
router.GET("/api/bookmarks", hdl.apiGetBookmarks)
router.GET("/api/tags", hdl.apiGetTags)
router.PUT("/api/tag", hdl.apiRenameTag)
router.POST("/api/bookmarks", hdl.apiInsertBookmark)
router.DELETE("/api/bookmarks", hdl.apiDeleteBookmark)
router.PUT("/api/bookmarks", hdl.apiUpdateBookmark)
router.PUT("/api/cache", hdl.apiUpdateCache)
router.PUT("/api/bookmarks/tags", hdl.apiUpdateBookmarkTags)
router.POST("/api/bookmarks/ext", hdl.apiInsertViaExtension)
router.DELETE("/api/bookmarks/ext", hdl.apiDeleteViaExtension)
router.GET(jp("/"), hdl.serveIndexPage)
router.GET(jp("/login"), hdl.serveLoginPage)
router.GET(jp("/bookmark/:id/thumb"), hdl.serveThumbnailImage)
router.GET(jp("/bookmark/:id/content"), hdl.serveBookmarkContent)
router.GET(jp("/bookmark/:id/archive/*filepath"), hdl.serveBookmarkArchive)
router.GET("/api/accounts", hdl.apiGetAccounts)
router.PUT("/api/accounts", hdl.apiUpdateAccount)
router.POST("/api/accounts", hdl.apiInsertAccount)
router.DELETE("/api/accounts", hdl.apiDeleteAccount)
router.POST(jp("/api/login"), hdl.apiLogin)
router.POST(jp("/api/logout"), hdl.apiLogout)
router.GET(jp("/api/bookmarks"), hdl.apiGetBookmarks)
router.GET(jp("/api/tags"), hdl.apiGetTags)
router.PUT(jp("/api/tag"), hdl.apiRenameTag)
router.POST(jp("/api/bookmarks"), hdl.apiInsertBookmark)
router.DELETE(jp("/api/bookmarks"), hdl.apiDeleteBookmark)
router.PUT(jp("/api/bookmarks"), hdl.apiUpdateBookmark)
router.PUT(jp("/api/cache"), hdl.apiUpdateCache)
router.PUT(jp("/api/bookmarks/tags"), hdl.apiUpdateBookmarkTags)
router.POST(jp("/api/bookmarks/ext"), hdl.apiInsertViaExtension)
router.DELETE(jp("/api/bookmarks/ext"), hdl.apiDeleteViaExtension)
router.GET(jp("/api/accounts"), hdl.apiGetAccounts)
router.PUT(jp("/api/accounts"), hdl.apiUpdateAccount)
router.POST(jp("/api/accounts"), hdl.apiInsertAccount)
router.DELETE(jp("/api/accounts"), hdl.apiDeleteAccount)
// Route for panic
router.PanicHandler = func(w http.ResponseWriter, r *http.Request, arg interface{}) {
@ -66,7 +84,7 @@ func ServeApp(DB database.DB, dataDir string, address string, port int) error {
}
// Create server
url := fmt.Sprintf("%s:%d", address, port)
url := fmt.Sprintf("%s:%d", cfg.ServerAddress, cfg.ServerPort)
svr := &http.Server{
Addr: url,
Handler: router,

View File

@ -16,7 +16,26 @@ import (
"syscall"
)
var rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
var (
rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
presetMimeTypes = map[string]string{
".css": "text/css; charset=utf-8",
".html": "text/html; charset=utf-8",
".js": "application/javascript",
".png": "image/png",
}
)
func guessTypeByExtension(ext string) string {
ext = strings.ToLower(ext)
if v, ok := presetMimeTypes[ext]; ok {
return v
}
return mime.TypeByExtension(ext)
}
func serveFile(w http.ResponseWriter, filePath string, cache bool) error {
// Open file
@ -42,7 +61,7 @@ func serveFile(w http.ResponseWriter, filePath string, cache bool) error {
// Set content type
ext := fp.Ext(filePath)
mimeType := mime.TypeByExtension(ext)
mimeType := guessTypeByExtension(ext)
if mimeType != "" {
w.Header().Set("Content-Type", mimeType)
w.Header().Set("X-Content-Type-Options", "nosniff")

View File

@ -1,198 +0,0 @@
package archiver
import (
"bytes"
"compress/gzip"
"fmt"
"strings"
"sync"
"time"
"go.etcd.io/bbolt"
)
// Archiver is struct for archiving an URL and its resources.
type Archiver struct {
sync.RWMutex
sync.WaitGroup
DB *bbolt.DB
ChDone chan struct{}
ChErrors chan error
ChWarnings chan error
ChRequest chan ResourceURL
ResourceMap map[string]struct{}
LogEnabled bool
}
// Close closes channels that used by the Archiver.
func (arc *Archiver) Close() {
close(arc.ChErrors)
close(arc.ChWarnings)
close(arc.ChRequest)
}
// StartArchiver starts the archival process.
func (arc *Archiver) StartArchiver() []error {
go func() {
time.Sleep(time.Second)
arc.Wait()
close(arc.ChDone)
}()
// Download the URL concurrently.
// After download finished, parse response to extract resources
// URL inside it. After that, send it to channel to download again.
errors := make([]error, 0)
warnings := make([]error, 0)
func() {
for {
select {
case <-arc.ChDone:
return
case err := <-arc.ChErrors:
errors = append(errors, err)
case err := <-arc.ChWarnings:
warnings = append(warnings, err)
case res := <-arc.ChRequest:
arc.RLock()
_, exist := arc.ResourceMap[res.DownloadURL]
arc.RUnlock()
if !exist {
arc.Add(1)
go arc.archive(res)
}
}
}
}()
// Print log message if required
if arc.LogEnabled {
nErrors := len(errors)
nWarnings := len(warnings)
arc.Logf(infoLog, "Download finished with %d warnings and %d errors\n", nWarnings, nErrors)
if nWarnings > 0 {
fmt.Println()
for _, warning := range warnings {
arc.Log(warningLog, warning)
}
}
if nErrors > 0 {
for _, err := range errors {
arc.Log(errorLog, err)
}
}
}
return nil
}
// archive downloads a subresource and save it to storage.
func (arc *Archiver) archive(res ResourceURL) {
// Make sure to decrease wait group once finished
defer arc.Done()
// Download resource
resp, err := DownloadData(res.DownloadURL)
if err != nil {
arc.ChErrors <- fmt.Errorf("failed to download %s: %v", res.DownloadURL, err)
return
}
defer resp.Body.Close()
// Process resource depending on its type.
// Since this `archive` method only used for processing sub
// resource, we will only process the CSS and HTML sub resources.
// For other file, we will simply download it as it is.
var result ProcessResult
var subResources []ResourceURL
cType := resp.Header.Get("Content-Type")
switch {
case strings.Contains(cType, "text/html") && res.IsEmbedded:
result, subResources, err = arc.ProcessHTMLFile(res, resp.Body)
case strings.Contains(cType, "text/css"):
result, subResources, err = arc.ProcessCSSFile(res, resp.Body)
default:
result, err = arc.ProcessOtherFile(res, resp.Body)
}
if err != nil {
arc.ChErrors <- fmt.Errorf("failed to process %s: %v", res.DownloadURL, err)
return
}
// Add this url to resource map
arc.Lock()
arc.ResourceMap[res.DownloadURL] = struct{}{}
arc.Unlock()
// Save content to storage
arc.Logf(infoLog, "Downloaded %s\n"+
"\tArchive name %s\n"+
"\tParent %s\n"+
"\tSize %d Bytes\n",
res.DownloadURL,
res.ArchivalURL,
res.Parent,
resp.ContentLength)
result.ContentType = cType
err = arc.SaveToStorage(result)
if err != nil {
arc.ChErrors <- fmt.Errorf("failed to save %s: %v", res.DownloadURL, err)
return
}
// Send sub resource to request channel
for _, subRes := range subResources {
arc.ChRequest <- subRes
}
}
// SaveToStorage save processing result to storage.
func (arc *Archiver) SaveToStorage(result ProcessResult) error {
// Compress content
buffer := bytes.NewBuffer(nil)
gzipper := gzip.NewWriter(buffer)
_, err := gzipper.Write(result.Content)
if err != nil {
return fmt.Errorf("compress failed: %v", err)
}
err = gzipper.Close()
if err != nil {
return fmt.Errorf("compress failed: %v", err)
}
err = arc.DB.Batch(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(result.Name))
if bucket != nil {
return nil
}
bucket, err := tx.CreateBucketIfNotExists([]byte(result.Name))
if err != nil {
return err
}
err = bucket.Put([]byte("content"), buffer.Bytes())
if err != nil {
return err
}
err = bucket.Put([]byte("type"), []byte(result.ContentType))
if err != nil {
return err
}
return nil
})
return err
}

View File

@ -1,38 +0,0 @@
package archiver
import (
"crypto/tls"
"net/http"
"net/http/cookiejar"
"time"
)
var (
defaultClient *http.Client
)
func init() {
jar, _ := cookiejar.New(nil)
defaultClient = &http.Client{
Timeout: time.Minute,
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: true,
},
},
Jar: jar,
}
}
// DownloadData downloads data from the specified URL.
func DownloadData(url string) (*http.Response, error) {
// Prepare request
req, err := http.NewRequest("GET", url, nil)
if err != nil {
return nil, err
}
// Send request
req.Header.Set("User-Agent", "Shiori/2.0.0 (+https://github.com/go-shiori/shiori)")
return defaultClient.Do(req)
}

View File

@ -1,43 +0,0 @@
package archiver
import "github.com/sirupsen/logrus"
type logType int
const (
infoLog logType = iota
errorLog
warningLog
)
// Log prints the log ended with newline.
func (arc *Archiver) Log(tp logType, msgs ...interface{}) {
if !arc.LogEnabled {
return
}
switch tp {
case errorLog:
logrus.Errorln(msgs...)
case warningLog:
logrus.Warnln(msgs...)
default:
logrus.Infoln(msgs...)
}
}
// Logf print log with specified format.
func (arc *Archiver) Logf(tp logType, format string, msgs ...interface{}) {
if !arc.LogEnabled {
return
}
switch tp {
case errorLog:
logrus.Errorf(format, msgs...)
case warningLog:
logrus.Warnf(format, msgs...)
default:
logrus.Infof(format, msgs...)
}
}

View File

@ -1,541 +0,0 @@
package archiver
import (
"bytes"
"fmt"
"io"
"mime"
nurl "net/url"
"path"
"regexp"
"strings"
"github.com/tdewolff/parse/v2/css"
"github.com/tdewolff/parse/v2/js"
"golang.org/x/net/html"
)
// ProcessResult is the result from content processing.
type ProcessResult struct {
Name string
ContentType string
Content []byte
}
var (
rxImageMeta = regexp.MustCompile(`(?i)image|thumbnail`)
rxLazyImageSrcset = regexp.MustCompile(`(?i)\.(jpg|jpeg|png|webp)\s+\d`)
rxLazyImageSrc = regexp.MustCompile(`(?i)^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$`)
rxStyleURL = regexp.MustCompile(`(?i)^url\((.+)\)$`)
rxJSContentType = regexp.MustCompile(`(?i)(text|application)/(java|ecma)script`)
)
// ProcessHTMLFile process HTML file that submitted through the io.Reader.
func (arc *Archiver) ProcessHTMLFile(res ResourceURL, input io.Reader) (result ProcessResult, resources []ResourceURL, err error) {
// Parse HTML document
doc, err := html.Parse(input)
if err != nil {
return ProcessResult{}, nil, fmt.Errorf("failed to parse HTML for %s: %v", res.DownloadURL, err)
}
// Parse URL
parsedURL, err := nurl.ParseRequestURI(res.DownloadURL)
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
return ProcessResult{}, nil, fmt.Errorf("url %s is not valid", res.DownloadURL)
}
// TODO: I'm still not really sure, but IMHO it's safer to disable Javascript
// Ideally, we only want to remove XHR request by using function disableXHR(doc).
// Unfortunately, the result is not that good for now, so it's still not used.
removeNodes(getElementsByTagName(doc, "script"), nil)
// Convert lazy loaded image to normal
fixLazyImages(doc)
// Convert hyperlinks rith relative URL
fixRelativeURIs(doc, parsedURL)
// Extract resources from each nodes
for _, node := range getElementsByTagName(doc, "*") {
// First extract resources from inline style
cssResources := extractInlineCSS(node, parsedURL)
resources = append(resources, cssResources...)
// Next extract resources from tag's specific attribute
nodeResources := []ResourceURL{}
switch tagName(node) {
case "style":
nodeResources = extractStyleTag(node, parsedURL)
case "script":
nodeResources = extractScriptTag(node, parsedURL)
case "meta":
nodeResources = extractMetaTag(node, parsedURL)
case "img", "picture", "figure", "video", "audio", "source":
nodeResources = extractMediaTag(node, parsedURL)
case "link":
nodeResources = extractGenericTag(node, "href", parsedURL)
case "iframe":
nodeResources = extractGenericTag(node, "src", parsedURL)
case "object":
nodeResources = extractGenericTag(node, "data", parsedURL)
default:
continue
}
resources = append(resources, nodeResources...)
}
// Get outer HTML of the doc
result = ProcessResult{
Name: res.ArchivalURL,
Content: outerHTML(doc),
}
return result, resources, nil
}
// ProcessCSSFile process CSS file that submitted through the io.Reader.
func (arc *Archiver) ProcessCSSFile(res ResourceURL, input io.Reader) (result ProcessResult, resources []ResourceURL, err error) {
// Parse URL
parsedURL, err := nurl.ParseRequestURI(res.DownloadURL)
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
return ProcessResult{}, nil, fmt.Errorf("url %s is not valid", res.DownloadURL)
}
// Extract CSS rules
rules, resources := processCSS(input, parsedURL)
result = ProcessResult{
Name: res.ArchivalURL,
Content: []byte(rules),
}
return result, resources, nil
}
// ProcessOtherFile process files that not HTML, JS or CSS that submitted through the io.Reader.
func (arc *Archiver) ProcessOtherFile(res ResourceURL, input io.Reader) (result ProcessResult, err error) {
// Copy data to buffer
buffer := bytes.NewBuffer(nil)
_, err = io.Copy(buffer, input)
if err != nil {
return ProcessResult{}, fmt.Errorf("failed to copy data: %v", err)
}
// Create result
result = ProcessResult{
Name: res.ArchivalURL,
Content: buffer.Bytes(),
}
return result, nil
}
func disableXHR(doc *html.Node) {
var head *html.Node
heads := getElementsByTagName(doc, "head")
if len(heads) > 0 {
head = heads[0]
} else {
head = createElement("head")
prependChild(doc, head)
}
xhrDisabler := `
fetch = new Promise();
XMLHttpRequest = function() {};
XMLHttpRequest.prototype = {
open: function(){},
send: function(){},
abort: function(){},
setRequestHeader: function(){},
overrideMimeType: function(){},
getResponseHeaders(): function(){},
getAllResponseHeaders(): function(){},
};`
script := createElement("script")
scriptContent := createTextNode(xhrDisabler)
prependChild(script, scriptContent)
prependChild(head, script)
}
// fixRelativeURIs converts each <a> in the given element
// to an absolute URI, ignoring #ref URIs.
func fixRelativeURIs(doc *html.Node, pageURL *nurl.URL) {
links := getAllNodesWithTag(doc, "a")
forEachNode(links, func(link *html.Node, _ int) {
href := getAttribute(link, "href")
if href == "" {
return
}
// Replace links with javascript: URIs with text content,
// since they won't work after scripts have been removed
// from the page.
if strings.HasPrefix(href, "javascript:") {
text := createTextNode(textContent(link))
replaceNode(link, text)
} else {
newHref := toAbsoluteURI(href, pageURL)
if newHref == "" {
removeAttribute(link, "href")
} else {
setAttribute(link, "href", newHref)
}
}
})
}
// fixLazyImages convert images and figures that have properties like data-src into
// images that can be loaded without JS.
func fixLazyImages(root *html.Node) {
imageNodes := getAllNodesWithTag(root, "img", "picture", "figure")
forEachNode(imageNodes, func(elem *html.Node, _ int) {
src := getAttribute(elem, "src")
srcset := getAttribute(elem, "srcset")
nodeTag := tagName(elem)
nodeClass := className(elem)
if (src == "" && srcset == "") || strings.Contains(strings.ToLower(nodeClass), "lazy") {
for i := 0; i < len(elem.Attr); i++ {
attr := elem.Attr[i]
if attr.Key == "src" || attr.Key == "srcset" {
continue
}
copyTo := ""
if rxLazyImageSrcset.MatchString(attr.Val) {
copyTo = "srcset"
} else if rxLazyImageSrc.MatchString(attr.Val) {
copyTo = "src"
}
if copyTo == "" {
continue
}
if nodeTag == "img" || nodeTag == "picture" {
// if this is an img or picture, set the attribute directly
setAttribute(elem, copyTo, attr.Val)
} else if nodeTag == "figure" && len(getAllNodesWithTag(elem, "img", "picture")) == 0 {
// if the item is a <figure> that does not contain an image or picture,
// create one and place it inside the figure see the nytimes-3
// testcase for an example
img := createElement("img")
setAttribute(img, copyTo, attr.Val)
appendChild(elem, img)
}
}
}
})
}
// extractInlineCSS extract archive's resource from the CSS rules inside
// style attribute. Once finished, all CSS URLs in the style attribute
// will be updated to use the archival URL.
func extractInlineCSS(node *html.Node, pageURL *nurl.URL) []ResourceURL {
// Make sure this node has inline style
styleAttr := getAttribute(node, "style")
if styleAttr == "" {
return nil
}
// Extract resource URLs from the inline style
// and update the CSS rules accordingly.
reader := strings.NewReader(styleAttr)
newStyleAttr, resources := processCSS(reader, pageURL)
setAttribute(node, "style", newStyleAttr)
return resources
}
// extractStyleTag extract archive's resource from inside a <style> tag.
// Once finished, all CSS URLs will be updated to use the archival URL.
func extractStyleTag(node *html.Node, pageURL *nurl.URL) []ResourceURL {
// Extract CSS rules from <style>
rules := textContent(node)
rules = strings.TrimSpace(rules)
if rules == "" {
return nil
}
// Extract resource URLs from the rules and update it accordingly.
reader := strings.NewReader(rules)
newRules, resources := processCSS(reader, pageURL)
setTextContent(node, newRules)
return resources
}
// extractScriptTag extract archive's resource from inside a <script> tag.
// Once finished, all URLs inside it will be updated to use the archival URL.
func extractScriptTag(node *html.Node, pageURL *nurl.URL) []ResourceURL {
// Also get the URL from `src` attribute
resources := extractGenericTag(node, "src", pageURL)
// Extract JS code from the <script> itself
script := textContent(node)
script = strings.TrimSpace(script)
if script == "" {
return resources
}
reader := strings.NewReader(script)
newScript, scriptResources := processJS(reader, pageURL)
setTextContent(node, newScript)
resources = append(resources, scriptResources...)
return resources
}
// extractMetaTag extract archive's resource from inside a <meta>.
// Normally, <meta> doesn't have any resource URLs. However, as
// social media come and grow, a new metadata is added to contain
// the hero image for a web page, e.g. og:image, twitter:image, etc.
// Once finished, all URLs in <meta> for image will be updated
// to use the archival URL.
func extractMetaTag(node *html.Node, pageURL *nurl.URL) []ResourceURL {
// Get the needed attributes
name := getAttribute(node, "name")
property := getAttribute(node, "property")
content := getAttribute(node, "content")
// If this <meta> is not for image, don't process it
if !rxImageMeta.MatchString(name + " " + property) {
return nil
}
// If URL is not valid, skip
tmp, err := nurl.ParseRequestURI(content)
if err != nil || tmp.Scheme == "" || tmp.Hostname() == "" {
return nil
}
// Create archive resource and update the href URL
res := ToResourceURL(content, pageURL)
if res.ArchivalURL == "" {
return nil
}
setAttribute(node, "content", res.ArchivalURL)
return []ResourceURL{res}
}
// extractMediaTag extract resource from inside a media tag e.g.
// <img>, <video>, <audio>, <source>. Once finished, all URLs will be
// updated to use the archival URL.
func extractMediaTag(node *html.Node, pageURL *nurl.URL) []ResourceURL {
// Get the needed attributes
src := getAttribute(node, "src")
poster := getAttribute(node, "poster")
strSrcSets := getAttribute(node, "srcset")
// Create initial resources
resources := []ResourceURL{}
// Save `src` and `poster` to resources
if src != "" {
res := ToResourceURL(src, pageURL)
if res.ArchivalURL != "" {
setAttribute(node, "src", res.ArchivalURL)
resources = append(resources, res)
}
}
if poster != "" {
res := ToResourceURL(poster, pageURL)
if res.ArchivalURL != "" {
setAttribute(node, "poster", res.ArchivalURL)
resources = append(resources, res)
}
}
// Split srcset by comma, then process it like any URLs
srcSets := strings.Split(strSrcSets, ",")
for i, srcSet := range srcSets {
srcSet = strings.TrimSpace(srcSet)
parts := strings.SplitN(srcSet, " ", 2)
if parts[0] == "" {
continue
}
res := ToResourceURL(parts[0], pageURL)
if res.ArchivalURL == "" {
continue
}
srcSets[i] = strings.Replace(srcSets[i], parts[0], res.ArchivalURL, 1)
resources = append(resources, res)
}
if len(srcSets) > 0 {
setAttribute(node, "srcset", strings.Join(srcSets, ","))
}
return resources
}
// extractGenericTag extract resource from specified attribute.
// This method is used for tags where the URL is obviously exist in
// the tag, without any additional process needed to extract it.
// For example is <link> with its href, <object> with its data, etc.
// Once finished, the URL attribute will be updated to use the
// archival URL.
func extractGenericTag(node *html.Node, attrName string, pageURL *nurl.URL) []ResourceURL {
// Get the needed attributes
attrValue := getAttribute(node, attrName)
if attrValue == "" {
return nil
}
res := ToResourceURL(attrValue, pageURL)
if res.ArchivalURL == "" {
return nil
}
// If this node is iframe, mark it as embedded
if tagName(node) == "iframe" {
res.IsEmbedded = true
}
setAttribute(node, attrName, res.ArchivalURL)
return []ResourceURL{res}
}
// processCSSRules extract resource URLs from the specified CSS input.
// Returns the new rules with all CSS URLs updated to the archival link.
func processCSS(input io.Reader, baseURL *nurl.URL) (string, []ResourceURL) {
// Prepare buffers
buffer := bytes.NewBuffer(nil)
// Scan CSS file and process the resource's URL
lexer := css.NewLexer(input)
resources := []ResourceURL{}
for {
token, bt := lexer.Next()
// Check for error
if token == css.ErrorToken {
break
}
// If it's not an URL, just write it to buffer as it is
if token != css.URLToken {
buffer.Write(bt)
continue
}
// Sanitize the URL by removing `url()`, quotation mark and trailing slash
cssURL := string(bt)
cssURL = rxStyleURL.ReplaceAllString(cssURL, "$1")
cssURL = strings.TrimSpace(cssURL)
cssURL = strings.Trim(cssURL, `'`)
cssURL = strings.Trim(cssURL, `"`)
// Save the CSS URL and replace it with archival URL
res := ToResourceURL(cssURL, baseURL)
if res.ArchivalURL == "" {
buffer.Write(bt)
continue
}
cssURL = `url("` + res.ArchivalURL + `")`
buffer.WriteString(cssURL)
resources = append(resources, res)
}
// Return the new rule after all URL has been processed
return buffer.String(), resources
}
// processJavascript extract resource URLs from the specified JS input.
// Returns the new rules with all URLs updated to the archival link.
func processJS(input io.Reader, baseURL *nurl.URL) (string, []ResourceURL) {
// Prepare buffers
buffer := bytes.NewBuffer(nil)
// Scan JS file and process the resource's URL
lexer := js.NewLexer(input)
resources := []ResourceURL{}
for {
token, bt := lexer.Next()
// Check for error
if token == js.ErrorToken {
break
}
// If it's not a string, just write it to buffer as it is
if token != js.StringToken {
buffer.Write(bt)
continue
}
// Process the string.
// Unlike CSS, JS doesn't have it's own URL token. So, we can only guess whether
// a string is URL or not. There are several criteria to decide if it's URL :
// - It surrounded by `url()` just like CSS
// - It started with http(s):// for absolute URL
// - It started with slash (/) for relative URL
// -
// If it doesn't fulfill any of criteria above, just write it as it is.
var res ResourceURL
var newURL string
text := string(bt)
text = strings.TrimSpace(text)
text = strings.Trim(text, `'`)
text = strings.Trim(text, `"`)
if strings.HasPrefix(text, "url(") {
cssURL := rxStyleURL.ReplaceAllString(text, "$1")
cssURL = strings.TrimSpace(cssURL)
cssURL = strings.Trim(cssURL, `'`)
cssURL = strings.Trim(cssURL, `"`)
res = ToResourceURL(cssURL, baseURL)
newURL = fmt.Sprintf("\"url('%s')\"", res.ArchivalURL)
} else if strings.HasPrefix(text, "/") || rxHTTPScheme.MatchString(text) {
res = ToResourceURL(text, baseURL)
tmp, err := nurl.Parse(res.DownloadURL)
if err != nil {
buffer.Write(bt)
continue
}
ext := path.Ext(tmp.Path)
cType := mime.TypeByExtension(ext)
switch {
case rxJSContentType.MatchString(cType),
strings.Contains(cType, "text/css"),
strings.Contains(cType, "image/"),
strings.Contains(cType, "audio/"),
strings.Contains(cType, "video/"):
default:
buffer.Write(bt)
continue
}
newURL = fmt.Sprintf("\"%s\"", res.ArchivalURL)
} else {
buffer.Write(bt)
continue
}
if res.ArchivalURL == "" {
continue
}
buffer.WriteString(newURL)
resources = append(resources, res)
}
// Return the new rule after all URL has been processed
return buffer.String(), resources
}

View File

@ -1,71 +0,0 @@
package archiver
import (
nurl "net/url"
"regexp"
"strings"
)
var (
rxHTTPScheme = regexp.MustCompile(`(?i)^https?:\/{2}`)
rxTrailingSlash = regexp.MustCompile(`(?i)/+$`)
rxRepeatedStrip = regexp.MustCompile(`(?i)-+`)
)
// ResourceURL is strcut that contains URL for downloading
// and archiving a resource.
type ResourceURL struct {
DownloadURL string
ArchivalURL string
Parent string
IsEmbedded bool
}
// ToResourceURL generates an uri into a Resource URL.
func ToResourceURL(uri string, base *nurl.URL) ResourceURL {
// Make sure URL has a valid scheme
uri = strings.TrimSpace(uri)
switch {
case uri == "",
strings.Contains(uri, ":") && !rxHTTPScheme.MatchString(uri):
return ResourceURL{}
}
// Create download URL
downloadURL := toAbsoluteURI(uri, base)
downloadURL = rxTrailingSlash.ReplaceAllString(downloadURL, "")
downloadURL = strings.ReplaceAll(downloadURL, " ", "+")
// Create archival URL
archivalURL := downloadURL
// Some URL have its query or path escaped, e.g. Wikipedia and Dev.to.
// For example, Wikipedia's stylesheet looks like this :
// load.php?lang=en&modules=ext.3d.styles%7Cext.cite.styles%7Cext.uls.interlanguage
// However, when browser download it, it will be registered as unescaped query :
// load.php?lang=en&modules=ext.3d.styles|ext.cite.styles|ext.uls.interlanguage
// So, for archival URL, we need to unescape the query and path first.
tmp, err := nurl.Parse(downloadURL)
if err == nil {
unescapedQuery, _ := nurl.QueryUnescape(tmp.RawQuery)
if unescapedQuery != "" {
tmp.RawQuery = unescapedQuery
}
archivalURL = tmp.String()
archivalURL = strings.Replace(archivalURL, tmp.EscapedPath(), tmp.Path, 1)
}
archivalURL = strings.ReplaceAll(archivalURL, "://", "/")
archivalURL = strings.ReplaceAll(archivalURL, "?", "-")
archivalURL = strings.ReplaceAll(archivalURL, "#", "-")
archivalURL = strings.ReplaceAll(archivalURL, "/", "-")
archivalURL = strings.ReplaceAll(archivalURL, " ", "-")
archivalURL = rxRepeatedStrip.ReplaceAllString(archivalURL, "-")
return ResourceURL{
DownloadURL: downloadURL,
ArchivalURL: archivalURL,
Parent: base.String(),
}
}

View File

@ -1,350 +0,0 @@
package archiver
import (
"bytes"
"strings"
"golang.org/x/net/html"
)
// getElementsByTagName returns a collection of all elements in the document with
// the specified tag name, as an array of Node object.
// The special tag "*" will represents all elements.
func getElementsByTagName(doc *html.Node, tagName string) []*html.Node {
var results []*html.Node
var finder func(*html.Node)
finder = func(node *html.Node) {
if node.Type == html.ElementNode && (tagName == "*" || node.Data == tagName) {
results = append(results, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
finder(child)
}
}
for child := doc.FirstChild; child != nil; child = child.NextSibling {
finder(child)
}
return results
}
// createElement creates a new ElementNode with specified tag.
func createElement(tagName string) *html.Node {
return &html.Node{
Type: html.ElementNode,
Data: tagName,
}
}
// createTextNode creates a new Text node.
func createTextNode(data string) *html.Node {
return &html.Node{
Type: html.TextNode,
Data: data,
}
}
// tagName returns the tag name of a Node.
// If it's not ElementNode, return empty string.
func tagName(node *html.Node) string {
if node.Type != html.ElementNode {
return ""
}
return node.Data
}
// getAttribute returns the value of a specified attribute on
// the element. If the given attribute does not exist, the value
// returned will be an empty string.
func getAttribute(node *html.Node, attrName string) string {
for i := 0; i < len(node.Attr); i++ {
if node.Attr[i].Key == attrName {
return node.Attr[i].Val
}
}
return ""
}
// setAttribute sets attribute for node. If attribute already exists,
// it will be replaced.
func setAttribute(node *html.Node, attrName string, attrValue string) {
attrIdx := -1
for i := 0; i < len(node.Attr); i++ {
if node.Attr[i].Key == attrName {
attrIdx = i
break
}
}
if attrIdx >= 0 {
node.Attr[attrIdx].Val = attrValue
} else {
node.Attr = append(node.Attr, html.Attribute{
Key: attrName,
Val: attrValue,
})
}
}
// removeAttribute removes attribute with given name.
func removeAttribute(node *html.Node, attrName string) {
attrIdx := -1
for i := 0; i < len(node.Attr); i++ {
if node.Attr[i].Key == attrName {
attrIdx = i
break
}
}
if attrIdx >= 0 {
a := node.Attr
a = append(a[:attrIdx], a[attrIdx+1:]...)
node.Attr = a
}
}
// hasAttribute returns a Boolean value indicating whether the
// specified node has the specified attribute or not.
func hasAttribute(node *html.Node, attrName string) bool {
for i := 0; i < len(node.Attr); i++ {
if node.Attr[i].Key == attrName {
return true
}
}
return false
}
// textContent returns the text content of the specified node,
// and all its descendants.
func textContent(node *html.Node) string {
var buffer bytes.Buffer
var finder func(*html.Node)
finder = func(n *html.Node) {
if n.Type == html.TextNode {
buffer.WriteString(n.Data)
}
for child := n.FirstChild; child != nil; child = child.NextSibling {
finder(child)
}
}
finder(node)
return buffer.String()
}
// outerHTML returns an HTML serialization of the element and its descendants.
func outerHTML(node *html.Node) []byte {
var buffer bytes.Buffer
err := html.Render(&buffer, node)
if err != nil {
return []byte{}
}
return buffer.Bytes()
}
// innerHTML returns the HTML content (inner HTML) of an element.
func innerHTML(node *html.Node) string {
var err error
var buffer bytes.Buffer
for child := node.FirstChild; child != nil; child = child.NextSibling {
err = html.Render(&buffer, child)
if err != nil {
return ""
}
}
return strings.TrimSpace(buffer.String())
}
// documentElement returns the Element that is the root element
// of the document. Since we are working with HTML document,
// the root will be <html> element for HTML documents).
func documentElement(doc *html.Node) *html.Node {
if nodes := getElementsByTagName(doc, "html"); len(nodes) > 0 {
return nodes[0]
}
return nil
}
// id returns the value of the id attribute of the specified element.
func id(node *html.Node) string {
id := getAttribute(node, "id")
id = strings.TrimSpace(id)
return id
}
// className returns the value of the class attribute of
// the specified element.
func className(node *html.Node) string {
className := getAttribute(node, "class")
className = strings.TrimSpace(className)
className = strings.Join(strings.Fields(className), " ")
return className
}
// children returns an HTMLCollection of the child elements of Node.
func children(node *html.Node) []*html.Node {
var children []*html.Node
if node == nil {
return nil
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
children = append(children, child)
}
}
return children
}
// childNodes returns list of a node's direct children.
func childNodes(node *html.Node) []*html.Node {
var childNodes []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
childNodes = append(childNodes, child)
}
return childNodes
}
// firstElementChild returns the object's first child Element,
// or nil if there are no child elements.
func firstElementChild(node *html.Node) *html.Node {
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
return child
}
}
return nil
}
// nextElementSibling returns the Element immediately following
// the specified one in its parent's children list, or nil if the
// specified Element is the last one in the list.
func nextElementSibling(node *html.Node) *html.Node {
for sibling := node.NextSibling; sibling != nil; sibling = sibling.NextSibling {
if sibling.Type == html.ElementNode {
return sibling
}
}
return nil
}
// appendChild adds a node to the end of the list of children of a
// specified parent node. If the given child is a reference to an
// existing node in the document, appendChild() moves it from its
// current position to the new position.
func appendChild(node *html.Node, child *html.Node) {
if child.Parent != nil {
temp := cloneNode(child)
node.AppendChild(temp)
child.Parent.RemoveChild(child)
} else {
node.AppendChild(child)
}
}
// prependChild works like appendChild, except it adds a node to the
// beginning of the list of children of a specified parent node.
func prependChild(node *html.Node, child *html.Node) {
if child.Parent != nil {
temp := cloneNode(child)
child.Parent.RemoveChild(child)
child = temp
}
if node.FirstChild != nil {
node.InsertBefore(child, node.FirstChild)
} else {
node.AppendChild(child)
}
}
// replaceNode replaces an OldNode with a NewNode.
func replaceNode(oldNode *html.Node, newNode *html.Node) {
if oldNode.Parent == nil {
return
}
newNode.Parent = nil
newNode.PrevSibling = nil
newNode.NextSibling = nil
oldNode.Parent.InsertBefore(newNode, oldNode)
oldNode.Parent.RemoveChild(oldNode)
}
// includeNode determines if node is included inside nodeList.
func includeNode(nodeList []*html.Node, node *html.Node) bool {
for i := 0; i < len(nodeList); i++ {
if nodeList[i] == node {
return true
}
}
return false
}
// cloneNode returns a deep clone of the node and its children.
// However, it will be detached from the original's parents
// and siblings.
func cloneNode(src *html.Node) *html.Node {
clone := &html.Node{
Type: src.Type,
DataAtom: src.DataAtom,
Data: src.Data,
Attr: make([]html.Attribute, len(src.Attr)),
}
copy(clone.Attr, src.Attr)
for child := src.FirstChild; child != nil; child = child.NextSibling {
clone.AppendChild(cloneNode(child))
}
return clone
}
func getAllNodesWithTag(node *html.Node, tagNames ...string) []*html.Node {
var result []*html.Node
for i := 0; i < len(tagNames); i++ {
result = append(result, getElementsByTagName(node, tagNames[i])...)
}
return result
}
// forEachNode iterates over a NodeList and runs fn on each node.
func forEachNode(nodeList []*html.Node, fn func(*html.Node, int)) {
for i := 0; i < len(nodeList); i++ {
fn(nodeList[i], i)
}
}
// removeNodes iterates over a NodeList, calls `filterFn` for each node
// and removes node if function returned `true`. If function is not
// passed, removes all the nodes in node list.
func removeNodes(nodeList []*html.Node, filterFn func(*html.Node) bool) {
for i := len(nodeList) - 1; i >= 0; i-- {
node := nodeList[i]
parentNode := node.Parent
if parentNode != nil && (filterFn == nil || filterFn(node)) {
parentNode.RemoveChild(node)
}
}
}
// setTextContent sets the text content of the specified node.
func setTextContent(node *html.Node, text string) {
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Parent != nil {
child.Parent.RemoveChild(child)
}
}
node.AppendChild(&html.Node{
Type: html.TextNode,
Data: text,
})
}

View File

@ -1,46 +0,0 @@
package archiver
import (
nurl "net/url"
"strings"
)
func clearUTMParams(url *nurl.URL) {
queries := url.Query()
for key := range queries {
if strings.HasPrefix(key, "utm_") {
queries.Del(key)
}
}
url.RawQuery = queries.Encode()
}
// toAbsoluteURI convert uri to absolute path based on base.
// However, if uri is prefixed with hash (#), the uri won't be changed.
func toAbsoluteURI(uri string, base *nurl.URL) string {
if uri == "" || base == nil {
return ""
}
// If it is hash tag, return as it is
if uri[:1] == "#" {
return uri
}
// If it is already an absolute URL, return as it is
tmp, err := nurl.ParseRequestURI(uri)
if err == nil && tmp.Scheme != "" && tmp.Hostname() != "" {
return uri
}
// Otherwise, resolve against base URI.
tmp, err = nurl.Parse(uri)
if err != nil {
return uri
}
clearUTMParams(tmp)
return base.ResolveReference(tmp).String()
}

View File

@ -1,93 +0,0 @@
package warc
import (
"fmt"
"os"
"go.etcd.io/bbolt"
)
// Archive is the storage for archiving the web page.
type Archive struct {
db *bbolt.DB
}
// Open opens the archive from specified path.
func Open(path string) (*Archive, error) {
// Make sure archive exists
info, err := os.Stat(path)
if os.IsNotExist(err) || info.IsDir() {
return nil, fmt.Errorf("archive doesn't exist")
}
// Open database
options := &bbolt.Options{
ReadOnly: true,
}
db, err := bbolt.Open(path, os.ModePerm, options)
if err != nil {
return nil, err
}
return &Archive{db: db}, nil
}
// Close closes the storage.
func (arc *Archive) Close() {
arc.db.Close()
}
// Read fetch the resource with specified name from archive.
func (arc *Archive) Read(name string) ([]byte, string, error) {
// Make sure name exists
if name == "" {
name = "archive-root"
}
var content []byte
var strContentType string
err := arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
if bucket == nil {
return fmt.Errorf("%s doesn't exist", name)
}
contentType := bucket.Get([]byte("type"))
if contentType == nil {
return fmt.Errorf("%s doesn't exist", name)
}
strContentType = string(contentType)
content = bucket.Get([]byte("content"))
if content == nil {
return fmt.Errorf("%s doesn't exist", name)
}
return nil
})
if err != nil {
return nil, "", err
}
return content, strContentType, nil
}
// HasResource checks if the resource exists in archive.
func (arc *Archive) HasResource(name string) bool {
// Make sure name exists
if name == "" {
name = "archive-root"
}
var exists bool
arc.db.View(func(tx *bbolt.Tx) error {
bucket := tx.Bucket([]byte(name))
exists = bucket != nil
return nil
})
return exists
}

View File

@ -1,119 +0,0 @@
package warc
import (
"fmt"
"io"
nurl "net/url"
"os"
fp "path/filepath"
"strings"
"time"
"github.com/go-shiori/shiori/pkg/warc/internal/archiver"
"go.etcd.io/bbolt"
)
// ArchivalRequest is request for archiving a web page,
// either from URL or from an io.Reader.
type ArchivalRequest struct {
URL string
Reader io.Reader
ContentType string
LogEnabled bool
}
// NewArchive creates new archive based on submitted request,
// then save it to specified path.
func NewArchive(req ArchivalRequest, dstPath string) error {
// Make sure URL is valid
parsedURL, err := nurl.ParseRequestURI(req.URL)
if err != nil || parsedURL.Scheme == "" || parsedURL.Hostname() == "" {
return fmt.Errorf("url %s is not valid", req.URL)
}
// Generate resource URL
res := archiver.ToResourceURL(req.URL, parsedURL)
res.ArchivalURL = "archive-root"
// Download URL if needed
if req.Reader == nil || req.ContentType == "" {
resp, err := archiver.DownloadData(res.DownloadURL)
if err != nil {
return fmt.Errorf("failed to download %s: %v", req.URL, err)
}
defer resp.Body.Close()
req.Reader = resp.Body
req.ContentType = resp.Header.Get("Content-Type")
}
// Create database for archive
os.MkdirAll(fp.Dir(dstPath), os.ModePerm)
db, err := bbolt.Open(dstPath, os.ModePerm, nil)
if err != nil {
return fmt.Errorf("failed to create archive: %v", err)
}
defer db.Close()
// Create archiver
arc := &archiver.Archiver{
DB: db,
ChDone: make(chan struct{}),
ChErrors: make(chan error),
ChWarnings: make(chan error),
ChRequest: make(chan archiver.ResourceURL, 10),
ResourceMap: make(map[string]struct{}),
LogEnabled: req.LogEnabled,
}
// TODO: investigate whether the channel must be closed or not.
// At first, I thought the channels must be closed. Unfortunately, it leads to
// a panic when error message is accidentally sent after error channels closed.
// defer arc.Close()
// Process input depending on its type.
// If it's HTML, we need to extract the sub resources that used by it, e.g some CSS or JS files.
// If it's not HTML, we can just save it to archive.
var result archiver.ProcessResult
var subResources []archiver.ResourceURL
if strings.Contains(req.ContentType, "text/html") {
result, subResources, err = arc.ProcessHTMLFile(res, req.Reader)
} else {
result, err = arc.ProcessOtherFile(res, req.Reader)
}
if err != nil {
return fmt.Errorf("archival failed: %v", err)
}
// Add this url to resource map to mark it as processed
arc.ResourceMap[res.DownloadURL] = struct{}{}
// Save content to storage
arc.Logf(0, "Downloaded %s", res.DownloadURL)
result.ContentType = req.ContentType
err = arc.SaveToStorage(result)
if err != nil {
return fmt.Errorf("failed to save %s: %v", res.DownloadURL, err)
}
// If there are no sub resources found, our job is finished.
if len(subResources) == 0 {
return nil
}
// However, if there are, we need to run the archiver in background to
// process the sub resources concurrently.
go func() {
for _, subRes := range subResources {
arc.ChRequest <- subRes
}
}()
time.Sleep(time.Second)
arc.StartArchiver()
return nil
}