From 8c5ef033be25ff0882cfaf40973d59ed68d123ae Mon Sep 17 00:00:00 2001 From: Nico Xiang <11958087+nicoxiang@users.noreply.github.com> Date: Wed, 1 Jun 2022 00:10:17 +0800 Subject: [PATCH] feat: add download markdown support (#38) --- README.md | 23 ++- cmd/root.go | 182 ++++++++++++------ go.mod | 3 + go.sum | 30 +++ .../{pkg/file/file.go => config/config.go} | 57 +----- internal/geektime/client.go | 154 ++++++++++----- internal/geektime/login.go | 4 +- internal/markdown/markdown.go | 158 +++++++++++++++ internal/pdf/pdf.go | 11 +- .../pkg/{file => filenamify}/filenamify.go | 2 +- .../{file => filenamify}/filenamify_test.go | 4 +- internal/pkg/geektime/header.go | 10 + internal/pkg/logger/discard.go | 21 ++ internal/video/video.go | 56 ++++-- 14 files changed, 514 insertions(+), 201 deletions(-) rename internal/{pkg/file/file.go => config/config.go} (64%) create mode 100644 internal/markdown/markdown.go rename internal/pkg/{file => filenamify}/filenamify.go (99%) rename internal/pkg/{file => filenamify}/filenamify_test.go (98%) create mode 100644 internal/pkg/geektime/header.go create mode 100644 internal/pkg/logger/discard.go diff --git a/README.md b/README.md index c6bcbe2..4eeed73 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # geektime-downloader -geektime-downloader 支持下载专栏为 PDF 文档和下载视频课。 +geektime-downloader 支持下载专栏为 PDF/Markdown 文档和下载视频课。 [![go report card](https://goreportcard.com/badge/github.com/nicoxiang/geektime-downloader "go report card")](https://goreportcard.com/report/github.com/nicoxiang/geektime-downloader) [![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT) @@ -49,13 +49,14 @@ Usage: geektime-downloader [flags] Flags: - --comments 是否需要专栏的第一页评论 (default true) - -f, --folder string 专栏和视频课的下载目标位置 (default "") - --gcess string 极客时间 cookie 值 gcess - --gcid string 极客时间 cookie 值 gcid - -h, --help help for geektime-downloader - -u, --phone string 你的极客时间账号(手机号) - -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") + --columnOutputType int8 下载专栏的输出格式(1pdf,2markdown,3all) (default 1) + --comments 是否需要专栏的第一页评论 (default true) + -f, --folder string 专栏和视频课的下载目标位置 (default "") + --gcess string 极客时间 cookie 值 gcess + --gcid string 极客时间 cookie 值 gcid + -h, --help help for geektime-downloader + -u, --phone string 你的极客时间账号(手机号) + -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") ``` ## Note @@ -64,9 +65,11 @@ Flags: 2. Ctrl + C 退出程序 -3. 如果选择下载所有后中断程序,可重新进入程序继续下载 +3. 默认情况下载专栏的输出格式只有 PDF,可以按需选择是否需要下载 Markdown 格式(--columnOutputType 参数),Markdown 格式虽然显示效果上不及 PDF,但优势为可以显示完整的代码块(PDF 代码块在水平方向太长时会有缺失)并保留了原文中的超链接。 -4. 通过密码登录的情况下,为了避免多次登录账户,会在目录 [UserConfigDir](https://pkg.go.dev/os#UserConfigDir)/geektime-downloader 下存放用户的登录 cookie,如果不是在自己的电脑上执行,请在使用完毕程序后手动删除 +4. 如果选择下载所有后中断程序,可重新进入程序继续下载 + +5. 通过密码登录的情况下,为了避免多次登录账户,会在目录 [UserConfigDir](https://pkg.go.dev/os#UserConfigDir)/geektime-downloader 下存放用户的登录 cookie,如果不是在自己的电脑上执行,请在使用完毕程序后手动删除 ## Inspired by diff --git a/cmd/root.go b/cmd/root.go index 3d46c25..c970277 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io/ioutil" "math" "math/rand" "net/http" @@ -18,20 +19,16 @@ import ( "github.com/briandowns/spinner" "github.com/chromedp/chromedp" "github.com/manifoldco/promptui" + "github.com/nicoxiang/geektime-downloader/internal/config" "github.com/nicoxiang/geektime-downloader/internal/geektime" + "github.com/nicoxiang/geektime-downloader/internal/markdown" "github.com/nicoxiang/geektime-downloader/internal/pdf" - "github.com/nicoxiang/geektime-downloader/internal/pkg/file" + "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" pgt "github.com/nicoxiang/geektime-downloader/internal/pkg/geektime" "github.com/nicoxiang/geektime-downloader/internal/video" "github.com/spf13/cobra" ) -// File extension -const ( - PDFExtension = ".pdf" - TSExtension = ".ts" -) - var ( phone string gcid string @@ -43,14 +40,13 @@ var ( currentProductIndex int quality string downloadComments bool + columnOutputType int8 ) func init() { - rand.Seed(time.Now().UnixNano()) - userHomeDir, _ := os.UserHomeDir() concurrency = int(math.Ceil(float64(runtime.NumCPU()) / 2.0)) - defaultDownloadFolder := filepath.Join(userHomeDir, file.GeektimeDownloaderFolder) + defaultDownloadFolder := filepath.Join(userHomeDir, config.GeektimeDownloaderFolder) rootCmd.Flags().StringVarP(&phone, "phone", "u", "", "你的极客时间账号(手机号)") rootCmd.Flags().StringVar(&gcid, "gcid", "", "极客时间 cookie 值 gcid") @@ -58,6 +54,7 @@ func init() { rootCmd.Flags().StringVarP(&downloadFolder, "folder", "f", defaultDownloadFolder, "专栏和视频课的下载目标位置") rootCmd.Flags().StringVarP(&quality, "quality", "q", "sd", "下载视频清晰度(ld标清,sd高清,hd超清)") rootCmd.Flags().BoolVar(&downloadComments, "comments", true, "是否需要专栏的第一页评论") + rootCmd.Flags().Int8Var(&columnOutputType, "columnOutputType", 1, "下载专栏的输出格式(1pdf,2markdown,3all)") sp = spinner.New(spinner.CharSets[4], 100*time.Millisecond) } @@ -69,9 +66,12 @@ var rootCmd = &cobra.Command{ if quality != "ld" && quality != "sd" && quality != "hd" { exitWithMsg("argument 'quality' is not valid") } + if columnOutputType <= 0 || columnOutputType >= 4 { + exitWithMsg("argument 'columnOutputType' is not valid") + } var readCookies []*http.Cookie if phone != "" { - rc, err := file.ReadCookieFromConfigFile(phone) + rc, err := config.ReadCookieFromConfigFile(phone) if err != nil { exitWithError(err) } @@ -101,7 +101,7 @@ var rootCmd = &cobra.Command{ sp.Stop() checkGeekTimeError(err) } - if err := file.WriteCookieToConfigFile(phone, readCookies); err != nil { + if err := config.WriteCookieToConfigFile(phone, readCookies); err != nil { exitWithError(err) } sp.Stop() @@ -208,7 +208,7 @@ func handleSelectArticle(ctx context.Context, articles []geektime.Article, index } a := articles[index-1] - projectDir, err := file.MkDownloadProjectFolder(downloadFolder, phone, gcid, products[currentProductIndex].Title) + projectDir, err := mkDownloadProjectDir(downloadFolder, phone, gcid, products[currentProductIndex].Title) if err != nil { exitWithError(err) } @@ -222,52 +222,83 @@ func handleDownloadAll(ctx context.Context) { cTitle := products[currentProductIndex].Title articles := loadArticles() - folder, err := file.MkDownloadProjectFolder(downloadFolder, phone, gcid, cTitle) + projectDir, err := mkDownloadProjectDir(downloadFolder, phone, gcid, cTitle) if err != nil { exitWithError(err) } - downloaded, err := file.FindDownloadedArticleFileNames(folder) + downloaded, err := findDownloadedArticleFileNames(projectDir) if err != nil { exitWithError(err) } if isColumn() { + rand.Seed(time.Now().UnixNano()) fmt.Printf("正在下载专栏 《%s》 中的所有文章\n", cTitle) total := len(articles) var i int - chromedpCtx, cancel := chromedp.NewContext(ctx) - // start the browser - err := chromedp.Run(chromedpCtx) - if err != nil { - exitWithError(err) + var chromedpCtx context.Context + var cancel context.CancelFunc + + if columnOutputType == 3 || columnOutputType == 1 { + chromedpCtx, cancel = chromedp.NewContext(ctx) + // start the browser + err := chromedp.Run(chromedpCtx) + if err != nil { + exitWithError(err) + } + defer cancel() } - defer cancel() for _, a := range articles { - fileName := getDownloadFileName(a) - if _, ok := downloaded[fileName]; ok { + fileName := filenamify.Filenamify(a.Title) + var b int8 + _, pdfExists := downloaded[fileName+pdf.PDFExtension] + if pdfExists { + b = 1 + } + _, mdExists := downloaded[fileName+markdown.MDExtension] + if mdExists { + b |= (1 << 1) + } + + if b == columnOutputType { increasePDFCount(total, &i) continue } - fileFullPath := filepath.Join(folder, fileName) - if err := pdf.PrintArticlePageToPDF(chromedpCtx, a.AID, fileFullPath, geektime.SiteCookies, downloadComments); err != nil { - // ensure chrome killed before os exit - cancel() + + if (columnOutputType&1 == 1) && !pdfExists { + if err := pdf.PrintArticlePageToPDF(chromedpCtx, + a.AID, + projectDir, + a.Title, + geektime.SiteCookies, + downloadComments, + ); err != nil { + // ensure chrome killed before os exit + cancel() + checkGeekTimeError(err) + } + } + if ((columnOutputType>>1)&1 == 1) && !mdExists { + html, err := geektime.GetColumnContent(a.AID) + checkGeekTimeError(err) + err = markdown.Download(ctx, html, a.Title, projectDir, a.AID, concurrency) checkGeekTimeError(err) } + increasePDFCount(total, &i) r := rand.Intn(2000) time.Sleep(time.Duration(r) * time.Millisecond) } } else if isVideo() { for _, a := range articles { - fileName := getDownloadFileName(a) + fileName := filenamify.Filenamify(a.Title) + video.TSExtension if _, ok := downloaded[fileName]; ok { continue } videoInfo, err := geektime.GetVideoInfo(a.AID, quality) checkGeekTimeError(err) - err = video.DownloadVideo(ctx, videoInfo.M3U8URL, fileName, folder, int64(videoInfo.Size), concurrency) + err = video.DownloadVideo(ctx, videoInfo.M3U8URL, a.Title, projectDir, int64(videoInfo.Size), concurrency) checkGeekTimeError(err) } } @@ -313,49 +344,48 @@ func loadArticles() []geektime.Article { } func downloadArticle(ctx context.Context, article geektime.Article, projectDir string) { - fileName := getDownloadFileName(article) - fileFullPath := filepath.Join(projectDir, fileName) - if isColumn() { sp.Prefix = fmt.Sprintf("[ 正在下载 《%s》... ]", article.Title) sp.Start() - chromedpCtx, cancel := chromedp.NewContext(ctx) - // start the browser - err := chromedp.Run(chromedpCtx) - if err != nil { - exitWithError(err) + + if columnOutputType&1 == 1 { + chromedpCtx, cancel := chromedp.NewContext(ctx) + // start the browser + err := chromedp.Run(chromedpCtx) + if err != nil { + exitWithError(err) + } + defer cancel() + err = pdf.PrintArticlePageToPDF(chromedpCtx, + article.AID, + projectDir, + article.Title, + geektime.SiteCookies, + downloadComments, + ) + if err != nil { + sp.Stop() + // ensure chrome killed before os exit + cancel() + checkGeekTimeError(err) + } } - defer cancel() - err = pdf.PrintArticlePageToPDF(chromedpCtx, - article.AID, - fileFullPath, - geektime.SiteCookies, - downloadComments, - ) - sp.Stop() - if err != nil { - // ensure chrome killed before os exit - cancel() + + if (columnOutputType>>1)&1 == 1 { + html, err := geektime.GetColumnContent(article.AID) + checkGeekTimeError(err) + err = markdown.Download(ctx, html, article.Title, projectDir, article.AID, concurrency) checkGeekTimeError(err) } + sp.Stop() } else if isVideo() { videoInfo, err := geektime.GetVideoInfo(article.AID, quality) checkGeekTimeError(err) - err = video.DownloadVideo(ctx, videoInfo.M3U8URL, fileName, projectDir, int64(videoInfo.Size), concurrency) + err = video.DownloadVideo(ctx, videoInfo.M3U8URL, article.Title, projectDir, int64(videoInfo.Size), concurrency) checkGeekTimeError(err) } } -func getDownloadFileName(article geektime.Article) string { - var ext string - if isColumn() { - ext = PDFExtension - } else if isVideo() { - ext = TSExtension - } - return file.Filenamify(article.Title) + ext -} - func isColumn() bool { return products[currentProductIndex].Type == "c1" } @@ -384,6 +414,34 @@ func readCookiesFromInput() []*http.Cookie { return cookies } +func findDownloadedArticleFileNames(projectDir string) (map[string]struct{}, error) { + files, err := ioutil.ReadDir(projectDir) + res := make(map[string]struct{}, len(files)) + if err != nil { + return res, err + } + if len(files) == 0 { + return res, nil + } + for _, f := range files { + res[f.Name()] = struct{}{} + } + return res, nil +} + +func mkDownloadProjectDir(downloadFolder, phone, gcid, projectName string) (string, error) { + userName := phone + if gcid != "" { + userName = gcid + } + path := filepath.Join(downloadFolder, userName, filenamify.Filenamify(projectName)) + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + return "", err + } + return path, nil +} + func checkGeekTimeError(err error) { if err != nil { if errors.Is(err, context.Canceled) { @@ -400,7 +458,7 @@ func checkGeekTimeError(err error) { } fmt.Fprintln(os.Stderr, err.Error()) - if err := file.RemoveConfig(phone); err != nil { + if err := config.RemoveConfig(phone); err != nil { fmt.Fprintln(os.Stderr, err.Error()) } os.Exit(1) @@ -409,7 +467,7 @@ func checkGeekTimeError(err error) { } else if _, ok := err.(*geektime.ErrGeekTimeAPIBadCode); ok { exitWithMsg(err.Error()) } else { - // Client error, others + // others exitWithError(err) } } @@ -425,7 +483,7 @@ func checkPromptError(err error) { } func exitWhenClientTimeout() { - exitWithMsg("Request Timeout") + exitWithMsg("\n请求超时") } // Unexpected error diff --git a/go.mod b/go.mod index 39d0a26..7fec021 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,10 @@ require ( ) require ( + github.com/JohannesKaufmann/html-to-markdown v1.3.4 // indirect + github.com/PuerkitoBio/goquery v1.5.1 // indirect github.com/VividCortex/ewma v1.1.1 // indirect + github.com/andybalholm/cascadia v1.1.0 // indirect github.com/chromedp/sysutil v1.0.0 // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/fatih/color v1.13.0 // indirect diff --git a/go.sum b/go.sum index f4ddee1..a0be395 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,11 @@ +github.com/JohannesKaufmann/html-to-markdown v1.3.4 h1:0ooS4xfe4SY/fPPswAySee1cvqXZXfHKZ/4Pv+mF3ko= +github.com/JohannesKaufmann/html-to-markdown v1.3.4/go.mod h1:JNSClIRYICFDiFhw6RBhBeWGnMSSKVZ6sPQA+TK4tyM= +github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= +github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/VividCortex/ewma v1.1.1 h1:MnEK4VOv6n0RSY4vtRe3h11qjxL3+t0B8yOL8iMXdcM= github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA= +github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= +github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/briandowns/spinner v1.18.1 h1:yhQmQtM1zsqFsouh09Bk/jCjd50pC3EOGsh28gLVvwY= github.com/briandowns/spinner v1.18.1/go.mod h1:mQak9GHqbspjC/5iUx3qMlIho8xBS/ppAL/hX5SmPJU= github.com/cheggaaa/pb/v3 v3.0.8 h1:bC8oemdChbke2FHIIGy9mn4DPJ2caZYQnfbRqwmdCoA= @@ -17,6 +23,8 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.10.0/go.mod h1:ELkj/draVOlAH/xkhN6mQ50Qd0MPOk5AAr3maGEBuJM= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= @@ -33,6 +41,9 @@ github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NH github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYthEiA= @@ -50,20 +61,34 @@ github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRC github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/orisano/pixelmatch v0.0.0-20210112091706-4fa4c7ba91d5 h1:1SoBaSPudixRecmlHXb/GxmaD3fLMtHIDN13QujwQuc= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sebdah/goldie/v2 v2.5.1/go.mod h1:oZ9fp0+se1eapSRjfYbsV/0Hqhbuu3bJVvKI/NNtssI= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= +github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q= github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/yuin/goldmark v1.2.0/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20211029224645-99673261e6eb/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c h1:5KslGYwFpkhGh+Q16bwMP3cOontH8FOep7tGV86Y7SQ= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -76,7 +101,12 @@ golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/internal/pkg/file/file.go b/internal/config/config.go similarity index 64% rename from internal/pkg/file/file.go rename to internal/config/config.go index 6c7a4a8..0d90c23 100644 --- a/internal/pkg/file/file.go +++ b/internal/config/config.go @@ -1,4 +1,4 @@ -package file +package config import ( "errors" @@ -6,7 +6,6 @@ import ( "net/http" "os" "path/filepath" - "strconv" "strings" "time" @@ -24,30 +23,6 @@ func init() { userConfigDir, _ = os.UserConfigDir() } -// ByNumericalFilename implement sort interface, order by file name suffix number -type ByNumericalFilename []os.FileInfo - -func (nf ByNumericalFilename) Len() int { return len(nf) } -func (nf ByNumericalFilename) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] } -func (nf ByNumericalFilename) Less(i, j int) bool { - // Use path names - pathA := nf[i].Name() - pathB := nf[j].Name() - - // Grab integer value of each filename by parsing the string and slicing off - // the extension - a, err1 := strconv.ParseInt(pathA[0:strings.LastIndex(pathA, ".")], 10, 64) - b, err2 := strconv.ParseInt(pathB[0:strings.LastIndex(pathB, ".")], 10, 64) - - // If any were not numbers sort lexographically - if err1 != nil || err2 != nil { - return pathA < pathB - } - - // Which integer is smaller? - return a < b -} - // ReadCookieFromConfigFile read cookies from app config file. func ReadCookieFromConfigFile(phone string) ([]*http.Cookie, error) { dir := filepath.Join(userConfigDir, GeektimeDownloaderFolder) @@ -149,36 +124,6 @@ func RemoveConfig(phone string) error { return nil } -// MkDownloadProjectFolder creates download project directory if not exist -func MkDownloadProjectFolder(downloadFolder, phone, gcid, projectName string) (string, error) { - userName := phone - if gcid != "" { - userName = gcid - } - path := filepath.Join(downloadFolder, userName, Filenamify(projectName)) - err := os.MkdirAll(path, os.ModePerm) - if err != nil { - return "", err - } - return path, nil -} - -// FindDownloadedArticleFileNames find all downloaded articles file name in specified account and column -func FindDownloadedArticleFileNames(projectDir string) (map[string]struct{}, error) { - files, err := ioutil.ReadDir(projectDir) - res := make(map[string]struct{}, len(files)) - if err != nil { - return res, err - } - if len(files) == 0 { - return res, nil - } - for _, f := range files { - res[f.Name()] = struct{}{} - } - return res, nil -} - func writeOnelineConfig(sb strings.Builder, key string, value string) strings.Builder { sb.WriteString(key) sb.WriteString(" ") diff --git a/internal/geektime/client.go b/internal/geektime/client.go index 6fc40cc..0696b62 100644 --- a/internal/geektime/client.go +++ b/internal/geektime/client.go @@ -4,25 +4,21 @@ import ( "errors" "fmt" "net/http" + "strconv" "time" "github.com/go-resty/resty/v2" pgt "github.com/nicoxiang/geektime-downloader/internal/pkg/geektime" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" ) const ( - // UserAgentHeaderName ... - UserAgentHeaderName = "User-Agent" - // OriginHeaderName ... - OriginHeaderName = "Origin" - // UserAgent is Web browser User Agent - UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" // ProductPath ... ProductPath = "/serv/v3/learn/product" // ArticlesPath ... ArticlesPath = "/serv/v1/column/articles" - // ArticleInfoPath ... - ArticleInfoPath = "/serv/v3/article/info" + // ArticleV1Path ... + ArticleV1Path = "/serv/v1/article" ) var ( @@ -43,7 +39,7 @@ type ErrGeekTimeAPIBadCode struct { // Error implements error interface func (e ErrGeekTimeAPIBadCode) Error() string { - return fmt.Sprintf("make geektime api call %s failed, code %d, msg %s", e.Path, e.Code, e.Msg) + return fmt.Sprintf("请求极客时间接口 %s 失败, code %d, msg %s", e.Path, e.Code, e.Msg) } // Product ... @@ -55,7 +51,7 @@ type Product struct { Articles []Article } -// ArticleSummary ... +// Article ... type Article struct { AID int Title string @@ -67,21 +63,61 @@ type VideoInfo struct { Size int } +// ColumnResponse ... +type ColumnResponse struct { + Code int `json:"code"` + Data struct { + ArticleTitle string `json:"article_title"` + ArticleContent string `json:"article_content"` + } `json:"data"` +} + +// VideoResponse ... +type VideoResponse struct { + Code int `json:"code"` + Data struct { + ArticleTitle string `json:"article_title"` + HLSVideos struct { + SD struct { + Size int `json:"size"` + URL string `json:"url"` + } `json:"sd"` + HD struct { + Size int `json:"size"` + URL string `json:"url"` + } `json:"hd"` + LD struct { + Size int `json:"size"` + URL string `json:"url"` + } `json:"ld"` + } `json:"hls_videos"` + } `json:"data"` +} + +// ArticleResponse type constraint, column and video response are different, +// hls_videos field in video response is struct, but in column response its slice +type ArticleResponse interface { + ColumnResponse | VideoResponse +} + +// InitClient init golbal clients with cookies func InitClient(cookies []*http.Cookie) { geekTimeClient = resty.New(). SetBaseURL(pgt.GeekBang). SetCookies(cookies). SetRetryCount(1). SetTimeout(10*time.Second). - SetHeader(UserAgentHeaderName, UserAgent). - SetHeader(OriginHeaderName, pgt.GeekBang) + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang). + SetLogger(logger.DiscardLogger{}) accountClient = resty.New(). SetBaseURL(pgt.GeekBangAccount). SetCookies(cookies). SetTimeout(10*time.Second). - SetHeader(UserAgentHeaderName, UserAgent). - SetHeader(OriginHeaderName, pgt.GeekBang) + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang). + SetLogger(logger.DiscardLogger{}) SiteCookies = cookies } @@ -152,57 +188,75 @@ func GetArticles(cid string) ([]Article, error) { return nil, ErrGeekTimeAPIBadCode{ArticlesPath, result.Code, ""} } -// GetVideoInfo call geektime api to get video info +// GetColumnContent ... +func GetColumnContent(articleID int) (string, error) { + a, err := GetArticleInfo[ColumnResponse](articleID) + if err != nil { + return "", err + } + if a.Code != 0 { + return "", ErrGeekTimeAPIBadCode{ArticleV1Path, a.Code, ""} + } + + return a.Data.ArticleContent, err +} + +// GetVideoInfo ... func GetVideoInfo(articleID int, quality string) (VideoInfo, error) { - var videoInfo VideoInfo + var v VideoInfo + a, err := GetArticleInfo[VideoResponse](articleID) + if err != nil { + return v, err + } + if a.Code != 0 { + return v, ErrGeekTimeAPIBadCode{ArticleV1Path, a.Code, ""} + } + if quality == "sd" { + v = VideoInfo{ + M3U8URL: a.Data.HLSVideos.SD.URL, + Size: a.Data.HLSVideos.SD.Size, + } + } else if quality == "hd" { + v = VideoInfo{ + M3U8URL: a.Data.HLSVideos.HD.URL, + Size: a.Data.HLSVideos.HD.Size, + } + } else if quality == "ld" { + v = VideoInfo{ + M3U8URL: a.Data.HLSVideos.LD.URL, + Size: a.Data.HLSVideos.LD.Size, + } + } + return v, nil +} + +// GetArticleInfo ... +func GetArticleInfo[R ArticleResponse](articleID int) (R, error) { + var response R ok, err := auth() if err != nil { - return videoInfo, err + return response, err } if !ok { - return videoInfo, ErrAuthFailed + return response, ErrAuthFailed } - var result struct { - Code int `json:"code"` - Data struct { - Info struct { - ID int `json:"id"` - Title string `json:"title"` - Video struct { - HLSVideos []struct { - Size int `json:"size"` - Quality string `json:"quality"` - URL string `json:"url"` - } `json:"hls_medias"` - } `json:"video"` - } `json:"info"` - } `json:"data"` - } _, err = geekTimeClient.R(). SetBody( map[string]interface{}{ - "id": articleID, + "id": strconv.Itoa(articleID), + "include_neighbors": true, + "is_freelyread": true, + "reverse": false, }). - SetResult(&result). - Post(ArticleInfoPath) + SetResult(&response). + Post(ArticleV1Path) if err != nil { - return videoInfo, err - } - - if result.Code == 0 { - for _, v := range result.Data.Info.Video.HLSVideos { - if v.Quality == quality { - return VideoInfo{ - v.URL, - v.Size, - }, nil - } - } + return response, err } - return videoInfo, ErrGeekTimeAPIBadCode{ArticleInfoPath, result.Code, ""} + return response, nil } // auth check if current user login is expired or login in another device diff --git a/internal/geektime/login.go b/internal/geektime/login.go index 8d6db7c..072e1dd 100644 --- a/internal/geektime/login.go +++ b/internal/geektime/login.go @@ -36,8 +36,8 @@ func Login(phone, password string) ([]*http.Cookie, error) { loginResponse, err := resty.New(). SetBaseURL(pgt.GeekBangAccount). SetTimeout(10*time.Second). - SetHeader(UserAgentHeaderName, UserAgent). - SetHeader(OriginHeaderName, pgt.GeekBang). + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang). SetHeader("Referer", pgt.GeekBangAccount+"/signin?redirect=https%3A%2F%2Ftime.geekbang.org%2F"). R(). SetBody( diff --git a/internal/markdown/markdown.go b/internal/markdown/markdown.go new file mode 100644 index 0000000..6886cf4 --- /dev/null +++ b/internal/markdown/markdown.go @@ -0,0 +1,158 @@ +package markdown + +import ( + "context" + "os" + "path" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "time" + + md "github.com/JohannesKaufmann/html-to-markdown" + "github.com/go-resty/resty/v2" + "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + pgt "github.com/nicoxiang/geektime-downloader/internal/pkg/geektime" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" + "golang.org/x/sync/errgroup" +) + +var ( + converter *md.Converter + imgRegexp = regexp.MustCompile(`!\[(.*?)]\((.*?)\)`) +) + +// MDExtension ... +const MDExtension = ".md" + +type markdownString struct { + sync.Mutex + s string +} + +func (ms *markdownString) ReplaceAll(o, n string) { + ms.Lock() + defer ms.Unlock() + ms.s = strings.ReplaceAll(ms.s, o, n) +} + +// Download ... +func Download(ctx context.Context, html, title, dir string, aid, concurrency int) error { + select { + case <-ctx.Done(): + return context.Canceled + default: + } + // step1: convert to md string + markdown, err := getDefaultConverter().ConvertString(html) + if err != nil { + return err + } + // step2: download images + var ss = &markdownString{s: markdown} + imageURLs := findAllImages(markdown) + + // images/aid/imageName.png + imagesFolder := filepath.Join(dir, "images", strconv.Itoa(aid)) + + c := resty.New() + c.SetOutputDirectory(imagesFolder). + SetRetryCount(1). + SetTimeout(5*time.Second). + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang). + SetLogger(logger.DiscardLogger{}) + + g := new(errgroup.Group) + ch := make(chan string, concurrency) + + for i := 0; i < concurrency; i++ { + g.Go(func() error { + return writeImageFile(ctx, ch, dir, imagesFolder, c, ss) + }) + } + + for _, imageURL := range imageURLs { + ch <- imageURL + } + close(ch) + err = g.Wait() + if err != nil { + return err + } + + fullName := path.Join(dir, filenamify.Filenamify(title)+MDExtension) + f, err := os.Create(fullName) + if err != nil { + return err + } + // step3: write md file + _, err = f.WriteString("# " + title + "\n" + ss.s) + if err != nil { + return err + } + return nil +} + +func findAllImages(md string) (images []string) { + for _, matches := range imgRegexp.FindAllStringSubmatch(md, -1) { + if len(matches) == 3 { + images = append(images, matches[2]) + } + } + return +} + +func getDefaultConverter() *md.Converter { + if converter == nil { + converter = md.NewConverter("", true, nil) + } + return converter +} + +func writeImageFile(ctx context.Context, imageURLs chan string, dir, imagesFolder string, c *resty.Client, ms *markdownString) (err error) { + var es []error +loop: + for { + select { + case <-ctx.Done(): + for range imageURLs { + } + case imageURL, ok := <-imageURLs: + if !ok { + break loop + } + if imageURL == "" { + return + } + segments := strings.Split(imageURL, "/") + f := segments[len(segments)-1] + if i := strings.Index(f, "?"); i > 0 { + f = f[:i] + } + imageLocalFullPath := filepath.Join(imagesFolder, f) + rel, err := filepath.Rel(dir, imageLocalFullPath) + if err != nil { + es = append(es, err) + break loop + } + + _, err = c.R(). + SetContext(ctx). + SetOutput(f). + Get(imageURL) + if err != nil { + es = append(es, err) + continue + } + + ms.ReplaceAll(imageURL, rel) + } + } + if len(es) > 0 { + return es[0] + } + return nil +} diff --git a/internal/pdf/pdf.go b/internal/pdf/pdf.go index cb24205..0bb711e 100644 --- a/internal/pdf/pdf.go +++ b/internal/pdf/pdf.go @@ -3,9 +3,9 @@ package pdf import ( "context" "errors" - "io/ioutil" "net/http" "os" + "path/filepath" "strconv" "time" @@ -15,14 +15,18 @@ import ( "github.com/chromedp/cdproto/runtime" "github.com/chromedp/chromedp" "github.com/chromedp/chromedp/device" + "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" pgt "github.com/nicoxiang/geektime-downloader/internal/pkg/geektime" ) +// PDFExtension ... +const PDFExtension = ".pdf" + // ErrGeekTimeRateLimit ... var ErrGeekTimeRateLimit = errors.New("已触发限流, 你可以选择重新登录/重新获取 cookie, 或者稍后再试, 然后生成剩余的文章") // PrintArticlePageToPDF use chromedp to print article page and save -func PrintArticlePageToPDF(ctx context.Context, aid int, filename string, cookies []*http.Cookie, downloadComments bool) error { +func PrintArticlePageToPDF(ctx context.Context, aid int, dir, title string, cookies []*http.Cookie, downloadComments bool) error { rateLimit := false // new tab ctx, cancel := chromedp.NewContext(ctx) @@ -65,7 +69,8 @@ func PrintArticlePageToPDF(ctx context.Context, aid int, filename string, cookie return err } - if err := ioutil.WriteFile(filename, buf, os.ModePerm); err != nil { + fileName := filepath.Join(dir, filenamify.Filenamify(title)+PDFExtension) + if err := os.WriteFile(fileName, buf, 0666); err != nil { return err } return nil diff --git a/internal/pkg/file/filenamify.go b/internal/pkg/filenamify/filenamify.go similarity index 99% rename from internal/pkg/file/filenamify.go rename to internal/pkg/filenamify/filenamify.go index bfda936..9e602e2 100644 --- a/internal/pkg/file/filenamify.go +++ b/internal/pkg/filenamify/filenamify.go @@ -1,4 +1,4 @@ -package file +package filenamify import ( "math" diff --git a/internal/pkg/file/filenamify_test.go b/internal/pkg/filenamify/filenamify_test.go similarity index 98% rename from internal/pkg/file/filenamify_test.go rename to internal/pkg/filenamify/filenamify_test.go index 398d5d3..e545041 100644 --- a/internal/pkg/file/filenamify_test.go +++ b/internal/pkg/filenamify/filenamify_test.go @@ -1,4 +1,4 @@ -package file +package filenamify import ( "testing" @@ -34,4 +34,4 @@ func TestFilenamify_TooLongEnglish(t *testing.T) { if fileName != want { t.Fatalf(`want %s, but got %s`, want, fileName) } -} \ No newline at end of file +} diff --git a/internal/pkg/geektime/header.go b/internal/pkg/geektime/header.go new file mode 100644 index 0000000..2d146b3 --- /dev/null +++ b/internal/pkg/geektime/header.go @@ -0,0 +1,10 @@ +package geektime + +const ( + // UserAgentHeaderName ... + UserAgentHeaderName = "User-Agent" + // OriginHeaderName ... + OriginHeaderName = "Origin" + // UserAgentHeaderValue ... + UserAgentHeaderValue = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" +) diff --git a/internal/pkg/logger/discard.go b/internal/pkg/logger/discard.go new file mode 100644 index 0000000..2d26135 --- /dev/null +++ b/internal/pkg/logger/discard.go @@ -0,0 +1,21 @@ +package logger + +// DiscardLogger ... +type DiscardLogger struct{ + +} + +// Errorf do nothing, just discard resty log +func (DiscardLogger) Errorf(format string, v ...interface{}) { + +} + +// Warnf do nothing, just discard resty log +func (DiscardLogger) Warnf(format string, v ...interface{}) { + +} + +// Debugf do nothing, just discard resty log +func (DiscardLogger) Debugf(format string, v ...interface{}) { + +} \ No newline at end of file diff --git a/internal/video/video.go b/internal/video/video.go index 742c5bf..936a360 100644 --- a/internal/video/video.go +++ b/internal/video/video.go @@ -10,22 +10,23 @@ import ( "os" "path/filepath" "sort" + "strconv" "strings" "sync" "time" "github.com/cheggaaa/pb/v3" "github.com/go-resty/resty/v2" - pf "github.com/nicoxiang/geektime-downloader/internal/pkg/file" + "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" pgt "github.com/nicoxiang/geektime-downloader/internal/pkg/geektime" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" "golang.org/x/sync/errgroup" ) const ( syncByte = uint8(71) //0x47 - userAgentHeaderName = "User-Agent" - originHeaderName = "Origin" - userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" + // TSExtension ... + TSExtension = ".ts" ) var clientOnce struct { @@ -40,22 +41,47 @@ var ( ErrUnexpectedDecryptKeyResponse = errors.New("unexpected decrypt key response") ) +// ByNumericalFilename implement sort interface, order by file name suffix number +type ByNumericalFilename []os.FileInfo + +func (nf ByNumericalFilename) Len() int { return len(nf) } +func (nf ByNumericalFilename) Swap(i, j int) { nf[i], nf[j] = nf[j], nf[i] } +func (nf ByNumericalFilename) Less(i, j int) bool { + // Use path names + pathA := nf[i].Name() + pathB := nf[j].Name() + + // Grab integer value of each filename by parsing the string and slicing off + // the extension + a, err1 := strconv.ParseInt(pathA[0:strings.LastIndex(pathA, ".")], 10, 64) + b, err2 := strconv.ParseInt(pathB[0:strings.LastIndex(pathB, ".")], 10, 64) + + // If any were not numbers sort lexographically + if err1 != nil || err2 != nil { + return pathA < pathB + } + + // Which integer is smaller? + return a < b +} + func getClient() *resty.Client { clientOnce.Do(func() { clientOnce.c = resty.New(). SetRetryCount(1). SetTimeout(10*time.Second). - SetHeader(userAgentHeaderName, userAgent). - SetHeader(originHeaderName, pgt.GeekBang) + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang). + SetLogger(logger.DiscardLogger{}) }) return clientOnce.c } // DownloadVideo ... -func DownloadVideo(ctx context.Context, m3u8url, fileName, downloadProjectFolder string, size int64, concurrency int) (err error) { +func DownloadVideo(ctx context.Context, m3u8url, title, projectDir string, size int64, concurrency int) (err error) { i := strings.LastIndex(m3u8url, "/") tsURLPrefix := m3u8url[:i+1] - filenamifyTitle := strings.TrimSuffix(fileName, filepath.Ext(fileName)) + filenamifyTitle := filenamify.Filenamify(title) // Stage1: Make m3u8 URL call and resolve decryptkmsURL, tsFileNames, err := readM3U8File(ctx, m3u8url) @@ -76,7 +102,7 @@ func DownloadVideo(ctx context.Context, m3u8url, fileName, downloadProjectFolder } // Stage3: Make temp ts folder and download temp ts files - tempVideoDir := filepath.Join(downloadProjectFolder, filenamifyTitle) + tempVideoDir := filepath.Join(projectDir, filenamifyTitle) if err = os.MkdirAll(tempVideoDir, os.ModePerm); err != nil { return } @@ -109,7 +135,7 @@ func DownloadVideo(ctx context.Context, m3u8url, fileName, downloadProjectFolder } // Stage4: Read temp ts files, decrypt and merge into the one final video file - err = mergeTSFiles(tempVideoDir, fileName, downloadProjectFolder, key) + err = mergeTSFiles(tempVideoDir, filenamifyTitle, projectDir, key) return } @@ -130,8 +156,8 @@ loop: c := resty.New() c.SetOutputDirectory(tempVideoDir). SetTimeout(time.Minute). - SetHeader(userAgentHeaderName, userAgent). - SetHeader(originHeaderName, pgt.GeekBang) + SetHeader(pgt.UserAgentHeaderName, pgt.UserAgentHeaderValue). + SetHeader(pgt.OriginHeaderName, pgt.GeekBang) resp, err := c.R(). SetContext(ctx). @@ -169,13 +195,13 @@ func readM3U8File(ctx context.Context, url string) (decryptkmsURL string, tsFile return } -func mergeTSFiles(tempVideoDir, fileName, downloadProjectFolder string, key []byte) error { +func mergeTSFiles(tempVideoDir, filenamifyTitle, projectDir string, key []byte) error { tempTSFiles, err := ioutil.ReadDir(tempVideoDir) if err != nil { return err } - sort.Sort(pf.ByNumericalFilename(tempTSFiles)) - fullPath := filepath.Join(downloadProjectFolder, fileName) + sort.Sort(ByNumericalFilename(tempTSFiles)) + fullPath := filepath.Join(projectDir, filenamifyTitle+TSExtension) finalVideoFile, err := os.OpenFile(fullPath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, os.ModePerm) if err != nil { return err