From fc48947a918011580d4e866150b206630287d043 Mon Sep 17 00:00:00 2001 From: yinyicao <71380489+yinyicao@users.noreply.github.com> Date: Sun, 14 Apr 2024 11:04:57 +0800 Subject: [PATCH 1/8] feat(Enterprise version supporting training camps) (#182) * feat(Enterprise version supporting training camps): support Enterprise version supporting training camps at https://b.geekbang.org/ * fix(Enterprise training camps): fix no sections folder * fix(Enterprise): fix error 'cannot unmarshal number into Go struct field .data.teach_type_list of type string' * style(Enterprise): modify code style * style(Enterprise): modify code style --- cmd/root.go | 98 ++++++--- internal/geektime/geektime.go | 131 ++++++++++- .../struct_v1_enterprise_article_info.go | 195 ++++++++++++++++ .../response/struct_v1_enterprise_articles.go | 208 ++++++++++++++++++ .../struct_v1_enterprise_product_info.go | 166 ++++++++++++++ internal/video/video.go | 35 ++- 6 files changed, 798 insertions(+), 35 deletions(-) create mode 100644 internal/geektime/response/struct_v1_enterprise_article_info.go create mode 100644 internal/geektime/response/struct_v1_enterprise_articles.go create mode 100644 internal/geektime/response/struct_v1_enterprise_product_info.go diff --git a/cmd/root.go b/cmd/root.go index 37c9337..01d54f0 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "io/ioutil" "math" "math/rand" "net/http" @@ -30,22 +29,23 @@ import ( ) var ( - phone string - gcid string - gcess string - concurrency int - downloadFolder string - sp *spinner.Spinner - selectedProduct geektime.Product - quality string - downloadComments bool - selectedProductType productTypeSelectOption - columnOutputType int - waitSeconds int - productTypeOptions = make([]productTypeSelectOption, 6) - geektimeClient *geektime.Client - accountClient *geektime.Client - universityClient *geektime.Client + phone string + gcid string + gcess string + concurrency int + downloadFolder string + sp *spinner.Spinner + selectedProduct geektime.Product + quality string + downloadComments bool + selectedProductType productTypeSelectOption + columnOutputType int + waitSeconds int + productTypeOptions = make([]productTypeSelectOption, 7) + geektimeClient *geektime.Client + geekEnterpriseClient *geektime.Client + accountClient *geektime.Client + universityClient *geektime.Client ) type productTypeSelectOption struct { @@ -90,6 +90,7 @@ func setProductTypeOptions() { productTypeOptions[3] = productTypeSelectOption{3, "大厂案例", 4, []string{"q"}, false} productTypeOptions[4] = productTypeSelectOption{4, "训练营", 5, []string{""}, true} //custom source type, not use productTypeOptions[5] = productTypeSelectOption{5, "其他", 1, []string{"x", "c6"}, true} + productTypeOptions[6] = productTypeSelectOption{6, "企业版训练营", 6, []string{"c44"}, true} } var rootCmd = &cobra.Command{ @@ -145,6 +146,7 @@ var rootCmd = &cobra.Command{ } geektimeClient = geektime.NewClient(readCookies) universityClient = geektime.NewUniversityClient(readCookies) + geekEnterpriseClient = geektime.NewEnterpriseClient(readCookies) selectProductType(cmd.Context()) }, } @@ -232,6 +234,8 @@ func loadProduct(ctx context.Context, productID int) { p, err = universityClient.MyClassProduct(productID) // university don't need check product type // if input invalid id, access mark is 0 + } else if isEnterpriseUniversity() { + p, err = geekEnterpriseClient.EnterpriseArticlesInfo(productID) } else { p, err = geektimeClient.ColumnInfo(productID) if err == nil { @@ -419,15 +423,24 @@ func handleDownloadAll(ctx context.Context) { } } else { for _, a := range selectedProduct.Articles { + sectionDir := projectDir fileName := filenamify.Filenamify(a.Title) + video.TSExtension if _, ok := downloaded[fileName]; ok { continue } + // add sub dir + if a.SectionTitle != "" { + sectionDir, err = mkDownloadProjectSectionDir(projectDir, a.SectionTitle) + checkError(err) + } if isUniversity() { - err := video.DownloadUniversityVideo(ctx, universityClient, a.AID, selectedProduct, projectDir, quality, concurrency) + err := video.DownloadUniversityVideo(ctx, universityClient, a.AID, selectedProduct, sectionDir, quality, concurrency) + checkError(err) + } else if isEnterpriseUniversity() { + err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) checkError(err) } else { - err := video.DownloadArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, projectDir, quality, concurrency) + err := video.DownloadArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) checkError(err) } } @@ -441,7 +454,7 @@ func increasePDFCount(total int, i *int) { } func loadArticles() { - if !isUniversity() && len(selectedProduct.Articles) <= 0 { + if !isUniversity() && !isEnterpriseUniversity() && len(selectedProduct.Articles) <= 0 { sp.Prefix = "[ 正在加载文章列表... ]" sp.Start() articles, err := geektimeClient.ColumnArticles(strconv.Itoa(selectedProduct.ID)) @@ -513,6 +526,9 @@ func downloadArticle(ctx context.Context, article geektime.Article, projectDir s if isUniversity() { err := video.DownloadUniversityVideo(ctx, universityClient, article.AID, selectedProduct, projectDir, quality, concurrency) checkError(err) + } else if isEnterpriseUniversity() { + err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) + checkError(err) } else { err := video.DownloadArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) checkError(err) @@ -528,6 +544,10 @@ func isUniversity() bool { return selectedProductType.Index == 4 } +func isEnterpriseUniversity() bool { + return selectedProductType.Index == 6 +} + // Sets the bit at pos in the integer n. func setBit(n int, pos uint) int { n |= (1 << pos) @@ -555,17 +575,24 @@ func readCookiesFromInput() []*http.Cookie { } func findDownloadedArticleFileNames(projectDir string) (map[string]struct{}, error) { - files, err := ioutil.ReadDir(projectDir) - res := make(map[string]struct{}, len(files)) - if err != nil { - return res, err - } - if len(files) == 0 { - return res, nil - } - for _, f := range files { - res[f.Name()] = struct{}{} - } + res := make(map[string]struct{}) + limit := 2 + err := filepath.Walk(projectDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + fmt.Printf("访问路径时出错:%v\n", err) + return err + } + // 计算当前路径的深度 + depth := len(filepath.SplitList(path)) - len(filepath.SplitList(projectDir)) + if depth >= limit { + return filepath.SkipDir // 如果达到限制深度,则跳过该文件夹及其子文件夹 + } + if !info.IsDir() { + res[info.Name()] = struct{}{} + } + return nil + }) + checkError(err) return res, nil } @@ -582,6 +609,15 @@ func mkDownloadProjectDir(downloadFolder, phone, gcid, projectName string) (stri return path, nil } +func mkDownloadProjectSectionDir(downloadFolder, sectionName string) (string, error) { + path := filepath.Join(downloadFolder, filenamify.Filenamify(sectionName)) + err := os.MkdirAll(path, os.ModePerm) + if err != nil { + return "", err + } + return path, nil +} + func checkProductType(productType string) bool { for _, pt := range selectedProductType.AcceptProductTypes { if pt == productType { diff --git a/internal/geektime/geektime.go b/internal/geektime/geektime.go index 4f2e55f..24f72e2 100644 --- a/internal/geektime/geektime.go +++ b/internal/geektime/geektime.go @@ -24,6 +24,7 @@ const ( UserAgent = "User-Agent" // GeekBangUniversityBaseURL ... GeekBangUniversityBaseURL = "https://u.geekbang.org" + GeekBangEnterpriseBaseURL = "https://b.geekbang.org" // GeekBangAccountBaseURL ... GeekBangAccountBaseURL = "https://account.geekbang.org" // LoginPath ... @@ -49,6 +50,15 @@ const ( // UniversityV1MyClassInfoPath get university class info and all articles info in it UniversityV1MyClassInfoPath = "/serv/v1/myclass/info" + // V1EnterpriseProductInfoPath used in enterprise course product info + V1EnterpriseProductInfoPath = "/app/v1/course/info" + // V1EnterpriseArticlesInfoPath used in enterprise course articles info + V1EnterpriseArticlesInfoPath = "/app/v1/course/articles" + // V1EnterpriseArticleDetailInfoPath used in enterprise course article detail info + V1EnterpriseArticleDetailInfoPath = "/app/v1/article/detail" + // V1EnterpriseVideoPlayAuthPath used in enterprise course video play auth + V1EnterpriseVideoPlayAuthPath = "/app/v1/source_auth/video_play_auth" + // GeekBangCookieDomain ... GeekBangCookieDomain = ".geekbang.org" @@ -81,8 +91,9 @@ type Product struct { // Article ... type Article struct { - AID int - Title string + AID int + SectionTitle string + Title string } // ErrGeekTimeAPIBadCode ... @@ -141,6 +152,19 @@ func NewUniversityClient(cs []*http.Cookie) *Client { return c } +// NewEnterpriseClient +func NewEnterpriseClient(cs []*http.Cookie) *Client { + httpClient := resty.New(). + SetCookies(cs). + SetRetryCount(1). + SetTimeout(10*time.Second). + SetHeader("User-Agent", DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + c := &Client{HTTPClient: httpClient, BaseURL: GeekBangEnterpriseBaseURL, Cookies: cs} + return c +} + // Login call geektime login api and return auth cookies func (c *Client) Login(phone, password string) ([]*http.Cookie, error) { var res struct { @@ -328,6 +352,109 @@ func (c *Client) ProductInfo(productID int) (response.V3ProductInfoResponse, err return res, nil } +func (c *Client) enterpriseProductInfo(productID int) (response.V1EnterpriseProductInfoResponse, error) { + var res response.V1EnterpriseProductInfoResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseProductInfoPath, + nil, + map[string]interface{}{ + "id": productID, + }, + &res, + ) + if _, err := do(r); err != nil { + return response.V1EnterpriseProductInfoResponse{}, err + } + return res, nil +} + +func (c *Client) EnterpriseArticlesInfo(id int) (Product, error) { + var p Product + productInfo, err := c.enterpriseProductInfo(id) + if err != nil { + return p, err + } + + var res response.V1EnterpriseArticlesResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseArticlesInfoPath, + nil, + map[string]interface{}{ + "id": id, + }, + &res, + ) + + resp, err := do(r) + if err != nil { + return p, err + } + + if res.Code != 0 { + if !res.Data.IsShow && !productInfo.Data.Extra.IsMyCourse { + p.Access = false + return p, nil + } + return p, ErrGeekTimeAPIBadCode{V1EnterpriseArticlesInfoPath, resp.String()} + } + + p = Product{ + Access: true, + ID: id, + Title: productInfo.Data.Title, + Type: "", + IsVideo: true, + } + var articles []Article + + for _, sections := range res.Data.List { + for _, a := range sections.ArticleList { + articleID, _ := strconv.Atoi(a.Article.ID) + articles = append(articles, Article{ + AID: articleID, + SectionTitle: sections.Title, + Title: a.Article.Title, + }) + } + } + p.Articles = articles + + return p, nil +} + +func (c *Client) V1EnterpriseArticleDetailInfo(articleID string) (response.V1EnterpriseArticlesDetailResponse, error) { + var res response.V1EnterpriseArticlesDetailResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseArticleDetailInfoPath, + nil, + map[string]interface{}{ + "article_id": articleID, + }, + &res, + ) + if _, err := do(r); err != nil { + return response.V1EnterpriseArticlesDetailResponse{}, err + } + return res, nil +} + +func (c *Client) EnterpriseVideoPlayAuth(articleID, videoID string) (string, error) { + var res response.V3VideoPlayAuthResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseVideoPlayAuthPath, + nil, + map[string]interface{}{ + "aid": articleID, + "video_id": videoID, + }, + &res, + ) + if _, err := do(r); err != nil { + return "", err + } + return res.Data.PlayAuth, nil +} + // V3ArticleInfo used to get daily lesson or qconplus article info func (c *Client) V3ArticleInfo(articleID int) (response.V3ArticleInfoResponse, error) { var res response.V3ArticleInfoResponse diff --git a/internal/geektime/response/struct_v1_enterprise_article_info.go b/internal/geektime/response/struct_v1_enterprise_article_info.go new file mode 100644 index 0000000..b78b3a5 --- /dev/null +++ b/internal/geektime/response/struct_v1_enterprise_article_info.go @@ -0,0 +1,195 @@ +package response + +type V1EnterpriseArticlesDetailResponse struct { + Code int `json:"code"` + Data struct { + ID string `json:"id"` + Time string `json:"time"` + Type string `json:"type"` + FavoriteID int `json:"favorite_id"` + DiscussionNumber int `json:"discussion_number"` + ColumnTitle string `json:"column_title"` + Rights bool `json:"rights"` + Show bool `json:"show"` + RichType int `json:"rich_type"` + PID int `json:"pid"` + SKU int `json:"sku"` + Action string `json:"action"` + Score int `json:"score"` + IsRequired bool `json:"is_required"` + URI string `json:"uri"` + ColumnType int `json:"column_type"` + EnterpriseID string `json:"enterprise_id"` + NodeType int `json:"node_type"` + Published int `json:"published"` + ArtStatus int `json:"art_status"` + SKUStatus int `json:"sku_status"` + IsSell int `json:"is_sell"` + Name string `json:"name"` + ProductType string `json:"product_type"` + ArticleSource int `json:"article_source"` + ArticleVendorID int `json:"article_vendor_id"` + Author struct { + Name string `json:"name"` + Avatar string `json:"avatar"` + Info string `json:"info"` + Intro string `json:"intro"` + } `json:"author"` + Article struct { + ID string `json:"id"` + Title string `json:"title"` + Content string `json:"content"` + ContentMD string `json:"content_md"` + CTime int `json:"ctime"` + PosterWxlite string `json:"poster_wxlite"` + CoverHidden int `json:"cover_hidden"` + Subtitle string `json:"subtitle"` + Summary string `json:"summary"` + CouldPreview bool `json:"could_preview"` + BCouldPreview bool `json:"b_could_preview"` + ContentJSON string `json:"content_json"` + ContentJSONShort string `json:"content_json_short"` + InlineVideo struct { + Rights []interface{} `json:"rights"` + Preview []interface{} `json:"preview"` + } `json:"inline_video"` + Cover struct { + ColumnCover string `json:"column_cover"` + Default string `json:"default"` + CoverID int `json:"cover_id"` + CoverStatus int `json:"cover_status"` + SKUCover struct { + Ratio16 string `json:"ratio_16"` + Ratio16URL string `json:"ratio_16_url"` + Ratio4 string `json:"ratio_4"` + Ratio4URL string `json:"ratio_4_url"` + Ratio1 string `json:"ratio_1"` + Ratio1URL string `json:"ratio_1_url"` + ShowCover int `json:"show_cover"` + } `json:"sku_cover"` + } `json:"cover"` + Share struct { + Title string `json:"title"` + Content string `json:"content"` + Cover string `json:"cover"` + Poster string `json:"poster"` + } `json:"share"` + Relation struct { + PrevID string `json:"prev_id"` + PrevChapterTitle string `json:"prev_chapter_title"` + PrevArticleTitle string `json:"prev_article_title"` + NextID string `json:"next_id"` + NextChapterTitle string `json:"next_chapter_title"` + NextArticleTitle string `json:"next_article_title"` + } `json:"relation"` + } `json:"article"` + Chapter struct { + SourceID int `json:"source_id"` + Title string `json:"title"` + SKU string `json:"sku"` + Score string `json:"score"` + PChapterSourceID string `json:"pchapter_source_id"` + PChapterTitle string `json:"p_chapter_title"` + ChapterStatus int `json:"chapter_status"` + } `json:"chapter"` + Audio struct { + URL string `json:"url"` + DownloadURL string `json:"download_url"` + Size int `json:"size"` + Title string `json:"title"` + Time string `json:"time"` + MD5 string `json:"md5"` + Dubber string `json:"dubber"` + ID string `json:"id"` + Status int `json:"status"` + } `json:"audio"` + Video struct { + ID string `json:"id"` + MD5 string `json:"md5"` + URL string `json:"url"` + Cover struct { + Type int `json:"type"` + ID int `json:"id"` + URL string `json:"url"` + } `json:"cover"` + Width int `json:"width"` + Height int `json:"height"` + Size int `json:"size"` + Time string `json:"time"` + HLSMedias []struct { + Quality string `json:"quality"` + Size int `json:"size"` + URL string `json:"url"` + } `json:"hls_medias"` + HLSVid string `json:"hls_vid"` + Version int `json:"version"` + Medias interface{} `json:"medias"` + MediaOpen string `json:"media_open"` + CouldPreview int `json:"could_preview"` + Preview struct { + Duration int `json:"duration"` + Medias []struct { + Quality string `json:"quality"` + Size int `json:"size"` + URL string `json:"url"` + } `json:"medias"` + } `json:"preview"` + Subtitles struct { + Rights interface{} `json:"rights"` + Preview interface{} `json:"preview"` + } `json:"subtitles"` + Status int `json:"status"` + } `json:"video"` + Files []interface{} `json:"files"` + Extra struct { + Process struct { + ArticleID string `json:"article_id"` + LearnPercent int `json:"learn_percent"` + ArticleOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"article_offset"` + AudioOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"audio_offset"` + VideoOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"video_offset"` + } `json:"process"` + IsLast bool `json:"is_last"` + Fav struct { + HasDone bool `json:"has_done"` + TotalCount int `json:"total_count"` + FavID int `json:"fav_id"` + FavType int `json:"fav_type"` + } `json:"fav"` + IsShow bool `json:"IsShow"` + Attachments []interface{} `json:"attachments"` + } `json:"extra"` + AnyreadTotal int `json:"anyread_total"` + AnyreadUsed int `json:"anyread_used"` + AnyreadHit bool `json:"anyread_hit"` + } `json:"data"` + Error interface{} `json:"error"` + Extra struct { + Cost float64 `json:"cost"` + RequestID string `json:"request-id"` + } `json:"extra"` +} diff --git a/internal/geektime/response/struct_v1_enterprise_articles.go b/internal/geektime/response/struct_v1_enterprise_articles.go new file mode 100644 index 0000000..3386f32 --- /dev/null +++ b/internal/geektime/response/struct_v1_enterprise_articles.go @@ -0,0 +1,208 @@ +package response + +type V1EnterpriseArticlesResponse struct { + Code int `json:"code"` + Data struct { + List []struct { + ID int `json:"id"` + Title string `json:"title"` + Count int `json:"count"` + Score int `json:"score"` + IsLast bool `json:"is_last"` + ArticleList []struct { + ID string `json:"id"` + Time string `json:"time"` + Type string `json:"type"` + FavoriteID int `json:"favorite_id"` + DiscussionNumber int `json:"discussion_number"` + ColumnTitle string `json:"column_title"` + Rights bool `json:"rights"` + Show bool `json:"show"` + RichType int `json:"rich_type"` + PID int `json:"pid"` + SKU int `json:"sku"` + Action string `json:"action"` + Score int `json:"score"` + IsRequired bool `json:"is_required"` + URI string `json:"uri"` + ColumnType int `json:"column_type"` + EnterpriseID string `json:"enterprise_id"` + NodeType int `json:"node_type"` + Published int `json:"published"` + ArtStatus int `json:"art_status"` + SKUStatus int `json:"sku_status"` + IsSell int `json:"is_sell"` + Name string `json:"name"` + ProductType string `json:"product_type"` + ArticleSource int `json:"article_source"` + ArticleVendorID int `json:"article_vendor_id"` + Author struct { + Name string `json:"name"` + Avatar string `json:"avatar"` + Info string `json:"info"` + Intro string `json:"intro"` + } `json:"author"` + Article struct { + ID string `json:"id"` + Title string `json:"title"` + Content string `json:"content"` + ContentMD string `json:"content_md"` + CTime int `json:"ctime"` + PosterWxlite string `json:"poster_wxlite"` + CoverHidden int `json:"cover_hidden"` + Subtitle string `json:"subtitle"` + Summary string `json:"summary"` + CouldPreview bool `json:"could_preview"` + BCouldPreview bool `json:"b_could_preview"` + ContentJSON string `json:"content_json"` + ContentJSONShort string `json:"content_json_short"` + InlineVideo struct { + Rights []interface{} `json:"rights"` + Preview []interface{} `json:"preview"` + } `json:"inline_video"` + Cover struct { + ColumnCover string `json:"column_cover"` + Default string `json:"default"` + CoverID int `json:"cover_id"` + CoverStatus int `json:"cover_status"` + SKUCover struct { + Ratio16 string `json:"ratio_16"` + Ratio16URL string `json:"ratio_16_url"` + Ratio4 string `json:"ratio_4"` + Ratio4URL string `json:"ratio_4_url"` + Ratio1 string `json:"ratio_1"` + Ratio1URL string `json:"ratio_1_url"` + ShowCover int `json:"show_cover"` + } `json:"sku_cover"` + } `json:"cover"` + Share struct { + Title string `json:"title"` + Content string `json:"content"` + Cover string `json:"cover"` + Poster string `json:"poster"` + } `json:"share"` + Relation struct { + PrevID string `json:"prev_id"` + PrevChapterTitle string `json:"prev_chapter_title"` + PrevArticleTitle string `json:"prev_article_title"` + NextID string `json:"next_id"` + NextChapterTitle string `json:"next_chapter_title"` + NextArticleTitle string `json:"next_article_title"` + } `json:"relation"` + } `json:"article"` + Chapter struct { + SourceID int `json:"source_id"` + Title string `json:"title"` + SKU string `json:"sku"` + Score string `json:"score"` + PChapterSourceID string `json:"pchapter_source_id"` + PChapterTitle string `json:"p_chapter_title"` + ChapterStatus int `json:"chapter_status"` + } `json:"chapter"` + Audio struct { + URL string `json:"url"` + DownloadURL string `json:"download_url"` + Size int `json:"size"` + Title string `json:"title"` + Time string `json:"time"` + MD5 string `json:"md5"` + Dubber string `json:"dubber"` + ID string `json:"id"` + Status int `json:"status"` + } `json:"audio"` + Video struct { + ID string `json:"id"` + MD5 string `json:"md5"` + URL string `json:"url"` + Cover struct { + Type int `json:"type"` + ID int `json:"id"` + URL string `json:"url"` + } `json:"cover"` + Width int `json:"width"` + Height int `json:"height"` + Size int `json:"size"` + Time string `json:"time"` + HlsMedias []struct { + Quality string `json:"quality"` + Size int `json:"size"` + URL string `json:"url"` + } `json:"hls_medias"` + HlsVid string `json:"hls_vid"` + Version int `json:"version"` + Medias interface{} `json:"medias"` + MediaOpen string `json:"media_open"` + CouldPreview int `json:"could_preview"` + Preview struct { + Duration int `json:"duration"` + Medias []struct { + Quality string `json:"quality"` + Size int `json:"size"` + URL string `json:"url"` + } `json:"medias"` + } `json:"preview"` + Subtitles struct { + Rights interface{} `json:"rights"` + Preview []interface{} `json:"preview"` + } `json:"subtitles"` + Status int `json:"status"` + } `json:"video"` + Files []interface{} `json:"files"` + Extra struct { + Process struct { + ArticleID string `json:"article_id"` + LearnPercent int `json:"learn_percent"` + ArticleOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"article_offset"` + AudioOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"audio_offset"` + VideoOffset struct { + CurOffset int `json:"cur_offset"` + MaxOffset int `json:"max_offset"` + Length int `json:"length"` + Version int `json:"version"` + Process int `json:"process"` + LearnTime int `json:"learn_time"` + LearnStatus int `json:"learn_status"` + } `json:"video_offset"` + } `json:"process"` + IsLast bool `json:"is_last"` + Fav struct { + HasDone bool `json:"has_done"` + TotalCount int `json:"total_count"` + FavID int `json:"fav_id"` + FavType int `json:"fav_type"` + } `json:"fav"` + IsShow bool `json:"IsShow"` + Attachments []interface{} `json:"attachments"` + } `json:"extra"` + AnyreadTotal int `json:"anyread_total"` + AnyreadUsed int `json:"anyread_used"` + AnyreadHit bool `json:"anyread_hit"` + } `json:"article_list"` + } `json:"list"` + HasChapter bool `json:"has_chapter"` + IsShow bool `json:"is_show"` + AnyreadTotal int `json:"anyread_total"` + AnyreadUsed int `json:"anyread_used"` + + Extra struct { + Cost float64 `json:"cost"` + RequestID string `json:"request-id"` + } `json:"extra"` + } `json:"data"` +} diff --git a/internal/geektime/response/struct_v1_enterprise_product_info.go b/internal/geektime/response/struct_v1_enterprise_product_info.go new file mode 100644 index 0000000..3096a24 --- /dev/null +++ b/internal/geektime/response/struct_v1_enterprise_product_info.go @@ -0,0 +1,166 @@ +package response + +type V1EnterpriseProductInfoResponse struct { + Code int `json:"code"` + Data struct { + ID int `json:"id"` + SKU int `json:"sku"` + Title string `json:"title"` + SubTitle string `json:"sub_title"` + ProductType string `json:"product_type"` + ColumnType int `json:"column_type"` + CourseType int `json:"course_type"` + UpdateFreq string `json:"update_frequency"` + Author struct { + Name string `json:"name"` + Intro string `json:"intro"` + Info string `json:"info"` + Avatar string `json:"avatar"` + BriefHTML string `json:"brief_html"` + Brief string `json:"brief"` + } `json:"author"` + Cover struct { + Square string `json:"square"` + Rectangle string `json:"rectangle"` + Horizontal string `json:"horizontal"` + LectureHorizontal string `json:"lecture_horizontal"` + LearnHorizontal string `json:"learn_horizontal"` + Transparent string `json:"transparent"` + Color string `json:"color"` + Cover string `json:"cover"` + RectCover string `json:"rect_cover"` + Ratio1 string `json:"ratio_1"` + Ratio4 string `json:"ratio_4"` + Ratio16 string `json:"ratio_16"` + CoverID int `json:"cover_id"` + CoverStatus int `json:"cover_status"` + } `json:"cover"` + TeachTypeList []int `json:"teach_type_list"` + TeachTypeNameList []string `json:"teach_type_name_list"` + Article struct { + Count int `json:"count"` + CountReq int `json:"count_req"` + CountPub int `json:"count_pub"` + FirstArticleID string `json:"first_article_id"` + TotalLength int `json:"total_length"` + TotalTimeStr string `json:"total_time_str"` + TotalTimeHourStr string `json:"total_time_hour_str"` + } `json:"article"` + SEO struct { + Keywords []string `json:"keywords"` + } `json:"seo"` + Category struct { + CategoryID int `json:"category_id"` + Name string `json:"name"` + PID int `json:"pid"` + } `json:"category"` + Path struct { + Desc string `json:"desc"` + DescHTML string `json:"desc_html"` + } `json:"path"` + DL struct { + Article struct { + ArticleID string `json:"article_id"` + Duration string `json:"duration"` + Hot int `json:"hot"` + CouldPreview bool `json:"could_preview"` + DurationSeconds int `json:"duration_seconds"` + } `json:"article"` + CollectionIDs interface{} `json:"collection_ids"` + } `json:"dl"` + Share struct { + PicURL string `json:"pic_url"` + Title string `json:"title"` + PicName string `json:"pic_name"` + Content string `json:"content"` + } `json:"share"` + IsFinish bool `json:"is_finish"` + Unit string `json:"unit"` + BannerCover string `json:"banner_cover"` + CatalogPicURL string `json:"catalog_pic_url"` + Extra struct { + Fav struct { + HasDone bool `json:"has_done"` + TotalCount int `json:"total_count"` + FavID int `json:"fav_id"` + FavType int `json:"fav_type"` + } `json:"fav"` + IsSVIP bool `json:"is_svip"` + IsMyCourse bool `json:"is_my_course"` + Rate struct { + ArticleCount int `json:"article_count"` + ArticleCountReq int `json:"article_count_req"` + IsFinished bool `json:"is_finished"` + RatePercent int `json:"rate_percent"` + VideoSeconds int `json:"video_seconds"` + LastArticleID string `json:"last_article_id"` + LastChapterID int `json:"last_chapter_id"` + HasLearn bool `json:"has_learn"` + } `json:"rate"` + StudyCount int `json:"study_count"` + Modules []struct { + Name string `json:"name"` + IsTop bool `json:"is_top"` + Title string `json:"title"` + Type string `json:"type"` + Content string `json:"content"` + } `json:"modules"` + TplType int `json:"tpl_type"` + CollectionType int `json:"collection_type"` + WithVideo bool `json:"with_video"` + PIDs []interface{} `json:"pids"` + Labels []interface{} `json:"labels"` + CategoryIDs []interface{} `json:"category_ids"` + Group struct { + Title string `json:"title"` + Description string `json:"description"` + StartTime int `json:"start_time"` + EndTime int `json:"end_time"` + QRCodeShow bool `json:"qrcode_show"` + QRCodeURL string `json:"qrcode_url"` + } `json:"group"` + VIP struct { + Show bool `json:"show"` + EndTime int `json:"end_time"` + } `json:"vip"` + CourseStatus int `json:"course_status"` + CID int `json:"cid"` + RelatedVIPSkus []struct { + ColumnTitle string `json:"column_title"` + DisplayType int `json:"display_type"` + EsPrice int `json:"es_price"` + EsSaleMaxLimit int `json:"es_sale_max_limit"` + EsSaleMinLimit int `json:"es_sale_min_limit"` + SKU int `json:"sku"` + Status int `json:"status"` + VIPDays int `json:"vip_days"` + VIPTitle string `json:"vip_title"` + } `json:"related_vip_skus"` + } `json:"extra"` + Intro string `json:"intro"` + IntroHTML string `json:"intro_html"` + BgColor string `json:"bgcolor"` + IsIncludePreview bool `json:"is_include_preview"` + ShowChapter bool `json:"show_chapter"` + DisplayType int `json:"display_type"` + IntroBGStyle int `json:"intro_bg_style"` + Sort int `json:"sort"` + CTime int `json:"ctime"` + SalePrice int `json:"sale_price"` + SaleLimit int `json:"sale_limit"` + Status int `json:"status"` + IsJoinSVIP int `json:"is_join_svip"` + IsJoinColumnVIP int `json:"is_join_column_vip"` + IsJoinCVIP int `json:"is_join_cvip"` + NeedGraduate int `json:"need_graduate"` + AuthorSignatureURL string `json:"author_signature_url"` + IsFreebie int `json:"is_freebie"` + IsDtai int `json:"is_dtai"` + } `json:"data"` + Error struct { + } `json:"error"` + Extra struct { + Cost float64 `json:"cost"` + RequestID string `json:"request-id"` + } `json:"extra"` +} diff --git a/internal/video/video.go b/internal/video/video.go index 45f3b19..408ae02 100644 --- a/internal/video/video.go +++ b/internal/video/video.go @@ -8,6 +8,7 @@ import ( "os" "path" "path/filepath" + "strconv" "strings" "time" @@ -76,6 +77,36 @@ func DownloadArticleVideo(ctx context.Context, concurrency) } +func DownloadEnterpriseArticleVideo(ctx context.Context, + client *geektime.Client, + articleID int, + sourceType int, + projectDir string, + quality string, + concurrency int, +) error { + + articleInfo, err := client.V1EnterpriseArticleDetailInfo(strconv.Itoa(articleID)) + if err != nil { + return err + } + if articleInfo.Data.Video.ID == "" { + return nil + } + playAuth, err := client.EnterpriseVideoPlayAuth(strconv.Itoa(articleID), articleInfo.Data.Video.ID) + if err != nil { + return err + } + return downloadAliyunVodEncryptVideo(ctx, + client, + playAuth, + articleInfo.Data.Article.Title, + projectDir, + quality, + articleInfo.Data.Video.ID, + concurrency) +} + // DownloadUniversityVideo ... func DownloadUniversityVideo(ctx context.Context, client *geektime.Client, @@ -185,11 +216,11 @@ func download(ctx context.Context, for _, tsFileName := range tsFileNames { u := tsURLPrefix + tsFileName dst := filepath.Join(tempVideoDir, tsFileName) - + headers := make(map[string]string, 2) headers[geektime.Origin] = geektime.DefaultBaseURL headers[geektime.UserAgent] = geektime.DefaultUserAgent - + fileSize, err := downloader.DownloadFileConcurrently(ctx, dst, u, headers, 5) if err != nil { return err From 3c98c9bdecce2f0b182d9b4b7b39c92e30557dc6 Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sun, 14 Apr 2024 20:52:39 +0800 Subject: [PATCH 2/8] update geektime client --- cmd/root.go | 27 +-- internal/geektime/enterprise_geekbang.go | 144 ++++++++++++ internal/geektime/geektime.go | 288 ++++------------------- internal/geektime/university_geekbang.go | 102 ++++++++ internal/video/video.go | 4 +- 5 files changed, 303 insertions(+), 262 deletions(-) create mode 100644 internal/geektime/enterprise_geekbang.go create mode 100644 internal/geektime/university_geekbang.go diff --git a/cmd/root.go b/cmd/root.go index 01d54f0..ca43524 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -231,13 +231,13 @@ func loadProduct(ctx context.Context, productID int) { var p geektime.Product var err error if isUniversity() { - p, err = universityClient.MyClassProduct(productID) + p, err = universityClient.GetUniversityProductInfo(productID) // university don't need check product type // if input invalid id, access mark is 0 - } else if isEnterpriseUniversity() { - p, err = geekEnterpriseClient.EnterpriseArticlesInfo(productID) + } else if isEnterprise() { + p, err = geekEnterpriseClient.GetEnterpriseProductInfo(productID) } else { - p, err = geektimeClient.ColumnInfo(productID) + p, err = geektimeClient.GetNormalProductInfo(productID) if err == nil { c := checkProductType(p.Type) // if check product type fail, re-input product @@ -297,7 +297,6 @@ func productOps(ctx context.Context) { } func selectArticle(ctx context.Context) { - loadArticles() items := []geektime.Article{ { AID: -1, @@ -339,7 +338,6 @@ func handleSelectArticle(ctx context.Context, index int) { } func handleDownloadAll(ctx context.Context) { - loadArticles() projectDir, err := mkDownloadProjectDir(downloadFolder, phone, gcid, selectedProduct.Title) checkError(err) downloaded, err := findDownloadedArticleFileNames(projectDir) @@ -436,7 +434,7 @@ func handleDownloadAll(ctx context.Context) { if isUniversity() { err := video.DownloadUniversityVideo(ctx, universityClient, a.AID, selectedProduct, sectionDir, quality, concurrency) checkError(err) - } else if isEnterpriseUniversity() { + } else if isEnterprise() { err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) checkError(err) } else { @@ -453,17 +451,6 @@ func increasePDFCount(total int, i *int) { fmt.Printf("\r已完成下载%d/%d", *i, total) } -func loadArticles() { - if !isUniversity() && !isEnterpriseUniversity() && len(selectedProduct.Articles) <= 0 { - sp.Prefix = "[ 正在加载文章列表... ]" - sp.Start() - articles, err := geektimeClient.ColumnArticles(strconv.Itoa(selectedProduct.ID)) - checkError(err) - selectedProduct.Articles = articles - sp.Stop() - } -} - func downloadArticle(ctx context.Context, article geektime.Article, projectDir string) { if isText() { needDownloadPDF := columnOutputType&1 == 1 @@ -526,7 +513,7 @@ func downloadArticle(ctx context.Context, article geektime.Article, projectDir s if isUniversity() { err := video.DownloadUniversityVideo(ctx, universityClient, article.AID, selectedProduct, projectDir, quality, concurrency) checkError(err) - } else if isEnterpriseUniversity() { + } else if isEnterprise() { err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) checkError(err) } else { @@ -544,7 +531,7 @@ func isUniversity() bool { return selectedProductType.Index == 4 } -func isEnterpriseUniversity() bool { +func isEnterprise() bool { return selectedProductType.Index == 6 } diff --git a/internal/geektime/enterprise_geekbang.go b/internal/geektime/enterprise_geekbang.go new file mode 100644 index 0000000..3da46ba --- /dev/null +++ b/internal/geektime/enterprise_geekbang.go @@ -0,0 +1,144 @@ +package geektime + +import ( + "net/http" + "strconv" + "time" + + "github.com/go-resty/resty/v2" + "github.com/nicoxiang/geektime-downloader/internal/geektime/response" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" +) + +const ( + // GeekBangEnterpriseBaseURL is geekbang enterprise base URL + GeekBangEnterpriseBaseURL = "https://b.geekbang.org" + // V1EnterpriseCourseInfoPath used in enterprise course product info + V1EnterpriseCourseInfoPath = "/app/v1/course/info" + // V1EnterpriseArticlesInfoPath used in enterprise course articles info + V1EnterpriseArticlesInfoPath = "/app/v1/course/articles" + // V1EnterpriseArticleDetailInfoPath used in enterprise course article detail info + V1EnterpriseArticleDetailInfoPath = "/app/v1/article/detail" + // V1EnterpriseVideoPlayAuthPath used in enterprise course video play auth + V1EnterpriseVideoPlayAuthPath = "/app/v1/source_auth/video_play_auth" +) + +// NewEnterpriseClient init enterprise http client +func NewEnterpriseClient(cs []*http.Cookie) *Client { + httpClient := resty.New(). + SetCookies(cs). + SetRetryCount(1). + SetTimeout(10*time.Second). + SetHeader("User-Agent", DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + c := &Client{HTTPClient: httpClient, BaseURL: GeekBangEnterpriseBaseURL, Cookies: cs} + return c +} + +// GetEnterpriseProductInfo get enterprise course info +func (c *Client) GetEnterpriseProductInfo(id int) (Product, error) { + var p Product + var err error + p, err = c.enterpriseCourseInfo(id) + if err != nil { + return p, err + } + + var articles []Article + articles, err = c.enterpriseCourseArticles(id) + if err != nil { + return p, err + } + p.Articles = articles + + return p, nil +} + +// V1EnterpriseArticleDetailInfo get enterprise article detail +func (c *Client) V1EnterpriseArticleDetailInfo(articleID string) (response.V1EnterpriseArticlesDetailResponse, error) { + var res response.V1EnterpriseArticlesDetailResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseArticleDetailInfoPath, + nil, + map[string]interface{}{ + "article_id": articleID, + }, + &res, + ) + if _, err := do(r); err != nil { + return response.V1EnterpriseArticlesDetailResponse{}, err + } + return res, nil +} + +// EnterpriseVideoPlayAuth get enterprise play auth string +func (c *Client) EnterpriseVideoPlayAuth(articleID, videoID string) (string, error) { + var res response.V3VideoPlayAuthResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseVideoPlayAuthPath, + nil, + map[string]interface{}{ + "aid": articleID, + "video_id": videoID, + }, + &res, + ) + if _, err := do(r); err != nil { + return "", err + } + return res.Data.PlayAuth, nil +} + +func (c *Client) enterpriseCourseInfo(productID int) (Product, error) { + var res response.V1EnterpriseProductInfoResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseCourseInfoPath, + nil, + map[string]interface{}{ + "id": productID, + }, + &res, + ) + if _, err := do(r); err != nil { + return Product{}, err + } + + return Product{ + Access: res.Data.Extra.IsMyCourse, + ID: productID, + Title: res.Data.Title, + Type: "", + IsVideo: true, + }, nil +} + +func (c *Client) enterpriseCourseArticles(productID int) ([]Article, error) { + var res response.V1EnterpriseArticlesResponse + r := c.newRequest(resty.MethodPost, + V1EnterpriseArticlesInfoPath, + nil, + map[string]interface{}{ + "id": productID, + }, + &res, + ) + + if _, err := do(r); err != nil { + return nil, err + } + + var articles []Article + + for _, sections := range res.Data.List { + for _, a := range sections.ArticleList { + articleID, _ := strconv.Atoi(a.Article.ID) + articles = append(articles, Article{ + AID: articleID, + SectionTitle: sections.Title, + Title: a.Article.Title, + }) + } + } + return articles, nil +} diff --git a/internal/geektime/geektime.go b/internal/geektime/geektime.go index 24f72e2..ed37b76 100644 --- a/internal/geektime/geektime.go +++ b/internal/geektime/geektime.go @@ -22,9 +22,7 @@ const ( Origin = "Origin" // UserAgent ... UserAgent = "User-Agent" - // GeekBangUniversityBaseURL ... - GeekBangUniversityBaseURL = "https://u.geekbang.org" - GeekBangEnterpriseBaseURL = "https://b.geekbang.org" + // GeekBangAccountBaseURL ... GeekBangAccountBaseURL = "https://account.geekbang.org" // LoginPath ... @@ -35,7 +33,6 @@ const ( V1ColumnArticlesPath = "/serv/v1/column/articles" // V1ArticlePath used in normal column V1ArticlePath = "/serv/v1/article" - // V3ColumnInfoPath used in get normal column/video info V3ColumnInfoPath = "/serv/v3/column/info" // V3ProductInfoPath used in get daily lesson, qconplus product info @@ -45,19 +42,6 @@ const ( // V3VideoPlayAuthPath used in normal video, daily lesson, qconplus video play auth V3VideoPlayAuthPath = "/serv/v3/source_auth/video_play_auth" - // UniversityV1VideoPlayAuthPath used in university video play auth - UniversityV1VideoPlayAuthPath = "/serv/v1/video/play-auth" - // UniversityV1MyClassInfoPath get university class info and all articles info in it - UniversityV1MyClassInfoPath = "/serv/v1/myclass/info" - - // V1EnterpriseProductInfoPath used in enterprise course product info - V1EnterpriseProductInfoPath = "/app/v1/course/info" - // V1EnterpriseArticlesInfoPath used in enterprise course articles info - V1EnterpriseArticlesInfoPath = "/app/v1/course/articles" - // V1EnterpriseArticleDetailInfoPath used in enterprise course article detail info - V1EnterpriseArticleDetailInfoPath = "/app/v1/article/detail" - // V1EnterpriseVideoPlayAuthPath used in enterprise course video play auth - V1EnterpriseVideoPlayAuthPath = "/app/v1/source_auth/video_play_auth" // GeekBangCookieDomain ... GeekBangCookieDomain = ".geekbang.org" @@ -139,32 +123,6 @@ func NewAccountClient() *Client { return c } -// NewUniversityClient ... -func NewUniversityClient(cs []*http.Cookie) *Client { - httpClient := resty.New(). - SetCookies(cs). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: GeekBangUniversityBaseURL, Cookies: cs} - return c -} - -// NewEnterpriseClient -func NewEnterpriseClient(cs []*http.Cookie) *Client { - httpClient := resty.New(). - SetCookies(cs). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: GeekBangEnterpriseBaseURL, Cookies: cs} - return c -} - // Login call geektime login api and return auth cookies func (c *Client) Login(phone, password string) ([]*http.Cookie, error) { var res struct { @@ -261,58 +219,23 @@ func (c *Client) Auth(cs []*http.Cookie) error { return nil } -// ColumnInfo get normal column info, like v3 product info -func (c *Client) ColumnInfo(productID int) (Product, error) { - var res response.V3ColumnInfoResponse - r := c.newRequest(resty.MethodPost, - V3ColumnInfoPath, - nil, - map[string]interface{}{ - "product_id": productID, - "with_recommend_article": true, - }, - &res, - ) - if _, err := do(r); err != nil { - return Product{}, err - } - - return Product{ - Access: res.Data.Extra.Sub.AccessMask > 0, - ID: res.Data.ID, - Type: res.Data.Type, - Title: res.Data.Title, - IsVideo: res.Data.IsVideo, - }, nil -} - -// ColumnArticles call geektime api to get article list -func (c *Client) ColumnArticles(cid string) ([]Article, error) { - res := &response.V1ColumnArticlesResponse{} - r := c.newRequest(resty.MethodPost, - V1ColumnArticlesPath, - nil, - map[string]interface{}{ - "cid": cid, - "order": "earliest", - "prev": 0, - "sample": false, - "size": 500, //get all articles - }, - res, - ) - if _, err := do(r); err != nil { - return nil, err +// GetNormalProductInfo get narmal geektime product info +func (c *Client) GetNormalProductInfo(productID int) (Product, error) { + var p Product + var err error + p, err = c.columnInfo(productID) + if err != nil { + return p, err } var articles []Article - for _, v := range res.Data.List { - articles = append(articles, Article{ - AID: v.ID, - Title: v.ArticleTitle, - }) + articles, err = c.columnArticles(productID) + if err != nil { + return p, err } - return articles, nil + p.Articles = articles + + return p, nil } // V1ArticleInfo ... @@ -352,108 +275,6 @@ func (c *Client) ProductInfo(productID int) (response.V3ProductInfoResponse, err return res, nil } -func (c *Client) enterpriseProductInfo(productID int) (response.V1EnterpriseProductInfoResponse, error) { - var res response.V1EnterpriseProductInfoResponse - r := c.newRequest(resty.MethodPost, - V1EnterpriseProductInfoPath, - nil, - map[string]interface{}{ - "id": productID, - }, - &res, - ) - if _, err := do(r); err != nil { - return response.V1EnterpriseProductInfoResponse{}, err - } - return res, nil -} - -func (c *Client) EnterpriseArticlesInfo(id int) (Product, error) { - var p Product - productInfo, err := c.enterpriseProductInfo(id) - if err != nil { - return p, err - } - - var res response.V1EnterpriseArticlesResponse - r := c.newRequest(resty.MethodPost, - V1EnterpriseArticlesInfoPath, - nil, - map[string]interface{}{ - "id": id, - }, - &res, - ) - - resp, err := do(r) - if err != nil { - return p, err - } - - if res.Code != 0 { - if !res.Data.IsShow && !productInfo.Data.Extra.IsMyCourse { - p.Access = false - return p, nil - } - return p, ErrGeekTimeAPIBadCode{V1EnterpriseArticlesInfoPath, resp.String()} - } - - p = Product{ - Access: true, - ID: id, - Title: productInfo.Data.Title, - Type: "", - IsVideo: true, - } - var articles []Article - - for _, sections := range res.Data.List { - for _, a := range sections.ArticleList { - articleID, _ := strconv.Atoi(a.Article.ID) - articles = append(articles, Article{ - AID: articleID, - SectionTitle: sections.Title, - Title: a.Article.Title, - }) - } - } - p.Articles = articles - - return p, nil -} - -func (c *Client) V1EnterpriseArticleDetailInfo(articleID string) (response.V1EnterpriseArticlesDetailResponse, error) { - var res response.V1EnterpriseArticlesDetailResponse - r := c.newRequest(resty.MethodPost, - V1EnterpriseArticleDetailInfoPath, - nil, - map[string]interface{}{ - "article_id": articleID, - }, - &res, - ) - if _, err := do(r); err != nil { - return response.V1EnterpriseArticlesDetailResponse{}, err - } - return res, nil -} - -func (c *Client) EnterpriseVideoPlayAuth(articleID, videoID string) (string, error) { - var res response.V3VideoPlayAuthResponse - r := c.newRequest(resty.MethodPost, - V1EnterpriseVideoPlayAuthPath, - nil, - map[string]interface{}{ - "aid": articleID, - "video_id": videoID, - }, - &res, - ) - if _, err := do(r); err != nil { - return "", err - } - return res.Data.PlayAuth, nil -} // V3ArticleInfo used to get daily lesson or qconplus article info func (c *Client) V3ArticleInfo(articleID int) (response.V3ArticleInfoResponse, error) { @@ -491,73 +312,58 @@ func (c *Client) VideoPlayAuth(articleID, sourceType int, videoID string) (strin return res.Data.PlayAuth, nil } -// UniversityVideoPlayAuth ... -func (c *Client) UniversityVideoPlayAuth(articleID, classID int) (response.V1VideoPlayAuthResponse, error) { - var res response.V1VideoPlayAuthResponse +// columnInfo get normal column info, like v3 product info +func (c *Client) columnInfo(productID int) (Product, error) { + var res response.V3ColumnInfoResponse r := c.newRequest(resty.MethodPost, - UniversityV1VideoPlayAuthPath, + V3ColumnInfoPath, nil, map[string]interface{}{ - "article_id": articleID, - "class_id": classID, + "product_id": productID, + "with_recommend_article": true, }, &res, ) if _, err := do(r); err != nil { - return response.V1VideoPlayAuthResponse{}, err + return Product{}, err } - return res, nil -} -// MyClassProduct ... -func (c *Client) MyClassProduct(classID int) (Product, error) { - var p Product + return Product{ + Access: res.Data.Extra.Sub.AccessMask > 0, + ID: res.Data.ID, + Type: res.Data.Type, + Title: res.Data.Title, + IsVideo: res.Data.IsVideo, + }, nil +} - var res response.V1MyClassInfoResponse +// columnArticles call geektime api to get article list +func (c *Client) columnArticles(cid int) ([]Article, error) { + res := &response.V1ColumnArticlesResponse{} r := c.newRequest(resty.MethodPost, - UniversityV1MyClassInfoPath, + V1ColumnArticlesPath, nil, map[string]interface{}{ - "class_id": classID, + "cid": strconv.Itoa(cid), + "order": "earliest", + "prev": 0, + "sample": false, + "size": 500, //get all articles }, - &res, + res, ) - - resp, err := do(r) - if err != nil { - return p, err - } - - if res.Code != 0 { - if res.Error.Code == -5001 { - p.Access = false - return p, nil - } - return p, ErrGeekTimeAPIBadCode{UniversityV1MyClassInfoPath, resp.String()} + if _, err := do(r); err != nil { + return nil, err } - p = Product{ - Access: true, - ID: classID, - Title: res.Data.Title, - Type: "", - IsVideo: true, - } var articles []Article - for _, lesson := range res.Data.Lessons { - for _, article := range lesson.Articles { - // ONLY download university video lessons - if article.VideoTime > 0 { - articles = append(articles, Article{ - AID: article.ArticleID, - Title: article.ArticleTitle, - }) - } - } + for _, v := range res.Data.List { + articles = append(articles, Article{ + AID: v.ID, + Title: v.ArticleTitle, + }) } - p.Articles = articles - - return p, nil + return articles, nil } func (c *Client) newRequest(method, url string, params map[string]string, body interface{}, res interface{}) *resty.Request { diff --git a/internal/geektime/university_geekbang.go b/internal/geektime/university_geekbang.go new file mode 100644 index 0000000..b1c8dd8 --- /dev/null +++ b/internal/geektime/university_geekbang.go @@ -0,0 +1,102 @@ +package geektime + +import ( + "net/http" + "time" + + "github.com/go-resty/resty/v2" + "github.com/nicoxiang/geektime-downloader/internal/geektime/response" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" +) + +const ( + // GeekBangUniversityBaseURL ... + GeekBangUniversityBaseURL = "https://u.geekbang.org" + + // UniversityV1VideoPlayAuthPath used in university video play auth + UniversityV1VideoPlayAuthPath = "/serv/v1/video/play-auth" + // UniversityV1MyClassInfoPath get university class info and all articles info in it + UniversityV1MyClassInfoPath = "/serv/v1/myclass/info" +) + +// NewUniversityClient init university http client +func NewUniversityClient(cs []*http.Cookie) *Client { + httpClient := resty.New(). + SetCookies(cs). + SetRetryCount(1). + SetTimeout(10*time.Second). + SetHeader("User-Agent", DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + c := &Client{HTTPClient: httpClient, BaseURL: GeekBangUniversityBaseURL, Cookies: cs} + return c +} + +// GetUniversityProductInfo get university class info +func (c *Client) GetUniversityProductInfo(classID int) (Product, error) { + var p Product + + var res response.V1MyClassInfoResponse + r := c.newRequest(resty.MethodPost, + UniversityV1MyClassInfoPath, + nil, + map[string]interface{}{ + "class_id": classID, + }, + &res, + ) + + resp, err := do(r) + if err != nil { + return p, err + } + + if res.Code != 0 { + if res.Error.Code == -5001 { + p.Access = false + return p, nil + } + return p, ErrGeekTimeAPIBadCode{UniversityV1MyClassInfoPath, resp.String()} + } + + p = Product{ + Access: true, + ID: classID, + Title: res.Data.Title, + Type: "", + IsVideo: true, + } + var articles []Article + for _, lesson := range res.Data.Lessons { + for _, article := range lesson.Articles { + // ONLY download university video lessons + if article.VideoTime > 0 { + articles = append(articles, Article{ + AID: article.ArticleID, + Title: article.ArticleTitle, + }) + } + } + } + p.Articles = articles + + return p, nil +} + +// UniversityVideoPlayAuth get university play auth string +func (c *Client) UniversityVideoPlayAuth(articleID, classID int) (response.V1VideoPlayAuthResponse, error) { + var res response.V1VideoPlayAuthResponse + r := c.newRequest(resty.MethodPost, + UniversityV1VideoPlayAuthPath, + nil, + map[string]interface{}{ + "article_id": articleID, + "class_id": classID, + }, + &res, + ) + if _, err := do(r); err != nil { + return response.V1VideoPlayAuthResponse{}, err + } + return res, nil +} diff --git a/internal/video/video.go b/internal/video/video.go index 408ae02..44db710 100644 --- a/internal/video/video.go +++ b/internal/video/video.go @@ -24,6 +24,7 @@ import ( const ( // syncByte = uint8(71) //0x47 + // TSExtension ... TSExtension = ".ts" ) @@ -77,6 +78,7 @@ func DownloadArticleVideo(ctx context.Context, concurrency) } +// DownloadEnterpriseArticleVideo download enterprise video func DownloadEnterpriseArticleVideo(ctx context.Context, client *geektime.Client, articleID int, @@ -221,7 +223,7 @@ func download(ctx context.Context, headers[geektime.Origin] = geektime.DefaultBaseURL headers[geektime.UserAgent] = geektime.DefaultUserAgent - fileSize, err := downloader.DownloadFileConcurrently(ctx, dst, u, headers, 5) + fileSize, err := downloader.DownloadFileConcurrently(ctx, dst, u, headers, concurrency) if err != nil { return err } From d34ead878d56f3583d4ddc7b6a616f610f986bff Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Thu, 25 Apr 2024 23:45:21 +0800 Subject: [PATCH 3/8] refactor: split geektime client --- cmd/root.go | 55 ++-- internal/config/config.go | 2 +- internal/geektime/account.go | 121 ++++++++ internal/geektime/client.go | 133 +++++++++ .../{enterprise_geekbang.go => enterprise.go} | 56 ++-- internal/geektime/geektime.go | 274 ++---------------- .../{university_geekbang.go => university.go} | 33 +-- internal/pkg/m3u8/m3u8.go | 2 +- internal/video/video.go | 10 +- 9 files changed, 347 insertions(+), 339 deletions(-) create mode 100644 internal/geektime/account.go create mode 100644 internal/geektime/client.go rename internal/geektime/{enterprise_geekbang.go => enterprise.go} (66%) rename internal/geektime/{university_geekbang.go => university.go} (71%) diff --git a/cmd/root.go b/cmd/root.go index ca43524..cc8b49b 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -29,23 +29,20 @@ import ( ) var ( - phone string - gcid string - gcess string - concurrency int - downloadFolder string - sp *spinner.Spinner - selectedProduct geektime.Product - quality string - downloadComments bool - selectedProductType productTypeSelectOption - columnOutputType int - waitSeconds int - productTypeOptions = make([]productTypeSelectOption, 7) - geektimeClient *geektime.Client - geekEnterpriseClient *geektime.Client - accountClient *geektime.Client - universityClient *geektime.Client + phone string + gcid string + gcess string + concurrency int + downloadFolder string + sp *spinner.Spinner + selectedProduct geektime.Course + quality string + downloadComments bool + selectedProductType productTypeSelectOption + columnOutputType int + waitSeconds int + productTypeOptions = make([]productTypeSelectOption, 7) + geektimeClient *geektime.Client ) type productTypeSelectOption struct { @@ -80,7 +77,6 @@ func init() { rootCmd.MarkFlagsRequiredTogether("gcid", "gcess") sp = spinner.New(spinner.CharSets[4], 100*time.Millisecond) - accountClient = geektime.NewAccountClient() } func setProductTypeOptions() { @@ -129,7 +125,7 @@ var rootCmd = &cobra.Command{ checkError(err) sp.Prefix = "[ 正在登录... ]" sp.Start() - readCookies, err = accountClient.Login(phone, pwd) + readCookies, err = geektime.Login(phone, pwd) if err != nil { sp.Stop() checkError(err) @@ -141,12 +137,11 @@ var rootCmd = &cobra.Command{ } // first time auth check - if err := accountClient.Auth(readCookies); err != nil { + if err := geektime.Auth(readCookies); err != nil { checkError(err) } + geektimeClient = geektime.NewClient(readCookies) - universityClient = geektime.NewUniversityClient(readCookies) - geekEnterpriseClient = geektime.NewEnterpriseClient(readCookies) selectProductType(cmd.Context()) }, } @@ -228,16 +223,16 @@ func letInputProductID(ctx context.Context) { func loadProduct(ctx context.Context, productID int) { sp.Prefix = "[ 正在加载课程信息... ]" sp.Start() - var p geektime.Product + var p geektime.Course var err error if isUniversity() { - p, err = universityClient.GetUniversityProductInfo(productID) + p, err = geektimeClient.UniversityCourseInfo(productID) // university don't need check product type // if input invalid id, access mark is 0 } else if isEnterprise() { - p, err = geekEnterpriseClient.GetEnterpriseProductInfo(productID) + p, err = geektimeClient.EnterpriseCourseInfo(productID) } else { - p, err = geektimeClient.GetNormalProductInfo(productID) + p, err = geektimeClient.CourseInfo(productID) if err == nil { c := checkProductType(p.Type) // if check product type fail, re-input product @@ -432,10 +427,10 @@ func handleDownloadAll(ctx context.Context) { checkError(err) } if isUniversity() { - err := video.DownloadUniversityVideo(ctx, universityClient, a.AID, selectedProduct, sectionDir, quality, concurrency) + err := video.DownloadUniversityVideo(ctx, geektimeClient, a.AID, selectedProduct, sectionDir, quality, concurrency) checkError(err) } else if isEnterprise() { - err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) + err := video.DownloadEnterpriseArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) checkError(err) } else { err := video.DownloadArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) @@ -511,10 +506,10 @@ func downloadArticle(ctx context.Context, article geektime.Article, projectDir s checkError(err) } else { if isUniversity() { - err := video.DownloadUniversityVideo(ctx, universityClient, article.AID, selectedProduct, projectDir, quality, concurrency) + err := video.DownloadUniversityVideo(ctx, geektimeClient, article.AID, selectedProduct, projectDir, quality, concurrency) checkError(err) } else if isEnterprise() { - err := video.DownloadEnterpriseArticleVideo(ctx, geekEnterpriseClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) + err := video.DownloadEnterpriseArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) checkError(err) } else { err := video.DownloadArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) diff --git a/internal/config/config.go b/internal/config/config.go index 78f01d9..f0b0f31 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -121,7 +121,7 @@ func removeConfig(dir, phone string) error { } } } - return nil; + return nil } func writeOnelineConfig(sb strings.Builder, cookie *http.Cookie) strings.Builder { diff --git a/internal/geektime/account.go b/internal/geektime/account.go new file mode 100644 index 0000000..3272f3c --- /dev/null +++ b/internal/geektime/account.go @@ -0,0 +1,121 @@ +package geektime + +import ( + "fmt" + "net/http" + "time" + + "github.com/go-resty/resty/v2" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" +) + +const ( + // GeekBangAccountBaseURL ... + GeekBangAccountBaseURL = "https://account.geekbang.org" + // LoginPath ... + LoginPath = "/account/ticket/login" + // V1AuthPath ... + V1AuthPath = "/serv/v1/user/auth" +) + +// Login call geektime login api and return auth cookies +func Login(phone, password string) ([]*http.Cookie, error) { + var res struct { + Code int `json:"code"` + Data struct { + UID int `json:"uid"` + Name string `json:"nickname"` + } `json:"data"` + Error struct { + Code int `json:"code"` + Msg string `json:"msg"` + } `json:"error"` + } + + client := resty.New(). + SetTimeout(DefaultTimeout). + SetHeader(UserAgent, DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + logger.Infof("Login request start") + + resp, err := client.R(). + SetHeader(Origin, DefaultBaseURL). + SetBody(map[string]interface{}{ + "country": 86, + "appid": 1, + "platform": 3, + "cellphone": phone, + "password": password, + }). + SetResult(&res). + Post(GeekBangAccountBaseURL + LoginPath) + + if err != nil { + return nil, err + } + + if resp.RawResponse.StatusCode != 200 || res.Code != 0 { + logger.Warnf("Login request end, status code: %d, response body: %s", + resp.RawResponse.StatusCode, + resp.String(), + ) + } + + if res.Code == 0 { + var cookies []*http.Cookie + for _, c := range resp.Cookies() { + if c.Name == GCID || c.Name == GCESS { + cookies = append(cookies, c) + } + } + return cookies, nil + } else if res.Error.Code == -3031 { + return nil, ErrWrongPassword + } else if res.Error.Code == -3005 { + return nil, ErrTooManyLoginAttemptTimes + } + return nil, ErrGeekTimeAPIBadCode{LoginPath, resp.String()} +} + +// Auth check if current user login is expired or login in another device +func Auth(cs []*http.Cookie) error { + var res struct { + Code int `json:"code"` + } + t := fmt.Sprintf("%v", time.Now().Round(time.Millisecond).UnixNano()/(int64(time.Millisecond)/int64(time.Nanosecond))) + params := make(map[string]string, 2) + params["t"] = t + params["v_t"] = t + + client := resty.New(). + SetTimeout(DefaultTimeout). + SetHeader(UserAgent, DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + logger.Infof("Auth request start") + + resp, err := client.R(). + SetQueryParams(params). + SetCookies(cs). + SetHeader(Origin, DefaultBaseURL). + SetResult(&res). + Get(GeekBangAccountBaseURL + V1AuthPath) + + if err != nil { + return err + } + + if resp.RawResponse.StatusCode != 200 || res.Code != 0 { + logger.Warnf("Auth request end, status code: %d, response body: %s", + resp.RawResponse.StatusCode, + resp.String(), + ) + + // result Code -1 + // {\"error\":{\"msg\":\"未登录\",\"code\":-2000} + return ErrAuthFailed + } + + return nil +} diff --git a/internal/geektime/client.go b/internal/geektime/client.go new file mode 100644 index 0000000..36de9f5 --- /dev/null +++ b/internal/geektime/client.go @@ -0,0 +1,133 @@ +package geektime + +import ( + "errors" + "fmt" + "net/http" + "reflect" + "time" + + "github.com/go-resty/resty/v2" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" +) + +const ( + DefaultTimeout = 10 * time.Second + // Origin ... + Origin = "Origin" + // UserAgent ... + UserAgent = "User-Agent" + // DefaultUserAgent ... + DefaultUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" +) + +// A Client manages communication with the Geektime API. +type Client struct { + RestyClient *resty.Client + Cookies []*http.Cookie +} + +// ErrGeekTimeAPIBadCode ... +type ErrGeekTimeAPIBadCode struct { + Path string + ResponseString string +} + +// Error implements error interface +func (e ErrGeekTimeAPIBadCode) Error() string { + return fmt.Sprintf("请求极客时间接口 %s 失败, ResponseBody: %s", e.Path, e.ResponseString) +} + +var ( + // ErrWrongPassword ... + ErrWrongPassword = errors.New("密码错误, 请尝试重新登录") + // ErrTooManyLoginAttemptTimes ... + ErrTooManyLoginAttemptTimes = errors.New("密码输入错误次数过多,已触发验证码校验,请稍后再试") + // ErrGeekTimeRateLimit ... + ErrGeekTimeRateLimit = errors.New("已触发限流, 你可以选择重新登录/重新获取 cookie, 或者稍后再试, 然后生成剩余的文章") + // ErrAuthFailed ... + ErrAuthFailed = errors.New("当前账户在其他设备登录或者登录已经过期, 请尝试重新登录") +) + +// NewClient returns a new Geektime API client. +func NewClient(cs []*http.Cookie) *Client { + restyClient := resty.New(). + SetCookies(cs). + SetRetryCount(1). + SetTimeout(DefaultTimeout). + SetHeader(UserAgent, DefaultUserAgent). + SetLogger(logger.DiscardLogger{}) + + c := &Client{RestyClient: restyClient, Cookies: cs} + return c +} + +// newRequest new http request +func (c *Client) newRequest( + method string, + baseURL string, + path string, + params map[string]string, + body interface{}, + result interface{}) *resty.Request { + r := c.RestyClient.R() + r.Method = method + r.URL = baseURL + path + r.SetHeader(Origin, baseURL) + if len(params) > 0 { + r.SetQueryParams(params) + } + if body != nil { + r.SetBody(body) + } + r.SetResult(result) + return r +} + +// do perform http request +func do(r *resty.Request) (*resty.Response, error) { + logger.Infof("Http request start, method: %s, url: %s", + r.Method, + r.URL, + ) + resp, err := r.Execute(r.Method, r.URL) + + if err != nil { + return nil, err + } + + statusCode := resp.RawResponse.StatusCode + if statusCode != 200 { + logNotOkResponse(resp) + if statusCode == 451 { + return nil, ErrGeekTimeRateLimit + } else if statusCode == 452 { + return nil, ErrAuthFailed + } + } + + rv := reflect.ValueOf(r.Result) + f := reflect.Indirect(rv).FieldByName("Code") + code := int(f.Int()) + + if code == 0 { + return resp, nil + } + + logNotOkResponse(resp) + //未登录或者已失效 + if code == -3050 || code == -2000 { + return nil, ErrAuthFailed + } + + return nil, ErrGeekTimeAPIBadCode{r.URL, resp.String()} +} + +func logNotOkResponse(resp *resty.Response) { + logger.Warnf("Http request end, method: %s, url: %s, status code: %d, response body: %s", + resp.RawResponse.Request.Method, + resp.RawResponse.Request.URL, + resp.RawResponse.StatusCode, + resp.String(), + ) +} diff --git a/internal/geektime/enterprise_geekbang.go b/internal/geektime/enterprise.go similarity index 66% rename from internal/geektime/enterprise_geekbang.go rename to internal/geektime/enterprise.go index 3da46ba..edaf157 100644 --- a/internal/geektime/enterprise_geekbang.go +++ b/internal/geektime/enterprise.go @@ -1,13 +1,10 @@ package geektime import ( - "net/http" "strconv" - "time" "github.com/go-resty/resty/v2" "github.com/nicoxiang/geektime-downloader/internal/geektime/response" - "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" ) const ( @@ -17,28 +14,15 @@ const ( V1EnterpriseCourseInfoPath = "/app/v1/course/info" // V1EnterpriseArticlesInfoPath used in enterprise course articles info V1EnterpriseArticlesInfoPath = "/app/v1/course/articles" - // V1EnterpriseArticleDetailInfoPath used in enterprise course article detail info - V1EnterpriseArticleDetailInfoPath = "/app/v1/article/detail" + // V1EnterpriseArticleDetailPath used in enterprise course article detail info + V1EnterpriseArticleDetailPath = "/app/v1/article/detail" // V1EnterpriseVideoPlayAuthPath used in enterprise course video play auth V1EnterpriseVideoPlayAuthPath = "/app/v1/source_auth/video_play_auth" ) -// NewEnterpriseClient init enterprise http client -func NewEnterpriseClient(cs []*http.Cookie) *Client { - httpClient := resty.New(). - SetCookies(cs). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: GeekBangEnterpriseBaseURL, Cookies: cs} - return c -} - -// GetEnterpriseProductInfo get enterprise course info -func (c *Client) GetEnterpriseProductInfo(id int) (Product, error) { - var p Product +// EnterpriseCourseInfo get enterprise course info +func (c *Client) EnterpriseCourseInfo(id int) (Course, error) { + var p Course var err error p, err = c.enterpriseCourseInfo(id) if err != nil { @@ -55,11 +39,13 @@ func (c *Client) GetEnterpriseProductInfo(id int) (Product, error) { return p, nil } -// V1EnterpriseArticleDetailInfo get enterprise article detail -func (c *Client) V1EnterpriseArticleDetailInfo(articleID string) (response.V1EnterpriseArticlesDetailResponse, error) { +// V1EnterpriseArticleDetail get enterprise article detail +func (c *Client) V1EnterpriseArticleDetail(articleID string) (response.V1EnterpriseArticlesDetailResponse, error) { var res response.V1EnterpriseArticlesDetailResponse - r := c.newRequest(resty.MethodPost, - V1EnterpriseArticleDetailInfoPath, + r := c.newRequest( + resty.MethodPost, + GeekBangEnterpriseBaseURL, + V1EnterpriseArticleDetailPath, nil, map[string]interface{}{ "article_id": articleID, @@ -75,7 +61,9 @@ func (c *Client) V1EnterpriseArticleDetailInfo(articleID string) (response.V1Ent // EnterpriseVideoPlayAuth get enterprise play auth string func (c *Client) EnterpriseVideoPlayAuth(articleID, videoID string) (string, error) { var res response.V3VideoPlayAuthResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + GeekBangEnterpriseBaseURL, V1EnterpriseVideoPlayAuthPath, nil, map[string]interface{}{ @@ -90,9 +78,12 @@ func (c *Client) EnterpriseVideoPlayAuth(articleID, videoID string) (string, err return res.Data.PlayAuth, nil } -func (c *Client) enterpriseCourseInfo(productID int) (Product, error) { +func (c *Client) enterpriseCourseInfo(productID int) (Course, error) { var res response.V1EnterpriseProductInfoResponse - r := c.newRequest(resty.MethodPost, + + r := c.newRequest( + resty.MethodPost, + GeekBangEnterpriseBaseURL, V1EnterpriseCourseInfoPath, nil, map[string]interface{}{ @@ -100,11 +91,12 @@ func (c *Client) enterpriseCourseInfo(productID int) (Product, error) { }, &res, ) + if _, err := do(r); err != nil { - return Product{}, err + return Course{}, err } - return Product{ + return Course{ Access: res.Data.Extra.IsMyCourse, ID: productID, Title: res.Data.Title, @@ -115,7 +107,9 @@ func (c *Client) enterpriseCourseInfo(productID int) (Product, error) { func (c *Client) enterpriseCourseArticles(productID int) ([]Article, error) { var res response.V1EnterpriseArticlesResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + GeekBangEnterpriseBaseURL, V1EnterpriseArticlesInfoPath, nil, map[string]interface{}{ diff --git a/internal/geektime/geektime.go b/internal/geektime/geektime.go index ed37b76..9053e08 100644 --- a/internal/geektime/geektime.go +++ b/internal/geektime/geektime.go @@ -1,34 +1,16 @@ package geektime import ( - "errors" - "fmt" - "net/http" - "reflect" "strconv" - "time" "github.com/go-resty/resty/v2" "github.com/nicoxiang/geektime-downloader/internal/geektime/response" - "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" ) const ( // DefaultBaseURL ... DefaultBaseURL = "https://time.geekbang.org" - // DefaultUserAgent ... - DefaultUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" - // Origin ... - Origin = "Origin" - // UserAgent ... - UserAgent = "User-Agent" - - // GeekBangAccountBaseURL ... - GeekBangAccountBaseURL = "https://account.geekbang.org" - // LoginPath ... - LoginPath = "/account/ticket/login" - // V1AuthPath ... - V1AuthPath = "/serv/v1/user/auth" + // V1ColumnArticlesPath get all articles summary info in one column V1ColumnArticlesPath = "/serv/v1/column/articles" // V1ArticlePath used in normal column @@ -42,7 +24,6 @@ const ( // V3VideoPlayAuthPath used in normal video, daily lesson, qconplus video play auth V3VideoPlayAuthPath = "/serv/v3/source_auth/video_play_auth" - // GeekBangCookieDomain ... GeekBangCookieDomain = ".geekbang.org" @@ -52,19 +33,8 @@ const ( GCESS = "GCESS" ) -var ( - // ErrWrongPassword ... - ErrWrongPassword = errors.New("密码错误, 请尝试重新登录") - // ErrTooManyLoginAttemptTimes ... - ErrTooManyLoginAttemptTimes = errors.New("密码输入错误次数过多,已触发验证码校验,请稍后再试") - // ErrGeekTimeRateLimit ... - ErrGeekTimeRateLimit = errors.New("已触发限流, 你可以选择重新登录/重新获取 cookie, 或者稍后再试, 然后生成剩余的文章") - // ErrAuthFailed ... - ErrAuthFailed = errors.New("当前账户在其他设备登录或者登录已经过期, 请尝试重新登录") -) - -// Product ... -type Product struct { +// Course ... +type Course struct { Access bool ID int Title string @@ -80,148 +50,9 @@ type Article struct { Title string } -// ErrGeekTimeAPIBadCode ... -type ErrGeekTimeAPIBadCode struct { - Path string - ResponseString string -} - -// Error implements error interface -func (e ErrGeekTimeAPIBadCode) Error() string { - return fmt.Sprintf("请求极客时间接口 %s 失败, ResponseBody: %s", e.Path, e.ResponseString) -} - -// A Client manages communication with the Geektime API. -type Client struct { - HTTPClient *resty.Client - BaseURL string - Cookies []*http.Cookie -} - -// NewClient returns a new Geektime API client. -func NewClient(cs []*http.Cookie) *Client { - httpClient := resty.New(). - SetCookies(cs). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: DefaultBaseURL, Cookies: cs} - return c -} - -// NewAccountClient ... -func NewAccountClient() *Client { - httpClient := resty.New(). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: GeekBangAccountBaseURL} - return c -} - -// Login call geektime login api and return auth cookies -func (c *Client) Login(phone, password string) ([]*http.Cookie, error) { - var res struct { - Code int `json:"code"` - Data struct { - UID int `json:"uid"` - Name string `json:"nickname"` - } `json:"data"` - Error struct { - Code int `json:"code"` - Msg string `json:"msg"` - } `json:"error"` - } - - r := c.newRequest(resty.MethodPost, - LoginPath, - nil, - map[string]interface{}{ - "country": 86, - "appid": 1, - "platform": 3, - "cellphone": phone, - "password": password, - }, - &res, - ) - - logger.Infof("Login request start") - resp, err := r.Execute(r.Method, r.URL) - - if err != nil { - return nil, err - } - - if resp.RawResponse.StatusCode != 200 || res.Code != 0 { - logger.Warnf("Login request end, status code: %d, response body: %s", - resp.RawResponse.StatusCode, - resp.String(), - ) - } - - if res.Code == 0 { - var cookies []*http.Cookie - for _, c := range resp.Cookies() { - if c.Name == GCID || c.Name == GCESS { - cookies = append(cookies, c) - } - } - return cookies, nil - } else if res.Error.Code == -3031 { - return nil, ErrWrongPassword - } else if res.Error.Code == -3005 { - return nil, ErrTooManyLoginAttemptTimes - } - return nil, ErrGeekTimeAPIBadCode{LoginPath, resp.String()} -} - -// Auth check if current user login is expired or login in another device -func (c *Client) Auth(cs []*http.Cookie) error { - var res struct { - Code int `json:"code"` - } - t := fmt.Sprintf("%v", time.Now().Round(time.Millisecond).UnixNano()/(int64(time.Millisecond)/int64(time.Nanosecond))) - c.HTTPClient.SetCookies(cs) - params := make(map[string]string, 2) - params["t"] = t - params["v_t"] = t - r := c.newRequest(resty.MethodGet, - V1AuthPath, - params, - nil, - res, - ) - r.SetHeader(Origin, DefaultBaseURL) - - logger.Infof("Auth request start") - resp, err := r.Execute(r.Method, r.URL) - - if err != nil { - return err - } - - if resp.RawResponse.StatusCode != 200 || res.Code != 0 { - logger.Warnf("Auth request end, status code: %d, response body: %s", - resp.RawResponse.StatusCode, - resp.String(), - ) - - // result Code -1 - // {\"error\":{\"msg\":\"未登录\",\"code\":-2000} - return ErrAuthFailed - } - - return nil -} - -// GetNormalProductInfo get narmal geektime product info -func (c *Client) GetNormalProductInfo(productID int) (Product, error) { - var p Product +// CourseInfo get narmal geektime course info +func (c *Client) CourseInfo(productID int) (Course, error) { + var p Course var err error p, err = c.columnInfo(productID) if err != nil { @@ -241,7 +72,9 @@ func (c *Client) GetNormalProductInfo(productID int) (Product, error) { // V1ArticleInfo ... func (c *Client) V1ArticleInfo(articleID int) (response.V1ArticleResponse, error) { var res response.V1ArticleResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V1ArticlePath, nil, map[string]interface{}{ @@ -261,7 +94,9 @@ func (c *Client) V1ArticleInfo(articleID int) (response.V1ArticleResponse, error // ProductInfo used to get daily lesson or qconplus product info func (c *Client) ProductInfo(productID int) (response.V3ProductInfoResponse, error) { var res response.V3ProductInfoResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V3ProductInfoPath, nil, map[string]interface{}{ @@ -275,11 +110,12 @@ func (c *Client) ProductInfo(productID int) (response.V3ProductInfoResponse, err return res, nil } - // V3ArticleInfo used to get daily lesson or qconplus article info func (c *Client) V3ArticleInfo(articleID int) (response.V3ArticleInfoResponse, error) { var res response.V3ArticleInfoResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V3ArticleInfoPath, nil, map[string]interface{}{ @@ -296,7 +132,9 @@ func (c *Client) V3ArticleInfo(articleID int) (response.V3ArticleInfoResponse, e // VideoPlayAuth get play auth string func (c *Client) VideoPlayAuth(articleID, sourceType int, videoID string) (string, error) { var res response.V3VideoPlayAuthResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V3VideoPlayAuthPath, nil, map[string]interface{}{ @@ -313,9 +151,11 @@ func (c *Client) VideoPlayAuth(articleID, sourceType int, videoID string) (strin } // columnInfo get normal column info, like v3 product info -func (c *Client) columnInfo(productID int) (Product, error) { +func (c *Client) columnInfo(productID int) (Course, error) { var res response.V3ColumnInfoResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V3ColumnInfoPath, nil, map[string]interface{}{ @@ -325,10 +165,10 @@ func (c *Client) columnInfo(productID int) (Product, error) { &res, ) if _, err := do(r); err != nil { - return Product{}, err + return Course{}, err } - return Product{ + return Course{ Access: res.Data.Extra.Sub.AccessMask > 0, ID: res.Data.ID, Type: res.Data.Type, @@ -340,7 +180,9 @@ func (c *Client) columnInfo(productID int) (Product, error) { // columnArticles call geektime api to get article list func (c *Client) columnArticles(cid int) ([]Article, error) { res := &response.V1ColumnArticlesResponse{} - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + DefaultBaseURL, V1ColumnArticlesPath, nil, map[string]interface{}{ @@ -365,65 +207,3 @@ func (c *Client) columnArticles(cid int) ([]Article, error) { } return articles, nil } - -func (c *Client) newRequest(method, url string, params map[string]string, body interface{}, res interface{}) *resty.Request { - r := c.HTTPClient.R() - r.Method = method - r.URL = c.BaseURL + url - r.SetHeader(Origin, c.BaseURL) - if len(params) > 0 { - r.SetQueryParams(params) - } - if body != nil { - r.SetBody(body) - } - r.SetResult(res) - return r -} - -func do(r *resty.Request) (*resty.Response, error) { - logger.Infof("Http request start, method: %s, url: %s", - r.Method, - r.URL, - ) - resp, err := r.Execute(r.Method, r.URL) - - if err != nil { - return nil, err - } - - statusCode := resp.RawResponse.StatusCode - if statusCode != 200 { - logNotOkResponse(resp) - if statusCode == 451 { - return nil, ErrGeekTimeRateLimit - } else if statusCode == 452 { - return nil, ErrAuthFailed - } - } - - rv := reflect.ValueOf(r.Result) - f := reflect.Indirect(rv).FieldByName("Code") - code := int(f.Int()) - - if code == 0 { - return resp, nil - } - - logNotOkResponse(resp) - //未登录或者已失效 - if code == -3050 || code == -2000 { - return nil, ErrAuthFailed - } - - return nil, ErrGeekTimeAPIBadCode{r.URL, resp.String()} -} - -func logNotOkResponse(resp *resty.Response) { - logger.Warnf("Http request end, method: %s, url: %s, status code: %d, response body: %s", - resp.RawResponse.Request.Method, - resp.RawResponse.Request.URL, - resp.RawResponse.StatusCode, - resp.String(), - ) -} diff --git a/internal/geektime/university_geekbang.go b/internal/geektime/university.go similarity index 71% rename from internal/geektime/university_geekbang.go rename to internal/geektime/university.go index b1c8dd8..b4aa55b 100644 --- a/internal/geektime/university_geekbang.go +++ b/internal/geektime/university.go @@ -1,12 +1,8 @@ package geektime import ( - "net/http" - "time" - "github.com/go-resty/resty/v2" "github.com/nicoxiang/geektime-downloader/internal/geektime/response" - "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" ) const ( @@ -19,25 +15,14 @@ const ( UniversityV1MyClassInfoPath = "/serv/v1/myclass/info" ) -// NewUniversityClient init university http client -func NewUniversityClient(cs []*http.Cookie) *Client { - httpClient := resty.New(). - SetCookies(cs). - SetRetryCount(1). - SetTimeout(10*time.Second). - SetHeader("User-Agent", DefaultUserAgent). - SetLogger(logger.DiscardLogger{}) - - c := &Client{HTTPClient: httpClient, BaseURL: GeekBangUniversityBaseURL, Cookies: cs} - return c -} - -// GetUniversityProductInfo get university class info -func (c *Client) GetUniversityProductInfo(classID int) (Product, error) { - var p Product +// UniversityCourseInfo get university class info +func (c *Client) UniversityCourseInfo(classID int) (Course, error) { + var p Course var res response.V1MyClassInfoResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + GeekBangUniversityBaseURL, UniversityV1MyClassInfoPath, nil, map[string]interface{}{ @@ -59,7 +44,7 @@ func (c *Client) GetUniversityProductInfo(classID int) (Product, error) { return p, ErrGeekTimeAPIBadCode{UniversityV1MyClassInfoPath, resp.String()} } - p = Product{ + p = Course{ Access: true, ID: classID, Title: res.Data.Title, @@ -86,7 +71,9 @@ func (c *Client) GetUniversityProductInfo(classID int) (Product, error) { // UniversityVideoPlayAuth get university play auth string func (c *Client) UniversityVideoPlayAuth(articleID, classID int) (response.V1VideoPlayAuthResponse, error) { var res response.V1VideoPlayAuthResponse - r := c.newRequest(resty.MethodPost, + r := c.newRequest( + resty.MethodPost, + GeekBangUniversityBaseURL, UniversityV1VideoPlayAuthPath, nil, map[string]interface{}{ diff --git a/internal/pkg/m3u8/m3u8.go b/internal/pkg/m3u8/m3u8.go index bd1b2a1..7538484 100644 --- a/internal/pkg/m3u8/m3u8.go +++ b/internal/pkg/m3u8/m3u8.go @@ -15,7 +15,7 @@ var ( // Parse do m3u8 url GET request, and extract ts file names and check if it's encrypt video func Parse(client *geektime.Client, m3u8url string) (tsFileNames []string, isVodEncryptVideo bool, err error) { - m3u8Resp, err := client.HTTPClient.R().SetDoNotParseResponse(true).Get(m3u8url) + m3u8Resp, err := client.RestyClient.R().SetDoNotParseResponse(true).Get(m3u8url) if err != nil { return nil, false, err } diff --git a/internal/video/video.go b/internal/video/video.go index 44db710..2e99f67 100644 --- a/internal/video/video.go +++ b/internal/video/video.go @@ -87,8 +87,7 @@ func DownloadEnterpriseArticleVideo(ctx context.Context, quality string, concurrency int, ) error { - - articleInfo, err := client.V1EnterpriseArticleDetailInfo(strconv.Itoa(articleID)) + articleInfo, err := client.V1EnterpriseArticleDetail(strconv.Itoa(articleID)) if err != nil { return err } @@ -113,11 +112,10 @@ func DownloadEnterpriseArticleVideo(ctx context.Context, func DownloadUniversityVideo(ctx context.Context, client *geektime.Client, articleID int, - currentProduct geektime.Product, + currentProduct geektime.Course, projectDir string, quality string, concurrency int) error { - playAuthInfo, err := client.UniversityVideoPlayAuth(articleID, currentProduct.ID) if err != nil { return err @@ -300,7 +298,7 @@ func addBarValue(bar *pb.ProgressBar, written int64) { } } -func getUniversityVideoTitle(articleID int, currentProduct geektime.Product) string { +func getUniversityVideoTitle(articleID int, currentProduct geektime.Course) string { for _, v := range currentProduct.Articles { if v.AID == articleID { return v.Title @@ -317,7 +315,7 @@ func extractTSURLPrefix(m3u8url string) string { func getPlayInfo(client *geektime.Client, playInfoURL, quality string) (vod.PlayInfo, error) { var getPlayInfoResp GetPlayInfoResponse var playInfo vod.PlayInfo - _, err := client.HTTPClient.R(). + _, err := client.RestyClient.R(). SetResult(&getPlayInfoResp). Get(playInfoURL) From f472a02cefdf1c73f2c2eb189d30f0d1bcbf7c6f Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sat, 4 May 2024 17:04:55 +0800 Subject: [PATCH 4/8] feat:new arg enterprise --- .github/workflows/golangci-lint.yml | 2 +- .github/workflows/release.yml | 2 +- .gitignore | 2 + cmd/root.go | 312 +++++++++++----------------- go.mod | 2 +- internal/audio/audio.go | 11 +- internal/config/config.go | 11 +- internal/markdown/markdown.go | 24 ++- internal/markdown/markdown_test.go | 2 +- internal/pdf/pdf.go | 20 +- internal/pkg/files/files.go | 12 ++ internal/video/video.go | 15 +- 12 files changed, 191 insertions(+), 224 deletions(-) create mode 100644 internal/pkg/files/files.go diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 7395f85..bdbdfb7 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -18,7 +18,7 @@ jobs: steps: - uses: actions/setup-go@v5 with: - go-version: '1.18.0' + go-version: '1.22.0' - uses: actions/checkout@v4 - name: golangci-lint uses: golangci/golangci-lint-action@v4.0.0 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a7ed76d..38e2de7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v5 with: - go-version: 1.18 + go-version: 1.22 - name: Run GoReleaser uses: goreleaser/goreleaser-action@v4 with: diff --git a/.gitignore b/.gitignore index a7f0cbc..bde7941 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,5 @@ .vscode dist/ + +.idea diff --git a/cmd/root.go b/cmd/root.go index cc8b49b..3d1e0f7 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -23,6 +23,7 @@ import ( "github.com/nicoxiang/geektime-downloader/internal/markdown" "github.com/nicoxiang/geektime-downloader/internal/pdf" "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + "github.com/nicoxiang/geektime-downloader/internal/pkg/files" "github.com/nicoxiang/geektime-downloader/internal/video" "github.com/spf13/cobra" "golang.org/x/net/html" @@ -41,8 +42,11 @@ var ( selectedProductType productTypeSelectOption columnOutputType int waitSeconds int - productTypeOptions = make([]productTypeSelectOption, 7) + interval int + productTypeOptions []productTypeSelectOption geektimeClient *geektime.Client + isEnterprise bool + waitRand = rand.New(rand.NewSource(time.Now().UnixNano())) ) type productTypeSelectOption struct { @@ -62,7 +66,7 @@ func init() { userHomeDir, _ := os.UserHomeDir() concurrency = int(math.Ceil(float64(runtime.NumCPU()) / 2.0)) defaultDownloadFolder := filepath.Join(userHomeDir, config.GeektimeDownloaderFolder) - setProductTypeOptions() + rootCmd.Flags().StringVarP(&phone, "phone", "u", "", "你的极客时间账号(手机号)") rootCmd.Flags().StringVar(&gcid, "gcid", "", "极客时间 cookie 值 gcid") rootCmd.Flags().StringVar(&gcess, "gcess", "", "极客时间 cookie 值 gcess") @@ -71,6 +75,8 @@ func init() { rootCmd.Flags().BoolVar(&downloadComments, "comments", true, "是否需要专栏的第一页评论") rootCmd.Flags().IntVar(&columnOutputType, "output", 1, "专栏的输出内容(1pdf,2markdown,4audio)可自由组合") rootCmd.Flags().IntVar(&waitSeconds, "wait-seconds", 8, "Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒") + rootCmd.Flags().IntVar(&interval, "interval", 1, "下载资源的间隔时间, 单位为秒, 默认1秒") + rootCmd.Flags().BoolVar(&isEnterprise, "enterprise", false, "是否下载企业版极客时间资源") rootCmd.MarkFlagsMutuallyExclusive("phone", "gcid") rootCmd.MarkFlagsMutuallyExclusive("phone", "gcess") @@ -80,13 +86,16 @@ func init() { } func setProductTypeOptions() { - productTypeOptions[0] = productTypeSelectOption{0, "普通课程", 1, []string{"c1", "c3"}, true} - productTypeOptions[1] = productTypeSelectOption{1, "每日一课", 2, []string{"d"}, false} - productTypeOptions[2] = productTypeSelectOption{2, "公开课", 1, []string{"p35", "p29", "p30"}, true} - productTypeOptions[3] = productTypeSelectOption{3, "大厂案例", 4, []string{"q"}, false} - productTypeOptions[4] = productTypeSelectOption{4, "训练营", 5, []string{""}, true} //custom source type, not use - productTypeOptions[5] = productTypeSelectOption{5, "其他", 1, []string{"x", "c6"}, true} - productTypeOptions[6] = productTypeSelectOption{6, "企业版训练营", 6, []string{"c44"}, true} + if isEnterprise { + productTypeOptions = append(productTypeOptions, productTypeSelectOption{0, "训练营", 5, []string{"c44"}, true}) //custom source type, not use + } else { + productTypeOptions = append(productTypeOptions, productTypeSelectOption{0, "普通课程", 1, []string{"c1", "c3"}, true}) + productTypeOptions = append(productTypeOptions, productTypeSelectOption{1, "每日一课", 2, []string{"d"}, false}) + productTypeOptions = append(productTypeOptions, productTypeSelectOption{2, "公开课", 1, []string{"p35", "p29", "p30"}, true}) + productTypeOptions = append(productTypeOptions, productTypeSelectOption{3, "大厂案例", 4, []string{"q"}, false}) + productTypeOptions = append(productTypeOptions, productTypeSelectOption{4, "训练营", 5, []string{""}, true}) //custom source type, not use + productTypeOptions = append(productTypeOptions, productTypeSelectOption{5, "其他", 1, []string{"x", "c6"}, true}) + } } var rootCmd = &cobra.Command{ @@ -142,6 +151,7 @@ var rootCmd = &cobra.Command{ } geektimeClient = geektime.NewClient(readCookies) + setProductTypeOptions() selectProductType(cmd.Context()) }, } @@ -226,10 +236,11 @@ func loadProduct(ctx context.Context, productID int) { var p geektime.Course var err error if isUniversity() { - p, err = geektimeClient.UniversityCourseInfo(productID) // university don't need check product type // if input invalid id, access mark is 0 - } else if isEnterprise() { + p, err = geektimeClient.UniversityCourseInfo(productID) + } else if isEnterprise { + // TODO: check enterprise course type p, err = geektimeClient.EnterpriseCourseInfo(productID) } else { p, err = geektimeClient.CourseInfo(productID) @@ -335,187 +346,134 @@ func handleSelectArticle(ctx context.Context, index int) { func handleDownloadAll(ctx context.Context) { projectDir, err := mkDownloadProjectDir(downloadFolder, phone, gcid, selectedProduct.Title) checkError(err) - downloaded, err := findDownloadedArticleFileNames(projectDir) - checkError(err) if isText() { - rand.Seed(time.Now().UnixNano()) fmt.Printf("正在下载专栏 《%s》 中的所有文章\n", selectedProduct.Title) total := len(selectedProduct.Articles) var i int - needDownloadPDF := columnOutputType&1 == 1 - needDownloadMD := (columnOutputType>>1)&1 == 1 - needDownloadAudio := (columnOutputType>>2)&1 == 1 - - for _, a := range selectedProduct.Articles { - fileName := filenamify.Filenamify(a.Title) - var b int - if _, exists := downloaded[fileName+pdf.PDFExtension]; exists { - b = setBit(b, 0) - } - if _, exists := downloaded[fileName+markdown.MDExtension]; exists { - b = setBit(b, 1) - } - if _, exists := downloaded[fileName+audio.MP3Extension]; exists { - b = setBit(b, 2) - } - - if b == columnOutputType { - increasePDFCount(total, &i) - continue - } - - articleInfo, err := geektimeClient.V1ArticleInfo(a.AID) - checkError(err) - - hasVideo, videoURL := getVideoURLFromArticleContent(articleInfo.Data.ArticleContent) - - if hasVideo && videoURL != "" { - err = video.DownloadMP4(ctx, a.Title, projectDir, []string{videoURL}) - } - - if len(articleInfo.Data.InlineVideoSubtitles) > 0 { - videoURLs := make([]string, len(articleInfo.Data.InlineVideoSubtitles)) - for i, v := range articleInfo.Data.InlineVideoSubtitles { - videoURLs[i] = v.VideoURL - } - err = video.DownloadMP4(ctx, a.Title, projectDir, videoURLs) - } - - if needDownloadPDF { - err = pdf.PrintArticlePageToPDF(ctx, - a.AID, - projectDir, - a.Title, - geektimeClient.Cookies, - downloadComments, - waitSeconds, - ) - if err != nil { - checkError(err) - } - } - - if needDownloadMD { - err = markdown.Download(ctx, - articleInfo.Data.ArticleContent, - a.Title, - projectDir, - a.AID) - } - - if needDownloadAudio { - err = audio.DownloadAudio(ctx, articleInfo.Data.AudioDownloadURL, projectDir, a.Title) + for _, article := range selectedProduct.Articles { + skipped := downloadTextArticle(ctx, article, projectDir, false) + increaseDownloadedTextArticleCount(total, &i) + if !skipped { + waitRandomTime() } - - checkError(err) - - increasePDFCount(total, &i) - r := rand.Intn(2000) - time.Sleep(time.Duration(r) * time.Millisecond) } } else { - for _, a := range selectedProduct.Articles { - sectionDir := projectDir - fileName := filenamify.Filenamify(a.Title) + video.TSExtension - if _, ok := downloaded[fileName]; ok { - continue - } - // add sub dir - if a.SectionTitle != "" { - sectionDir, err = mkDownloadProjectSectionDir(projectDir, a.SectionTitle) - checkError(err) - } - if isUniversity() { - err := video.DownloadUniversityVideo(ctx, geektimeClient, a.AID, selectedProduct, sectionDir, quality, concurrency) - checkError(err) - } else if isEnterprise() { - err := video.DownloadEnterpriseArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) - checkError(err) - } else { - err := video.DownloadArticleVideo(ctx, geektimeClient, a.AID, selectedProductType.SourceType, sectionDir, quality, concurrency) - checkError(err) + for _, article := range selectedProduct.Articles { + skipped := downloadVideoArticle(ctx, article, projectDir, false) + if !skipped { + waitRandomTime() } } } selectProductType(ctx) } -func increasePDFCount(total int, i *int) { - (*i)++ +func increaseDownloadedTextArticleCount(total int, i *int) { + *i++ fmt.Printf("\r已完成下载%d/%d", *i, total) } func downloadArticle(ctx context.Context, article geektime.Article, projectDir string) { if isText() { - needDownloadPDF := columnOutputType&1 == 1 - needDownloadMD := (columnOutputType>>1)&1 == 1 - needDownloadAudio := (columnOutputType>>2)&1 == 1 + sp.Prefix = fmt.Sprintf("[ 正在下载 《%s》... ]", article.Title) + sp.Start() + defer sp.Stop() + downloadTextArticle(ctx, article, projectDir, true) + } else { + downloadVideoArticle(ctx, article, projectDir, true) + } +} + +func downloadTextArticle(ctx context.Context, article geektime.Article, projectDir string, overwrite bool) bool { + needDownloadPDF := columnOutputType&1 == 1 + needDownloadMD := (columnOutputType>>1)&1 == 1 + needDownloadAudio := (columnOutputType>>2)&1 == 1 + skipped := true - articleInfo, err := geektimeClient.V1ArticleInfo(article.AID) + articleInfo, err := geektimeClient.V1ArticleInfo(article.AID) + checkError(err) + + hasVideo, videoURL := getVideoURLFromArticleContent(articleInfo.Data.ArticleContent) + if hasVideo && videoURL != "" { + err = video.DownloadMP4(ctx, article.Title, projectDir, []string{videoURL}, overwrite) checkError(err) + } - sp.Prefix = fmt.Sprintf("[ 正在下载 《%s》... ]", article.Title) - hasVideo, videoURL := getVideoURLFromArticleContent(articleInfo.Data.ArticleContent) - if len(articleInfo.Data.InlineVideoSubtitles) > 0 || hasVideo && videoURL != "" { - sp.Prefix = fmt.Sprintf("[ 正在下载 《%s》, 该文章中包含视频, 请耐心等待... ]", article.Title) + if len(articleInfo.Data.InlineVideoSubtitles) > 0 { + videoURLs := make([]string, len(articleInfo.Data.InlineVideoSubtitles)) + for i, v := range articleInfo.Data.InlineVideoSubtitles { + videoURLs[i] = v.VideoURL } - sp.Start() + err = video.DownloadMP4(ctx, article.Title, projectDir, videoURLs, overwrite) + checkError(err) + } - if hasVideo && videoURL != "" { - err = video.DownloadMP4(ctx, article.Title, projectDir, []string{videoURL}) + if needDownloadPDF { + innerSkipped, err := pdf.PrintArticlePageToPDF(ctx, + article.AID, + projectDir, + article.Title, + geektimeClient.Cookies, + downloadComments, + waitSeconds, + overwrite, + ) + if !innerSkipped { + skipped = false } + checkError(err) + } - if len(articleInfo.Data.InlineVideoSubtitles) > 0 { - videoURLs := make([]string, len(articleInfo.Data.InlineVideoSubtitles)) - for i, v := range articleInfo.Data.InlineVideoSubtitles { - videoURLs[i] = v.VideoURL - } - err = video.DownloadMP4(ctx, article.Title, projectDir, videoURLs) + if needDownloadMD { + innerSkipped, err := markdown.Download(ctx, + articleInfo.Data.ArticleContent, + article.Title, + projectDir, + article.AID, + overwrite) + if !innerSkipped { + skipped = false } + checkError(err) + } - if needDownloadPDF { - checkError(err) - err = pdf.PrintArticlePageToPDF(ctx, - article.AID, - projectDir, - article.Title, - geektimeClient.Cookies, - downloadComments, - waitSeconds, - ) - if err != nil { - sp.Stop() - checkError(err) - } + if needDownloadAudio { + innerSkipped, err := audio.DownloadAudio(ctx, articleInfo.Data.AudioDownloadURL, projectDir, article.Title, overwrite) + if !innerSkipped { + skipped = false } + checkError(err) + } + return skipped +} - if needDownloadMD { - err = markdown.Download(ctx, - articleInfo.Data.ArticleContent, - article.Title, - projectDir, - article.AID) - } +func downloadVideoArticle(ctx context.Context, article geektime.Article, projectDir string, overwrite bool) bool { + dir := projectDir + var err error + // add sub dir + if article.SectionTitle != "" { + dir, err = mkDownloadProjectSectionDir(projectDir, article.SectionTitle) + checkError(err) + } - if needDownloadAudio { - err = audio.DownloadAudio(ctx, articleInfo.Data.AudioDownloadURL, projectDir, article.Title) - } + fileName := filenamify.Filenamify(article.Title) + video.TSExtension + fullPath := filepath.Join(dir, fileName) + if files.CheckFileExists(fullPath) && !overwrite { + return true + } - sp.Stop() + if isUniversity() { + err = video.DownloadUniversityVideo(ctx, geektimeClient, article.AID, selectedProduct, dir, quality, concurrency) + checkError(err) + } else if isEnterprise { + err = video.DownloadEnterpriseArticleVideo(ctx, geektimeClient, article.AID, dir, quality, concurrency) checkError(err) } else { - if isUniversity() { - err := video.DownloadUniversityVideo(ctx, geektimeClient, article.AID, selectedProduct, projectDir, quality, concurrency) - checkError(err) - } else if isEnterprise() { - err := video.DownloadEnterpriseArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) - checkError(err) - } else { - err := video.DownloadArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, projectDir, quality, concurrency) - checkError(err) - } + err = video.DownloadArticleVideo(ctx, geektimeClient, article.AID, selectedProductType.SourceType, dir, quality, concurrency) + checkError(err) } + return false } func isText() bool { @@ -523,17 +481,7 @@ func isText() bool { } func isUniversity() bool { - return selectedProductType.Index == 4 -} - -func isEnterprise() bool { - return selectedProductType.Index == 6 -} - -// Sets the bit at pos in the integer n. -func setBit(n int, pos uint) int { - n |= (1 << pos) - return n + return selectedProductType.Index == 4 && !isEnterprise } func readCookiesFromInput() []*http.Cookie { @@ -556,28 +504,6 @@ func readCookiesFromInput() []*http.Cookie { return cookies } -func findDownloadedArticleFileNames(projectDir string) (map[string]struct{}, error) { - res := make(map[string]struct{}) - limit := 2 - err := filepath.Walk(projectDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - fmt.Printf("访问路径时出错:%v\n", err) - return err - } - // 计算当前路径的深度 - depth := len(filepath.SplitList(path)) - len(filepath.SplitList(projectDir)) - if depth >= limit { - return filepath.SkipDir // 如果达到限制深度,则跳过该文件夹及其子文件夹 - } - if !info.IsDir() { - res[info.Name()] = struct{}{} - } - return nil - }) - checkError(err) - return res, nil -} - func mkDownloadProjectDir(downloadFolder, phone, gcid, projectName string) (string, error) { userName := phone if gcid != "" { @@ -647,6 +573,12 @@ func getVideoURLFromArticleContent(content string) (hasVideo bool, videoURL stri return hasVideo, videoURL } +// waitRandomTime wait interval seconds of time plus a 2000ms max jitter +func waitRandomTime() { + randomMillis := interval*1000 + waitRand.Intn(2000) + time.Sleep(time.Duration(randomMillis) * time.Millisecond) +} + // Execute ... func Execute() { ctx := context.Background() diff --git a/go.mod b/go.mod index 3b0c70d..cd767c9 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/nicoxiang/geektime-downloader -go 1.18 +go 1.22 require ( github.com/JohannesKaufmann/html-to-markdown v1.5.0 diff --git a/internal/audio/audio.go b/internal/audio/audio.go index 3cbd426..b9d3bb5 100644 --- a/internal/audio/audio.go +++ b/internal/audio/audio.go @@ -8,6 +8,7 @@ import ( "github.com/nicoxiang/geektime-downloader/internal/geektime" "github.com/nicoxiang/geektime-downloader/internal/pkg/downloader" "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + "github.com/nicoxiang/geektime-downloader/internal/pkg/files" ) const ( @@ -16,14 +17,18 @@ const ( ) // DownloadAudio ... -func DownloadAudio(ctx context.Context, downloadAudioURL, dir, title string) error { +func DownloadAudio(ctx context.Context, downloadAudioURL, dir, title string, overwrite bool) (bool, error) { if downloadAudioURL == "" { - return nil + return false, nil } filenamifyTitle := filenamify.Filenamify(title) dst := filepath.Join(dir, filenamifyTitle+MP3Extension) + if files.CheckFileExists(dst) && !overwrite { + return true, nil + } + headers := make(map[string]string, 2) headers[geektime.Origin] = geektime.DefaultBaseURL headers[geektime.UserAgent] = geektime.DefaultUserAgent @@ -34,5 +39,5 @@ func DownloadAudio(ctx context.Context, downloadAudioURL, dir, title string) err _ = os.Remove(dst) } - return err + return false, err } diff --git a/internal/config/config.go b/internal/config/config.go index f0b0f31..3878fff 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -2,7 +2,6 @@ package config import ( "errors" - "io/ioutil" "net/http" "os" "path/filepath" @@ -26,7 +25,7 @@ func init() { // ReadCookieFromConfigFile read cookies from app config file. func ReadCookieFromConfigFile(phone string) ([]*http.Cookie, error) { dir := filepath.Join(userConfigDir, GeektimeDownloaderFolder) - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) if err != nil { if errors.Is(err, os.ErrNotExist) { return nil, nil @@ -45,7 +44,7 @@ func ReadCookieFromConfigFile(phone string) ([]*http.Cookie, error) { fullName := filepath.Join(userConfigDir, GeektimeDownloaderFolder, fi.Name()) var cookies []*http.Cookie - data, err := ioutil.ReadFile(fullName) + data, err := os.ReadFile(fullName) if err != nil { return nil, err } @@ -79,9 +78,9 @@ func WriteCookieToConfigFile(phone string, cookies []*http.Cookie) error { if err := os.MkdirAll(dir, os.ModePerm); err != nil { return err } - removeConfig(dir, phone) + _ = removeConfig(dir, phone) - file, err := ioutil.TempFile(dir, phone) + file, err := os.CreateTemp(dir, phone) if err != nil { return err } @@ -103,7 +102,7 @@ func RemoveConfig(phone string) error { } func removeConfig(dir, phone string) error { - files, err := ioutil.ReadDir(dir) + files, err := os.ReadDir(dir) if err != nil { return err } diff --git a/internal/markdown/markdown.go b/internal/markdown/markdown.go index e8217cf..77fec91 100644 --- a/internal/markdown/markdown.go +++ b/internal/markdown/markdown.go @@ -16,6 +16,7 @@ import ( "github.com/nicoxiang/geektime-downloader/internal/geektime" "github.com/nicoxiang/geektime-downloader/internal/pkg/downloader" "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + "github.com/nicoxiang/geektime-downloader/internal/pkg/files" ) var ( @@ -37,17 +38,23 @@ func (ms *markdownString) ReplaceAll(o, n string) { ms.s = strings.ReplaceAll(ms.s, o, n) } -// Download ... -func Download(ctx context.Context, html, title, dir string, aid int) error { +// Download article as markdown +func Download(ctx context.Context, html, title, dir string, aid int, overwrite bool) (bool, error) { select { case <-ctx.Done(): - return context.Canceled + return false, context.Canceled default: } + + fullName := path.Join(dir, filenamify.Filenamify(title)+MDExtension) + if files.CheckFileExists(fullName) && !overwrite { + return true, nil + } + // step1: convert to md string markdown, err := getDefaultConverter().ConvertString(html) if err != nil { - return err + return false, err } // step2: download images var ss = &markdownString{s: markdown} @@ -63,23 +70,22 @@ func Download(ctx context.Context, html, title, dir string, aid int) error { err = writeImageFile(ctx, imageURLs, dir, imagesFolder, ss) if err != nil { - return err + return false, err } - fullName := path.Join(dir, filenamify.Filenamify(title)+MDExtension) f, err := os.Create(fullName) defer func() { _ = f.Close() }() if err != nil { - return err + return false, err } // step3: write md file _, err = f.WriteString("# " + title + "\n" + ss.s) if err != nil { - return err + return false, err } - return nil + return false, nil } func findAllImages(md string) (images []string) { diff --git a/internal/markdown/markdown_test.go b/internal/markdown/markdown_test.go index 58c7d28..e6daeff 100644 --- a/internal/markdown/markdown_test.go +++ b/internal/markdown/markdown_test.go @@ -14,7 +14,7 @@ func TestDownLoad_SpecialHtml(t *testing.T) { content := "这里,让我们一起看一个XSStrike的使用示例,来加深对它的理解。
首先,我们来看看它的用法。
其中比较重要的配置项,我将它们列举如下:
-h #提示信息\n-u #目标地址\n-data #通过post方式上传数据\n--headers #配置请求头信息,包括cookie等\n
sudo python3 xsstrike.py -u 'http://b6b7183d85ac4d36bb9449cb938ef977.app.mituan.zone/level1.php?name=test' \n
这段代码就是用参数u配置了一个目标地址,其中在请求中通过get方式上传了参数name,这样XSStrike可以识别到这个通过get方式上传的参数,可以看到应用有如下输出:
从输出中,我们可以知道它会首先判断是否有WAF存在,然后对参数进行测试,获取到页面的响应,并据此生成payload。这和我们之前学习的sqlmap非常类似,因为它们本质上其实都是注入检测工具。
生成payload之后,XSStrike会将它们按照Confidence的值从大到小进行排序,之后按照顺序逐一对它们进行检测。这里你可能会好奇Confidence是什么,事实上,它代表的是XSStrike开发人员对于这个payload成功的信心,它的取值范围为0-10,值越高代表注入成功的可能性就越大。
之后XSStrike根据注入的payload以及它们响应的内容,会给这个payload生成一个评分即Efficiency,这个评分越高,代表这个payload实现XSS攻击的成功率越大。如果评分高于90,就会将这个payload标记为成功,并将它输出在命令行中,否则就会认为这个payload无效。
到这里,你已经学会了XSS攻击的检测方法,接下来让我们进入到XSS攻击防御方案的学习之中。
# 原始代码\n<script>alert(1)</script>\n# 混淆后的代码\n[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]][([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]]((!![]+[])[+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+([][[]]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+!+[]]+(+[![]]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+!+[]]]+(!![]+[])[!+[]+!+[]+!+[]]+(+(!+[]+!+[]+!+[]+[+!+[]]))[(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([]+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]][([][[]]+[])[+!+[]]+(![]+[])[+!+[]]+((+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]]+[])[+!+[]+[+!+[]]]+(!![]+[])[!+[]+!+[]+!+[]]]](!+[]+!+[]+!+[]+[!+[]+!+[]])+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]])()(([]+[])[([![]]+[][[]])[+!+[]+[+[]]]+(!![]+[])[+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(![]+[])[!+[]+!+[]+!+[]]]()[+[]]+(![]+[])[!+[]+!+[]+!+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+(+(!+[]+!+[]+[+!+[]]+[+!+[]]))[(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([]+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]][([][[]]+[])[+!+[]]+(![]+[])[+!+[]]+((+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]]+[])[+!+[]+[+!+[]]]+(!![]+[])[!+[]+!+[]+!+[]]]](!+[]+!+[]+!+[]+[+!+[]])[+!+[]]+(!![]+[])[+[]]+([]+[])[([![]]+[][[]])[+!+[]+[+[]]]+(!![]+[])[+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(![]+[])[!+[]+!+[]+!+[]]]()[!+[]+!+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+(!![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+!+[]]+(!![]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[+!+[]+[!+[]+!+[]+!+[]]]+[+!+[]]+([+[]]+![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[!+[]+!+[]+[+[]]]+([]+[])[([![]]+[][[]])[+!+[]+[+[]]]+(!![]+[])[+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(![]+[])[!+[]+!+[]+!+[]]]()[+[]]+(![]+[+[]])[([![]]+[][[]])[+!+[]+[+[]]]+(!![]+[])[+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(![]+[])[!+[]+!+[]+!+[]]]()[+!+[]+[+[]]]+(![]+[])[!+[]+!+[]+!+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+(+(!+[]+!+[]+[+!+[]]+[+!+[]]))[(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([]+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]][([][[]]+[])[+!+[]]+(![]+[])[+!+[]]+((+[])[([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+([][[]]+[])[+!+[]]+(![]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[])[+!+[]]+([][[]]+[])[+[]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(!![]+[])[+[]]+(!![]+[][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]])[+!+[]+[+[]]]+(!![]+[])[+!+[]]]+[])[+!+[]+[+!+[]]]+(!![]+[])[!+[]+!+[]+!+[]]]](!+[]+!+[]+!+[]+[+!+[]])[+!+[]]+(!![]+[])[+[]]+([]+[])[([![]]+[][[]])[+!+[]+[+[]]]+(!![]+[])[+[]]+(![]+[])[+!+[]]+(![]+[])[!+[]+!+[]]+([![]]+[][[]])[+!+[]+[+[]]]+([][(![]+[])[+[]]+(![]+[])[!+[]+!+[]]+(![]+[])[+!+[]]+(!![]+[])[+[]]]+[])[!+[]+!+[]+!+[]]+(![]+[])[!+[]+!+[]+!+[]]]()[!+[]+!+[]])\n
这个例子是一个JavaScript代码混淆示例,我们可以将一个非常明显的JavaScript转化为一堆乱码,神奇的是这串乱码和特征明显的JavaScript语句具有一样的功能。这样攻击者就可以将一个很容易被黑名单、白名单以及WAF检测出来的负载改为了难以被检测出来的负载,从而成功发起XSS攻击,实现自己想要的恶意行为。" - err := Download(ctx, content, "失效的输入检测(上):攻击者有哪些绕过方案?", p, 100101501) + _, err := Download(ctx, content, "失效的输入检测(上):攻击者有哪些绕过方案?", p, 100101501, true) if err != nil { t.Error(err) } diff --git a/internal/pdf/pdf.go b/internal/pdf/pdf.go index 872c4c0..fc429b1 100644 --- a/internal/pdf/pdf.go +++ b/internal/pdf/pdf.go @@ -16,6 +16,7 @@ import ( "github.com/chromedp/chromedp/device" "github.com/nicoxiang/geektime-downloader/internal/geektime" "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + "github.com/nicoxiang/geektime-downloader/internal/pkg/files" ) // PDFExtension ... @@ -29,8 +30,16 @@ func PrintArticlePageToPDF(ctx context.Context, cookies []*http.Cookie, downloadComments bool, waitSeconds int, -) error { + overwrite bool, +) (bool, error) { rateLimit := false + + fileName := filepath.Join(dir, filenamify.Filenamify(title)+PDFExtension) + + if files.CheckFileExists(fileName) && !overwrite { + return true, nil + } + // new tab ctx, cancel := chromedp.NewContext(ctx) defer cancel() @@ -63,16 +72,15 @@ func PrintArticlePageToPDF(ctx context.Context, if err != nil { if rateLimit { - return geektime.ErrGeekTimeRateLimit + return false, geektime.ErrGeekTimeRateLimit } - return err + return false, err } - fileName := filepath.Join(dir, filenamify.Filenamify(title)+PDFExtension) if err := os.WriteFile(fileName, buf, 0666); err != nil { - return err + return false, err } - return nil + return false, nil } func setCookies(cookies []*http.Cookie) chromedp.ActionFunc { diff --git a/internal/pkg/files/files.go b/internal/pkg/files/files.go new file mode 100644 index 0000000..b863be5 --- /dev/null +++ b/internal/pkg/files/files.go @@ -0,0 +1,12 @@ +package files + +import ( + "errors" + "os" +) + +// CheckFileExists check if file exists +func CheckFileExists(filePath string) bool { + _, error := os.Stat(filePath) + return !errors.Is(error, os.ErrNotExist) +} diff --git a/internal/video/video.go b/internal/video/video.go index 2e99f67..0be0525 100644 --- a/internal/video/video.go +++ b/internal/video/video.go @@ -3,7 +3,6 @@ package video import ( "context" "fmt" - "io/ioutil" "net/url" "os" "path" @@ -18,6 +17,7 @@ import ( "github.com/nicoxiang/geektime-downloader/internal/pkg/crypto" "github.com/nicoxiang/geektime-downloader/internal/pkg/downloader" "github.com/nicoxiang/geektime-downloader/internal/pkg/filenamify" + "github.com/nicoxiang/geektime-downloader/internal/pkg/files" "github.com/nicoxiang/geektime-downloader/internal/pkg/m3u8" "github.com/nicoxiang/geektime-downloader/internal/video/vod" ) @@ -82,7 +82,6 @@ func DownloadArticleVideo(ctx context.Context, func DownloadEnterpriseArticleVideo(ctx context.Context, client *geektime.Client, articleID int, - sourceType int, projectDir string, quality string, concurrency int, @@ -164,8 +163,8 @@ func downloadAliyunVodEncryptVideo(ctx context.Context, return download(ctx, tsURLPrefix, videoTitle, projectDir, tsFileNames, []byte(decryptKey), playInfo.Size, isVodEncryptVideo, concurrency) } -// DownloadMP4 ... -func DownloadMP4(ctx context.Context, title, projectDir string, mp4URLs []string) (err error) { +// DownloadMP4 download MP4 resources in article +func DownloadMP4(ctx context.Context, title, projectDir string, mp4URLs []string, overwrite bool) (err error) { filenamifyTitle := filenamify.Filenamify(title) videoDir := filepath.Join(projectDir, "videos", filenamifyTitle) if err = os.MkdirAll(videoDir, os.ModePerm); err != nil { @@ -176,6 +175,10 @@ func DownloadMP4(ctx context.Context, title, projectDir string, mp4URLs []string u, _ := url.Parse(mp4URL) dst := filepath.Join(videoDir, path.Base(u.Path)) + if files.CheckFileExists(dst) && !overwrite { + continue + } + headers := make(map[string]string, 2) headers[geektime.Origin] = geektime.DefaultBaseURL headers[geektime.UserAgent] = geektime.DefaultUserAgent @@ -238,7 +241,7 @@ func download(ctx context.Context, } func mergeTSFiles(tempVideoDir, filenamifyTitle, projectDir string, key []byte, isVodEncryptVideo bool) error { - tempTSFiles, err := ioutil.ReadDir(tempVideoDir) + tempTSFiles, err := os.ReadDir(tempVideoDir) if err != nil { return err } @@ -251,7 +254,7 @@ func mergeTSFiles(tempVideoDir, filenamifyTitle, projectDir string, key []byte, return err } for _, tempTSFile := range tempTSFiles { - f, err := ioutil.ReadFile(filepath.Join(tempVideoDir, tempTSFile.Name())) + f, err := os.ReadFile(filepath.Join(tempVideoDir, tempTSFile.Name())) if err != nil { return err } From f2b8a8ffe225c26a54012a041dbb9c165c043114 Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sat, 4 May 2024 18:23:22 +0800 Subject: [PATCH 5/8] Update README --- README.md | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index baa4f08..0e24f10 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,24 @@ # geektime-downloader -geektime-downloader 支持下载极客时间专栏(PDF/Markdown/音频)/视频课/每日一课/大厂实践/训练营视频。 +geektime-downloader 支持下载以下极客时间网站资源. + +**极客时间** +- [x] 专栏(PDF/Markdown/音频) +- [x] 视频课 +- [x] 每日一课 +- [x] 大厂案例 +- [x] 训练营视频 +- [ ] 线下大会 + +**企业版极客时间** +- [ ] 体系课 +- [ ] 每日一课 +- [ ] 大厂案例 +- [ ] 生态课 +- [x] 训练营视频 + +部分资源暂未支持下载,欢迎PR. + [![go report card](https://goreportcard.com/badge/github.com/nicoxiang/geektime-downloader "go report card")](https://goreportcard.com/report/github.com/nicoxiang/geektime-downloader) [![MIT license](https://img.shields.io/badge/license-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT) @@ -49,14 +67,16 @@ Usage: geektime-downloader [flags] Flags: - --comments 是否需要专栏的第一页评论 (default true) - -f, --folder string 专栏和视频课的下载目标位置 (default "") - --gcess string 极客时间 cookie 值 gcess - --gcid string 极客时间 cookie 值 gcid - -h, --help help for geektime-downloader - --output int 专栏的输出内容(1pdf,2markdown,4audio)可自由组合 (default 1) - -u, --phone string 你的极客时间账号(手机号) - -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") + --comments 是否需要专栏的第一页评论 (default true) + --enterprise 是否下载企业版极客时间资源 + -f, --folder string 专栏和视频课的下载目标位置 (default "C:\\Users\\nico\\geektime-downloader") + --gcess string 极客时间 cookie 值 gcess + --gcid string 极客时间 cookie 值 gcid + -h, --help help for geektime-downloader + --interval int 下载资源的间隔时间, 单位为秒, 默认1秒 (default 1) + --output int 专栏的输出内容(1pdf,2markdown,4audio)可自由组合 (default 1) + -u, --phone string 你的极客时间账号(手机号) + -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") --wait-seconds int Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒 (default 8) ``` @@ -110,12 +130,20 @@ https://time.geekbang.org/opencourse/videointro/100546701 **其他:** -打开极客时间[我的课程-其他](https://time.geekbang.org/dashboard/course),选择你想要查看的会议,在新打开的会议详情 Tab 页,查看 URL ```course/intro/``` 最后的数字,例如下面的链接中 100551201 就是课程 ID: +打开极客时间[我的课程-其他](https://time.geekbang.org/dashboard/course),选择你想要查看的课程,在新打开的课程详情 Tab 页,查看 URL ```course/intro/``` 最后的数字,例如下面的链接中 100551201 就是课程 ID: ``` https://time.geekbang.org/course/intro/100551201 ``` +**企业版训练营:** + +选择你想要查看的课程,查看 URL ```mall/product/```后的数字,例如下面的链接中 100618109 就是课程 ID: + +``` +https://b.geekbang.org/mall/product/100618109 +``` + ### 为什么我下载的PDF是空白页? 首先下载课程请保证VPN已关闭。在此前提下如果仍然出现空白页情况,说明后台Chrome网页加载速度较慢,可以尝试加大--wait-seconds参数,保证页面完全加载完成后再开始生成PDF。 From 2cab3c50f3798390e43ae5e1e0692e51f8fba267 Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sat, 4 May 2024 20:55:03 +0800 Subject: [PATCH 6/8] fix #188 --- internal/pkg/logger/logger.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/pkg/logger/logger.go b/internal/pkg/logger/logger.go index 18a38e5..1689254 100644 --- a/internal/pkg/logger/logger.go +++ b/internal/pkg/logger/logger.go @@ -41,14 +41,14 @@ func (f *customFormatter) Format(entry *logrus.Entry) ([]byte, error) { return []byte(message), nil } -func init(){ +func init() { userConfigDir, _ := os.UserConfigDir() - logFilePath := filepath.Join(userConfigDir, GeektimeLogFolder, GeektimeLogFolder + ".log") + logFilePath := filepath.Join(userConfigDir, GeektimeLogFolder, GeektimeLogFolder+".log") logger.SetReportCaller(true) logger.SetLevel(logrus.InfoLevel) logger.SetFormatter(&customFormatter{}) - logFile, err := os.OpenFile(logFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + logFile, err := os.OpenFile(logFilePath, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666) if err == nil { logger.Out = logFile } else { @@ -68,10 +68,10 @@ func Warnf(format string, args ...interface{}) { } // Error wrapper logrus log.Error -func Error(err error, args ...interface{}){ - if err != nil{ +func Error(err error, args ...interface{}) { + if err != nil { logger.WithError(err).Error(args...) - }else{ + } else { logger.Error(args...) } -} \ No newline at end of file +} From 936c72f46f21c0353d6ea915b2a3abf5b5dcbd45 Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sat, 4 May 2024 23:59:27 +0800 Subject: [PATCH 7/8] fix #160 - add print-pdf-timeout args - add print-pdf-wait args - remove wait-seconds args --- README.md | 32 ++++++----- cmd/root.go | 41 ++++++++------- internal/pdf/pdf.go | 48 +++++++++++++---- internal/pdf/stream.go | 117 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+), 44 deletions(-) create mode 100644 internal/pdf/stream.go diff --git a/README.md b/README.md index 0e24f10..fc4161f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # geektime-downloader -geektime-downloader 支持下载以下极客时间网站资源. +geektime-downloader 支持下载以下极客时间网站资源。 **极客时间** - [x] 专栏(PDF/Markdown/音频) @@ -17,7 +17,7 @@ geektime-downloader 支持下载以下极客时间网站资源. - [ ] 生态课 - [x] 训练营视频 -部分资源暂未支持下载,欢迎PR. +部分资源暂未支持下载,欢迎PR。 [![go report card](https://goreportcard.com/badge/github.com/nicoxiang/geektime-downloader "go report card")](https://goreportcard.com/report/github.com/nicoxiang/geektime-downloader) @@ -67,17 +67,18 @@ Usage: geektime-downloader [flags] Flags: - --comments 是否需要专栏的第一页评论 (default true) - --enterprise 是否下载企业版极客时间资源 - -f, --folder string 专栏和视频课的下载目标位置 (default "C:\\Users\\nico\\geektime-downloader") - --gcess string 极客时间 cookie 值 gcess - --gcid string 极客时间 cookie 值 gcid - -h, --help help for geektime-downloader - --interval int 下载资源的间隔时间, 单位为秒, 默认1秒 (default 1) - --output int 专栏的输出内容(1pdf,2markdown,4audio)可自由组合 (default 1) - -u, --phone string 你的极客时间账号(手机号) - -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") - --wait-seconds int Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒 (default 8) + --comments 是否需要专栏的第一页评论 (default true) + --enterprise 是否下载企业版极客时间资源 + -f, --folder string 专栏和视频课的下载目标位置 (default "C:\\Users\\nico\\geektime-downloader") + --gcess string 极客时间 cookie 值 gcess + --gcid string 极客时间 cookie 值 gcid + -h, --help help for geektime-downloader + --interval int 下载资源的间隔时间, 单位为秒, 默认1秒 (default 1) + --output int 专栏的输出内容(1pdf,2markdown,4audio)可自由组合 (default 1) + -u, --phone string 你的极客时间账号(手机号) + --print-pdf-timeout int Chrome生成PDF的超时时间, 单位为秒, 默认60秒 (default 60) + --print-pdf-wait int Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒 (default 8) + -q, --quality string 下载视频清晰度(ld标清,sd高清,hd超清) (default "sd") ``` ## Note @@ -145,7 +146,10 @@ https://b.geekbang.org/mall/product/100618109 ``` ### 为什么我下载的PDF是空白页? -首先下载课程请保证VPN已关闭。在此前提下如果仍然出现空白页情况,说明后台Chrome网页加载速度较慢,可以尝试加大--wait-seconds参数,保证页面完全加载完成后再开始生成PDF。 +首先下载课程请保证VPN已关闭。在此前提下如果仍然出现空白页情况,说明后台Chrome网页加载速度较慢,可以尝试加大--print-pdf-wait参数,保证页面完全加载完成后再开始生成PDF。 + +### 为什么我下载PDF一直提示超时? +首先下载课程请保证VPN已关闭。在此前提下如果下载持续出现超时,有可能是因为课程章节图片等内容较多,生成速度慢,比如课程《AI 绘画核心技术与实战》中的部分章节,可以尝试加大--print-pdf-timeout参数,并耐心等待。 ### 如何下载专栏的 Markdown 格式和文章音频? diff --git a/cmd/root.go b/cmd/root.go index 3d1e0f7..ad39eb4 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -30,23 +30,24 @@ import ( ) var ( - phone string - gcid string - gcess string - concurrency int - downloadFolder string - sp *spinner.Spinner - selectedProduct geektime.Course - quality string - downloadComments bool - selectedProductType productTypeSelectOption - columnOutputType int - waitSeconds int - interval int - productTypeOptions []productTypeSelectOption - geektimeClient *geektime.Client - isEnterprise bool - waitRand = rand.New(rand.NewSource(time.Now().UnixNano())) + phone string + gcid string + gcess string + concurrency int + downloadFolder string + sp *spinner.Spinner + selectedProduct geektime.Course + quality string + downloadComments bool + selectedProductType productTypeSelectOption + columnOutputType int + printPDFWaitSeconds int + printPDFTimeoutSeconds int + interval int + productTypeOptions []productTypeSelectOption + geektimeClient *geektime.Client + isEnterprise bool + waitRand = rand.New(rand.NewSource(time.Now().UnixNano())) ) type productTypeSelectOption struct { @@ -74,7 +75,8 @@ func init() { rootCmd.Flags().StringVarP(&quality, "quality", "q", "sd", "下载视频清晰度(ld标清,sd高清,hd超清)") rootCmd.Flags().BoolVar(&downloadComments, "comments", true, "是否需要专栏的第一页评论") rootCmd.Flags().IntVar(&columnOutputType, "output", 1, "专栏的输出内容(1pdf,2markdown,4audio)可自由组合") - rootCmd.Flags().IntVar(&waitSeconds, "wait-seconds", 8, "Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒") + rootCmd.Flags().IntVar(&printPDFWaitSeconds, "print-pdf-wait", 8, "Chrome生成PDF前的等待页面加载时间, 单位为秒, 默认8秒") + rootCmd.Flags().IntVar(&printPDFTimeoutSeconds, "print-pdf-timeout", 60, "Chrome生成PDF的超时时间, 单位为秒, 默认60秒") rootCmd.Flags().IntVar(&interval, "interval", 1, "下载资源的间隔时间, 单位为秒, 默认1秒") rootCmd.Flags().BoolVar(&isEnterprise, "enterprise", false, "是否下载企业版极客时间资源") @@ -416,7 +418,8 @@ func downloadTextArticle(ctx context.Context, article geektime.Article, projectD article.Title, geektimeClient.Cookies, downloadComments, - waitSeconds, + printPDFWaitSeconds, + printPDFTimeoutSeconds, overwrite, ) if !innerSkipped { diff --git a/internal/pdf/pdf.go b/internal/pdf/pdf.go index fc429b1..b63d5e2 100644 --- a/internal/pdf/pdf.go +++ b/internal/pdf/pdf.go @@ -1,7 +1,9 @@ package pdf import ( + "bufio" "context" + "github.com/nicoxiang/geektime-downloader/internal/pkg/logger" "net/http" "os" "path/filepath" @@ -29,7 +31,8 @@ func PrintArticlePageToPDF(ctx context.Context, title string, cookies []*http.Cookie, downloadComments bool, - waitSeconds int, + printPDFWaitSeconds int, + printPDFTimeoutSeconds int, overwrite bool, ) (bool, error) { rateLimit := false @@ -44,7 +47,7 @@ func PrintArticlePageToPDF(ctx context.Context, ctx, cancel := chromedp.NewContext(ctx) defer cancel() - ctx, cancel = context.WithTimeout(ctx, time.Minute) + ctx, cancel = context.WithTimeout(ctx, time.Duration(printPDFTimeoutSeconds)*time.Second) defer cancel() chromedp.ListenTarget(ctx, func(ev interface{}) { @@ -58,15 +61,14 @@ func PrintArticlePageToPDF(ctx context.Context, } }) - var buf []byte err := chromedp.Run(ctx, chromedp.Tasks{ chromedp.Emulate(device.IPadPro11), setCookies(cookies), chromedp.Navigate(geektime.DefaultBaseURL + `/column/article/` + strconv.Itoa(aid)), - chromedp.Sleep(time.Duration(waitSeconds) * time.Second), + chromedp.Sleep(time.Duration(printPDFWaitSeconds) * time.Second), hideRedundantElements(downloadComments), - printToPDF(&buf), + printToPDF(fileName), }, ) @@ -77,9 +79,6 @@ func PrintArticlePageToPDF(ctx context.Context, return false, err } - if err := os.WriteFile(fileName, buf, 0666); err != nil { - return false, err - } return false, nil } @@ -179,18 +178,45 @@ func hideRedundantElements(downloadComments bool) chromedp.ActionFunc { }) } -func printToPDF(res *[]byte) chromedp.ActionFunc { +func printToPDF(fileName string) chromedp.ActionFunc { return chromedp.ActionFunc(func(ctx context.Context) error { - data, _, err := page.PrintToPDF(). + _, stream, err := page.PrintToPDF(). WithMarginTop(0.4). WithMarginBottom(0.4). WithMarginLeft(0.4). WithMarginRight(0.4). + WithTransferMode(page.PrintToPDFTransferModeReturnAsStream). Do(ctx) if err != nil { return err } - *res = data + + reader := &streamReader{ + ctx: ctx, + handle: stream, + r: nil, + pos: 0, + eof: false, + } + + defer func() { + _ = reader.Close() + }() + + file, _ := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666) + + defer func() { + _ = file.Close() + }() + + buffer := bufio.NewReader(reader) + + _, err = buffer.WriteTo(file) + if err != nil { + logger.Error(err, "write result to output path") + return err + } + return nil }) } diff --git a/internal/pdf/stream.go b/internal/pdf/stream.go new file mode 100644 index 0000000..f4681ba --- /dev/null +++ b/internal/pdf/stream.go @@ -0,0 +1,117 @@ +package pdf + +import ( + "bytes" + "context" + "encoding/base64" + "fmt" + "github.com/chromedp/cdproto/cdp" + "io" + "strings" + + cdprotoio "github.com/chromedp/cdproto/io" +) + +// Credits: https://raw.githubusercontent.com/mafredri/cdp/3c5eab7ffc5cbee667b0a813ce470ac423792811/protocol/io/stream_reader.go. +type streamReader struct { + ctx context.Context + handle cdprotoio.StreamHandle + r io.Reader + pos int + eof bool +} + +// Read a chunk of the stream. +func (reader *streamReader) Read(p []byte) (n int, err error) { + if reader.r != nil { + // Continue reading from buffer. + return reader.read(p) + } + + if reader.eof { + return 0, io.EOF + } + + if len(p) == 0 { + return 0, nil + } + + // Chromium might have an off-by-one when deciding the maximum size (at + // least for base64 encoded data), usually it will overflow. We subtract + // one to make sure it fits into p. + size := len(p) - 1 + if size < 1 { + // Safety-check to avoid crashing Chrome (e.g. via SetSize(-1)). + size = 1 + } + + reply, err := reader.next(reader.pos, size) + if err != nil { + return 0, err + } + + reader.eof = reply.EOF + + switch { + case reply.Base64encoded: + b := []byte(reply.Data) + size := base64.StdEncoding.DecodedLen(len(b)) + + // Safety-check for fast-path to avoid panics. + if len(p) >= size { + n, err = base64.StdEncoding.Decode(p, b) + reader.pos += n + + return n, err + } + + reader.r = base64.NewDecoder(base64.StdEncoding, bytes.NewReader(b)) + default: + reader.r = strings.NewReader(reply.Data) + } + + return reader.read(p) +} + +// Close closes the stream, discard any temporary backing storage. +func (reader *streamReader) Close() error { + err := cdprotoio.Close(reader.handle).Do(reader.ctx) + if err == nil { + return nil + } + + return fmt.Errorf("close Chromium stream: %w", err) +} + +func (reader *streamReader) next(pos, size int) (cdprotoio.ReadReturns, error) { + params := cdprotoio. + Read(reader.handle). + WithOffset(int64(pos)). + WithSize(int64(size)) + + var res cdprotoio.ReadReturns + err := cdp.Execute(reader.ctx, cdprotoio.CommandRead, params, &res) + + if err == nil { + return res, nil + } + + return res, fmt.Errorf("execute IO.read command: %w", err) +} + +func (reader *streamReader) read(p []byte) (n int, err error) { + n, err = reader.r.Read(p) + reader.pos += n + + if !reader.eof && err == io.EOF { + reader.r = nil + err = nil + } + + return n, err +} + +// Interface guards. +var ( + _ io.Reader = (*streamReader)(nil) +) From 761af1117fea3e06da4908c5388b0a873e461c3a Mon Sep 17 00:00:00 2001 From: nicoxiang <916592561@qq.com> Date: Sun, 5 May 2024 15:32:10 +0800 Subject: [PATCH 8/8] fix: update http request log --- internal/geektime/client.go | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/internal/geektime/client.go b/internal/geektime/client.go index 36de9f5..65923c6 100644 --- a/internal/geektime/client.go +++ b/internal/geektime/client.go @@ -85,18 +85,26 @@ func (c *Client) newRequest( } // do perform http request -func do(r *resty.Request) (*resty.Response, error) { - logger.Infof("Http request start, method: %s, url: %s", - r.Method, - r.URL, +func do(request *resty.Request) (*resty.Response, error) { + logger.Infof("Http request start, method: %s, url: %s, request body: %v", + request.Method, + request.URL, + request.Body, ) - resp, err := r.Execute(r.Method, r.URL) + resp, err := request.Execute(request.Method, request.URL) if err != nil { return nil, err } statusCode := resp.RawResponse.StatusCode + + logger.Infof("Http request end, method: %s, url: %s, status code: %d", + resp.RawResponse.Request.Method, + resp.RawResponse.Request.URL, + resp.RawResponse.StatusCode, + ) + if statusCode != 200 { logNotOkResponse(resp) if statusCode == 451 { @@ -106,7 +114,7 @@ func do(r *resty.Request) (*resty.Response, error) { } } - rv := reflect.ValueOf(r.Result) + rv := reflect.ValueOf(request.Result) f := reflect.Indirect(rv).FieldByName("Code") code := int(f.Int()) @@ -120,14 +128,9 @@ func do(r *resty.Request) (*resty.Response, error) { return nil, ErrAuthFailed } - return nil, ErrGeekTimeAPIBadCode{r.URL, resp.String()} + return nil, ErrGeekTimeAPIBadCode{request.URL, resp.String()} } func logNotOkResponse(resp *resty.Response) { - logger.Warnf("Http request end, method: %s, url: %s, status code: %d, response body: %s", - resp.RawResponse.Request.Method, - resp.RawResponse.Request.URL, - resp.RawResponse.StatusCode, - resp.String(), - ) + logger.Warnf("Http request not ok, response body: %s", resp.String()) }