Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[YouTube] Fix extraction of Shorts in channels and remove visitor data usage #1221

Merged
merged 3 commits into from
Sep 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -42,30 +42,14 @@
*/
public class YoutubeChannelTabExtractor extends ChannelTabExtractor {

/**
* Whether the visitor data extracted from the initial channel response is required to be used
* for continuations.
*
* <p>
* A valid {@code visitorData} is required to get continuations of shorts in channels.
* </p>
*
* <p>
* It should be not used when it is not needed, in order to reduce YouTube's tracking.
* </p>
*/
private final boolean useVisitorData;
private JsonObject jsonResponse;
private String channelId;
@Nullable
private String visitorData;
@SuppressWarnings("OptionalUsedAsFieldOrParameterType")
protected Optional<YoutubeChannelHelper.ChannelHeader> channelHeader;

public YoutubeChannelTabExtractor(final StreamingService service,
final ListLinkHandler linkHandler) {
super(service, linkHandler);
useVisitorData = getName().equals(ChannelTabs.SHORTS);
}

@Nonnull
Expand Down Expand Up @@ -100,9 +84,6 @@ public void onFetchPage(@Nonnull final Downloader downloader) throws IOException
jsonResponse = data.jsonResponse;
channelHeader = YoutubeChannelHelper.getChannelHeader(jsonResponse);
channelId = data.channelId;
if (useVisitorData) {
visitorData = jsonResponse.getObject("responseContext").getString("visitorData");
}
}

@Nonnull
Expand Down Expand Up @@ -176,10 +157,8 @@ public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionEx
channelName, channelUrl)
.orElse(null);

final Page nextPage = getNextPageFrom(continuation,
useVisitorData && !isNullOrEmpty(visitorData)
? List.of(channelName, channelUrl, verifiedStatus.toString(), visitorData)
: List.of(channelName, channelUrl, verifiedStatus.toString()));
final Page nextPage = getNextPageFrom(
continuation, List.of(channelName, channelUrl, verifiedStatus.toString()));

return new InfoItemsPage<>(collector, nextPage);
}
Expand Down Expand Up @@ -299,6 +278,9 @@ private Optional<JsonObject> collectItem(@Nonnull final MultiInfoItemsCollector
} else if (richItem.has("reelItemRenderer")) {
commitReel(collector, richItem.getObject("reelItemRenderer"),
channelVerifiedStatus, channelName, channelUrl);
} else if (richItem.has("shortsLockupViewModel")) {
commitShortsLockup(collector, richItem.getObject("shortsLockupViewModel"),
channelVerifiedStatus, channelName, channelUrl);
} else if (richItem.has("playlistRenderer")) {
commitPlaylist(collector, richItem.getObject("playlistRenderer"),
channelVerifiedStatus, channelName, channelUrl);
Expand Down Expand Up @@ -356,6 +338,30 @@ public boolean isUploaderVerified() {
});
}

private static void commitShortsLockup(@Nonnull final MultiInfoItemsCollector collector,
@Nonnull final JsonObject shortsLockupViewModel,
@Nonnull final VerifiedStatus channelVerifiedStatus,
@Nullable final String channelName,
@Nullable final String channelUrl) {
collector.commit(
new YoutubeShortsLockupInfoItemExtractor(shortsLockupViewModel) {
@Override
public String getUploaderName() throws ParsingException {
return isNullOrEmpty(channelName) ? super.getUploaderName() : channelName;
}

@Override
public String getUploaderUrl() throws ParsingException {
return isNullOrEmpty(channelUrl) ? super.getUploaderName() : channelUrl;
}

@Override
public boolean isUploaderVerified() {
return channelVerifiedStatus == VerifiedStatus.VERIFIED;
}
});
}

private void commitVideo(@Nonnull final MultiInfoItemsCollector collector,
@Nonnull final TimeAgoParser timeAgoParser,
@Nonnull final JsonObject jsonObject,
Expand Down Expand Up @@ -434,8 +440,7 @@ private Page getNextPageFrom(final JsonObject continuations,
.getString("token");

final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(getExtractorLocalization(),
getExtractorContentCountry(),
useVisitorData && channelIds.size() >= 3 ? channelIds.get(2) : null)
getExtractorContentCountry())
.value("continuation", continuation)
.done())
.getBytes(StandardCharsets.UTF_8);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,19 @@
import javax.annotation.Nullable;

/**
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderers}.
* A {@link StreamInfoItemExtractor} for YouTube's {@code reelItemRenderer}s.
*
* <p>
* {@code reelItemRenderers} are returned on YouTube for their short-form contents on almost every
* {@code reelItemRenderer}s were returned on YouTube for their short-form contents on almost every
* place and every major client. They provide a limited amount of information and do not provide
* the exact view count, any uploader info (name, URL, avatar, verified status) and the upload date.
* </p>
*
* <p>
* At the time this documentation has been updated, they are being replaced by
* {@code shortsLockupViewModel}s. See {@link YoutubeShortsLockupInfoItemExtractor} for an
* extractor for this new UI data type.
* </p>
*/
public class YoutubeReelInfoItemExtractor implements StreamInfoItemExtractor {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
package org.schabi.newpipe.extractor.services.youtube.extractors;

import com.grack.nanojson.JsonObject;
import org.schabi.newpipe.extractor.Image;
import org.schabi.newpipe.extractor.exceptions.ParsingException;
import org.schabi.newpipe.extractor.localization.DateWrapper;
import org.schabi.newpipe.extractor.services.youtube.linkHandler.YoutubeStreamLinkHandlerFactory;
import org.schabi.newpipe.extractor.stream.StreamInfoItemExtractor;
import org.schabi.newpipe.extractor.stream.StreamType;
import org.schabi.newpipe.extractor.utils.Utils;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.util.List;

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getThumbnailsFromInfoItem;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

/**
* A {@link StreamInfoItemExtractor} for YouTube's {@code shortsLockupViewModel}s.
*
* <p>
* {@code shortsLockupViewModel}s are returned on YouTube for their short-form contents on almost
* every place and every major client. They provide a limited amount of information and do not
* provide the exact view count, any uploader info (name, URL, avatar, verified status) and the
* upload date.
* </p>
*
* <p>
* At the time this documentation has been written, this data UI type is not fully used (rolled
* out), so {@code reelItemRenderer}s are also returned. See {@link YoutubeReelInfoItemExtractor}
* for an extractor for this UI data type.
* </p>
*/
public class YoutubeShortsLockupInfoItemExtractor implements StreamInfoItemExtractor {

@Nonnull
private final JsonObject shortsLockupViewModel;

public YoutubeShortsLockupInfoItemExtractor(@Nonnull final JsonObject shortsLockupViewModel) {
this.shortsLockupViewModel = shortsLockupViewModel;
}

@Override
public String getName() throws ParsingException {
return shortsLockupViewModel.getObject("overlayMetadata")
.getObject("primaryText")
.getString("content");
}

@Override
public String getUrl() throws ParsingException {
String videoId = shortsLockupViewModel.getObject("onTap")
.getObject("innertubeCommand")
.getObject("reelWatchEndpoint")
.getString("videoId");

if (isNullOrEmpty(videoId)) {
videoId = shortsLockupViewModel.getObject("inlinePlayerData")
.getObject("onVisible")
.getObject("innertubeCommand")
.getObject("watchEndpoint")
.getString("videoId");
}

if (isNullOrEmpty(videoId)) {
throw new ParsingException("Could not get video ID");
}

try {
return YoutubeStreamLinkHandlerFactory.getInstance().getUrl(videoId);
} catch (final Exception e) {
throw new ParsingException("Could not get URL", e);
}
}

@Nonnull
@Override
public List<Image> getThumbnails() throws ParsingException {
return getThumbnailsFromInfoItem(shortsLockupViewModel.getObject("thumbnail")
.getObject("sources"));
}

@Override
public StreamType getStreamType() throws ParsingException {
return StreamType.VIDEO_STREAM;
}

@Override
public long getViewCount() throws ParsingException {
final String viewCountText = shortsLockupViewModel.getObject("overlayMetadata")
.getObject("secondaryText")
.getString("content");
if (!isNullOrEmpty(viewCountText)) {
// This approach is language dependent
if (viewCountText.toLowerCase().contains("no views")) {
return 0;
}

return Utils.mixedNumberWordToLong(viewCountText);
}

throw new ParsingException("Could not get short view count");
}

@Override
public boolean isShortFormContent() {
return true;
}

// All the following properties cannot be obtained from shortsLockupViewModels

@Override
public boolean isAd() throws ParsingException {
return false;
}

@Override
public long getDuration() throws ParsingException {
return -1;
}

@Override
public String getUploaderName() throws ParsingException {
return null;
}

@Override
public String getUploaderUrl() throws ParsingException {
return null;
}

@Override
public boolean isUploaderVerified() throws ParsingException {
return false;
}

@Nullable
@Override
public String getTextualUploadDate() throws ParsingException {
return null;
}

@Nullable
@Override
public DateWrapper getUploadDate() throws ParsingException {
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
"httpMethod": "GET",
"url": "https://www.youtube.com/sw.js",
"headers": {
"Origin": [
"Referer": [
"https://www.youtube.com"
],
"Referer": [
"Origin": [
"https://www.youtube.com"
],
"Accept-Language": [
Expand Down Expand Up @@ -34,17 +34,20 @@
"cache-control": [
"private, max-age\u003d0"
],
"content-security-policy": [
"require-trusted-types-for \u0027script\u0027"
],
"content-type": [
"text/javascript; charset\u003dutf-8"
],
"cross-origin-opener-policy": [
"same-origin; report-to\u003d\"youtube_main\""
],
"date": [
"Wed, 24 Jul 2024 17:37:25 GMT"
"Sun, 08 Sep 2024 15:45:44 GMT"
],
"expires": [
"Wed, 24 Jul 2024 17:37:25 GMT"
"Sun, 08 Sep 2024 15:45:44 GMT"
],
"origin-trial": [
"AmhMBR6zCLzDDxpW+HfpP67BqwIknWnyMOXOQGfzYswFmJe+fgaI6XZgAzcxOrzNtP7hEDsOo1jdjFnVr2IdxQ4AAAB4eyJvcmlnaW4iOiJodHRwczovL3lvdXR1YmUuY29tOjQ0MyIsImZlYXR1cmUiOiJXZWJWaWV3WFJlcXVlc3RlZFdpdGhEZXByZWNhdGlvbiIsImV4cGlyeSI6MTc1ODA2NzE5OSwiaXNTdWJkb21haW4iOnRydWV9"
Expand All @@ -62,8 +65,8 @@
"ESF"
],
"set-cookie": [
"YSC\u003dQqImeZ_ECz4; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dThu, 28-Oct-2021 17:37:25 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
"YSC\u003dbsHskp20CKw; Domain\u003d.youtube.com; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone",
"VISITOR_INFO1_LIVE\u003d; Domain\u003d.youtube.com; Expires\u003dMon, 13-Dec-2021 15:45:44 GMT; Path\u003d/; Secure; HttpOnly; SameSite\u003dnone"
],
"strict-transport-security": [
"max-age\u003d31536000"
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Loading
Loading