Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Refactored YoutubeCommentsExtractor #799

Merged
merged 2 commits into from
Feb 23, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonPostResponse;
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.prepareDesktopJsonBuilder;
import static org.schabi.newpipe.extractor.utils.Utils.UTF_8;
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
Expand All @@ -17,7 +17,6 @@
import org.schabi.newpipe.extractor.StreamingService;
import org.schabi.newpipe.extractor.comments.CommentsExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItem;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemExtractor;
import org.schabi.newpipe.extractor.comments.CommentsInfoItemsCollector;
import org.schabi.newpipe.extractor.downloader.Downloader;
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
Expand All @@ -38,7 +37,7 @@ public class YoutubeCommentsExtractor extends CommentsExtractor {
* Caching mechanism and holder of the commentsDisabled value.
* <br/>
* Initial value = empty -> unknown if comments are disabled or not<br/>
* Some method calls {@link YoutubeCommentsExtractor#findInitialCommentsToken()}
* Some method calls {@link #findInitialCommentsToken()}
* -> value is set<br/>
* If the method or another one that is depending on disabled comments
* is now called again, the method execution can avoid unnecessary calls
Expand Down Expand Up @@ -74,45 +73,45 @@ public InfoItemsPage<CommentsInfoItem> getInitialPage()

/**
* Finds the initial comments token and initializes commentsDisabled.
* <br/>
* Also sets {@link #optCommentsDisabled}.
*
* @return the continuation token or null if none was found
*/
@Nullable
private String findInitialCommentsToken() throws ExtractionException {

final JsonArray jArray = JsonUtils.getArray(nextResponse,
"contents.twoColumnWatchNextResults.results.results.contents");

final Optional<Object> itemSectionRenderer = jArray.stream().filter(o -> {
JsonObject jObj = (JsonObject) o;

if (jObj.has("itemSectionRenderer")) {
try {
return JsonUtils.getString(jObj, "itemSectionRenderer.targetId")
.equals("comments-section");
} catch (final ParsingException ignored) {
}
}

return false;
}).findFirst();

final String token;

if (itemSectionRenderer.isPresent()) {
token = JsonUtils.getString(((JsonObject) itemSectionRenderer.get())
.getObject("itemSectionRenderer").getArray("contents").getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} else {
token = null;
}

if (token == null) {
optCommentsDisabled = Optional.of(true);
return null;
}

optCommentsDisabled = Optional.of(false);
final String token = JsonUtils.getArray(nextResponse,
"contents.twoColumnWatchNextResults.results.results.contents")
.stream()
// Only use JsonObjects
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
// Check if the comment-section is present
.filter(jObj -> {
try {
return "comments-section".equals(
JsonUtils.getString(jObj, "itemSectionRenderer.targetId"));
} catch (final ParsingException ignored) {
return false;
}
})
.findFirst()
// Extract the token (or null in case of error)
.map(itemSectionRenderer -> {
try {
return JsonUtils.getString(
itemSectionRenderer
.getObject("itemSectionRenderer")
.getArray("contents").getObject(0),
"continuationItemRenderer.continuationEndpoint.continuationCommand.token");
} catch (final ParsingException ignored) {
return null;
}
})
.orElse(null);

// The comments are disabled if we couldn't get a token
optCommentsDisabled = Optional.of(token == null);

return token;
}
Expand All @@ -124,25 +123,37 @@ private InfoItemsPage<CommentsInfoItem> getInfoItemsPageForDisabledComments() {

@Nullable
private Page getNextPage(@Nonnull final JsonObject ajaxJson) throws ExtractionException {
final JsonArray jsonArray;
final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
"onResponseReceivedEndpoints");
final JsonObject endpoint = onResponseReceivedEndpoints.getObject(
onResponseReceivedEndpoints.size() - 1);
final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints");

// Prevent ArrayIndexOutOfBoundsException
if (onResponseReceivedEndpoints.isEmpty()) {
return null;
}

final JsonArray continuationItemsArray;
try {
jsonArray = endpoint.getObject("reloadContinuationItemsCommand", endpoint.getObject(
"appendContinuationItemsAction")).getArray("continuationItems");
final JsonObject endpoint = onResponseReceivedEndpoints
.getObject(onResponseReceivedEndpoints.size() - 1);
continuationItemsArray = endpoint
.getObject("reloadContinuationItemsCommand",
endpoint.getObject("appendContinuationItemsAction"))
.getArray("continuationItems");
} catch (final Exception e) {
return null;
}
if (jsonArray.isEmpty()) {
// Prevent ArrayIndexOutOfBoundsException
if (continuationItemsArray.isEmpty()) {
return null;
}

final JsonObject continuationItemRenderer = jsonArray.getObject(jsonArray.size() - 1).getObject("continuationItemRenderer");
final JsonObject continuationItemRenderer = continuationItemsArray
.getObject(continuationItemsArray.size() - 1)
.getObject("continuationItemRenderer");

final String jsonPath = continuationItemRenderer.has("button") ? "button.buttonRenderer.command.continuationCommand.token" : "continuationEndpoint.continuationCommand.token";
final String jsonPath = continuationItemRenderer.has("button")
? "button.buttonRenderer.command.continuationCommand.token"
: "continuationEndpoint.continuationCommand.token";

final String continuation;
try {
Expand All @@ -169,11 +180,11 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
}

final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("continuation", page.getId())
.done())
.getBytes(UTF_8);
final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("continuation", page.getId())
.done())
.getBytes(StandardCharsets.UTF_8);

final JsonObject ajaxJson = getJsonPostResponse("next", body, localization);

Expand All @@ -186,10 +197,14 @@ public InfoItemsPage<CommentsInfoItem> getPage(final Page page)
private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
@Nonnull final JsonObject ajaxJson) throws ParsingException {

final JsonArray onResponseReceivedEndpoints = ajaxJson.getArray(
"onResponseReceivedEndpoints");
final JsonObject commentsEndpoint = onResponseReceivedEndpoints.getObject(
onResponseReceivedEndpoints.size() - 1);
final JsonArray onResponseReceivedEndpoints =
ajaxJson.getArray("onResponseReceivedEndpoints");
// Prevent ArrayIndexOutOfBoundsException
if (onResponseReceivedEndpoints.isEmpty()) {
return;
}
final JsonObject commentsEndpoint =
onResponseReceivedEndpoints.getObject(onResponseReceivedEndpoints.size() - 1);

final String path;

Expand All @@ -204,18 +219,20 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,

final JsonArray contents;
try {
contents = (JsonArray) JsonUtils.getArray(commentsEndpoint, path).clone();
contents = new JsonArray(JsonUtils.getArray(commentsEndpoint, path));
} catch (final Exception e) {
// No comments
return;
}

final int index = contents.size() - 1;
if (contents.getObject(index).has("continuationItemRenderer")) {
if (!contents.isEmpty() && contents.getObject(index).has("continuationItemRenderer")) {
contents.remove(index);
}

final String jsonKey = contents.getObject(0).has("commentThreadRenderer") ? "commentThreadRenderer" : "commentRenderer";
final String jsonKey = contents.getObject(0).has("commentThreadRenderer")
? "commentThreadRenderer"
: "commentRenderer";

final List<Object> comments;
try {
Expand All @@ -224,24 +241,23 @@ private void collectCommentsFrom(final CommentsInfoItemsCollector collector,
throw new ParsingException("Unable to get parse youtube comments", e);
}

for (final Object c : comments) {
if (c instanceof JsonObject) {
final CommentsInfoItemExtractor extractor = new YoutubeCommentsInfoItemExtractor(
(JsonObject) c, getUrl(), getTimeAgoParser());
collector.commit(extractor);
}
}
final String url = getUrl();
comments.stream()
.filter(JsonObject.class::isInstance)
.map(JsonObject.class::cast)
.map(jObj -> new YoutubeCommentsInfoItemExtractor(jObj, url, getTimeAgoParser()))
litetex marked this conversation as resolved.
Show resolved Hide resolved
.forEach(collector::commit);
}

@Override
public void onFetchPage(@Nonnull final Downloader downloader)
throws IOException, ExtractionException {
final Localization localization = getExtractorLocalization();
final byte[] body = JsonWriter.string(prepareDesktopJsonBuilder(localization,
getExtractorContentCountry())
.value("videoId", getId())
.done())
.getBytes(UTF_8);
final byte[] body = JsonWriter.string(
prepareDesktopJsonBuilder(localization, getExtractorContentCountry())
.value("videoId", getId())
.done())
.getBytes(StandardCharsets.UTF_8);

nextResponse = getJsonPostResponse("next", body, localization);
}
Expand Down