diff --git a/src/helpers/getTitle.js b/src/helpers/getTitle.js new file mode 100644 index 0000000..e7b6914 --- /dev/null +++ b/src/helpers/getTitle.js @@ -0,0 +1,110 @@ +import apiRequestRawHtml from "./apiRequestRawHtml"; +import DomParser from "dom-parser"; +import seriesFetcher from "./seriesFetcher"; + +export default async function getTitle(id) { + const parser = new DomParser(); + const html = await apiRequestRawHtml(`https://www.imdb.com/title/${id}`); + const dom = parser.parseFromString(html); + const nextData = dom.getElementsByAttribute("id", "__NEXT_DATA__"); + const json = JSON.parse(nextData[0].textContent); + + const props = json.props.pageProps; + + const getCredits = (lookFor, v) => { + const result = props.aboveTheFoldData.principalCredits.find( + (e) => e?.category?.id === lookFor + ); + + return result + ? result.credits.map((e) => { + if (v === "2") + return { + id: e.name.id, + name: e.name.nameText.text, + }; + + return e.name.nameText.text; + }) + : []; + }; + + return { + id: id, + review_api_path: `/reviews/${id}`, + imdb: `https://www.imdb.com/title/${id}`, + contentType: props.aboveTheFoldData.titleType.id, + contentRating: props.aboveTheFoldData.certificate.rating, + isSeries: props.aboveTheFoldData.titleType.isSeries, + productionStatus: + props.aboveTheFoldData.productionStatus.currentProductionStage.id, + isReleased: + props.aboveTheFoldData.productionStatus.currentProductionStage.id === + "released", + title: props.aboveTheFoldData.titleText.text, + image: props.aboveTheFoldData.primaryImage.url, + images: props.mainColumnData.titleMainImages.edges + .filter((e) => e.__typename === "ImageEdge") + .map((e) => e.node.url), + plot: props.aboveTheFoldData.plot.plotText.plainText, + runtime: + props.aboveTheFoldData.runtime?.displayableProperty?.value?.plainText ?? + "", + runtimeSeconds: props.aboveTheFoldData.runtime?.seconds ?? 0, + rating: { + count: props.aboveTheFoldData.ratingsSummary.voteCount, + star: props.aboveTheFoldData.ratingsSummary.aggregateRating, + }, + award: { + wins: props.mainColumnData.wins.total, + nominations: props.mainColumnData.nominations.total, + }, + genre: props.aboveTheFoldData.genres.genres.map((e) => e.id), + releaseDetailed: { + date: new Date( + props.aboveTheFoldData.releaseDate.year, + props.aboveTheFoldData.releaseDate.month - 1, + props.aboveTheFoldData.releaseDate.day + ).toISOString(), + day: props.aboveTheFoldData.releaseDate.day, + month: props.aboveTheFoldData.releaseDate.month, + year: props.aboveTheFoldData.releaseDate.year, + releaseLocation: { + country: props.mainColumnData.releaseDate?.country?.text, + cca2: props.mainColumnData.releaseDate?.country?.id, + }, + originLocations: props.mainColumnData.countriesOfOrigin.countries.map( + (e) => ({ + country: e.text, + cca2: e.id, + }) + ), + }, + year: props.aboveTheFoldData.releaseDate.year, + spokenLanguages: props.mainColumnData.spokenLanguages.spokenLanguages.map( + (e) => ({ + language: e.text, + id: e.id, + }) + ), + filmingLocations: props.mainColumnData.filmingLocations.edges.map( + (e) => e.node.text + ), + actors: getCredits("cast"), + actors_v2: getCredits("cast", "2"), + creators: getCredits("creator"), + creators_v2: getCredits("creator", "2"), + directors: getCredits("director"), + directors_v2: getCredits("director", "2"), + writers: getCredits("writer"), + writers_v2: getCredits("writer", "2"), + top_credits: props.aboveTheFoldData.principalCredits.map((e) => ({ + id: e.category.id, + name: e.category.text, + credits: e.credits.map((e) => e.name.nameText.text), + })), + ...(props.aboveTheFoldData.titleType.isSeries + ? await seriesFetcher(id) + : {}), + }; +} diff --git a/src/helpers/parseMoreInfo.js b/src/helpers/parseMoreInfo.js deleted file mode 100644 index 01baee5..0000000 --- a/src/helpers/parseMoreInfo.js +++ /dev/null @@ -1,98 +0,0 @@ -export default function parseMoreInfo(dom) { - let response = { - award: { - wins: -1, - nominations: -1, - }, - images: [], - productionStatus: null, - releaseDetailed: { - day: -1, - month: -1, - year: -1, - releaseLocation: { - country: null, - cca2: null, - }, - originLocations: [], // { country: null, cca2: null } - }, - spokenLanguages: [], // { language: null, id: null } - filmingLocations: [], - runtime: "", - runtimeSeconds: 0, - }; // ensure type - - try { - let nextData = JSON.parse(dom.getElementById("__NEXT_DATA__").innerHTML); - let allData = nextData.props.pageProps.mainColumnData; - - try { - response.award.nominations = allData.nominations.total; - response.award.wins = allData.wins.total; - } catch (_) {} - - try { - response.images = allData.titleMainImages.edges - .filter((e) => e.__typename === "ImageEdge") - .map((e) => e.node.url); - } catch (_) {} - - try { - response.productionStatus = - allData.productionStatus.currentProductionStage.text; - } catch (_) {} - - try { - response.releaseDetailed.day = allData.releaseDate.day; - response.releaseDetailed.month = allData.releaseDate.month; - response.releaseDetailed.year = allData.releaseDate.year; - response.releaseDetailed.releaseLocation = { - country: allData.releaseDate.country.text, - cca2: allData.releaseDate.country.id, - }; - response.releaseDetailed.originLocations = - allData.countriesOfOrigin.countries.map((e) => ({ - country: e.text, - cca2: e.id, - })); - } catch (_) {} - - try { - response.spokenLanguages = allData.spokenLanguages.spokenLanguages.map( - (e) => ({ - language: e.text, - id: e.id, - }) - ); - } catch (_) {} - - try { - response.filmingLocations = allData.filmingLocations.edges.map( - (e) => e.node.text - ); - } catch (_) {} - - try { - response.runtime = parseSecondToTime(allData.runtime.seconds); - response.runtimeSeconds = allData.runtime.seconds; - } catch (_) {} - } catch (error) { - console.log(`ParseMoreInfo error:`, error); - } - - return response; -} - -function parseSecondToTime(seconds) { - let hours = Math.floor(seconds / 3600); - let minutes = Math.floor((seconds - hours * 3600) / 60); - let second = seconds - hours * 3600 - minutes * 60; - - let result = ""; - if (hours > 0) result += hours + "h "; - - if (minutes > 0) result += minutes + "m "; - if (second > 0) result += second + "s"; - - return result.trim(); -} diff --git a/src/helpers/seriesFetcher.js b/src/helpers/seriesFetcher.js index 21516b1..d36d32f 100644 --- a/src/helpers/seriesFetcher.js +++ b/src/helpers/seriesFetcher.js @@ -1,171 +1,67 @@ import DomParser from "dom-parser"; -import { decode as entityDecoder } from "html-entities"; import apiRequestRawHtml from "./apiRequestRawHtml"; -const MAX_SEASONS = 2; - export default async function seriesFetcher(id) { - let allSeasons = []; - let seasons = []; - try { - let parser = new DomParser(); - let rawHtml = await apiRequestRawHtml( - `https://www.imdb.com/title/${id}/episodes/_ajax` - ); - let dom = parser.parseFromString(rawHtml); - - let seasonOption = dom.getElementById("bySeason"); - let seasonOptions = seasonOption.getElementsByTagName("option"); - for (let i = 0; i < seasonOptions.length; i++) { - try { - const seasonId = seasonOptions[i].getAttribute("value"); - let season = { - id: seasonId, - api_path: `/title/${id}/season/${seasonId}`, - isSelected: seasonOptions[i].getAttribute("selected") === "selected", - name: "", - episodes: [], - }; - seasons.push(season); - } catch (_) {} - } - - allSeasons = [...seasons]; - seasons = seasons.reverse(); - seasons = seasons.slice(0, MAX_SEASONS); - - await Promise.all( - seasons.map(async (season) => { - try { - let html = ""; - if (season.isSelected) { - html = rawHtml; - } else { - html = await apiRequestRawHtml( - `https://www.imdb.com/title/${id}/episodes/_ajax?season=${season.id}` - ); - } - - let parsed = parseEpisodes(html, season.id); - season.name = parsed.name; - season.episodes = parsed.episodes; - } catch (sfe) { - season.error = sfe.toString(); - } - }) - ); - - seasons = seasons.filter((s) => s.episodes.length); - seasons = seasons.map((s) => { - delete s.isSelected; - return s; - }); - } catch (error) {} - - return { - all_seasons: allSeasons.map((s) => ({ - id: s.id, - name: `Season ${s.id}`, - api_path: `/title/${id}/season/${s.id}`, - })), - seasons, - }; + const firstSeason = await getSeason({ id, seasonId: 1 }); + + return { + all_seasons: firstSeason.all_seasons, + seasons: [ + { + ...firstSeason, + all_seasons: undefined, + }, + ], + }; + } catch (error) { + return { + all_seasons: [], + seasons: [], + }; + } } -export function parseEpisodes(raw, seasonId) { - let parser = new DomParser(); - let dom = parser.parseFromString(raw); - - let name = dom.getElementById("episode_top").textContent.trim(); - name = entityDecoder(name, { level: "html5" }); - - let episodes = []; - - let item = dom.getElementsByClassName("list_item"); - - item.forEach((node, index) => { - try { - let image = null; - let image_large = null; - try { - image = node.getElementsByTagName("img")[0]; - image = image.getAttribute("src"); - image_large = image.replace(/[.]_.*_[.]/, "."); - } catch (_) {} - - let noStr = null; - try { - // noStr = node.getElementsByClassName("image")[0].textContent.trim(); - noStr = `S${seasonId}, Ep${index + 1}`; - } catch (_) {} - - let publishedDate = null; - try { - publishedDate = node - .getElementsByClassName("airdate")[0] - .textContent.trim(); - } catch (_) {} - - let title = null; - try { - title = node.getElementsByTagName("a"); - title = title.find((t) => t.getAttribute("itemprop") === "name"); - title = title.textContent.trim(); - title = entityDecoder(title, { level: "html5" }); - } catch (_) {} +export async function getSeason({ id, seasonId }) { + const html = await apiRequestRawHtml( + `https://www.imdb.com/title/${id}/episodes?season=${seasonId}` + ); - let plot = null; - try { - plot = node.getElementsByTagName("div"); - plot = plot.find((t) => t.getAttribute("itemprop") === "description"); - plot = plot.textContent.trim(); - plot = entityDecoder(plot, { level: "html5" }); - } catch (_) {} - - let star = 0; - try { - star = node - .getElementsByClassName("ipl-rating-star__rating")[0] - .textContent.trim(); - star = parseFloat(star); - } catch (_) {} + let parser = new DomParser(); + let dom = parser.parseFromString(html); - let count = 0; - try { - count = node - .getElementsByClassName("ipl-rating-star__total-votes")[0] - .textContent.trim(); - count = count.replace(/[(]|[)]|,|[.]/g, ""); - count = parseInt(count); - } catch (_) {} + const nextData = dom.getElementsByAttribute("id", "__NEXT_DATA__"); + const json = JSON.parse(nextData[0].textContent); - if ( - image.includes(`spinning-progress.gif`) && - plot.includes("Know what this is about") - ) - return null; + const episodes = json.props.pageProps.contentData.section.episodes.items; + const seasons = json.props.pageProps.contentData.section.seasons; - episodes.push({ - idx: index + 1, - no: noStr, - title, - image, - image_large, - plot, - publishedDate, + return { + name: json.props.pageProps.contentData.entityMetadata.titleText.text, + episodes: Object.values(episodes).map((e, i) => { + return { + idx: i + 1, + no: e.episode, + title: e.titleText, + image: e.image.url, + image_large: e.image.url, + image_caption: e.image.caption, + plot: e.plot, + publishedDate: new Date( + e.releaseDate.year, + e.releaseDate.month - 1, + e.releaseDate.day + ).toISOString(), rating: { - count, - star, + count: e.voteCount, + star: e.aggregateRating, }, - }); - } catch (ss) { - console.log(ss.message); - } - }); - - return { - name: name, - episodes: episodes, + }; + }), + all_seasons: seasons.map((s) => ({ + id: s.value, + name: `Season ${s.value}`, + api_path: `/title/${id}/season/${s.value}`, + })), }; } diff --git a/src/routes/title.js b/src/routes/title.js index fa936d6..4c7aa7b 100644 --- a/src/routes/title.js +++ b/src/routes/title.js @@ -1,124 +1,16 @@ import { Hono } from "hono"; -import DomParser from "dom-parser"; -import { decode as entityDecoder } from "html-entities"; -import seriesFetcher, { parseEpisodes } from "../helpers/seriesFetcher"; -import apiRequestRawHtml from "../helpers/apiRequestRawHtml"; -import parseMoreInfo from "../helpers/parseMoreInfo"; +import { getSeason } from "../helpers/seriesFetcher"; +import getTitle from "../helpers/getTitle"; + const title = new Hono(); title.get("/:id", async (c) => { const id = c.req.param("id"); try { - let parser = new DomParser(); - let rawHtml = await apiRequestRawHtml(`https://www.imdb.com/title/${id}`); - - let dom = parser.parseFromString(rawHtml); - - let moreDetails = parseMoreInfo(dom); - let response = {}; - - // schema parse - let schema = getNode(dom, "script", "application/ld+json"); - schema = JSON.parse(schema.innerHTML); - - // id - response.id = id; - - // review - response.review_api_path = `/reviews/${id}`; - - // imdb link - response.imdb = `https://www.imdb.com/title/${id}`; - - // content type - response.contentType = schema["@type"]; - - // production status - response.productionStatus = moreDetails.productionStatus; - - // title - // response.title = getNode(dom, "h1", "hero-title-block__title").innerHTML; - response.title = entityDecoder(schema.name, { level: "html5" }); - - // image - response.image = schema.image; - response.images = moreDetails.images; - - // plot - // response.plot = getNode(dom, "span", "plot-l").innerHTML; - response.plot = entityDecoder(schema.description, { level: "html5" }); + const result = await getTitle(id); - // rating - response.rating = { - count: schema.aggregateRating?.ratingCount ?? 0, - star: schema.aggregateRating?.ratingValue ?? 0, - }; - - // award - response.award = moreDetails.award; - - // content rating - response.contentRating = schema.contentRating; - - // genre - response.genre = - schema.genre?.map((e) => entityDecoder(e, { level: "html5" })) ?? []; - - // Relesde detail, laguages, fliming locations - response.releaseDetailed = moreDetails.releaseDetailed; - if (!response.year && response.releaseDetailed.year !== -1) - response.year = response.releaseDetailed.year; - - response.year = response.releaseDetailed.year; - response.spokenLanguages = moreDetails.spokenLanguages; - response.filmingLocations = moreDetails.filmingLocations; - response.runtime = moreDetails.runtime; - response.runtimeSeconds = moreDetails.runtimeSeconds; - - // actors - try { - response.actors = schema.actor.map((e) => - entityDecoder(e.name, { level: "html5" }) - ); - } catch (_) { - response.actors = []; - } - // director - try { - response.directors = schema.director.map((e) => - entityDecoder(e.name, { level: "html5" }) - ); - } catch (_) { - response.directors = []; - } - - // top credits - try { - let top_credits = getNode(dom, "div", "title-pc-expanded-section") - .firstChild.firstChild; - - response.top_credits = top_credits.childNodes.map((e) => { - return { - name: e.firstChild.textContent, - value: e.childNodes[1].firstChild.childNodes.map((e) => - entityDecoder(e.textContent, { level: "html5" }) - ), - }; - }); - } catch (_) { - response.top_credits = []; - } - - try { - if (["TVSeries"].includes(response.contentType)) { - let seasons = await seriesFetcher(id); - response.seasons = seasons.seasons; - response.all_seasons = seasons.all_seasons; - } - } catch (error) {} - - return c.json(response); + return c.json(result); } catch (error) { c.status(500); return c.json({ @@ -132,11 +24,8 @@ title.get("/:id/season/:seasonId", async (c) => { const seasonId = c.req.param("seasonId"); try { - const html = await apiRequestRawHtml( - `https://www.imdb.com/title/${id}/episodes/_ajax?season=${seasonId}` - ); + const result = await getSeason({ id, seasonId }); - const parsed = parseEpisodes(html, seasonId); const response = Object.assign( { id, @@ -144,7 +33,7 @@ title.get("/:id/season/:seasonId", async (c) => { imdb: `https://www.imdb.com/title/${id}/episodes?season=${seasonId}`, season_id: seasonId, }, - parsed + result ); return c.json(response);