Skip to content

Commit

Permalink
fix linter issues
Browse files Browse the repository at this point in the history
  • Loading branch information
inhumantsar committed Dec 10, 2024
1 parent adff379 commit 94ac3c5
Show file tree
Hide file tree
Showing 7 changed files with 283 additions and 39 deletions.
216 changes: 216 additions & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
/* eslint-env node */
"use strict";

module.exports = {
"parserOptions": {
"ecmaVersion": 2017,
},
"env": {
"es6": true,
"shared-node-browser": true,
},
"rules": {
// Braces only needed for multi-line arrow function blocks
// "arrow-body-style": [2, "as-needed"],

// Require spacing around =>
// "arrow-spacing": 2,

// Always require spacing around a single line block
// "block-spacing": 1,

// No newline before open brace for a block
"brace-style": 2,

// No space before always a space after a comma
"comma-spacing": [2, {"before": false, "after": true}],

// Commas at the end of the line not the start
// "comma-style": 2,

// Don't require spaces around computed properties
// "computed-property-spacing": [2, "never"],

// Functions must always return something or nothing
"consistent-return": 2,

// Require braces around blocks that start a new line
// Note that this rule is likely to be overridden on a per-directory basis
// very frequently.
// "curly": [2, "multi-line"],

// Always require a trailing EOL
"eol-last": 2,

// Require function* name()
// "generator-star-spacing": [2, {"before": false, "after": true}],

// Two space indent
"indent": [2, 2, { "SwitchCase": 1 }],

// Space after colon not before in property declarations
"key-spacing": [2, { "beforeColon": false, "afterColon": true, "mode": "minimum" }],

// Unix linebreaks
"linebreak-style": [2, "unix"],

// Always require parenthesis for new calls
"new-parens": 2,

// Use [] instead of Array()
// "no-array-constructor": 2,

// No duplicate arguments in function declarations
"no-dupe-args": 2,

// No duplicate keys in object declarations
"no-dupe-keys": 2,

// No duplicate cases in switch statements
"no-duplicate-case": 2,

// No labels
"no-labels": 2,

// If an if block ends with a return no need for an else block
"no-else-return": 2,

// No empty statements
"no-empty": 2,

// No empty character classes in regex
"no-empty-character-class": 2,

// Disallow empty destructuring
"no-empty-pattern": 2,

// No assiging to exception variable
// "no-ex-assign": 2,

// No using !! where casting to boolean is already happening
// "no-extra-boolean-cast": 2,

// No double semicolon
"no-extra-semi": 2,

// No overwriting defined functions
"no-func-assign": 2,

// Declarations in Program or Function Body
"no-inner-declarations": 2,

// No invalid regular expresions
"no-invalid-regexp": 2,

// No odd whitespace characters
"no-irregular-whitespace": 2,

// No single if block inside an else block
"no-lonely-if": 2,

// No mixing spaces and tabs in indent
"no-mixed-spaces-and-tabs": [2, "smart-tabs"],

// No unnecessary spacing
"no-multi-spaces": [2, { exceptions: { "AssignmentExpression": true, "VariableDeclarator": true, "ArrayExpression": true, "ObjectExpression": true } }],

// No reassigning native JS objects
"no-native-reassign": 2,

// No (!foo in bar)
"no-negated-in-lhs": 2,

// Nested ternary statements are confusing
"no-nested-ternary": 2,

// Use {} instead of new Object()
// "no-new-object": 2,

// No Math() or JSON()
"no-obj-calls": 2,

// No octal literals
"no-octal": 2,

// No redeclaring variables
"no-redeclare": 2,

// No unnecessary comparisons
"no-self-compare": 2,

// No declaring variables from an outer scope
"no-shadow": 2,

// No declaring variables that hide things like arguments
"no-shadow-restricted-names": 2,

// No spaces between function name and parentheses
"no-spaced-func": 2,

// No trailing whitespace
"no-trailing-spaces": 2,

// No using undeclared variables
"no-undef": 2,

// Error on newline where a semicolon is needed
"no-unexpected-multiline": 2,

// No unreachable statements
"no-unreachable": 2,

// No expressions where a statement is expected
// "no-unused-expressions": 2,

// No declaring variables that are never used
"no-unused-vars": [2, {"vars": "all", "args": "none"}],

// No using variables before defined
// "no-use-before-define": [2, "nofunc"],

// No using with
"no-with": 2,

// No if/while/for blocks on the same line as the if/while/for statement:
"nonblock-statement-body-position": [2, "below"],

// Always require semicolon at end of statement
"semi": [2, "always"],

// Require space after keywords
"keyword-spacing": 2,

// Always use double quotes
"quotes": [2, "double", {"avoidEscape": true}],

// Require space before blocks
"space-before-blocks": 2,

// Never use spaces before function parentheses
// "space-before-function-paren": [2, { "anonymous": "always", "named": "never" }],

// Require spaces before finally, catch, etc.
// "space-before-keywords": [2, "always"],

// No space padding in parentheses
// "space-in-parens": [2, "never"],

// Require spaces around operators
// "space-infix-ops": 2,

// Require spaces after return, throw and case
// "space-return-throw-case": 2,

// ++ and -- should not need spacing
// "space-unary-ops": [2, { "words": true, "nonwords": false }],

// No comparisons to NaN
"use-isnan": 2,

// Only check typeof against valid results
"valid-typeof": 2,

// enforce trailing commas
"comma-dangle": ["error", "always-multiline"],
},
};
82 changes: 54 additions & 28 deletions Readability.js
Original file line number Diff line number Diff line change
Expand Up @@ -1645,8 +1645,10 @@ Readability.prototype = {
}

if (!parsed["@type"] && Array.isArray(parsed["@graph"])) {
parsed = parsed["@graph"].find(it => {
return (it["@type"] || "").match(this.REGEXPS.jsonLdArticleTypes);
parsed = parsed["@graph"].find(function(it) {
return (it["@type"] || "").match(
this.REGEXPS.jsonLdArticleTypes,
);
});
}

Expand Down Expand Up @@ -1719,6 +1721,36 @@ Readability.prototype = {
return metadata ? metadata : {};
},

/**
* Swaps the "Surname, GivenName" formatted bylines to "GivenName Surname".
*
* @param {string|string[]} name
* @returns Name or names in "GivenName Surname" format
*/
_normalizeByline: function(name) {
var result = name;

if (Array.isArray(name)) {
return name.map((n) => this._normalizeByline(n));
}

// handle Surname, GivenName formatting
if (name.includes(",")) {
const parts = name.split(",").map(part => part.trim());
if (parts.length == 2) {
result = `${parts[1]} ${parts[0]}`;
}
if (parts.length > 2) {
result = `${parts[1]} ${parts[0]} ${parts.slice(2).join(" ")}`;
}
}

// remove things like "By:"
result = result.replace(/\w+:/, "");

return this._unescapeHtmlEntities(result);
},

/**
* Attempts to get excerpt and byline metadata for the article.
*
Expand All @@ -1737,12 +1769,7 @@ Readability.prototype = {
/\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi;

// name is a single value
<<<<<<< HEAD
var namePattern =
/^\s*(?:(dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-\.:]\s*)?(author|creator|pub-date|description|title|site_name)\s*$/i;
=======
var namePattern = /^\s*(?:(prism|citation|dc|dcterm|og|twitter|parsely|weibo:(article|webpage))\s*[-_\.:]\s*)?(author|creator|pub-date|publicationDate|publication|description|title|site_name)\s*$/i;
>>>>>>> 740ddd3 (WIP: add citation, prism, and dc metadata)

// Find description tags.
this._forEachNode(metaElements, function (element) {
Expand All @@ -1754,6 +1781,7 @@ Readability.prototype = {
}
var matches = null;
var name = null;
var result = null;

if (elementProperty) {
matches = elementProperty.match(propertyPattern);
Expand All @@ -1762,7 +1790,7 @@ Readability.prototype = {
// so we can match below.
name = matches[0].toLowerCase().replace(/\s/g, "");
// multiple authors
values[name] = content.trim();
result = content.trim();
}
}
if (!matches && elementName && namePattern.test(elementName)) {
Expand All @@ -1771,8 +1799,23 @@ Readability.prototype = {
// Convert to lowercase, remove any whitespace, and convert dots
// to colons so we can match below.
name = name.toLowerCase().replace(/\s/g, "").replace(/\./g, ":");
values[name] = content.trim();
result = content.trim();
}
}

// handle properties which might have multiple distinct values, eg: citation_author
if (result) {
if (values[name]) {
if (Array.isArray(values[name]) && typeof result == "string") {
values[name].push(result);
}
if (typeof values[name] == "string") {
values[name] = [values[name], result];
}
} else {
values[name] = result;
}
this.log(`found metadata: ${name}=${values[name]}`);
}
});

Expand All @@ -1793,21 +1836,12 @@ Readability.prototype = {
}

// get author
<<<<<<< HEAD
metadata.byline =
jsonld.byline ||
values["dc:creator"] ||
values["dcterm:creator"] ||
values.author ||
values["parsely-author"];
=======
metadata.byline = jsonld.byline ||
values["dc:creator"] ||
values["dcterm:creator"] ||
values["author"] ||
values.author ||
values["parsely-author"] ||
values["citation_author"];
>>>>>>> 740ddd3 (WIP: add citation, prism, and dc metadata)

// get description
metadata.excerpt =
Expand All @@ -1824,25 +1858,17 @@ Readability.prototype = {
metadata.siteName = jsonld.siteName || values["og:site_name"];

// get article published time
<<<<<<< HEAD
metadata.publishedTime =
jsonld.datePublished ||
values["article:published_time"] ||
values["parsely-pub-date"] ||
null;
=======
metadata.publishedTime = jsonld.datePublished ||
values["article:published_time"] ||
values["parsely-pub-date"] ||
values["citation_publication_date"] ||
values["prism:publicationDate"] ||
null;
>>>>>>> 740ddd3 (WIP: add citation, prism, and dc metadata)

// in many sites the meta value is escaped with HTML entities,
// so here we need to unescape it
metadata.title = this._unescapeHtmlEntities(metadata.title);
metadata.byline = this._unescapeHtmlEntities(metadata.byline);
metadata.byline = this._normalizeByline(metadata.byline);
metadata.excerpt = this._unescapeHtmlEntities(metadata.excerpt);
metadata.siteName = this._unescapeHtmlEntities(metadata.siteName);
metadata.publishedTime = this._unescapeHtmlEntities(metadata.publishedTime);
Expand Down
12 changes: 7 additions & 5 deletions test/generate-testcase.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ function getWithRedirects(url, cb) {
console.log("HEADERS:", JSON.stringify(response.headers));
}

if(response.statusCode > 300 && response.statusCode <= 303) {
if (debug) console.log("following redirect", response.headers.location);
if (response.statusCode > 300 && response.statusCode <= 303) {
if (debug)
console.log("following redirect", response.headers.location);
await getWithRedirects(response.headers.location, cb);
}

Expand All @@ -78,9 +79,10 @@ function getWithRedirects(url, cb) {
rv += chunk;
});

response.on("end", function () => {
if (debug) console.log("End received");
sanitizeSource(rv, cb);
response.on("end", () => {
if (debug)
console.log("End received");
cb(rv);
});
});
}
Expand Down
Loading

0 comments on commit 94ac3c5

Please sign in to comment.