import { aggregatePagination, pagination, api, parseContent } from './util';
import infoboxParser from 'infobox-parser';
import { tokenize, constructTree } from 'hyntax';
import { parseCoordinates } from './coordinates';
import QueryChain from './chain';
const get = (obj, first, ...rest) => {
if (obj === undefined || first === undefined) return obj;
if (typeof first === 'function') {
return get(first(obj), ...rest);
}
return get(obj[first], ...rest);
};
const firstValue = obj => {
if (typeof obj === 'object') return obj[Object.keys(obj)[0]];
return obj[0];
};
const getFileName = text => {
if (Array.isArray(text)) text = text[0];
if (!text) return undefined;
if (text.indexOf(':') !== -1) {
const [, name] = text.split(':');
return name;
}
return text;
};
/**
* WikiPage
* @namespace WikiPage
*/
export default function wikiPage(rawPageInfo, apiOptions) {
const raw = rawPageInfo;
/**
* HTML from page
* @example
* wiki.page('batman').then(page => page.html()).then(console.log);
* @method WikiPage#html
* @return {Promise}
*/
function html() {
return api(apiOptions, {
prop: 'revisions',
rvprop: 'content',
rvlimit: 1,
rvparse: '',
titles: raw.title
}).then(res => res.query.pages[raw.pageid].revisions[0]['*']);
}
/**
* @summary Useful for extracting structured section content from the page
* @alias sections
* @example
* wiki.page('batman').then(page => page.content()).then(console.log);
* @method WikiPage#content
* @return {Promise}
*/
function content() {
return rawContent().then(parseContent);
}
/**
* Raw content from page
* @example
* wiki.page('batman').then(page => page.rawContent()).then(console.log);
* @method WikiPage#rawContent
* @return {Promise}
*/
function rawContent() {
return chain()
.content()
.request()
.then(res => res.extract);
}
/**
* Text summary from page
* @example
* wiki.page('batman').then(page => page.summary()).then(console.log);
* @method WikiPage#summary
* @return {Promise}
*/
function summary() {
return chain()
.summary()
.request()
.then(res => res.extract);
}
/**
* Main page image directly from API
* @method WikiPage#pageImage
* @returns URL
*/
function pageImage() {
return chain()
.image({ original: true, name: true })
.request()
.then(res => get(res, 'image', 'original', 'source'));
}
/**
* Raw data from images from page
* @example
* wiki.page('batman').then(page => page.rawImages()).then(console.log);
* @method WikiPage#rawImages
* @return {Promise}
*/
function rawImages() {
return api(apiOptions, {
generator: 'images',
gimlimit: 'max',
prop: 'imageinfo',
iiprop: 'url',
titles: raw.title
}).then(res => {
if (res.query) {
return Object.keys(res.query.pages).map(id => res.query.pages[id]);
}
return [];
});
}
/**
* Main image URL from infobox on page
* @example
* wiki.page('batman').then(page => page.mainImage()).then(console.log);
* @method WikiPage#mainImage
* @return {Promise}
*/
function mainImage() {
return Promise.all([rawImages(), info()]).then(([images, info]) => {
// Handle different translations of "image" here
const mainImageName = getFileName(
info.image ||
info.bildname ||
info.imagen ||
info.Immagine ||
info.badge ||
info.logo
);
// Handle case where no info box exists
if (!mainImageName) {
return rawInfo().then(text => {
if (!images.length) return undefined;
// Sort images by what is seen first in page's info text
images.sort((a, b) => text.indexOf(b.title) - text.indexOf(a.title));
const image = images[0];
const fallback =
image && image.imageinfo.length > 0
? image.imageinfo[0].url
: undefined;
// If no image could be found, fallback to page image api result
return pageImage().then(url => url || fallback);
});
}
const image = images.find(({ title }) => {
const filename = getFileName(title);
// Some wikis use underscores for spaces, some don't
return (
filename.toUpperCase() === mainImageName.toUpperCase() ||
filename.replace(/\s/g, '_') === mainImageName
);
});
const fallback =
image && image.imageinfo.length > 0
? image.imageinfo[0].url
: undefined;
// If no image could be found, fallback to page image api result
return pageImage().then(url => url || fallback);
});
}
/**
* Image URL's from page
* @example
* wiki.page('batman').then(page => page.image()).then(console.log);
* @method WikiPage#images
* @return {Promise}
*/
function images() {
return rawImages().then(images => {
return images
.map(image => image.imageinfo)
.reduce((imageInfos, list) => [...imageInfos, ...list], [])
.map(info => info.url);
});
}
/**
* External links from page
* @example
* wiki().page('batman').then(page => page.externalLinks()).then(console.log);
* // or
* wiki().chain().search('batman').extlinks().request()
* @method WikiPage#externalLinks
* @return {Promise}
*/
function externalLinks() {
return chain().direct('extlinks');
}
function hasClass(node, className) {
return (
node.content.attributes &&
node.content.attributes.some(
attr =>
attr.key.content === 'class' &&
attr.value.content.indexOf(className) !== -1
)
);
}
function isTag(node) {
return node.nodeType === 'tag';
}
function hasName(node, name) {
return node.content.name === name;
}
function findNode(node, predicate) {
if (predicate(node)) return node;
// search through children as well
if (node.content.children) {
for (let child of node.content.children) {
const found = findNode(child, predicate);
if (found) {
return found;
}
}
}
return null;
}
function findNodes(node, predicate, nodes) {
if (predicate(node)) {
nodes.push(node);
}
if (node.content.children) {
for (let child of node.content.children) {
findNodes(child, predicate, nodes);
}
}
}
/**
* References from page
* @example
* wiki().page('batman').then(page => page.references()).then(console.log);
* @method WikiPage#references
* @return {Promise}
*/
function references() {
return html()
.then(inputHTML => {
const { tokens } = tokenize(inputHTML);
const { ast } = constructTree(tokens);
return ast;
})
.then(ast => {
const links = [];
const refs = [];
// There can be mulitple reference sections
findNodes(
ast,
node =>
isTag(node) && hasName(node, 'ol') && hasClass(node, 'references'),
refs
);
for (let ref of refs) {
const items = ref.content.children.filter(
el => isTag(el) && hasName(el, 'li') && el.content.children
);
for (let item of items) {
// The reference was moved under a span under li
const span = item.content.children[2];
const cite = findNode(
span,
node => isTag(node) && hasName(node, 'cite')
);
if (cite) {
for (let el of cite.content.children) {
if (isTag(el) && hasName(el, 'a') && hasClass(el, 'external')) {
const linkAttr = el.content.attributes.find(
attr => attr.key.content === 'href'
);
links.push(linkAttr.value.content);
}
}
}
}
}
return links;
});
}
/**
* Paginated links from page
* @example
* wiki().page('batman').then(page => page.links()).then(console.log);
* @method WikiPage#links
* @param {Boolean} [aggregated] - return all links (default is true)
* @param {Number} [limit] - number of links per page
* @return {Promise} - returns results if aggregated [and next function for more results if not aggregated]
*/
function links(aggregated = true, limit = 100) {
const _pagination = pagination(
apiOptions,
{
prop: 'links',
plnamespace: 0,
pllimit: limit,
titles: raw.title
},
res => (res.query.pages[raw.pageid].links || []).map(link => link.title)
);
if (aggregated) {
return aggregatePagination(_pagination);
}
return _pagination;
}
/**
* Paginated categories from page
* @example
* wiki().page('batman').then(page => page.categories()).then(console.log);
* @method WikiPage#categories
* @param {Boolean} [aggregated] - return all categories (default is true)
* @param {Number} [limit] - number of categories per page
* @return {Promise} - returns results if aggregated [and next function for more results if not aggregated]
*/
function categories(aggregated = true, limit = 100) {
const _pagination = pagination(
apiOptions,
chain()
.categories(limit)
.params(),
res =>
(res.query.pages[raw.pageid].categories || []).map(
category => category.title
)
);
if (aggregated) {
return aggregatePagination(_pagination);
}
return _pagination;
}
/**
* Geographical coordinates from page
* @example
* wiki().page('Texas').then(texas => texas.coordinates())
* @method WikiPage#coordinates
* @return {Promise}
*/
function coordinates() {
return chain()
.direct('coordinates')
.then(coords => {
if (coords) return coords;
// No coordinates for this page, check infobox for deprecated version
return info().then(data => parseCoordinates(data));
});
}
function rawInfo(title) {
return api(apiOptions, {
prop: 'revisions',
rvprop: 'content',
rvsection: 0,
titles: title || raw.title
}).then(res => get(res, 'query', 'pages', firstValue, 'revisions', 0, '*'));
}
/**
* Fetch and parse tables within page
* @method WikiPage#tables
* @return {Promise} Resolves to a collection of tables
*/
function tables() {
return api(apiOptions, {
prop: 'revisions',
rvprop: 'content',
titles: raw.title
})
.then(res => get(res, 'query', 'pages', firstValue, 'revisions', 0, '*'))
.then(wikitext => infoboxParser(wikitext, apiOptions.parser).tables);
}
/**
* Get general information from page, with optional specifc property
* @deprecated This method will be dropped and replaced with the `fullInfo` implementation in v5
* @example
* wiki().page('Batman').then(page => page.info('alter_ego'));
* @method WikiPage#info
* @param {String} [key] - Information key. Falsy keys are ignored
* @return {Promise} - info Object contains key/value pairs of infobox data, or specific value if key given
*/
function info(key) {
return rawInfo()
.then(wikitext => {
// Use general data for now...
const info = infoboxParser(wikitext, apiOptions.parser).general;
if (Object.keys(info).length === 0) {
// If empty, check to see if this page has a templated infobox
return rawInfo(`Template:Infobox ${raw.title.toLowerCase()}`).then(
_wikitext =>
infoboxParser(_wikitext || '', apiOptions.parser).general
);
}
return info;
})
.then(metadata => {
if (!key) {
return metadata;
}
if (metadata.hasOwnProperty(key)) {
return metadata[key];
}
});
}
/**
* Get the full infobox data, parsed in a easy to use manner
* @example
* new Wiki().page('Batman').then(page => page.fullInfo()).then(info => info.general.aliases);
* @method WikiPage#fullInfo
* @return {Promise} - Parsed object of all infobox data
*/
function fullInfo() {
return rawInfo().then(wikitext =>
infoboxParser(wikitext, apiOptions.parser)
);
}
/**
* Paginated backlinks from page
* @method WikiPage#backlinks
* @param {Boolean} [aggregated] - return all backlinks (default is true)
* @param {Number} [limit] - number of backlinks per page
* @return {Promise} - includes results [and next function for more results if not aggregated]
*/
function backlinks(aggregated = true, limit = 100) {
const _pagination = pagination(
apiOptions,
{
list: 'backlinks',
bllimit: limit,
bltitle: raw.title
},
res => (res.query.backlinks || []).map(link => link.title)
);
if (aggregated) {
return aggregatePagination(_pagination);
}
return _pagination;
}
/**
* Get list of links to different translations
* @method WikiPage#langlinks
* @return {Promise} - includes link objects { lang, title, url }
*/
function langlinks() {
return chain().direct('langlinks');
}
/**
* Get URL for wiki page
* @method WikiPage#url
* @return {String}
*/
function url() {
return raw.canonicalurl;
}
const page = Object.assign({}, raw);
/**
* Returns a QueryChain for the page
* @method WikiPage#chain
* @returns {QueryChain}
*/
function chain() {
return new QueryChain(apiOptions, raw.pageid);
}
Object.assign(page, {
raw,
html,
rawContent,
content,
sections: content,
summary,
images,
references,
links,
externalLinks,
categories,
coordinates,
info,
backlinks,
rawImages,
mainImage,
langlinks,
rawInfo,
fullInfo,
pageImage,
tables,
url,
chain
});
return page;
}