page.js - Documentation

import { aggregatePagination, pagination, api, parseContent } from './util';
import infoboxParser from 'infobox-parser';
import { tokenize, constructTree } from 'hyntax';
import { parseCoordinates } from './coordinates';
import QueryChain from './chain';

const get = (obj, first, ...rest) => {
	if (obj === undefined || first === undefined) return obj;
	if (typeof first === 'function') {
		return get(first(obj), ...rest);
	}
	return get(obj[first], ...rest);
};

const firstValue = obj => {
	if (typeof obj === 'object') return obj[Object.keys(obj)[0]];
	return obj[0];
};

const getFileName = text => {
	if (Array.isArray(text)) text = text[0];
	if (!text) return undefined;
	if (text.indexOf(':') !== -1) {
		const [, name] = text.split(':');
		return name;
	}
	return text;
};

/**
 * WikiPage
 * @namespace WikiPage
 */
export default function wikiPage(rawPageInfo, apiOptions) {
	const raw = rawPageInfo;

	/**
	 * HTML from page
	 * @example
	 * wiki.page('batman').then(page => page.html()).then(console.log);
	 * @method WikiPage#html
	 * @return {Promise}
	 */
	function html() {
		return api(apiOptions, {
			prop: 'revisions',
			rvprop: 'content',
			rvlimit: 1,
			rvparse: '',
			titles: raw.title
		}).then(res => res.query.pages[raw.pageid].revisions[0]['*']);
	}

	/**
	 * @summary Useful for extracting structured section content from the page
	 * @alias sections
	 * @example
	 * wiki.page('batman').then(page => page.content()).then(console.log);
	 * @method WikiPage#content
	 * @return {Promise}
	 */
	function content() {
		return rawContent().then(parseContent);
	}

	/**
	 * Raw content from page
	 * @example
	 * wiki.page('batman').then(page => page.rawContent()).then(console.log);
	 * @method WikiPage#rawContent
	 * @return {Promise}
	 */
	function rawContent() {
		return chain()
			.content()
			.request()
			.then(res => res.extract);
	}

	/**
	 * Text summary from page
	 * @example
	 * wiki.page('batman').then(page => page.summary()).then(console.log);
	 * @method WikiPage#summary
	 * @return {Promise}
	 */
	function summary() {
		return chain()
			.summary()
			.request()
			.then(res => res.extract);
	}

	/**
	 * Main page image directly from API
	 * @method WikiPage#pageImage
	 * @returns URL
	 */
	function pageImage() {
		return chain()
			.image({ original: true, name: true })
			.request()
			.then(res => get(res, 'image', 'original', 'source'));
	}

	/**
	 * Raw data from images from page
	 * @example
	 * wiki.page('batman').then(page => page.rawImages()).then(console.log);
	 * @method WikiPage#rawImages
	 * @return {Promise}
	 */
	function rawImages() {
		return api(apiOptions, {
			generator: 'images',
			gimlimit: 'max',
			prop: 'imageinfo',
			iiprop: 'url',
			titles: raw.title
		}).then(res => {
			if (res.query) {
				return Object.keys(res.query.pages).map(id => res.query.pages[id]);
			}
			return [];
		});
	}

	/**
	 * Main image URL from infobox on page
	 * @example
	 * wiki.page('batman').then(page => page.mainImage()).then(console.log);
	 * @method WikiPage#mainImage
	 * @return {Promise}
	 */
	function mainImage() {
		return Promise.all([rawImages(), info()]).then(([images, info]) => {
			// Handle different translations of "image" here
			const mainImageName = getFileName(
				info.image ||
					info.bildname ||
					info.imagen ||
					info.Immagine ||
					info.badge ||
					info.logo
			);
			// Handle case where no info box exists
			if (!mainImageName) {
				return rawInfo().then(text => {
					if (!images.length) return undefined;
					// Sort images by what is seen first in page's info text
					images.sort((a, b) => text.indexOf(b.title) - text.indexOf(a.title));
					const image = images[0];

					const fallback =
						image && image.imageinfo.length > 0
							? image.imageinfo[0].url
							: undefined;

					// If no image could be found, fallback to page image api result
					return pageImage().then(url => url || fallback);
				});
			}
			const image = images.find(({ title }) => {
				const filename = getFileName(title);
				// Some wikis use underscores for spaces, some don't
				return (
					filename.toUpperCase() === mainImageName.toUpperCase() ||
					filename.replace(/\s/g, '_') === mainImageName
				);
			});

			const fallback =
				image && image.imageinfo.length > 0
					? image.imageinfo[0].url
					: undefined;

			// If no image could be found, fallback to page image api result
			return pageImage().then(url => url || fallback);
		});
	}

	/**
	 * Image URL's from page
	 * @example
	 * wiki.page('batman').then(page => page.image()).then(console.log);
	 * @method WikiPage#images
	 * @return {Promise}
	 */
	function images() {
		return rawImages().then(images => {
			return images
				.map(image => image.imageinfo)
				.reduce((imageInfos, list) => [...imageInfos, ...list], [])
				.map(info => info.url);
		});
	}

	/**
	 * External links from page
	 * @example
	 * wiki().page('batman').then(page => page.externalLinks()).then(console.log);
	 * // or
	 * wiki().chain().search('batman').extlinks().request()
	 * @method WikiPage#externalLinks
	 * @return {Promise}
	 */
	function externalLinks() {
		return chain().direct('extlinks');
	}

	function hasClass(node, className) {
		return (
			node.content.attributes &&
			node.content.attributes.some(
				attr =>
					attr.key.content === 'class' &&
					attr.value.content.indexOf(className) !== -1
			)
		);
	}

	function isTag(node) {
		return node.nodeType === 'tag';
	}

	function hasName(node, name) {
		return node.content.name === name;
	}

	function findNode(node, predicate) {
		if (predicate(node)) return node;
		// search through children as well
		if (node.content.children) {
			for (let child of node.content.children) {
				const found = findNode(child, predicate);
				if (found) {
					return found;
				}
			}
		}
		return null;
	}

	function findNodes(node, predicate, nodes) {
		if (predicate(node)) {
			nodes.push(node);
		}
		if (node.content.children) {
			for (let child of node.content.children) {
				findNodes(child, predicate, nodes);
			}
		}
	}

	/**
	 * References from page
	 * @example
	 * wiki().page('batman').then(page => page.references()).then(console.log);
	 * @method WikiPage#references
	 * @return {Promise}
	 */
	function references() {
		return html()
			.then(inputHTML => {
				const { tokens } = tokenize(inputHTML);
				const { ast } = constructTree(tokens);
				return ast;
			})
			.then(ast => {
				const links = [];
				const refs = [];
				// There can be mulitple reference sections
				findNodes(
					ast,
					node =>
						isTag(node) && hasName(node, 'ol') && hasClass(node, 'references'),
					refs
				);
				for (let ref of refs) {
					const items = ref.content.children.filter(
						el => isTag(el) && hasName(el, 'li') && el.content.children
					);
					for (let item of items) {
						// The reference was moved under a span under li
						const span = item.content.children[2];
						const cite = findNode(
							span,
							node => isTag(node) && hasName(node, 'cite')
						);
						if (cite) {
							for (let el of cite.content.children) {
								if (isTag(el) && hasName(el, 'a') && hasClass(el, 'external')) {
									const linkAttr = el.content.attributes.find(
										attr => attr.key.content === 'href'
									);
									links.push(linkAttr.value.content);
								}
							}
						}
					}
				}
				return links;
			});
	}

	/**
	 * Paginated links from page
	 * @example
	 * wiki().page('batman').then(page => page.links()).then(console.log);
	 * @method WikiPage#links
	 * @param  {Boolean} [aggregated] - return all links (default is true)
	 * @param  {Number} [limit] - number of links per page
	 * @return {Promise} - returns results if aggregated [and next function for more results if not aggregated]
	 */
	function links(aggregated = true, limit = 100) {
		const _pagination = pagination(
			apiOptions,
			{
				prop: 'links',
				plnamespace: 0,
				pllimit: limit,
				titles: raw.title
			},
			res => (res.query.pages[raw.pageid].links || []).map(link => link.title)
		);
		if (aggregated) {
			return aggregatePagination(_pagination);
		}
		return _pagination;
	}

	/**
	 * Paginated categories from page
	 * @example
	 * wiki().page('batman').then(page => page.categories()).then(console.log);
	 * @method WikiPage#categories
	 * @param  {Boolean} [aggregated] - return all categories (default is true)
	 * @param  {Number} [limit] - number of categories per page
	 * @return {Promise} - returns results if aggregated [and next function for more results if not aggregated]
	 */
	function categories(aggregated = true, limit = 100) {
		const _pagination = pagination(
			apiOptions,
			chain()
				.categories(limit)
				.params(),
			res =>
				(res.query.pages[raw.pageid].categories || []).map(
					category => category.title
				)
		);
		if (aggregated) {
			return aggregatePagination(_pagination);
		}
		return _pagination;
	}

	/**
	 * Geographical coordinates from page
	 * @example
	 * wiki().page('Texas').then(texas => texas.coordinates())
	 * @method WikiPage#coordinates
	 * @return {Promise}
	 */
	function coordinates() {
		return chain()
			.direct('coordinates')
			.then(coords => {
				if (coords) return coords;
				// No coordinates for this page, check infobox for deprecated version
				return info().then(data => parseCoordinates(data));
			});
	}

	function rawInfo(title) {
		return api(apiOptions, {
			prop: 'revisions',
			rvprop: 'content',
			rvsection: 0,
			titles: title || raw.title
		}).then(res => get(res, 'query', 'pages', firstValue, 'revisions', 0, '*'));
	}

	/**
	 * Fetch and parse tables within page
	 * @method WikiPage#tables
	 * @return {Promise} Resolves to a collection of tables
	 */
	function tables() {
		return api(apiOptions, {
			prop: 'revisions',
			rvprop: 'content',
			titles: raw.title
		})
			.then(res => get(res, 'query', 'pages', firstValue, 'revisions', 0, '*'))
			.then(wikitext => infoboxParser(wikitext, apiOptions.parser).tables);
	}

	/**
	 * Get general information from page, with optional specifc property
	 * @deprecated This method will be dropped and replaced with the `fullInfo` implementation in v5
	 * @example
	 * wiki().page('Batman').then(page => page.info('alter_ego'));
	 * @method WikiPage#info
	 * @param  {String} [key] - Information key. Falsy keys are ignored
	 * @return {Promise} - info Object contains key/value pairs of infobox data, or specific value if key given
	 */
	function info(key) {
		return rawInfo()
			.then(wikitext => {
				// Use general data for now...
				const info = infoboxParser(wikitext, apiOptions.parser).general;
				if (Object.keys(info).length === 0) {
					// If empty, check to see if this page has a templated infobox
					return rawInfo(`Template:Infobox ${raw.title.toLowerCase()}`).then(
						_wikitext =>
							infoboxParser(_wikitext || '', apiOptions.parser).general
					);
				}
				return info;
			})
			.then(metadata => {
				if (!key) {
					return metadata;
				}
				if (metadata.hasOwnProperty(key)) {
					return metadata[key];
				}
			});
	}

	/**
	 * Get the full infobox data, parsed in a easy to use manner
	 * @example
	 * new Wiki().page('Batman').then(page => page.fullInfo()).then(info => info.general.aliases);
	 * @method WikiPage#fullInfo
	 * @return {Promise} - Parsed object of all infobox data
	 */
	function fullInfo() {
		return rawInfo().then(wikitext =>
			infoboxParser(wikitext, apiOptions.parser)
		);
	}

	/**
	 * Paginated backlinks from page
	 * @method WikiPage#backlinks
	 * @param  {Boolean} [aggregated] - return all backlinks (default is true)
	 * @param  {Number} [limit] - number of backlinks per page
	 * @return {Promise} - includes results [and next function for more results if not aggregated]
	 */
	function backlinks(aggregated = true, limit = 100) {
		const _pagination = pagination(
			apiOptions,
			{
				list: 'backlinks',
				bllimit: limit,
				bltitle: raw.title
			},
			res => (res.query.backlinks || []).map(link => link.title)
		);
		if (aggregated) {
			return aggregatePagination(_pagination);
		}
		return _pagination;
	}

	/**
	 * Get list of links to different translations
	 * @method WikiPage#langlinks
	 * @return {Promise} - includes link objects { lang, title, url }
	 */
	function langlinks() {
		return chain().direct('langlinks');
	}

	/**
	 * Get URL for wiki page
	 * @method WikiPage#url
	 * @return {String}
	 */
	function url() {
		return raw.canonicalurl;
	}

	const page = Object.assign({}, raw);

	/**
	 * Returns a QueryChain for the page
	 * @method WikiPage#chain
	 * @returns {QueryChain}
	 */
	function chain() {
		return new QueryChain(apiOptions, raw.pageid);
	}

	Object.assign(page, {
		raw,
		html,
		rawContent,
		content,
		sections: content,
		summary,
		images,
		references,
		links,
		externalLinks,
		categories,
		coordinates,
		info,
		backlinks,
		rawImages,
		mainImage,
		langlinks,
		rawInfo,
		fullInfo,
		pageImage,
		tables,
		url,
		chain
	});

	return page;
}