162 lines
4.4 KiB
JavaScript
162 lines
4.4 KiB
JavaScript
"use strict";
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const dom = __importStar(require("./dom"));
|
|
const SELECTOR_LIST = "ol, ul";
|
|
const SELECTOR_LINK = "> a, p > a";
|
|
const SELECTOR_PART = "h2, h3, h4";
|
|
/**
|
|
Find a list
|
|
|
|
@param {cheerio.Node}
|
|
@return {cheerio.Node}
|
|
*/
|
|
function findList($parent) {
|
|
const $container = $parent.children(".olist");
|
|
if ($container.length > 0)
|
|
$parent = $container.first();
|
|
return $parent.children(SELECTOR_LIST);
|
|
}
|
|
/**
|
|
Parse a ul list and return list of chapters recursvely
|
|
|
|
@param {cheerio.Node}
|
|
@param {cheerio.DOM}
|
|
@return {Array}
|
|
*/
|
|
function parseList($ul, $) {
|
|
const articles = [];
|
|
$ul.children("li").each(function () {
|
|
const article = {};
|
|
const $li = $(this);
|
|
// Get text for the entry
|
|
const $p = $li.children("p");
|
|
article.title = ($p.text() || dom.textNode($li.get(0))).trim();
|
|
// Parse link
|
|
const $a = $li.find(SELECTOR_LINK);
|
|
if ($a.length > 0) {
|
|
article.title = $a.first().text();
|
|
article.ref = $a.attr("href").replace(/\\/g, "/").replace(/^\/+/, "");
|
|
}
|
|
// Sub articles
|
|
const $sub = findList($li);
|
|
article.articles = parseList($sub, $);
|
|
if (!article.title)
|
|
return;
|
|
articles.push(article);
|
|
});
|
|
return articles;
|
|
}
|
|
/**
|
|
Find all parts and their corresponding lists
|
|
|
|
@param {cheerio.Node}
|
|
@param {cheerio.DOM}
|
|
@return {Array<{title: String, list: cheerio.Node}>}
|
|
*/
|
|
function findParts($parent, $) {
|
|
// Find parts and lists
|
|
// TODO asciidoc compatibility
|
|
const partsAndLists = $parent.children(`${SELECTOR_LIST}, ${SELECTOR_PART}`);
|
|
// Group each part with the list after
|
|
const parts = [];
|
|
let previousPart = null;
|
|
partsAndLists.each((i, el) => {
|
|
if (isPartNode(el)) {
|
|
if (previousPart !== null) {
|
|
// The previous part was empty
|
|
parts.push(previousPart);
|
|
}
|
|
previousPart = {
|
|
title: getPartTitle(el, $),
|
|
list: null
|
|
};
|
|
}
|
|
else {
|
|
// It is a list
|
|
if (previousPart !== null) {
|
|
previousPart.list = el;
|
|
}
|
|
else {
|
|
previousPart = {
|
|
title: "",
|
|
list: el
|
|
};
|
|
}
|
|
parts.push(previousPart);
|
|
previousPart = null;
|
|
}
|
|
});
|
|
// Last part might be empty
|
|
if (previousPart !== null) {
|
|
parts.push(previousPart);
|
|
}
|
|
return parts;
|
|
}
|
|
/**
|
|
True if the element is a part
|
|
|
|
@param el
|
|
@return {boolean}
|
|
*/
|
|
function isPartNode(el) {
|
|
return SELECTOR_PART.indexOf(el.name) !== -1;
|
|
}
|
|
/**
|
|
Parse the title of a part element
|
|
|
|
@param el
|
|
@param {cheerio.DOM} $
|
|
@return {string}
|
|
*/
|
|
function getPartTitle(el, $) {
|
|
return $(el).text().trim();
|
|
}
|
|
/**
|
|
Parse an HTML content into a tree of articles/parts
|
|
|
|
@param {string} html
|
|
@return {Object}
|
|
*/
|
|
function parseSummary(html) {
|
|
const $ = dom.parse(html);
|
|
const $root = dom.cleanup(dom.root($), $);
|
|
const parts = findParts($root, $);
|
|
// Parse each list
|
|
const parsedParts = [];
|
|
let part;
|
|
for (let i = 0; i < parts.length; ++i) {
|
|
part = parts[i];
|
|
parsedParts.push({
|
|
title: part.title,
|
|
articles: parseList($(part.list), $)
|
|
});
|
|
}
|
|
return {
|
|
parts: parsedParts
|
|
};
|
|
}
|
|
exports.default = parseSummary;
|