rudeshark.net/packages/backend/src/services/fetch-instance-metadata.ts
Johann150 bd07c7312a
fix: also recognize "shortcut icon" favicon (#8220)
* also recognize "shortcut icon" favicon

Not using querySelector for this because it uses jsdom which might be slower.
Reversing the order because WHATWG says the last appropriate link should be used.

* also fetchIconUrl

* br

* improve readability

* fix

* フォールバックにhrefの評価を含める

* fix val name

* 将来的な拡張を考えたコードにした

Co-authored-by: tamaina <tamaina@hotmail.co.jp>
Co-authored-by: syuilo <Syuilotan@yahoo.co.jp>
2022-02-23 20:17:43 +09:00

272 lines
7.5 KiB
TypeScript

import { DOMWindow, JSDOM } from 'jsdom';
import fetch from 'node-fetch';
import { getJson, getHtml, getAgentByUrl } from '@/misc/fetch';
import { Instance } from '@/models/entities/instance';
import { Instances } from '@/models/index';
import { getFetchInstanceMetadataLock } from '@/misc/app-lock';
import Logger from './logger';
import { URL } from 'url';
const logger = new Logger('metadata', 'cyan');
export async function fetchInstanceMetadata(instance: Instance, force = false): Promise<void> {
const unlock = await getFetchInstanceMetadataLock(instance.host);
if (!force) {
const _instance = await Instances.findOne({ host: instance.host });
const now = Date.now();
if (_instance && _instance.infoUpdatedAt && (now - _instance.infoUpdatedAt.getTime() < 1000 * 60 * 60 * 24)) {
unlock();
return;
}
}
logger.info(`Fetching metadata of ${instance.host} ...`);
try {
const [info, dom, manifest] = await Promise.all([
fetchNodeinfo(instance).catch(() => null),
fetchDom(instance).catch(() => null),
fetchManifest(instance).catch(() => null),
]);
const [favicon, icon, themeColor, name, description] = await Promise.all([
fetchFaviconUrl(instance, dom).catch(() => null),
fetchIconUrl(instance, dom, manifest).catch(() => null),
getThemeColor(dom, manifest).catch(() => null),
getSiteName(info, dom, manifest).catch(() => null),
getDescription(info, dom, manifest).catch(() => null),
]);
logger.succ(`Successfuly fetched metadata of ${instance.host}`);
const updates = {
infoUpdatedAt: new Date(),
} as Record<string, any>;
if (info) {
updates.softwareName = info.software?.name.toLowerCase();
updates.softwareVersion = info.software?.version;
updates.openRegistrations = info.openRegistrations;
updates.maintainerName = info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.name || null) : null : null;
updates.maintainerEmail = info.metadata ? info.metadata.maintainer ? (info.metadata.maintainer.email || null) : null : null;
}
if (name) updates.name = name;
if (description) updates.description = description;
if (icon || favicon) updates.iconUrl = icon || favicon;
if (favicon) updates.faviconUrl = favicon;
if (themeColor) updates.themeColor = themeColor;
await Instances.update(instance.id, updates);
logger.succ(`Successfuly updated metadata of ${instance.host}`);
} catch (e) {
logger.error(`Failed to update metadata of ${instance.host}: ${e}`);
} finally {
unlock();
}
}
type NodeInfo = {
openRegistrations?: any;
software?: {
name?: any;
version?: any;
};
metadata?: {
name?: any;
nodeName?: any;
nodeDescription?: any;
description?: any;
maintainer?: {
name?: any;
email?: any;
};
};
};
async function fetchNodeinfo(instance: Instance): Promise<NodeInfo> {
logger.info(`Fetching nodeinfo of ${instance.host} ...`);
try {
const wellknown = await getJson('https://' + instance.host + '/.well-known/nodeinfo')
.catch(e => {
if (e.statusCode === 404) {
throw 'No nodeinfo provided';
} else {
throw e.statusCode || e.message;
}
});
if (wellknown.links == null || !Array.isArray(wellknown.links)) {
throw 'No wellknown links';
}
const links = wellknown.links as any[];
const lnik1_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/1.0');
const lnik2_0 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.0');
const lnik2_1 = links.find(link => link.rel === 'http://nodeinfo.diaspora.software/ns/schema/2.1');
const link = lnik2_1 || lnik2_0 || lnik1_0;
if (link == null) {
throw 'No nodeinfo link provided';
}
const info = await getJson(link.href)
.catch(e => {
throw e.statusCode || e.message;
});
logger.succ(`Successfuly fetched nodeinfo of ${instance.host}`);
return info;
} catch (e) {
logger.error(`Failed to fetch nodeinfo of ${instance.host}: ${e}`);
throw e;
}
}
async function fetchDom(instance: Instance): Promise<DOMWindow['document']> {
logger.info(`Fetching HTML of ${instance.host} ...`);
const url = 'https://' + instance.host;
const html = await getHtml(url);
const { window } = new JSDOM(html);
const doc = window.document;
return doc;
}
async function fetchManifest(instance: Instance): Promise<Record<string, any> | null> {
const url = 'https://' + instance.host;
const manifestUrl = url + '/manifest.json';
const manifest = await getJson(manifestUrl);
return manifest;
}
async function fetchFaviconUrl(instance: Instance, doc: DOMWindow['document'] | null): Promise<string | null> {
const url = 'https://' + instance.host;
if (doc) {
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
const href = Array.from(doc.getElementsByTagName('link')).reverse().find(link => link.relList.contains('icon'))?.href;
if (href) {
return (new URL(href, url)).href;
}
}
const faviconUrl = url + '/favicon.ico';
const favicon = await fetch(faviconUrl, {
timeout: 10000,
agent: getAgentByUrl,
});
if (favicon.ok) {
return faviconUrl;
}
return null;
}
async function fetchIconUrl(instance: Instance, doc: DOMWindow['document'] | null, manifest: Record<string, any> | null): Promise<string | null> {
if (manifest && manifest.icons && manifest.icons.length > 0 && manifest.icons[0].src) {
const url = 'https://' + instance.host;
return (new URL(manifest.icons[0].src, url)).href;
}
if (doc) {
const url = 'https://' + instance.host;
// https://github.com/misskey-dev/misskey/pull/8220#issuecomment-1025104043
const links = Array.from(doc.getElementsByTagName('link')).reverse();
// https://github.com/misskey-dev/misskey/pull/8220/files/0ec4eba22a914e31b86874f12448f88b3e58dd5a#r796487559
const href =
[
links.find(link => link.relList.contains('apple-touch-icon-precomposed'))?.href,
links.find(link => link.relList.contains('apple-touch-icon'))?.href,
links.find(link => link.relList.contains('icon'))?.href,
]
.find(href => href);
if (href) {
return (new URL(href, url)).href;
}
}
return null;
}
async function getThemeColor(doc: DOMWindow['document'] | null, manifest: Record<string, any> | null): Promise<string | null> {
if (doc) {
const themeColor = doc.querySelector('meta[name="theme-color"]')?.getAttribute('content');
if (themeColor) {
return themeColor;
}
}
if (manifest) {
return manifest.theme_color;
}
return null;
}
async function getSiteName(info: NodeInfo | null, doc: DOMWindow['document'] | null, manifest: Record<string, any> | null): Promise<string | null> {
if (info && info.metadata) {
if (info.metadata.nodeName || info.metadata.name) {
return info.metadata.nodeName || info.metadata.name;
}
}
if (doc) {
const og = doc.querySelector('meta[property="og:title"]')?.getAttribute('content');
if (og) {
return og;
}
}
if (manifest) {
return manifest?.name || manifest?.short_name;
}
return null;
}
async function getDescription(info: NodeInfo | null, doc: DOMWindow['document'] | null, manifest: Record<string, any> | null): Promise<string | null> {
if (info && info.metadata) {
if (info.metadata.nodeDescription || info.metadata.description) {
return info.metadata.nodeDescription || info.metadata.description;
}
}
if (doc) {
const meta = doc.querySelector('meta[name="description"]')?.getAttribute('content');
if (meta) {
return meta;
}
const og = doc.querySelector('meta[property="og:description"]')?.getAttribute('content');
if (og) {
return og;
}
}
if (manifest) {
return manifest?.name || manifest?.short_name;
}
return null;
}