452 lines
11 KiB
TypeScript
452 lines
11 KiB
TypeScript
import { Health, Index, MeiliSearch, Stats } from "meilisearch";
|
|
import { dbLogger } from "./logger.js";
|
|
|
|
import config from "@/config/index.js";
|
|
import { Note } from "@/models/entities/note.js";
|
|
import * as url from "url";
|
|
import { ILocalUser } from "@/models/entities/user.js";
|
|
import { Followings, Users } from "@/models/index.js";
|
|
|
|
const logger = dbLogger.createSubLogger("meilisearch", "gray", false);
|
|
|
|
let posts: Index;
|
|
let client: MeiliSearch;
|
|
|
|
const hasConfig =
|
|
config.meilisearch &&
|
|
(config.meilisearch.host ||
|
|
config.meilisearch.port ||
|
|
config.meilisearch.apiKey);
|
|
|
|
if (hasConfig) {
|
|
const host = hasConfig ? config.meilisearch.host ?? "localhost" : "";
|
|
const port = hasConfig ? config.meilisearch.port ?? 7700 : 0;
|
|
const auth = hasConfig ? config.meilisearch.apiKey ?? "" : "";
|
|
const ssl = hasConfig ? config.meilisearch.ssl ?? false : false;
|
|
|
|
logger.info("Connecting to MeiliSearch");
|
|
|
|
client = new MeiliSearch({
|
|
host: `${ssl ? "https" : "http"}://${host}:${port}`,
|
|
apiKey: auth,
|
|
});
|
|
|
|
posts = client.index("posts");
|
|
|
|
posts
|
|
.updateSearchableAttributes(["text"])
|
|
.catch((e) =>
|
|
logger.error(`Setting searchable attr failed, searches won't work: ${e}`),
|
|
);
|
|
|
|
posts
|
|
.updateFilterableAttributes([
|
|
"userName",
|
|
"userHost",
|
|
"mediaAttachment",
|
|
"createdAt",
|
|
"userId",
|
|
])
|
|
.catch((e) =>
|
|
logger.error(
|
|
`Setting filterable attr failed, advanced searches won't work: ${e}`,
|
|
),
|
|
);
|
|
|
|
posts
|
|
.updateSortableAttributes(["createdAt"])
|
|
.catch((e) =>
|
|
logger.error(
|
|
`Setting sortable attr failed, placeholder searches won't sort properly: ${e}`,
|
|
),
|
|
);
|
|
|
|
posts
|
|
.updateStopWords([
|
|
"the",
|
|
"a",
|
|
"as",
|
|
"be",
|
|
"of",
|
|
"they",
|
|
"these",
|
|
"is",
|
|
"are",
|
|
"これ",
|
|
"それ",
|
|
"あれ",
|
|
"この",
|
|
"その",
|
|
"あの",
|
|
"ここ",
|
|
"そこ",
|
|
"あそこ",
|
|
"こちら",
|
|
"どこ",
|
|
"私",
|
|
"僕",
|
|
"俺",
|
|
"君",
|
|
"あなた",
|
|
"我々",
|
|
"私達",
|
|
"彼女",
|
|
"彼",
|
|
"です",
|
|
"ます",
|
|
"は",
|
|
"が",
|
|
"の",
|
|
"に",
|
|
"を",
|
|
"で",
|
|
"へ",
|
|
"から",
|
|
"まで",
|
|
"より",
|
|
"も",
|
|
"どの",
|
|
"と",
|
|
"それで",
|
|
"しかし",
|
|
])
|
|
.catch((e) =>
|
|
logger.error(
|
|
`Failed to set Meilisearch stop words, database size will be larger: ${e}`,
|
|
),
|
|
);
|
|
|
|
posts
|
|
.updateRankingRules([
|
|
"sort",
|
|
"words",
|
|
"typo",
|
|
"proximity",
|
|
"attribute",
|
|
"exactness",
|
|
])
|
|
.catch((e) => {
|
|
logger.error("Failed to set ranking rules, sorting won't work properly.");
|
|
});
|
|
|
|
logger.info("Connected to MeiliSearch");
|
|
}
|
|
|
|
export type MeilisearchNote = {
|
|
id: string;
|
|
text: string;
|
|
userId: string;
|
|
userHost: string;
|
|
userName: string;
|
|
channelId: string;
|
|
mediaAttachment: string;
|
|
createdAt: number;
|
|
};
|
|
|
|
function timestampToUnix(timestamp: string) {
|
|
let unix = 0;
|
|
|
|
// Only contains numbers => UNIX timestamp
|
|
if (/^\d+$/.test(timestamp)) {
|
|
unix = Number.parseInt(timestamp);
|
|
}
|
|
|
|
if (unix === 0) {
|
|
// Try to parse the timestamp as JavaScript Date
|
|
const date = Date.parse(timestamp);
|
|
if (isNaN(date)) return 0;
|
|
unix = date / 1000;
|
|
}
|
|
|
|
return unix;
|
|
}
|
|
|
|
export default hasConfig
|
|
? {
|
|
search: async (
|
|
query: string,
|
|
limit: number,
|
|
offset: number,
|
|
userCtx: ILocalUser | null,
|
|
overrideSort: string | null,
|
|
) => {
|
|
/// Advanced search syntax
|
|
/// from:user => filter by user + optional domain
|
|
/// has:image/video/audio/text/file => filter by attachment types
|
|
/// domain:domain.com => filter by domain
|
|
/// before:Date => show posts made before Date
|
|
/// after: Date => show posts made after Date
|
|
/// "text" => get posts with exact text between quotes
|
|
/// filter:following => show results only from users you follow
|
|
/// filter:followers => show results only from followers
|
|
/// order:desc/asc => order results ascending or descending
|
|
|
|
const constructedFilters: string[] = [];
|
|
let sortRules: string[] = [];
|
|
|
|
const splitSearch = query.split(" ");
|
|
|
|
// Detect search operators and remove them from the actual query
|
|
const filteredSearchTerms = (
|
|
await Promise.all(
|
|
splitSearch.map(async (term) => {
|
|
if (term.startsWith("has:")) {
|
|
const fileType = term.slice(4);
|
|
constructedFilters.push(`mediaAttachment = "${fileType}"`);
|
|
return null;
|
|
} else if (term.startsWith("from:")) {
|
|
let user = term.slice(5);
|
|
|
|
if (user.length === 0) return null;
|
|
|
|
// Cut off leading @, those aren't saved in the DB
|
|
if (user.charAt(0) === "@") {
|
|
user = user.slice(1);
|
|
}
|
|
|
|
// Determine if we got a webfinger address or a single username
|
|
if (user.split("@").length > 1) {
|
|
const splitUser = user.split("@");
|
|
|
|
const domain = splitUser.pop();
|
|
user = splitUser.join("@");
|
|
|
|
constructedFilters.push(
|
|
`userName = ${user} AND userHost = ${domain}`,
|
|
);
|
|
} else {
|
|
constructedFilters.push(`userName = ${user}`);
|
|
}
|
|
|
|
return null;
|
|
} else if (term.startsWith("domain:")) {
|
|
const domain = term.slice(7);
|
|
if (
|
|
domain.length === 0 ||
|
|
domain === "local" ||
|
|
domain === config.hostname
|
|
) {
|
|
constructedFilters.push("userHost NOT EXISTS");
|
|
return null;
|
|
}
|
|
constructedFilters.push(`userHost = ${domain}`);
|
|
return null;
|
|
} else if (term.startsWith("after:")) {
|
|
const timestamp = term.slice(6);
|
|
|
|
const unix = timestampToUnix(timestamp);
|
|
|
|
if (unix !== 0) constructedFilters.push(`createdAt > ${unix}`);
|
|
|
|
return null;
|
|
} else if (term.startsWith("before:")) {
|
|
const timestamp = term.slice(7);
|
|
|
|
const unix = timestampToUnix(timestamp);
|
|
if (unix !== 0) constructedFilters.push(`createdAt < ${unix}`);
|
|
|
|
return null;
|
|
} else if (term.startsWith("filter:following")) {
|
|
// Check if we got a context user
|
|
if (userCtx) {
|
|
// Fetch user follows from DB
|
|
const followedUsers = await Followings.find({
|
|
where: {
|
|
followerId: userCtx.id,
|
|
},
|
|
select: {
|
|
followeeId: true,
|
|
},
|
|
});
|
|
const followIDs = followedUsers.map(
|
|
(user) => user.followeeId,
|
|
);
|
|
|
|
if (followIDs.length === 0) return null;
|
|
|
|
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
|
|
} else {
|
|
logger.warn(
|
|
"search filtered to follows called without user context",
|
|
);
|
|
}
|
|
|
|
return null;
|
|
} else if (term.startsWith("filter:followers")) {
|
|
// Check if we got a context user
|
|
if (userCtx) {
|
|
// Fetch users follows from DB
|
|
const followedUsers = await Followings.find({
|
|
where: {
|
|
followeeId: userCtx.id,
|
|
},
|
|
select: {
|
|
followerId: true,
|
|
},
|
|
});
|
|
const followIDs = followedUsers.map(
|
|
(user) => user.followerId,
|
|
);
|
|
|
|
if (followIDs.length === 0) return null;
|
|
|
|
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
|
|
} else {
|
|
logger.warn(
|
|
"search filtered to followers called without user context",
|
|
);
|
|
}
|
|
|
|
return null;
|
|
} else if (term.startsWith("order:desc")) {
|
|
sortRules.push("createdAt:desc");
|
|
|
|
return null;
|
|
} else if (term.startsWith("order:asc")) {
|
|
sortRules.push("createdAt:asc");
|
|
|
|
return null;
|
|
}
|
|
|
|
return term;
|
|
}),
|
|
)
|
|
).filter((term) => term !== null);
|
|
|
|
// An empty search term with defined filters means we have a placeholder search => https://www.meilisearch.com/docs/reference/api/search#placeholder-search
|
|
// These have to be ordered manually, otherwise the *oldest* posts are returned first, which we don't want
|
|
// If the user has defined a sort rule, don't mess with it
|
|
if (
|
|
filteredSearchTerms.length === 0 &&
|
|
constructedFilters.length > 0 &&
|
|
sortRules.length === 0
|
|
) {
|
|
sortRules.push("createdAt:desc");
|
|
}
|
|
|
|
// More than one sorting rule doesn't make sense. We only keep the first one, otherwise weird stuff may happen.
|
|
if (sortRules.length > 1) {
|
|
sortRules = [sortRules[0]];
|
|
}
|
|
|
|
// An override sort takes precedence, user sorting is ignored here
|
|
if (overrideSort) {
|
|
sortRules = [overrideSort];
|
|
}
|
|
|
|
logger.info(`Searching for ${filteredSearchTerms.join(" ")}`);
|
|
logger.info(`Limit: ${limit}`);
|
|
logger.info(`Offset: ${offset}`);
|
|
logger.info(`Filters: ${constructedFilters}`);
|
|
logger.info(`Ordering: ${sortRules}`);
|
|
|
|
return posts.search(filteredSearchTerms.join(" "), {
|
|
limit: limit,
|
|
offset: offset,
|
|
filter: constructedFilters,
|
|
sort: sortRules,
|
|
});
|
|
},
|
|
ingestNote: async (ingestNotes: Note | Note[]) => {
|
|
if (ingestNotes instanceof Note) {
|
|
ingestNotes = [ingestNotes];
|
|
}
|
|
|
|
const indexingBatch: MeilisearchNote[] = [];
|
|
|
|
for (const note of ingestNotes) {
|
|
if (note.user === undefined) {
|
|
note.user = await Users.findOne({
|
|
where: {
|
|
id: note.userId,
|
|
},
|
|
});
|
|
}
|
|
|
|
let attachmentType = "";
|
|
if (note.attachedFileTypes.length > 0) {
|
|
attachmentType = note.attachedFileTypes[0].split("/")[0];
|
|
switch (attachmentType) {
|
|
case "image":
|
|
case "video":
|
|
case "audio":
|
|
case "text":
|
|
break;
|
|
default:
|
|
attachmentType = "file";
|
|
break;
|
|
}
|
|
}
|
|
|
|
indexingBatch.push(<MeilisearchNote>{
|
|
id: note.id.toString(),
|
|
text: note.text ? note.text : "",
|
|
userId: note.userId,
|
|
userHost:
|
|
note.userHost !== ""
|
|
? note.userHost
|
|
: url.parse(config.host).host,
|
|
channelId: note.channelId ? note.channelId : "",
|
|
mediaAttachment: attachmentType,
|
|
userName: note.user?.username ?? "UNKNOWN",
|
|
createdAt: note.createdAt.getTime() / 1000, // division by 1000 is necessary because Node returns in ms-accuracy
|
|
});
|
|
}
|
|
|
|
return posts
|
|
.addDocuments(indexingBatch, {
|
|
primaryKey: "id",
|
|
})
|
|
.then(() =>
|
|
logger.info(`sent ${indexingBatch.length} posts for indexing`),
|
|
);
|
|
},
|
|
serverStats: async () => {
|
|
const health: Health = await client.health();
|
|
const stats: Stats = await client.getStats();
|
|
|
|
return {
|
|
health: health.status,
|
|
size: stats.databaseSize,
|
|
indexed_count: stats.indexes["posts"].numberOfDocuments,
|
|
};
|
|
},
|
|
deleteNotes: async (note: Note | Note[] | string | string[]) => {
|
|
if (note instanceof Note) {
|
|
note = [note];
|
|
}
|
|
if (typeof note === "string") {
|
|
note = [note];
|
|
}
|
|
|
|
const deletionBatch = note
|
|
.map((n) => {
|
|
if (n instanceof Note) {
|
|
return n.id;
|
|
}
|
|
|
|
if (n.length > 0) return n;
|
|
|
|
logger.error(
|
|
`Failed to delete note from Meilisearch, invalid post ID: ${JSON.stringify(
|
|
n,
|
|
)}`,
|
|
);
|
|
|
|
throw new Error(
|
|
`Invalid note ID passed to meilisearch deleteNote: ${JSON.stringify(
|
|
n,
|
|
)}`,
|
|
);
|
|
})
|
|
.filter((el) => el !== null);
|
|
|
|
await posts.deleteDocuments(deletionBatch as string[]).then(() => {
|
|
logger.info(
|
|
`submitted ${deletionBatch.length} large batch for deletion`,
|
|
);
|
|
});
|
|
},
|
|
}
|
|
: null;
|