rudeshark.net/packages/backend/src/db/meilisearch.ts

372 lines
9.3 KiB
TypeScript
Raw Normal View History

import { Health, Index, MeiliSearch, Stats } from "meilisearch";
2023-05-29 05:34:18 +02:00
import { dbLogger } from "./logger.js";
2023-05-25 00:55:33 +02:00
import config from "@/config/index.js";
2023-05-29 05:34:18 +02:00
import { Note } from "@/models/entities/note.js";
2023-05-26 00:33:02 +02:00
import * as url from "url";
import { ILocalUser } from "@/models/entities/user.js";
2023-05-29 05:34:18 +02:00
import { Followings, Users } from "@/models/index.js";
2023-05-25 00:55:33 +02:00
const logger = dbLogger.createSubLogger("meilisearch", "gray", false);
let posts: Index;
let client: MeiliSearch;
2023-05-25 00:55:33 +02:00
const hasConfig =
2023-05-26 03:06:41 +02:00
config.meilisearch &&
(config.meilisearch.host ||
config.meilisearch.port ||
config.meilisearch.apiKey);
2023-05-25 00:55:33 +02:00
2023-05-29 20:09:52 +02:00
if (hasConfig) {
const host = hasConfig ? config.meilisearch.host ?? "localhost" : "";
2023-05-29 20:09:52 +02:00
const port = hasConfig ? config.meilisearch.port ?? 7700 : 0;
const auth = hasConfig ? config.meilisearch.apiKey ?? "" : "";
const ssl = hasConfig ? config.meilisearch.ssl ?? false : false;
2023-05-29 20:09:52 +02:00
logger.info("Connecting to MeiliSearch");
client = new MeiliSearch({
host: `${ssl ? "https" : "http"}://${host}:${port}`,
2023-05-29 20:09:52 +02:00
apiKey: auth,
});
posts = client.index("posts");
posts
2023-05-29 20:09:52 +02:00
.updateSearchableAttributes(["text"])
.catch((e) =>
logger.error(`Setting searchable attr failed, searches won't work: ${e}`),
);
posts
.updateFilterableAttributes([
"userName",
"userHost",
"mediaAttachment",
"createdAt",
"userId",
])
.catch((e) =>
logger.error(
`Setting filterable attr failed, advanced searches won't work: ${e}`,
),
);
posts
.updateSortableAttributes(["createdAt"])
.catch((e) =>
logger.error(
`Setting sortable attr failed, placeholder searches won't sort properly: ${e}`,
),
);
posts
.updateStopWords([
"the",
"a",
"as",
"be",
"of",
"they",
"these",
"これ",
"それ",
"あれ",
"この",
"その",
"あの",
"ここ",
"そこ",
"あそこ",
"こちら",
"どこ",
"だれ",
"なに",
"なん",
"何",
"私",
"貴方",
"貴方方",
"我々",
"私達",
"あの人",
"あのか",
"彼女",
"彼",
"です",
"ありま",
"おりま",
"います",
"は",
"が",
"の",
"に",
"を",
"で",
"え",
"から",
"まで",
"より",
"も",
"どの",
"と",
"し",
"それで",
"しかし",
])
.catch((e) =>
logger.error(
`Failed to set Meilisearch stop words, database size will be larger: ${e}`,
2023-05-29 20:09:52 +02:00
),
);
logger.info("Connected to MeiliSearch");
}
2023-05-25 00:55:33 +02:00
export type MeilisearchNote = {
id: string;
text: string;
userId: string;
userHost: string;
userName: string;
2023-05-25 00:55:33 +02:00
channelId: string;
mediaAttachment: string;
2023-05-26 03:06:41 +02:00
createdAt: number;
};
export default hasConfig
? {
2023-05-29 05:34:18 +02:00
search: async (
query: string,
limit: number,
offset: number,
userCtx: ILocalUser | null,
) => {
/// Advanced search syntax
/// from:user => filter by user + optional domain
/// has:image/video/audio/text/file => filter by attachment types
/// domain:domain.com => filter by domain
/// before:Date => show posts made before Date
/// after: Date => show posts made after Date
/// "text" => get posts with exact text between quotes
/// filter:following => show results only from users you follow
/// filter:followers => show results only from followers
2023-05-29 19:00:16 +02:00
const constructedFilters: string[] = [];
2023-05-29 05:34:18 +02:00
2023-05-29 19:00:16 +02:00
const splitSearch = query.split(" ");
2023-05-29 05:34:18 +02:00
// Detect search operators and remove them from the actual query
2023-05-29 19:00:16 +02:00
const filteredSearchTerms = (
2023-05-29 05:34:18 +02:00
await Promise.all(
splitSearch.map(async (term) => {
if (term.startsWith("has:")) {
2023-05-29 19:00:16 +02:00
const fileType = term.slice(4);
2023-05-29 05:34:18 +02:00
constructedFilters.push(`mediaAttachment = "${fileType}"`);
return null;
} else if (term.startsWith("from:")) {
2023-05-29 19:00:16 +02:00
const user = term.slice(5);
2023-05-29 05:34:18 +02:00
constructedFilters.push(`userName = ${user}`);
return null;
} else if (term.startsWith("domain:")) {
2023-05-29 19:00:16 +02:00
const domain = term.slice(7);
2023-05-29 05:34:18 +02:00
constructedFilters.push(`userHost = ${domain}`);
return null;
} else if (term.startsWith("after:")) {
2023-05-29 19:00:16 +02:00
const timestamp = term.slice(6);
2023-05-29 05:34:18 +02:00
// Try to parse the timestamp as JavaScript Date
2023-05-29 19:00:16 +02:00
const date = Date.parse(timestamp);
2023-05-29 05:34:18 +02:00
if (isNaN(date)) return null;
constructedFilters.push(`createdAt > ${date / 1000}`);
return null;
} else if (term.startsWith("before:")) {
2023-05-29 19:00:16 +02:00
const timestamp = term.slice(7);
2023-05-29 05:34:18 +02:00
// Try to parse the timestamp as JavaScript Date
2023-05-29 19:00:16 +02:00
const date = Date.parse(timestamp);
2023-05-29 05:34:18 +02:00
if (isNaN(date)) return null;
constructedFilters.push(`createdAt < ${date / 1000}`);
return null;
} else if (term.startsWith("filter:following")) {
// Check if we got a context user
if (userCtx) {
// Fetch user follows from DB
2023-05-29 19:00:16 +02:00
const followedUsers = await Followings.find({
2023-05-29 05:34:18 +02:00
where: {
followerId: userCtx.id,
},
select: {
followeeId: true,
},
});
2023-05-29 19:01:34 +02:00
const followIDs = followedUsers.map(
(user) => user.followeeId,
);
2023-05-29 05:34:18 +02:00
if (followIDs.length === 0) return null;
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
} else {
logger.warn(
"search filtered to follows called without user context",
);
}
return null;
} else if (term.startsWith("filter:followers")) {
// Check if we got a context user
if (userCtx) {
// Fetch users follows from DB
2023-05-29 19:00:16 +02:00
const followedUsers = await Followings.find({
2023-05-29 05:34:18 +02:00
where: {
followeeId: userCtx.id,
},
select: {
followerId: true,
},
});
2023-05-29 19:01:34 +02:00
const followIDs = followedUsers.map(
(user) => user.followerId,
);
2023-05-29 05:34:18 +02:00
if (followIDs.length === 0) return null;
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
} else {
logger.warn(
"search filtered to followers called without user context",
);
}
return null;
}
2023-05-29 05:34:18 +02:00
return term;
}),
)
).filter((term) => term !== null);
2023-05-29 19:00:16 +02:00
const sortRules = [];
2023-05-29 05:34:18 +02:00
// An empty search term with defined filters means we have a placeholder search => https://www.meilisearch.com/docs/reference/api/search#placeholder-search
// These have to be ordered manually, otherwise the *oldest* posts are returned first, which we don't want
if (filteredSearchTerms.length === 0 && constructedFilters.length > 0) {
sortRules.push("createdAt:desc");
2023-05-26 03:06:41 +02:00
}
2023-05-29 05:34:18 +02:00
logger.info(`Searching for ${filteredSearchTerms.join(" ")}`);
logger.info(`Limit: ${limit}`);
logger.info(`Offset: ${offset}`);
logger.info(`Filters: ${constructedFilters}`);
logger.info(`Ordering: ${sortRules}`);
return posts.search(filteredSearchTerms.join(" "), {
limit: limit,
offset: offset,
filter: constructedFilters,
sort: sortRules,
});
},
ingestNote: async (ingestNotes: Note | Note[]) => {
if (ingestNotes instanceof Note) {
ingestNotes = [ingestNotes];
}
2023-05-29 19:00:16 +02:00
const indexingBatch: MeilisearchNote[] = [];
2023-05-29 05:34:18 +02:00
2023-05-29 19:00:16 +02:00
for (const note of ingestNotes) {
2023-05-29 05:34:18 +02:00
if (note.user === undefined) {
note.user = await Users.findOne({
where: {
id: note.userId,
},
});
2023-05-26 03:06:41 +02:00
}
2023-05-29 05:34:18 +02:00
let attachmentType = "";
if (note.attachedFileTypes.length > 0) {
attachmentType = note.attachedFileTypes[0].split("/")[0];
switch (attachmentType) {
case "image":
case "video":
case "audio":
case "text":
break;
default:
attachmentType = "file";
break;
}
}
indexingBatch.push(<MeilisearchNote>{
id: note.id.toString(),
text: note.text ? note.text : "",
userId: note.userId,
userHost:
note.userHost !== ""
? note.userHost
: url.parse(config.host).host,
channelId: note.channelId ? note.channelId : "",
mediaAttachment: attachmentType,
userName: note.user?.username ?? "UNKNOWN",
createdAt: note.createdAt.getTime() / 1000, // division by 1000 is necessary because Node returns in ms-accuracy
});
2023-05-25 23:49:52 +02:00
}
2023-05-29 05:34:18 +02:00
return posts
.addDocuments(indexingBatch, {
primaryKey: "id",
})
.then(() =>
logger.info(`sent ${indexingBatch.length} posts for indexing`),
2023-05-29 05:34:18 +02:00
);
},
serverStats: async () => {
2023-05-29 19:00:16 +02:00
const health: Health = await client.health();
const stats: Stats = await client.getStats();
2023-05-29 05:34:18 +02:00
return {
health: health.status,
size: stats.databaseSize,
indexed_count: stats.indexes["posts"].numberOfDocuments,
};
},
2023-05-29 18:33:57 +02:00
deleteNotes: async (note: Note | Note[] | string | string[]) => {
if (note instanceof Note) {
note = [note];
}
if (typeof note === "string") {
note = [note];
}
2023-05-29 19:18:12 +02:00
const deletionBatch = note
2023-05-29 19:01:34 +02:00
.map((n) => {
if (n instanceof Note) {
return n.id;
}
2023-05-29 18:33:57 +02:00
2023-05-29 19:01:34 +02:00
if (n.length > 0) return n;
2023-05-29 18:33:57 +02:00
2023-05-29 19:01:34 +02:00
logger.error(
`Failed to delete note from Meilisearch, invalid post ID: ${JSON.stringify(
n,
)}`,
);
2023-05-29 18:33:57 +02:00
2023-05-29 19:01:34 +02:00
throw new Error(
`Invalid note ID passed to meilisearch deleteNote: ${JSON.stringify(
n,
)}`,
);
})
.filter((el) => el !== null);
2023-05-29 18:33:57 +02:00
await posts.deleteDocuments(deletionBatch as string[]).then(() => {
2023-05-29 19:01:34 +02:00
logger.info(
`submitted ${deletionBatch.length} large batch for deletion`,
);
2023-05-29 18:33:57 +02:00
});
},
2023-05-29 05:34:18 +02:00
}
2023-05-26 03:06:41 +02:00
: null;