From 90f01edddc87793678223354092ec3083c60fdfc Mon Sep 17 00:00:00 2001 From: cg sama Date: Wed, 20 Sep 2023 00:07:41 +0000 Subject: [PATCH] Adding language filter feature Co-authored-by: CGsama --- locales/en-US.yml | 5 +++ locales/ja-JP.yml | 5 +++ locales/zh-CN.yml | 5 +++ packages/backend/package.json | 2 + packages/backend/src/@types/cld.d.ts | 41 +++++++++++++++++++ packages/backend/src/@types/langdetect.d.ts | 7 ++++ .../backend/src/models/repositories/note.ts | 12 ++++++ packages/client/src/components/MkNote.vue | 7 +++- .../client/src/components/MkNoteDetailed.vue | 7 +++- packages/client/src/components/MkNoteSub.vue | 7 +++- .../client/src/pages/settings/word-mute.vue | 20 ++++++++- .../client/src/scripts/check-word-mute.ts | 37 +++++++++++++++++ packages/client/src/store.ts | 4 ++ 13 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 packages/backend/src/@types/cld.d.ts create mode 100644 packages/backend/src/@types/langdetect.d.ts diff --git a/locales/en-US.yml b/locales/en-US.yml index fa3c4af25..aa3e03adc 100644 --- a/locales/en-US.yml +++ b/locales/en-US.yml @@ -1375,14 +1375,19 @@ _menuDisplay: hide: "Hide" _wordMute: muteWords: "Muted words" + muteLangs: "Muted Languages" muteWordsDescription: "Separate with spaces for an AND condition or with line breaks for an OR condition." muteWordsDescription2: "Surround keywords with slashes to use regular expressions." + muteLangsDescription: "Separate with spaces or line breaks for an OR condition." + muteLangsDescription2: "Use language code e.g. en, fr, ja, zh." softDescription: "Hide posts that fulfil the set conditions from the timeline." + langDescription: "Hide posts that match set language from the timeline." hardDescription: "Prevents posts fulfilling the set conditions from being added to the timeline. In addition, these posts will not be added to the timeline even if the conditions are changed." soft: "Soft" + lang: "Language" hard: "Hard" mutedNotes: "Muted posts" _instanceMute: diff --git a/locales/ja-JP.yml b/locales/ja-JP.yml index 4cabff1d0..9c78e1de5 100644 --- a/locales/ja-JP.yml +++ b/locales/ja-JP.yml @@ -1200,11 +1200,16 @@ _menuDisplay: hide: "隠す" _wordMute: muteWords: "ミュートするワード" + muteLangs: "ミュートされた言語" muteWordsDescription: "スペースで区切るとAND指定になり、改行で区切るとOR指定になります。" muteWordsDescription2: "キーワードをスラッシュで囲むと正規表現になります。" + muteLangsDescription: "OR 条件の場合はスペースまたは改行で区切ります。" + muteLangsDescription2: "言語コードを使用します。例: en, fr, ja, zh." softDescription: "指定した条件の投稿をタイムラインから隠します。" + langDescription: "設定した言語に一致する投稿をタイムラインから非表示にします。" hardDescription: "指定した条件の投稿をタイムラインに追加しないようにします。追加されなかった投稿は、条件を変更しても除外されたままになります。" soft: "ソフト" + lang: "言語" hard: "ハード" mutedNotes: "ミュートされた投稿" _instanceMute: diff --git a/locales/zh-CN.yml b/locales/zh-CN.yml index cd18d6ce2..66983ae46 100644 --- a/locales/zh-CN.yml +++ b/locales/zh-CN.yml @@ -1110,11 +1110,16 @@ _menuDisplay: hide: "隐藏" _wordMute: muteWords: "过滤词" + muteLangs: "过滤语言" muteWordsDescription: "AND 条件用空格分隔,OR 条件用换行符分隔。" muteWordsDescription2: "将关键字用斜线括起来表示正则表达式。" + muteLangsDescription: "OR 条件用空格,换行符分隔" + muteLangsDescription2: "使用语言代码。例: en, fr, ja, zh." softDescription: "隐藏时间线中指定条件的帖子。" + langDescription: "从时间线中隐藏与设置语言匹配的帖子。" hardDescription: "防止将具有指定条件的帖子添加到时间线。 即使您更改条件,原先未添加的帖文也会被排除在外。" soft: "软过滤" + lang: "语言" hard: "硬过滤" mutedNotes: "已过滤的帖子" _instanceMute: diff --git a/packages/backend/package.json b/packages/backend/package.json index 03f54b2d3..474751965 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -54,6 +54,7 @@ "chalk": "5.3.0", "chalk-template": "0.4.0", "chokidar": "^3.5.3", + "cld": "^2.9.0", "cli-highlight": "2.1.11", "color-convert": "2.0.1", "content-disposition": "0.5.4", @@ -87,6 +88,7 @@ "koa-send": "5.0.1", "koa-slow": "2.1.0", "koa-views": "7.0.2", + "langdetect": "0.2.1", "megalodon": "workspace:*", "meilisearch": "0.34.1", "mfm-js": "0.23.3", diff --git a/packages/backend/src/@types/cld.d.ts b/packages/backend/src/@types/cld.d.ts new file mode 100644 index 000000000..ea4ec0970 --- /dev/null +++ b/packages/backend/src/@types/cld.d.ts @@ -0,0 +1,41 @@ +interface Language { + readonly name: string; + readonly code: string; + readonly percent: number; + readonly score: number; +} +interface Chunk { + readonly name: string; + readonly code: string; + readonly offset: number; + readonly bytes: number; +} +interface Options { + readonly isHTML: false; + readonly languageHint: string; + readonly encodingHint: string; + readonly tldHint: string; + readonly httpHint: string; +} +interface DetectLanguage { + readonly reliable: boolean; + readonly textBytes: number; + readonly languages: Language[]; + readonly chunks: Chunk[]; +} +export declare module "cld" { + declare function detect( + text: string, + options: Options, + callback: (err: string, result: DetectLanguage) => void, + ): void; + declare function detect( + text: string, + callback: (err: string, result: DetectLanguage) => void, + ): void; + declare function detect( + text: string, + options: Options, + ): Promise; + declare function detect(text: string): Promise; +} diff --git a/packages/backend/src/@types/langdetect.d.ts b/packages/backend/src/@types/langdetect.d.ts new file mode 100644 index 000000000..bcfa6f5ca --- /dev/null +++ b/packages/backend/src/@types/langdetect.d.ts @@ -0,0 +1,7 @@ +declare module "langdetect" { + interface DetectResult { + lang: string; + prob: number; + } + export function detect(words: string): DetectResult[]; +} diff --git a/packages/backend/src/models/repositories/note.ts b/packages/backend/src/models/repositories/note.ts index 453179bd6..b9e37c52f 100644 --- a/packages/backend/src/models/repositories/note.ts +++ b/packages/backend/src/models/repositories/note.ts @@ -27,6 +27,8 @@ import { } from "@/misc/populate-emojis.js"; import { db } from "@/db/postgre.js"; import { IdentifiableError } from "@/misc/identifiable-error.js"; +import cld from "cld"; +import { detect } from "langdetect"; export async function populatePoll(note: Note, meId: User["id"] | null) { const poll = await Polls.findOneByOrFail({ noteId: note.id }); @@ -201,6 +203,15 @@ export const NoteRepository = db.getRepository(Note).extend({ note.emojis.concat(reactionEmojiNames), host, ); + + let lang; + try { + lang = (await cld.detect((note.text || "") + (note.cw || ""))) + .languages[0].code; + } catch (e) { + lang = + detect((note.text || "") + (note.cw || ""))?.[0]?.lang || "unknown"; + } const reactionEmoji = await populateEmojis(reactionEmojiNames, host); const packed: Packed<"Note"> = await awaitAll({ id: note.id, @@ -260,6 +271,7 @@ export const NoteRepository = db.getRepository(Note).extend({ : undefined, } : {}), + lang: lang, }); if (packed.user.isCat && packed.user.speakAsCat && packed.text) { diff --git a/packages/client/src/components/MkNote.vue b/packages/client/src/components/MkNote.vue index 77c9e1856..9b7dc9fdb 100644 --- a/packages/client/src/components/MkNote.vue +++ b/packages/client/src/components/MkNote.vue @@ -354,7 +354,12 @@ const isMyRenote = $i && $i.id === note.value.userId; const showContent = ref(false); const isDeleted = ref(false); const muted = ref( - getWordSoftMute(note.value, $i, defaultStore.state.mutedWords), + getWordSoftMute( + note.value, + $i, + defaultStore.state.mutedWords, + defaultStore.state.mutedLangs, + ), ); const translation = ref(null); const translating = ref(false); diff --git a/packages/client/src/components/MkNoteDetailed.vue b/packages/client/src/components/MkNoteDetailed.vue index 9da05073a..f27a3d5b2 100644 --- a/packages/client/src/components/MkNoteDetailed.vue +++ b/packages/client/src/components/MkNoteDetailed.vue @@ -210,7 +210,12 @@ const reactButton = ref(); const showContent = ref(false); const isDeleted = ref(false); const muted = ref( - getWordSoftMute(note.value, $i, defaultStore.state.mutedWords), + getWordSoftMute( + note.value, + $i, + defaultStore.state.mutedWords, + defaultStore.state.mutedLangs, + ), ); const translation = ref(null); const translating = ref(false); diff --git a/packages/client/src/components/MkNoteSub.vue b/packages/client/src/components/MkNoteSub.vue index 7deea9646..99dc6e652 100644 --- a/packages/client/src/components/MkNoteSub.vue +++ b/packages/client/src/components/MkNoteSub.vue @@ -266,7 +266,12 @@ const appearNote = computed(() => ); const isDeleted = ref(false); const muted = ref( - getWordSoftMute(note.value, $i, defaultStore.state.mutedWords), + getWordSoftMute( + note.value, + $i, + defaultStore.state.mutedWords, + defaultStore.state.mutedLangs, + ), ); const translation = ref(null); const translating = ref(false); diff --git a/packages/client/src/pages/settings/word-mute.vue b/packages/client/src/pages/settings/word-mute.vue index 9a604c13a..c20f596be 100644 --- a/packages/client/src/pages/settings/word-mute.vue +++ b/packages/client/src/pages/settings/word-mute.vue @@ -17,6 +17,17 @@ }} + {{ + i18n.ts._wordMute.langDescription + }} + + {{ i18n.ts._wordMute.muteLangs }} + +
const tab = ref("soft"); const softMutedWords = ref(render(defaultStore.state.mutedWords)); +const softMutedLangs = ref(render(defaultStore.state.mutedLangs)); const hardMutedWords = ref(render($i!.mutedWords)); const hardWordMutedNotesCount = ref(null); const changed = ref(false); @@ -88,6 +100,10 @@ watch(softMutedWords, () => { changed.value = true; }); +watch(softMutedLangs, () => { + changed.value = true; +}); + watch(hardMutedWords, () => { changed.value = true; }); @@ -134,9 +150,10 @@ async function save() { return lines; }; - let softMutes, hardMutes; + let softMutes, softMLangs, hardMutes; try { softMutes = parseMutes(softMutedWords.value, i18n.ts._wordMute.soft); + softMLangs = parseMutes(softMutedLangs.value, i18n.ts._wordMute.lang); hardMutes = parseMutes(hardMutedWords.value, i18n.ts._wordMute.hard); } catch (err) { // already displayed error message in parseMutes @@ -144,6 +161,7 @@ async function save() { } defaultStore.set("mutedWords", softMutes); + defaultStore.set("mutedLangs", softMLangs); await os.api("i/update", { mutedWords: hardMutes, }); diff --git a/packages/client/src/scripts/check-word-mute.ts b/packages/client/src/scripts/check-word-mute.ts index bec824fb2..b54ca7311 100644 --- a/packages/client/src/scripts/check-word-mute.ts +++ b/packages/client/src/scripts/check-word-mute.ts @@ -6,6 +6,19 @@ export interface Muted { const NotMuted = { muted: false, matched: [] }; +function checkLangMute( + note: NoteLike, + mutedLangs: Array, +): Muted { + const mutedLangList = new Set( + mutedLangs.reduce((arr, x) => [...arr, ...(Array.isArray(x) ? x : [x])]), + ); + if (mutedLangList.has((note.lang?.[0]?.lang || "").split("-")[0])) { + return { muted: true, matched: [note.lang?.[0]?.lang] }; + } + return NotMuted; +} + function checkWordMute( note: NoteLike, mutedWords: Array, @@ -62,6 +75,7 @@ export function getWordSoftMute( note: Record, me: Record | null | undefined, mutedWords: Array, + mutedLangs: Array, ): Muted { // 自分自身 if (me && note.userId === me.id) { @@ -91,6 +105,29 @@ export function getWordSoftMute( } } } + if (mutedLangs.length > 0) { + let noteLangMuted = checkLangMute(note, mutedLangs); + if (noteLangMuted.muted) { + noteLangMuted.what = "note"; + return noteLangMuted; + } + + if (note.renote) { + let renoteLangMuted = checkLangMute(note, mutedLangs); + if (renoteLangMuted.muted) { + renoteLangMuted.what = note.text == null ? "renote" : "quote"; + return renoteLangMuted; + } + } + + if (note.reply) { + let replyLangMuted = checkLangMute(note, mutedLangs); + if (replyLangMuted.muted) { + replyLangMuted.what = "reply"; + return replyLangMuted; + } + } + } return NotMuted; } diff --git a/packages/client/src/store.ts b/packages/client/src/store.ts index 84ac279dd..9a545415e 100644 --- a/packages/client/src/store.ts +++ b/packages/client/src/store.ts @@ -101,6 +101,10 @@ export const defaultStore = markRaw( where: "account", default: [], }, + mutedLangs: { + where: "account", + default: [], + }, mutedAds: { where: "account", default: [] as string[],