2025-09-09 01:11:10 +08:00
|
|
|
|
package store
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"bufio"
|
2026-03-23 22:39:24 +08:00
|
|
|
|
"fmt"
|
|
|
|
|
|
"time"
|
|
|
|
|
|
|
2025-09-09 01:11:10 +08:00
|
|
|
|
"io"
|
|
|
|
|
|
"net/http"
|
|
|
|
|
|
"os"
|
|
|
|
|
|
"strings"
|
2026-03-23 22:39:24 +08:00
|
|
|
|
|
|
|
|
|
|
cmap "github.com/orcaman/concurrent-map/v2"
|
2025-09-09 01:11:10 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
// MemoryModel 使用并发 map 实现的内存词库
|
|
|
|
|
|
type MemoryModel struct {
|
|
|
|
|
|
store cmap.ConcurrentMap[string, struct{}]
|
|
|
|
|
|
addChan chan string
|
|
|
|
|
|
delChan chan string
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// NewMemoryModel 创建新的内存模型
|
|
|
|
|
|
func NewMemoryModel() *MemoryModel {
|
|
|
|
|
|
return &MemoryModel{
|
|
|
|
|
|
store: cmap.New[struct{}](),
|
|
|
|
|
|
addChan: make(chan string),
|
|
|
|
|
|
delChan: make(chan string),
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 从本地路径加载词库文件
|
|
|
|
|
|
func (m *MemoryModel) LoadDictPath(paths ...string) error {
|
|
|
|
|
|
for _, path := range paths {
|
|
|
|
|
|
err := func(path string) error {
|
|
|
|
|
|
f, err := os.Open(path)
|
|
|
|
|
|
defer func(f *os.File) {
|
|
|
|
|
|
_ = f.Close()
|
|
|
|
|
|
}(f)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return m.LoadDict(f)
|
|
|
|
|
|
}(path)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 加载嵌入式文本词库(go:embed)
|
|
|
|
|
|
func (m *MemoryModel) LoadDictEmbed(contents ...string) error {
|
|
|
|
|
|
for _, con := range contents {
|
|
|
|
|
|
reader := strings.NewReader(con)
|
|
|
|
|
|
if err := m.LoadDict(reader); err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 从远程 HTTP 地址加载词库
|
2026-03-23 22:39:24 +08:00
|
|
|
|
// LoadDictHttp 批量从 HTTP 地址加载字典(标准库 net/http 实现)
|
2025-09-09 01:11:10 +08:00
|
|
|
|
func (m *MemoryModel) LoadDictHttp(urls ...string) error {
|
2026-03-23 22:39:24 +08:00
|
|
|
|
// 【标准库】创建带超时的客户端,防止请求卡死
|
|
|
|
|
|
client := &http.Client{
|
|
|
|
|
|
Timeout: 10 * time.Second, // 超时控制,非常重要
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-09-09 01:11:10 +08:00
|
|
|
|
for _, url := range urls {
|
2026-03-23 22:39:24 +08:00
|
|
|
|
// 立即执行函数,解决 defer 循环变量问题
|
|
|
|
|
|
err := func(u string) error {
|
|
|
|
|
|
// 标准库 GET 请求
|
|
|
|
|
|
resp, err := client.Get(u)
|
2025-09-09 01:11:10 +08:00
|
|
|
|
if err != nil {
|
2026-03-23 22:39:24 +08:00
|
|
|
|
return fmt.Errorf("请求失败 %s: %w", u, err)
|
2025-09-09 01:11:10 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2026-03-23 22:39:24 +08:00
|
|
|
|
// 必须 defer 关闭 body,防止资源泄漏(标准库固定写法)
|
|
|
|
|
|
defer func() {
|
|
|
|
|
|
closeErr := resp.Body.Close()
|
|
|
|
|
|
if closeErr != nil {
|
|
|
|
|
|
fmt.Printf("警告: 关闭响应体失败 url=%s, err=%v\n", u, closeErr)
|
|
|
|
|
|
}
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
|
|
// 状态码判断
|
|
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
|
|
|
|
return fmt.Errorf("http 状态码错误 url=%s, code=%d", u, resp.StatusCode)
|
|
|
|
|
|
}
|
2025-09-09 01:11:10 +08:00
|
|
|
|
|
2026-03-23 22:39:24 +08:00
|
|
|
|
// 加载字典(和你原来逻辑一样)
|
|
|
|
|
|
return m.LoadDict(resp.Body)
|
2025-09-09 01:11:10 +08:00
|
|
|
|
}(url)
|
2026-03-23 22:39:24 +08:00
|
|
|
|
|
|
|
|
|
|
// 任意一个失败,立即返回
|
2025-09-09 01:11:10 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 读取词库(按行解析)
|
|
|
|
|
|
func (m *MemoryModel) LoadDict(reader io.Reader) error {
|
|
|
|
|
|
buf := bufio.NewReader(reader)
|
|
|
|
|
|
for {
|
|
|
|
|
|
line, _, err := buf.ReadLine()
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
if err != io.EOF {
|
|
|
|
|
|
return err
|
|
|
|
|
|
}
|
|
|
|
|
|
break
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
m.store.Set(string(line), struct{}{})
|
|
|
|
|
|
m.addChan <- string(line)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 返回所有敏感词的读取通道(可用于初始化加载)
|
|
|
|
|
|
func (m *MemoryModel) ReadChan() <-chan string {
|
|
|
|
|
|
ch := make(chan string)
|
|
|
|
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
|
|
for key := range m.store.Items() {
|
|
|
|
|
|
ch <- key
|
|
|
|
|
|
}
|
|
|
|
|
|
close(ch)
|
|
|
|
|
|
}()
|
|
|
|
|
|
|
|
|
|
|
|
return ch
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取所有敏感词(字符串数组)
|
|
|
|
|
|
func (m *MemoryModel) ReadString() []string {
|
|
|
|
|
|
res := make([]string, 0, m.store.Count())
|
|
|
|
|
|
|
|
|
|
|
|
for key := range m.store.Items() {
|
|
|
|
|
|
res = append(res, key)
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return res
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取新增词通道
|
|
|
|
|
|
func (m *MemoryModel) GetAddChan() <-chan string {
|
|
|
|
|
|
return m.addChan
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 获取删除词通道
|
|
|
|
|
|
func (m *MemoryModel) GetDelChan() <-chan string {
|
|
|
|
|
|
return m.delChan
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 添加自定义敏感词
|
|
|
|
|
|
func (m *MemoryModel) AddWord(words ...string) error {
|
|
|
|
|
|
for _, word := range words {
|
|
|
|
|
|
m.store.Set(word, struct{}{})
|
|
|
|
|
|
m.addChan <- word
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 删除敏感词(敏感词加白名单)
|
|
|
|
|
|
func (m *MemoryModel) DelWord(words ...string) error {
|
|
|
|
|
|
for _, word := range words {
|
|
|
|
|
|
m.store.Remove(word)
|
|
|
|
|
|
m.delChan <- word
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
}
|