Files
bl/common/utils/go-sensitive-word-1.3.3/store/memory.go
xinian 15764ee027
Some checks failed
ci/woodpecker/push/my-first-workflow Pipeline failed
refactor: 使用标准库替换第三方HTTP客户端并清理依赖
2026-03-23 22:39:24 +08:00

182 lines
3.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package store
import (
"bufio"
"fmt"
"time"
"io"
"net/http"
"os"
"strings"
cmap "github.com/orcaman/concurrent-map/v2"
)
// MemoryModel 使用并发 map 实现的内存词库
type MemoryModel struct {
store cmap.ConcurrentMap[string, struct{}]
addChan chan string
delChan chan string
}
// NewMemoryModel 创建新的内存模型
func NewMemoryModel() *MemoryModel {
return &MemoryModel{
store: cmap.New[struct{}](),
addChan: make(chan string),
delChan: make(chan string),
}
}
// 从本地路径加载词库文件
func (m *MemoryModel) LoadDictPath(paths ...string) error {
for _, path := range paths {
err := func(path string) error {
f, err := os.Open(path)
defer func(f *os.File) {
_ = f.Close()
}(f)
if err != nil {
return err
}
return m.LoadDict(f)
}(path)
if err != nil {
return err
}
}
return nil
}
// 加载嵌入式文本词库go:embed
func (m *MemoryModel) LoadDictEmbed(contents ...string) error {
for _, con := range contents {
reader := strings.NewReader(con)
if err := m.LoadDict(reader); err != nil {
return err
}
}
return nil
}
// 从远程 HTTP 地址加载词库
// LoadDictHttp 批量从 HTTP 地址加载字典(标准库 net/http 实现)
func (m *MemoryModel) LoadDictHttp(urls ...string) error {
// 【标准库】创建带超时的客户端,防止请求卡死
client := &http.Client{
Timeout: 10 * time.Second, // 超时控制,非常重要
}
for _, url := range urls {
// 立即执行函数,解决 defer 循环变量问题
err := func(u string) error {
// 标准库 GET 请求
resp, err := client.Get(u)
if err != nil {
return fmt.Errorf("请求失败 %s: %w", u, err)
}
// 必须 defer 关闭 body防止资源泄漏标准库固定写法
defer func() {
closeErr := resp.Body.Close()
if closeErr != nil {
fmt.Printf("警告: 关闭响应体失败 url=%s, err=%v\n", u, closeErr)
}
}()
// 状态码判断
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("http 状态码错误 url=%s, code=%d", u, resp.StatusCode)
}
// 加载字典(和你原来逻辑一样)
return m.LoadDict(resp.Body)
}(url)
// 任意一个失败,立即返回
if err != nil {
return err
}
}
return nil
}
// 读取词库(按行解析)
func (m *MemoryModel) LoadDict(reader io.Reader) error {
buf := bufio.NewReader(reader)
for {
line, _, err := buf.ReadLine()
if err != nil {
if err != io.EOF {
return err
}
break
}
m.store.Set(string(line), struct{}{})
m.addChan <- string(line)
}
return nil
}
// 返回所有敏感词的读取通道(可用于初始化加载)
func (m *MemoryModel) ReadChan() <-chan string {
ch := make(chan string)
go func() {
for key := range m.store.Items() {
ch <- key
}
close(ch)
}()
return ch
}
// 获取所有敏感词(字符串数组)
func (m *MemoryModel) ReadString() []string {
res := make([]string, 0, m.store.Count())
for key := range m.store.Items() {
res = append(res, key)
}
return res
}
// 获取新增词通道
func (m *MemoryModel) GetAddChan() <-chan string {
return m.addChan
}
// 获取删除词通道
func (m *MemoryModel) GetDelChan() <-chan string {
return m.delChan
}
// 添加自定义敏感词
func (m *MemoryModel) AddWord(words ...string) error {
for _, word := range words {
m.store.Set(word, struct{}{})
m.addChan <- word
}
return nil
}
// 删除敏感词(敏感词加白名单)
func (m *MemoryModel) DelWord(words ...string) error {
for _, word := range words {
m.store.Remove(word)
m.delChan <- word
}
return nil
}