-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmlxclip.go
156 lines (105 loc) · 2.75 KB
/
mlxclip.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
//go:build mlxclip
package mlxclip
import (
"context"
"encoding/json"
"fmt"
"net/url"
"os"
"os/exec"
wof_embeddings "github.com/whosonfirst/go-dedupe/embeddings"
)
type MLXClipEmbedder struct {
wof_embeddings.Embedder
embeddings_py string
}
func init() {
ctx := context.Background()
err := wof_embeddings.RegisterEmbedder(ctx, "mlxclip", NewMLXClipEmbedder)
if err != nil {
panic(err)
}
}
func NewMLXClipEmbedder(ctx context.Context, uri string) (wof_embeddings.Embedder, error) {
u, err := url.Parse(uri)
if err != nil {
return nil, fmt.Errorf("Failed to parse URI, %w", err)
}
embeddings_py := u.Path
_, err = os.Stat(embeddings_py)
if err != nil {
return nil, err
}
e := &MLXClipEmbedder{
embeddings_py: embeddings_py,
}
return e, nil
}
func (e *MLXClipEmbedder) Embeddings(ctx context.Context, content string) ([]float64, error) {
e32, err := e.Embeddings32(ctx, content)
if err != nil {
return nil, err
}
return e.asFloat64(e32), nil
}
func (e *MLXClipEmbedder) Embeddings32(ctx context.Context, content string) ([]float32, error) {
return e.generate_embeddings(ctx, "text", content)
}
func (e *MLXClipEmbedder) ImageEmbeddings(ctx context.Context, data []byte) ([]float64, error) {
e32, err := e.ImageEmbeddings32(ctx, data)
if err != nil {
return nil, err
}
return e.asFloat64(e32), nil
}
func (e *MLXClipEmbedder) ImageEmbeddings32(ctx context.Context, data []byte) ([]float32, error) {
tmp, err := os.CreateTemp("", "mlxclip.*.img")
if err != nil {
return nil, fmt.Errorf("Failed to create tmp file, %w", err)
}
defer os.Remove(tmp.Name()) // clean up
_, err = tmp.Write(data)
if err != nil {
return nil, err
}
err = tmp.Close()
if err != nil {
return nil, err
}
return e.generate_embeddings(ctx, "image", tmp.Name())
}
func (e *MLXClipEmbedder) generate_embeddings(ctx context.Context, target string, input string) ([]float32, error) {
tmp, err := os.CreateTemp("", "mlxclip.*.json")
if err != nil {
return nil, fmt.Errorf("Failed to create tmp file, %w", err)
}
defer os.Remove(tmp.Name())
err = tmp.Close()
if err != nil {
return nil, err
}
cmd := exec.CommandContext(ctx, "python3", e.embeddings_py, target, input, tmp.Name())
err = cmd.Run()
if err != nil {
return nil, fmt.Errorf("Failed to derive embeddings, %w", err)
}
r, err := os.Open(tmp.Name())
if err != nil {
return nil, err
}
defer r.Close()
var emb []float32
dec := json.NewDecoder(r)
err = dec.Decode(&emb)
if err != nil {
return nil, fmt.Errorf("Failed to unmarshal embeddings, %w (%s)", err, tmp.Name())
}
return emb, nil
}
func (e *MLXClipEmbedder) asFloat64(e32 []float32) []float64 {
e64 := make([]float64, len(e32))
for idx, v := range e32 {
e64[idx] = float64(v)
}
return e64
}