Added pearson correlation support for MPDist

matrix-profile-foundation · Jan 9, 2020 · c561d1e · c561d1e
1 parent 0938766
commit c561d1e
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 22 deletions.
diff --git a/matrixprofile.go b/matrixprofile.go
@@ -122,20 +122,34 @@ func applySingleAV(mp, ts []float64, w int, a av.AV) ([]float64, error) {
 // values must be between 0 and 1.
 func (mp MatrixProfile) ApplyAV() ([]float64, []float64, error) {
 	var err error
-	var abmp, bamp []float64
+	abmp := make([]float64, len(mp.MP))
+	bamp := make([]float64, len(mp.MPB))
 
-	abmp, err = applySingleAV(mp.MP, mp.A, mp.W, mp.AV)
+	copy(abmp, mp.MP)
+	copy(bamp, mp.MPB)
+	if !mp.Opts.Euclidean {
+		util.P2E(abmp, mp.W)
+		util.P2E(bamp, mp.W)
+	}
+
+	abmp, err = applySingleAV(abmp, mp.A, mp.W, mp.AV)
 	if err != nil {
 		return nil, nil, err
 	}
 
 	if mp.MPB != nil {
-		bamp, err = applySingleAV(mp.MPB, mp.B, mp.W, mp.AV)
+		bamp, err = applySingleAV(bamp, mp.B, mp.W, mp.AV)
 	}
 
 	if err != nil {
 		return nil, nil, err
 	}
+
+	if !mp.Opts.Euclidean {
+		util.E2P(abmp, mp.W)
+		util.E2P(bamp, mp.W)
+	}
+
 	return abmp, bamp, nil
 }
 

diff --git a/matrixprofile_test.go b/matrixprofile_test.go
@@ -41,37 +41,41 @@ func TestNew(t *testing.T) {
 	}
 }
 
-func TestApplyAV(t *testing.T) {
-	mprof := []float64{4, 6, 10, 2, 1, 0, 1, 2, 0, 0, 1, 2, 6}
-
+func TestApplyAVDefault(t *testing.T) {
 	testdata := []struct {
-		a          []float64
-		w          int
-		av         av.AV
-		expectedMP []float64
+		a []float64
+		w int
 	}{
-		{[]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 4, av.Default, mprof},
+		{[]float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 4},
 	}
 
-	var mp MatrixProfile
+	var mp *MatrixProfile
 	var err error
 	var outab []float64
 	for _, d := range testdata {
-		newMP := make([]float64, len(mprof))
-		copy(newMP, mprof)
-		mp = MatrixProfile{A: d.a, W: d.w, MP: newMP, AV: d.av}
+		mp, err = New(d.a, nil, d.w)
+		if err != nil {
+			t.Errorf("%v", err)
+			break
+		}
+		if err = mp.Compute(NewMPOpts()); err != nil {
+			t.Errorf("%v", err)
+			break
+		}
+
+		mp.AV = av.Default
 		outab, _, err = mp.ApplyAV()
 		if err != nil {
 			t.Fatal(err)
 		}
 
-		if len(outab) != len(d.expectedMP) {
-			t.Errorf("Expected %d elements, but got %d, %+v", len(d.expectedMP), len(outab), d)
+		if len(outab) != len(mp.MP) {
+			t.Errorf("Expected %d elements, but got %d, %+v", len(mp.MP), len(outab), d)
 			break
 		}
 		for i := 0; i < len(outab); i++ {
-			if math.Abs(float64(outab[i]-d.expectedMP[i])) > 1e-7 {
-				t.Errorf("Expected %v,\nbut got\n%v for %+v", d.expectedMP, outab, d)
+			if math.Abs(float64(outab[i]-mp.MP[i])) > 1e-7 {
+				t.Errorf("Expected %v,\nbut got\n%v for %+v", mp.MP, outab, d)
 				break
 			}
 		}
@@ -775,7 +779,7 @@ func TestDiscoverDiscords(t *testing.T) {
 	}
 
 	for _, d := range testdata {
-		mp := MatrixProfile{A: a, B: a, W: w, MP: d.mp, AV: av.Default}
+		mp := MatrixProfile{A: a, B: a, W: w, MP: d.mp, AV: av.Default, Opts: NewMPOpts()}
 		discords, err := mp.DiscoverDiscords(d.k, d.exzone)
 		if err != nil {
 			t.Errorf("Got error %v on %v", err, d)

diff --git a/util/util.go b/util/util.go
@@ -263,12 +263,28 @@ func DiagBatchingScheme(l, p int) []Batch {
 
 // P2E converts a slice of pearson correlation values to euclidean distances. This
 // is only valid for z-normalized time series.
-func P2E(mp []float64, m int) {
+func P2E(mp []float64, w int) {
 	for i := 0; i < len(mp); i++ {
 		// caps pearson correlation to 1 in case there are floating point accumulated errors
 		if mp[i] > 1 {
 			mp[i] = 1
 		}
-		mp[i] = math.Sqrt(2 * float64(m) * (1 - mp[i]))
+		mp[i] = math.Sqrt(2 * float64(w) * (1 - mp[i]))
+	}
+}
+
+// E2P converts a slice of euclidean distances to pearson correlation values. This
+// is only valid for z-normalized time series. Negative pearson correlation values will not be
+// discovered
+func E2P(mp []float64, w int) {
+	for i := 0; i < len(mp); i++ {
+		mp[i] = 1 - mp[i]*mp[i]/(2*float64(w))
+		// caps pearson correlation to 1 in case there are floating point accumulated errors
+		if mp[i] > 1 {
+			mp[i] = 1
+		}
+		if mp[i] < 0 {
+			mp[i] = 0
+		}
 	}
 }