Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

Commit 97c0273

Browse files
ajnavarrosmola
authored andcommitted
packfile: cache undeltified objects to improve decode performance (#218)
* Simple object cache that keeps in memory the last undeltified objects. When no more objects can be kept into memory, the oldest one is deleted (FIFO). This speeds up packfile operations preventing redundant seeks and decodes.
1 parent 892372b commit 97c0273

File tree

5 files changed

+248
-8
lines changed

5 files changed

+248
-8
lines changed

Diff for: cache/common.go

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
package cache
2+
3+
import "gopkg.in/src-d/go-git.v4/plumbing"
4+
5+
const (
6+
Byte = 1 << (iota * 10)
7+
KiByte
8+
MiByte
9+
GiByte
10+
)
11+
12+
type Object interface {
13+
Add(o plumbing.EncodedObject)
14+
Get(k plumbing.Hash) plumbing.EncodedObject
15+
Clear()
16+
}

Diff for: cache/object.go

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
package cache
2+
3+
import "gopkg.in/src-d/go-git.v4/plumbing"
4+
5+
const (
6+
initialQueueSize = 20
7+
MaxSize = 10 * MiByte
8+
)
9+
10+
type ObjectFIFO struct {
11+
objects map[plumbing.Hash]plumbing.EncodedObject
12+
order *queue
13+
14+
maxSize int64
15+
actualSize int64
16+
}
17+
18+
// NewObjectFIFO returns an Object cache that keeps the newest objects that fit
19+
// into the specific memory size
20+
func NewObjectFIFO(size int64) *ObjectFIFO {
21+
return &ObjectFIFO{
22+
objects: make(map[plumbing.Hash]plumbing.EncodedObject),
23+
order: newQueue(initialQueueSize),
24+
maxSize: size,
25+
}
26+
}
27+
28+
// Add adds a new object to the cache. If the object size is greater than the
29+
// cache size, the object is not added.
30+
func (c *ObjectFIFO) Add(o plumbing.EncodedObject) {
31+
// if the size of the object is bigger or equal than the cache size,
32+
// skip it
33+
if o.Size() >= c.maxSize {
34+
return
35+
}
36+
37+
// if the object is into the cache, do not add it again
38+
if _, ok := c.objects[o.Hash()]; ok {
39+
return
40+
}
41+
42+
// delete the oldest object if cache is full
43+
if c.actualSize >= c.maxSize {
44+
h := c.order.Pop()
45+
o := c.objects[h]
46+
if o != nil {
47+
c.actualSize -= o.Size()
48+
delete(c.objects, h)
49+
}
50+
}
51+
52+
c.objects[o.Hash()] = o
53+
c.order.Push(o.Hash())
54+
c.actualSize += o.Size()
55+
}
56+
57+
// Get returns an object by his hash. If the object is not into the cache, it
58+
// returns nil
59+
func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject {
60+
return c.objects[k]
61+
}
62+
63+
// Clear the content of this cache object
64+
func (c *ObjectFIFO) Clear() {
65+
c.objects = make(map[plumbing.Hash]plumbing.EncodedObject)
66+
c.order = newQueue(initialQueueSize)
67+
c.actualSize = 0
68+
}

Diff for: cache/object_test.go

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
package cache
2+
3+
import (
4+
"io"
5+
"testing"
6+
7+
"gopkg.in/src-d/go-git.v4/plumbing"
8+
9+
. "gopkg.in/check.v1"
10+
)
11+
12+
func Test(t *testing.T) { TestingT(t) }
13+
14+
type ObjectSuite struct {
15+
c *ObjectFIFO
16+
aObject plumbing.EncodedObject
17+
bObject plumbing.EncodedObject
18+
cObject plumbing.EncodedObject
19+
dObject plumbing.EncodedObject
20+
}
21+
22+
var _ = Suite(&ObjectSuite{})
23+
24+
func (s *ObjectSuite) SetUpTest(c *C) {
25+
s.aObject = newObject("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1*Byte)
26+
s.bObject = newObject("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 3*Byte)
27+
s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte)
28+
s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte)
29+
30+
s.c = NewObjectFIFO(2 * Byte)
31+
}
32+
33+
func (s *ObjectSuite) TestAdd_SameObject(c *C) {
34+
s.c.Add(s.aObject)
35+
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
36+
s.c.Add(s.aObject)
37+
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
38+
}
39+
40+
func (s *ObjectSuite) TestAdd_BigObject(c *C) {
41+
s.c.Add(s.bObject)
42+
c.Assert(s.c.actualSize, Equals, int64(0))
43+
c.Assert(len(s.c.objects), Equals, 0)
44+
}
45+
46+
func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) {
47+
s.c.Add(s.aObject)
48+
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
49+
s.c.Add(s.cObject)
50+
c.Assert(len(s.c.objects), Equals, 2)
51+
s.c.Add(s.dObject)
52+
c.Assert(len(s.c.objects), Equals, 2)
53+
54+
c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
55+
c.Assert(s.c.Get(s.cObject.Hash()), NotNil)
56+
c.Assert(s.c.Get(s.dObject.Hash()), NotNil)
57+
}
58+
59+
func (s *ObjectSuite) TestClear(c *C) {
60+
s.c.Add(s.aObject)
61+
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
62+
s.c.Clear()
63+
c.Assert(s.c.actualSize, Equals, int64(0))
64+
c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
65+
}
66+
67+
type dummyObject struct {
68+
hash plumbing.Hash
69+
size int64
70+
}
71+
72+
func newObject(hash string, size int64) plumbing.EncodedObject {
73+
return &dummyObject{
74+
hash: plumbing.NewHash(hash),
75+
size: size,
76+
}
77+
}
78+
79+
func (d *dummyObject) Hash() plumbing.Hash { return d.hash }
80+
func (*dummyObject) Type() plumbing.ObjectType { return plumbing.InvalidObject }
81+
func (*dummyObject) SetType(plumbing.ObjectType) {}
82+
func (d *dummyObject) Size() int64 { return d.size }
83+
func (*dummyObject) SetSize(s int64) {}
84+
func (*dummyObject) Reader() (io.ReadCloser, error) { return nil, nil }
85+
func (*dummyObject) Writer() (io.WriteCloser, error) { return nil, nil }

Diff for: cache/queue.go

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package cache
2+
3+
import "gopkg.in/src-d/go-git.v4/plumbing"
4+
5+
// queue is a basic FIFO queue based on a circular list that resize as needed.
6+
type queue struct {
7+
elements []plumbing.Hash
8+
size int
9+
head int
10+
tail int
11+
count int
12+
}
13+
14+
// newQueue returns a queue with the specified initial size
15+
func newQueue(size int) *queue {
16+
return &queue{
17+
elements: make([]plumbing.Hash, size),
18+
size: size,
19+
}
20+
}
21+
22+
// Push adds a node to the queue.
23+
func (q *queue) Push(h plumbing.Hash) {
24+
if q.head == q.tail && q.count > 0 {
25+
elements := make([]plumbing.Hash, len(q.elements)+q.size)
26+
copy(elements, q.elements[q.head:])
27+
copy(elements[len(q.elements)-q.head:], q.elements[:q.head])
28+
q.head = 0
29+
q.tail = len(q.elements)
30+
q.elements = elements
31+
}
32+
q.elements[q.tail] = h
33+
q.tail = (q.tail + 1) % len(q.elements)
34+
q.count++
35+
}
36+
37+
// Pop removes and returns a Hash from the queue in first to last order.
38+
func (q *queue) Pop() plumbing.Hash {
39+
if q.count == 0 {
40+
return plumbing.ZeroHash
41+
}
42+
node := q.elements[q.head]
43+
q.head = (q.head + 1) % len(q.elements)
44+
q.count--
45+
return node
46+
}

Diff for: plumbing/format/packfile/decoder.go

+33-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package packfile
33
import (
44
"bytes"
55

6+
"gopkg.in/src-d/go-git.v4/cache"
67
"gopkg.in/src-d/go-git.v4/plumbing"
78
"gopkg.in/src-d/go-git.v4/plumbing/storer"
89
)
@@ -62,6 +63,8 @@ type Decoder struct {
6263

6364
offsetToType map[int64]plumbing.ObjectType
6465
decoderType plumbing.ObjectType
66+
67+
cache cache.Object
6568
}
6669

6770
// NewDecoder returns a new Decoder that decodes a Packfile using the given
@@ -105,6 +108,8 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,
105108

106109
offsetToType: make(map[int64]plumbing.ObjectType, 0),
107110
decoderType: t,
111+
112+
cache: cache.NewObjectFIFO(cache.MaxSize),
108113
}, nil
109114
}
110115

@@ -341,13 +346,20 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum
341346
return 0, err
342347
}
343348

344-
base, err := d.recallByHash(ref)
345-
if err != nil {
346-
return 0, err
349+
base := d.cache.Get(ref)
350+
351+
if base == nil {
352+
base, err = d.recallByHash(ref)
353+
if err != nil {
354+
return 0, err
355+
}
347356
}
348357

349358
obj.SetType(base.Type())
350-
return crc, ApplyDelta(obj, base, buf.Bytes())
359+
err = ApplyDelta(obj, base, buf.Bytes())
360+
d.cache.Add(obj)
361+
362+
return crc, err
351363
}
352364

353365
func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) {
@@ -357,13 +369,24 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
357369
return 0, err
358370
}
359371

360-
base, err := d.recallByOffset(offset)
361-
if err != nil {
362-
return 0, err
372+
h := d.offsetToHash[offset]
373+
var base plumbing.EncodedObject
374+
if h != plumbing.ZeroHash {
375+
base = d.cache.Get(h)
376+
}
377+
378+
if base == nil {
379+
base, err = d.recallByOffset(offset)
380+
if err != nil {
381+
return 0, err
382+
}
363383
}
364384

365385
obj.SetType(base.Type())
366-
return crc, ApplyDelta(obj, base, buf.Bytes())
386+
err = ApplyDelta(obj, base, buf.Bytes())
387+
d.cache.Add(obj)
388+
389+
return crc, err
367390
}
368391

369392
func (d *Decoder) setOffset(h plumbing.Hash, offset int64) {
@@ -434,5 +457,7 @@ func (d *Decoder) CRCs() map[plumbing.Hash]uint32 {
434457
// Close close the Scanner, usually this mean that the whole reader is read and
435458
// discarded
436459
func (d *Decoder) Close() error {
460+
d.cache.Clear()
461+
437462
return d.s.Close()
438463
}

0 commit comments

Comments
 (0)