Skip to content
This repository was archived by the owner on Sep 11, 2020. It is now read-only.

packfile: cache undeltified objects to improve decode performance #218

Merged
merged 1 commit into from
Jan 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cache/common.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package cache

import "gopkg.in/src-d/go-git.v4/plumbing"

const (
Byte = 1 << (iota * 10)
KiByte
MiByte
GiByte
)

type Object interface {
Add(o plumbing.EncodedObject)
Get(k plumbing.Hash) plumbing.EncodedObject
Clear()
}
68 changes: 68 additions & 0 deletions cache/object.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package cache

import "gopkg.in/src-d/go-git.v4/plumbing"

const (
initialQueueSize = 20
MaxSize = 10 * MiByte
)

type ObjectFIFO struct {
objects map[plumbing.Hash]plumbing.EncodedObject
order *queue

maxSize int64
actualSize int64
}

// NewObjectFIFO returns an Object cache that keeps the newest objects that fit
// into the specific memory size
func NewObjectFIFO(size int64) *ObjectFIFO {
return &ObjectFIFO{
objects: make(map[plumbing.Hash]plumbing.EncodedObject),
order: newQueue(initialQueueSize),
maxSize: size,
}
}

// Add adds a new object to the cache. If the object size is greater than the
// cache size, the object is not added.
func (c *ObjectFIFO) Add(o plumbing.EncodedObject) {
// if the size of the object is bigger or equal than the cache size,
// skip it
if o.Size() >= c.maxSize {
return
}

// if the object is into the cache, do not add it again
if _, ok := c.objects[o.Hash()]; ok {
return
}

// delete the oldest object if cache is full
if c.actualSize >= c.maxSize {
h := c.order.Pop()
o := c.objects[h]
if o != nil {
c.actualSize -= o.Size()
delete(c.objects, h)
}
}

c.objects[o.Hash()] = o
c.order.Push(o.Hash())
c.actualSize += o.Size()
}

// Get returns an object by his hash. If the object is not into the cache, it
// returns nil
func (c *ObjectFIFO) Get(k plumbing.Hash) plumbing.EncodedObject {
return c.objects[k]
}

// Clear the content of this cache object
func (c *ObjectFIFO) Clear() {
c.objects = make(map[plumbing.Hash]plumbing.EncodedObject)
c.order = newQueue(initialQueueSize)
c.actualSize = 0
}
85 changes: 85 additions & 0 deletions cache/object_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package cache

import (
"io"
"testing"

"gopkg.in/src-d/go-git.v4/plumbing"

. "gopkg.in/check.v1"
)

func Test(t *testing.T) { TestingT(t) }

type ObjectSuite struct {
c *ObjectFIFO
aObject plumbing.EncodedObject
bObject plumbing.EncodedObject
cObject plumbing.EncodedObject
dObject plumbing.EncodedObject
}

var _ = Suite(&ObjectSuite{})

func (s *ObjectSuite) SetUpTest(c *C) {
s.aObject = newObject("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 1*Byte)
s.bObject = newObject("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 3*Byte)
s.cObject = newObject("cccccccccccccccccccccccccccccccccccccccc", 1*Byte)
s.dObject = newObject("dddddddddddddddddddddddddddddddddddddddd", 1*Byte)

s.c = NewObjectFIFO(2 * Byte)
}

func (s *ObjectSuite) TestAdd_SameObject(c *C) {
s.c.Add(s.aObject)
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
s.c.Add(s.aObject)
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
}

func (s *ObjectSuite) TestAdd_BigObject(c *C) {
s.c.Add(s.bObject)
c.Assert(s.c.actualSize, Equals, int64(0))
c.Assert(len(s.c.objects), Equals, 0)
}

func (s *ObjectSuite) TestAdd_CacheOverflow(c *C) {
s.c.Add(s.aObject)
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
s.c.Add(s.cObject)
c.Assert(len(s.c.objects), Equals, 2)
s.c.Add(s.dObject)
c.Assert(len(s.c.objects), Equals, 2)

c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
c.Assert(s.c.Get(s.cObject.Hash()), NotNil)
c.Assert(s.c.Get(s.dObject.Hash()), NotNil)
}

func (s *ObjectSuite) TestClear(c *C) {
s.c.Add(s.aObject)
c.Assert(s.c.actualSize, Equals, int64(1*Byte))
s.c.Clear()
c.Assert(s.c.actualSize, Equals, int64(0))
c.Assert(s.c.Get(s.aObject.Hash()), IsNil)
}

type dummyObject struct {
hash plumbing.Hash
size int64
}

func newObject(hash string, size int64) plumbing.EncodedObject {
return &dummyObject{
hash: plumbing.NewHash(hash),
size: size,
}
}

func (d *dummyObject) Hash() plumbing.Hash { return d.hash }
func (*dummyObject) Type() plumbing.ObjectType { return plumbing.InvalidObject }
func (*dummyObject) SetType(plumbing.ObjectType) {}
func (d *dummyObject) Size() int64 { return d.size }
func (*dummyObject) SetSize(s int64) {}
func (*dummyObject) Reader() (io.ReadCloser, error) { return nil, nil }
func (*dummyObject) Writer() (io.WriteCloser, error) { return nil, nil }
46 changes: 46 additions & 0 deletions cache/queue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package cache

import "gopkg.in/src-d/go-git.v4/plumbing"

// queue is a basic FIFO queue based on a circular list that resize as needed.
type queue struct {
elements []plumbing.Hash
size int
head int
tail int
count int
}

// newQueue returns a queue with the specified initial size
func newQueue(size int) *queue {
return &queue{
elements: make([]plumbing.Hash, size),
size: size,
}
}

// Push adds a node to the queue.
func (q *queue) Push(h plumbing.Hash) {
if q.head == q.tail && q.count > 0 {
elements := make([]plumbing.Hash, len(q.elements)+q.size)
copy(elements, q.elements[q.head:])
copy(elements[len(q.elements)-q.head:], q.elements[:q.head])
q.head = 0
q.tail = len(q.elements)
q.elements = elements
}
q.elements[q.tail] = h
q.tail = (q.tail + 1) % len(q.elements)
q.count++
}

// Pop removes and returns a Hash from the queue in first to last order.
func (q *queue) Pop() plumbing.Hash {
if q.count == 0 {
return plumbing.ZeroHash
}
node := q.elements[q.head]
q.head = (q.head + 1) % len(q.elements)
q.count--
return node
}
41 changes: 33 additions & 8 deletions plumbing/format/packfile/decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package packfile
import (
"bytes"

"gopkg.in/src-d/go-git.v4/cache"
"gopkg.in/src-d/go-git.v4/plumbing"
"gopkg.in/src-d/go-git.v4/plumbing/storer"
)
Expand Down Expand Up @@ -62,6 +63,8 @@ type Decoder struct {

offsetToType map[int64]plumbing.ObjectType
decoderType plumbing.ObjectType

cache cache.Object
}

// NewDecoder returns a new Decoder that decodes a Packfile using the given
Expand Down Expand Up @@ -105,6 +108,8 @@ func NewDecoderForType(s *Scanner, o storer.EncodedObjectStorer,

offsetToType: make(map[int64]plumbing.ObjectType, 0),
decoderType: t,

cache: cache.NewObjectFIFO(cache.MaxSize),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache.MaxSize should be added as external repository configuration. Some idea of how to do it correctly?

}, nil
}

Expand Down Expand Up @@ -341,13 +346,20 @@ func (d *Decoder) fillREFDeltaObjectContent(obj plumbing.EncodedObject, ref plum
return 0, err
}

base, err := d.recallByHash(ref)
if err != nil {
return 0, err
base := d.cache.Get(ref)

if base == nil {
base, err = d.recallByHash(ref)
if err != nil {
return 0, err
}
}

obj.SetType(base.Type())
return crc, ApplyDelta(obj, base, buf.Bytes())
err = ApplyDelta(obj, base, buf.Bytes())
d.cache.Add(obj)

return crc, err
}

func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset int64) (uint32, error) {
Expand All @@ -357,13 +369,24 @@ func (d *Decoder) fillOFSDeltaObjectContent(obj plumbing.EncodedObject, offset i
return 0, err
}

base, err := d.recallByOffset(offset)
if err != nil {
return 0, err
h := d.offsetToHash[offset]
var base plumbing.EncodedObject
if h != plumbing.ZeroHash {
base = d.cache.Get(h)
}

if base == nil {
base, err = d.recallByOffset(offset)
if err != nil {
return 0, err
}
}

obj.SetType(base.Type())
return crc, ApplyDelta(obj, base, buf.Bytes())
err = ApplyDelta(obj, base, buf.Bytes())
d.cache.Add(obj)

return crc, err
}

func (d *Decoder) setOffset(h plumbing.Hash, offset int64) {
Expand Down Expand Up @@ -434,5 +457,7 @@ func (d *Decoder) CRCs() map[plumbing.Hash]uint32 {
// Close close the Scanner, usually this mean that the whole reader is read and
// discarded
func (d *Decoder) Close() error {
d.cache.Clear()

return d.s.Close()
}