Skip to content

Commit

Permalink
Merge pull request #30 from Taimoor-12/type-sizes
Browse files Browse the repository at this point in the history
added type sizes info found within the data section
  • Loading branch information
UmanShahzad authored Jan 4, 2024
2 parents 20ef154 + 3608540 commit 6d422a6
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 29 deletions.
13 changes: 8 additions & 5 deletions cmd_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@ var predictMetadataFmts = []string{"pretty", "json"}

var completionsMetadata = &complete.Command{
Flags: map[string]complete.Predictor{
"--nocolor": predict.Nothing,
"-h": predict.Nothing,
"--help": predict.Nothing,
"-f": predict.Set(predictMetadataFmts),
"--format": predict.Set(predictMetadataFmts),
"--nocolor": predict.Nothing,
"--data-types": predict.Nothing,
"-h": predict.Nothing,
"--help": predict.Nothing,
"-f": predict.Set(predictMetadataFmts),
"--format": predict.Set(predictMetadataFmts),
},
}

Expand All @@ -29,6 +30,8 @@ Options:
General:
--nocolor
disable colored output.
--data-types
show data type sizes within the data section.
--help, -h
show help.
Expand Down
82 changes: 60 additions & 22 deletions lib/cmd_metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ const (

// CmdMetadataFlags are flags expected by CmdMetadata.
type CmdMetadataFlags struct {
Help bool
NoColor bool
Format string
Help bool
NoColor bool
Format string
DataTypes bool
}

// Init initializes the common flags available to CmdMetadata with sensible
Expand All @@ -44,6 +45,11 @@ func (f *CmdMetadataFlags) Init() {
"format", "f", "",
_h,
)
pflag.BoolVar(
&f.DataTypes,
"data-types", false,
"show data type sizes within the data section.",
)
}

func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
Expand Down Expand Up @@ -71,7 +77,8 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
}

// open tree.
db, err := maxminddb.Open(args[0])
mmdbFile := args[0]
db, err := maxminddb.Open(mmdbFile)
if err != nil {
return fmt.Errorf("couldn't open mmdb file: %w", err)
}
Expand All @@ -86,7 +93,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
metadataSectionStartOffset := 0

// Offset of this separator is used to determine the metadata start section, data section end and data section size.
offset, err := findSectionSeparator(args[0], MetadataStartMarker)
offset, err := findSectionSeparator(mmdbFile, MetadataStartMarker)
if err != nil {
return fmt.Errorf("couldn't process the mmdb file: %w", err)
}
Expand All @@ -96,32 +103,57 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
}
dataSectionEndOffset = int(offset)
dataSectionSize = int(offset) - treeSize - 16
var typeSizes TypeSizes
if f.DataTypes {
typeSizes, err = traverseDataSection(mmdbFile, int64(dataSectionStartOffset), int64(dataSectionEndOffset))
if err != nil {
return fmt.Errorf("couldn't process the mmdb file: %w", err)
}
}
metadataSectionStartOffset = int(offset) + len(MetadataStartMarker)

if f.Format == "pretty" {
fmtEntry := color.New(color.FgCyan)
fmtVal := color.New(color.FgGreen)
printlineGen := func(entryLen string) func(string, string) {
return func(name string, val string) {
printlineGen := func(indentSpace, entryLen string) func(string, string, string) {
return func(name string, val string, valSimplified string) {
fmt.Printf(
"- %v %v\n",
"%v- %v %v %v\n",
indentSpace,
fmtEntry.Sprintf("%-"+entryLen+"s", name),
fmtVal.Sprintf("%v", val),
fmtVal.Sprintf("%v", valSimplified),
)
}
}
printline := printlineGen("13")
printline("Binary Format", binaryFmt)
printline("Database Type", mdFromLib.DatabaseType)
printline("IP Version", strconv.Itoa(int(mdFromLib.IPVersion)))
printline("Record Size", strconv.Itoa(int(mdFromLib.RecordSize)))
printline("Node Count", strconv.Itoa(int(mdFromLib.NodeCount)))
printline("Tree Size", strconv.Itoa(treeSize))
printline("Data Section Size", strconv.Itoa(dataSectionSize))
printline("Data Section Start Offset", strconv.Itoa(dataSectionStartOffset))
printline("Data Section End Offset", strconv.Itoa(dataSectionEndOffset))
printline("Metadata Section Start Offset", strconv.Itoa(metadataSectionStartOffset))
printline("Description", "")

printline := printlineGen("", "13")
printline("Binary Format", binaryFmt, "")
printline("Database Type", mdFromLib.DatabaseType, "")
printline("IP Version", strconv.Itoa(int(mdFromLib.IPVersion)), "")
printline("Record Size", strconv.Itoa(int(mdFromLib.RecordSize)), simplifySize(int64(mdFromLib.RecordSize)))
printline("Node Count", strconv.Itoa(int(mdFromLib.NodeCount)), simplifySize(int64(mdFromLib.NodeCount)))
printline("Tree Size", strconv.Itoa(treeSize), simplifySize(int64(treeSize)))
printline("Data Section Size", strconv.Itoa(dataSectionSize), simplifySize(int64(dataSectionSize)))
if f.DataTypes {
typeSizePrintline := printlineGen(" ", "13")
typeSizePrintline("Pointer Size", strconv.Itoa(int(typeSizes.PointerSize)), simplifySize(typeSizes.PointerSize))
typeSizePrintline("UTF-8 String Size", strconv.Itoa(int(typeSizes.Utf8StringSize)), simplifySize(typeSizes.Utf8StringSize))
typeSizePrintline("Double Size", strconv.Itoa(int(typeSizes.DoubleSize)), simplifySize(typeSizes.DoubleSize))
typeSizePrintline("Bytes Size", strconv.Itoa(int(typeSizes.BytesSize)), simplifySize(typeSizes.BytesSize))
typeSizePrintline("Unsigned 16-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned16bitIntSize)), simplifySize(typeSizes.Unsigned16bitIntSize))
typeSizePrintline("Unsigned 32-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned32bitIntSize)), simplifySize(typeSizes.Unsigned32bitIntSize))
typeSizePrintline("Signed 32-bit Integer Size", strconv.Itoa(int(typeSizes.Signed32bitIntSize)), simplifySize(typeSizes.Signed32bitIntSize))
typeSizePrintline("Unsigned 64-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned64bitIntSize)), simplifySize(typeSizes.Unsigned64bitIntSize))
typeSizePrintline("Unsigned 128-bit Integer Size", strconv.Itoa(int(typeSizes.Unsigned128bitIntSize)), simplifySize(typeSizes.Unsigned128bitIntSize))
typeSizePrintline("Map Key-Value Pair Count", strconv.Itoa(int(typeSizes.MapKeyValueCount)), simplifySize(typeSizes.MapKeyValueCount))
typeSizePrintline("Array Length", strconv.Itoa(int(typeSizes.ArrayLength)), simplifySize(typeSizes.ArrayLength))
typeSizePrintline("Float Size", strconv.Itoa(int(typeSizes.FloatSize)), simplifySize(typeSizes.FloatSize))
}
printline("Data Section Start Offset", strconv.Itoa(dataSectionStartOffset), simplifySize(int64(dataSectionStartOffset)))
printline("Data Section End Offset", strconv.Itoa(dataSectionEndOffset), simplifySize(int64(dataSectionEndOffset)))
printline("Metadata Section Start Offset", strconv.Itoa(metadataSectionStartOffset), simplifySize(int64(metadataSectionStartOffset)))
printline("Description", "", "")
descKeys, descVals := sortedMapKeysAndVals(mdFromLib.Description)
longestDescKeyLen := strconv.Itoa(len(longestStrInStringSlice(descKeys)))
for i := 0; i < len(descKeys); i++ {
Expand All @@ -131,9 +163,13 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
fmtVal.Sprintf("%v", descVals[i]),
)
}
printline("Languages", strings.Join(mdFromLib.Languages, ", "))
printline("Build Epoch", strconv.Itoa(int(mdFromLib.BuildEpoch)))
printline("Languages", strings.Join(mdFromLib.Languages, ", "), "")
printline("Build Epoch", strconv.Itoa(int(mdFromLib.BuildEpoch)), "")
} else { // json
var typeSizesPtr *TypeSizes
if f.DataTypes {
typeSizesPtr = &typeSizes
}
md := struct {
BinaryFormatVsn string `json:"binary_format"`
DatabaseType string `json:"db_type"`
Expand All @@ -142,6 +178,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
NodeCount uint `json:"node_count"`
TreeSize uint `json:"tree_size"`
DataSectionSize uint `json:"data_section_size"`
TypeSize *TypeSizes `json:"data_type_sizes,omitempty"`
DataSectionStartOffset uint `json:"data_section_start_offset"`
DataSectionEndOffset uint `json:"data_section_end_offset"`
MetadataStartOffset uint `json:"metadata_section_start_offset"`
Expand All @@ -156,6 +193,7 @@ func CmdMetadata(f CmdMetadataFlags, args []string, printHelp func()) error {
mdFromLib.NodeCount,
uint(treeSize),
uint(dataSectionSize),
typeSizesPtr,
uint(dataSectionStartOffset),
uint(dataSectionEndOffset),
uint(metadataSectionStartOffset),
Expand Down
195 changes: 193 additions & 2 deletions lib/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"sort"
"strconv"
Expand Down Expand Up @@ -67,10 +68,10 @@ func mapInterfaceToStr(m map[string]interface{}) map[string]string {
return retVal
}

func findSectionSeparator(mmdbFile, sep string) (int64, error) {
func findSectionSeparator(mmdbFile string, sep string) (int64, error) {
file, err := os.Open(mmdbFile)
if err != nil {
return 0, err
return 0, fmt.Errorf("couldn't open mmdb file: %w", err)
}
defer file.Close()

Expand All @@ -96,3 +97,193 @@ func findSectionSeparator(mmdbFile, sep string) (int64, error) {

return -1, nil
}

func simplifySize(size int64) string {
const (
_ = iota
KB = 1 << (10 * iota)
MB
GB
TB
)

if size >= TB {
return fmt.Sprintf("(%.2f TB)", float64(size)/float64(TB))
} else if size >= GB {
return fmt.Sprintf("(%.2f GB)", float64(size)/float64(GB))
} else if size >= MB {
return fmt.Sprintf("(%.2f MB)", float64(size)/float64(MB))
} else if size >= KB {
return fmt.Sprintf("(%.2f KB)", float64(size)/float64(KB))
} else {
return ""
}

}

type TypeSizes struct {
PointerSize int64 `json:"pointer_size"`
Utf8StringSize int64 `json:"utf8_string_size"`
DoubleSize int64 `json:"double_size"`
BytesSize int64 `json:"bytes_size"`
Unsigned16bitIntSize int64 `json:"unsigned_16bit_int_size"`
Unsigned32bitIntSize int64 `json:"unsigned_32bit_int_size"`
Signed32bitIntSize int64 `json:"signed_32bit_int_size"`
Unsigned64bitIntSize int64 `json:"unsigned_64bit_int_size"`
Unsigned128bitIntSize int64 `json:"unsigned_128bit_int_size"`
MapKeyValueCount int64 `json:"map_key_value_pair_count"`
ArrayLength int64 `json:"array_length"`
FloatSize int64 `json:"float_size"`
}

func traverseDataSection(mmdbFile string, startOffset int64, endOffset int64) (TypeSizes, error) {
file, err := os.Open(mmdbFile)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't open mmdb file: %w", err)
}
defer file.Close()

// Go to the start offset of the data section.
_, err = file.Seek(startOffset, 0)
if err != nil {
return TypeSizes{}, err
}

var typeSizes TypeSizes

// Read and process bytes until the end offset is reached.
for offset := startOffset; offset < endOffset; {
var controlByte [1]byte
_, err := file.Read(controlByte[:])
if err != nil {
return TypeSizes{}, err
}
offset++

// Extract the type from the control byte.
dataType := (controlByte[0] >> 5) & 0b00000111 // Most significant 3 bits represent the type.
// Extract the payload size from the control byte.
payloadSize := int(controlByte[0] & 0b00011111) // Least significant 5 bits represent payload size.
// Check if it's an extended type.
if dataType == 0 {
// Read actual type number from the next byte
var extendedTypeByte [1]byte
_, err := file.Read(extendedTypeByte[:])
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
offset++

switch extendedTypeByte[0] {
case 1: // unsigned 32-bit int.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Signed32bitIntSize += int64(payloadSize)
case 2: // unsigned 64-bit int.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Unsigned64bitIntSize += int64(payloadSize)
case 3: // unsigned 128-bit int.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Unsigned128bitIntSize += int64(payloadSize)
case 4: // array.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.ArrayLength += int64(payloadSize)
case 8: // float.
typeSizes.FloatSize += 4
}
} else {
// Process based on the data type.
switch dataType {
case 1: // Pointer.
size := int((controlByte[0] >> 3) & 0b00000011) // Extract the size bits at position 3 and 4.
switch size {
case 1:
typeSizes.PointerSize += 1
case 2:
typeSizes.PointerSize += 2
case 3:
typeSizes.PointerSize += 3
}
case 2: // UTF-8 string.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Utf8StringSize += int64(payloadSize)
case 3: // Double.
typeSizes.DoubleSize += 8
case 4: // Byte.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.BytesSize += int64(payloadSize)
case 5: // unsigned 16-bit int.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Unsigned16bitIntSize += int64(payloadSize)
case 6: // unsigned 32-bit int.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.Unsigned32bitIntSize += int64(payloadSize)
case 7: // map.
payloadSize, offset, err = payloadCalculation(file, payloadSize, offset)
if err != nil {
return TypeSizes{}, fmt.Errorf("couldn't read the file: %v", err)
}
typeSizes.MapKeyValueCount += int64(payloadSize)
}
}
}

return typeSizes, nil
}

// This is used for further calculation on the current payload size if it is either 29, 30 or 31.
func payloadCalculation(mmdbFile io.Reader, payloadSize int, offset int64) (int, int64, error) {
if payloadSize == 29 {
// Read the next byte as the payload size.
var nextByte [1]byte
_, err := mmdbFile.Read(nextByte[:])
if err != nil {
return -1, -1, err
}
payloadSize = int(nextByte[0]) + 29
offset++
} else if payloadSize == 30 {
// Read the next two bytes as the payload size.
var nextBytes [2]byte
_, err := mmdbFile.Read(nextBytes[:])
if err != nil {
return -1, -1, err
}
payloadSize = int(nextBytes[0])<<8 + int(nextBytes[1]) + 285
offset += 2
} else if payloadSize == 31 {
// Read the next three bytes as the payload size.
var nextBytes [3]byte
_, err := mmdbFile.Read(nextBytes[:])
if err != nil {
return -1, -1, err
}
payloadSize = int(nextBytes[0])<<16 + int(nextBytes[1])<<8 + int(nextBytes[2]) + 65821
offset += 3
}

return payloadSize, offset, nil
}

0 comments on commit 6d422a6

Please # to comment.