Skip to content

Commit

Permalink
feat: convert-hf.py (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
b4rtaz authored May 24, 2024
1 parent 9a1e284 commit 2e523f6
Show file tree
Hide file tree
Showing 18 changed files with 294 additions and 185 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*.o
*.dSYM
*.data
*.bin
__pycache__

*-test
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ Run LLMs on weak devices or make powerful devices even more powerful by distribu
**Supported models:**
* Llama 2 (7B, 13B, 70B) chat and non-chat versions,
* Llama 3,
* Grok-1 (314B).
* Grok-1 (314B)
* Mistral, Mixtral
* TinyLlama

**Known limitations:**
* You can run Distributed Llama only on 1, 2, 4... 2^n devices.
Expand All @@ -28,7 +30,7 @@ Run LLMs on weak devices or make powerful devices even more powerful by distribu
* ❌ F32 × F32
* ❌ F16 × F32
* ❌ Q40 × F32
* ⚠️ Q40 × Q80 (partial optimization)
* Q40 × Q80

**Architecture**<br />
The project is split up into two parts:
Expand Down
3 changes: 3 additions & 0 deletions converter/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
*.t
*.m
*.bin
*/
8 changes: 2 additions & 6 deletions converter/convert-grok-1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
import sys
import os
from writer import isFloatTypeSupported, writeTensor, writeHeader
from writer import parseFloatType, writeTensor, writeHeader

# Model: https://huggingface.co/keyfan/grok-1-hf/tree/main

Expand Down Expand Up @@ -116,11 +116,7 @@ def usage():
usage()

folderPath = sys.argv[1]
targetFloatType = sys.argv[2]
targetFloatType = parseFloatType(sys.argv[2])
outputFileName = f'dllama-grok-1-{targetFloatType}.bin'

if not isFloatTypeSupported(targetFloatType):
print('Float type is not supported')
exit(1)

convert(targetFloatType, outputFileName)
210 changes: 210 additions & 0 deletions converter/convert-hf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import gc
import json
import sys
import os
from writer import parseFloatType, writeTensor, writeHeader, FloatType
from safetensors import safe_open

class ArchType:
LLAMA = 0xABCD00
MIXTRAL = 0xABCD02

def permute(tensor, nHeads: int, nKvHeads: int):
if nHeads != nKvHeads:
nHeads = nKvHeads
return (tensor.reshape(nHeads, 2, tensor.shape[0] // nHeads // 2, *tensor.shape[1:]).swapaxes(1, 2).reshape(tensor.shape))

class Processor:
def __init__(self, config):
self.config = config
self.currentModelIndex = None
self.currentModel = None
self.currentModelKeys = None
self.layerMap = {}
self.plan = []

def __unloadModel(self):
if self.currentModel:
del self.currentModel
self.currentModel = None
gc.collect()

def __loadModel(self, index: int):
if (self.currentModelIndex == index):
return
self.__unloadModel()
filePath = self.config['files'][index]
fileName = os.path.basename(filePath)
print(f'💿 Loading file {fileName}...')
self.currentModel = safe_open(filePath, framework='pt', device='cpu')
self.currentModelKeys = list(self.currentModel.keys())
for key in self.currentModelKeys:
self.layerMap[key] = index
print(f'Found {len(self.currentModelKeys)} layers')
self.currentModelIndex = index

def __permuteQ(self, tensor):
return permute(tensor, self.config['n_heads'], self.config['n_heads'])

def __permuteK(self, tensor):
return permute(tensor, self.config['n_heads'], self.config['n_kv_heads'])

def __preparePlan(self):
wt = self.config['weights_float_type']
p = self.plan
p.append([FloatType.F32,
'model.embed_tokens.weight'])
for l in range(0, self.config['n_layers']):
p.append([wt, self.__permuteQ,
f'model.layers.{l}.self_attn.q_proj.weight'])
p.append([wt, self.__permuteK,
f'model.layers.{l}.self_attn.k_proj.weight'])
p.append([wt,
f'model.layers.{l}.self_attn.v_proj.weight'])
p.append([wt,
f'model.layers.{l}.self_attn.o_proj.weight'])

if (self.config['n_experts'] > 0):
for e in range(self.config['n_experts']):
p.append([wt,
f'model.layers.{l}.block_sparse_moe.experts.{e}.w3.weight']) # up
p.append([wt,
f'model.layers.{l}.block_sparse_moe.experts.{e}.w1.weight']) # gate
p.append([wt,
f'model.layers.{l}.block_sparse_moe.experts.{e}.w2.weight']) # down
else:
p.append([wt,
f'model.layers.{l}.mlp.gate_proj.weight']) # gate
p.append([wt,
f'model.layers.{l}.mlp.down_proj.weight']) # down
p.append([wt,
f'model.layers.{l}.mlp.up_proj.weight']) # up

p.append([FloatType.F32,
f'model.layers.{l}.input_layernorm.weight'])
p.append([FloatType.F32,
f'model.layers.{l}.post_attention_layernorm.weight'])
p.append([FloatType.F32,
'model.norm.weight'])
p.append([wt,
'lm_head.weight'])

def write(self, outputFile: str):
self.__preparePlan()
for planItem in self.plan:
lookup = planItem[1:]
transform = None
if (callable(lookup[0])):
transform = lookup[0]
lookup = lookup[1:]

if (self.currentModelIndex == None):
modelIndex = 0
else:
modelIndex = None
for layerName in lookup:
if (layerName in self.layerMap):
modelIndex = self.layerMap[layerName]
break
if (modelIndex is None):
modelIndex = self.currentModelIndex + 1
self.__loadModel(modelIndex)

tensor = None
for layerName in lookup:
if (layerName in self.currentModelKeys):
tensor = self.currentModel.get_tensor(layerName)
break
if tensor is None:
raise Exception(f'Layer {lookup[0]} not found')
print(f'🔶 Writing tensor {layerName} {tensor.shape}...')

floatType = planItem[0]
if (transform):
tensor = transform(tensor)
writeTensor(outputFile, tensor, floatType)

def parseArchType(type: str):
archType = {
'llama': ArchType.LLAMA,
'mistral': ArchType.LLAMA,
'mixtral': ArchType.MIXTRAL,
}.get(type)
if (archType is None):
raise Exception(f'Unsupported arch type: {type}')
return archType

def parseHiddenAct(act: str):
hiddenAct = {
'gelu': 0,
'silu': 1
}.get(act)
if (hiddenAct is None):
raise Exception(f'Unsupported hidden act: {act}')
return hiddenAct

def loadConfig(folderPath: str, weightsFloatType: int):
allFiles = os.listdir(folderPath)
allFiles.sort()
with open(os.path.join(folderPath, 'config.json')) as fc:
config = json.load(fc)
files = []
for fileName in allFiles:
if fileName.endswith('.safetensors'):
files.append(os.path.join(folderPath, fileName))
if (len(files) == 0):
raise Exception('Not found any model file')

result = {
'version': 0,
'arch_type': parseArchType(config['model_type']),
'hidden_act': parseHiddenAct(config['hidden_act']),
'dim': config['hidden_size'],
'hidden_dim': config['intermediate_size'],
'n_layers': config['num_hidden_layers'],
'n_heads': config['num_attention_heads'],
'n_kv_heads': config['num_key_value_heads'],
'weights_float_type': weightsFloatType,
'max_seq_len': config['max_position_embeddings'],
'vocab_size': config['vocab_size'],
'files': files,
}

nExperts = config.get('num_local_experts')
nActiveExperts = config.get('num_active_local_experts') or config.get('num_experts_per_tok')
result['n_experts'] = int(nExperts) if nExperts is not None else 0
result['n_active_experts'] = int(nActiveExperts) if nActiveExperts is not None else 0

ropeTheta = config.get('rope_theta')
if (ropeTheta is not None):
result['rope_theta'] = int(ropeTheta)
return result

def printUsage():
print('Usage: python convert-hf.py <sourceFolderPath> <weightsFloatType> <name>')
print()
print('Options:')
print(' <sourceFolderPath> The path to the folder containing the model files')
print(' <weightsFloatType> The float type of the weights (e.g. "q40")')
print(' <name> The name of the model (e.g. "llama3")')

if __name__ == '__main__':
if (len(sys.argv) < 4):
printUsage()
exit(1)

sourceFolderPath = sys.argv[1]
weightsFloatType = parseFloatType(sys.argv[2])
name = sys.argv[3]
outputFileName = f'dllama_model_{name}_{sys.argv[2]}.m'

print(f'Output file: {outputFileName}')

config = loadConfig(sourceFolderPath, weightsFloatType)

with open(outputFileName, 'wb') as outputFile:
writeHeader(outputFile, config)
processor = Processor(config)
processor.write(outputFile)

print(f'✅ {outputFileName} created successfully')
11 changes: 4 additions & 7 deletions converter/convert-llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import torch
import math
import numpy as np
from writer import writeTensor, writeHeader, isFloatTypeSupported
from writer import writeTensor, writeHeader, parseFloatType, FloatType
from pathlib import Path

LAYER_CHUNK_SIZE = 48
Expand Down Expand Up @@ -81,7 +81,7 @@ def convert(modelPath, outputPath, targetFloatType):
layerName.endswith('.ffn_norm.weight') or
layerName == 'norm.weight'
)
floatType = 'f32' if isAlwaysF32 else targetFloatType
floatType = FloatType.F32 if isAlwaysF32 else targetFloatType

tensors = models[layerName]
if len(tensors) == 1 or len(tensors[0].shape) == 1:
Expand All @@ -105,13 +105,10 @@ def usage():
usage()

modelPath = sys.argv[1]
targetFloatType = sys.argv[2]

if (not modelPath or not isFloatTypeSupported(targetFloatType)):
usage()
targetFloatType = parseFloatType(sys.argv[2])

modelName = modelPath.split('/')[-1]
outputFileName = f'dllama_{modelName.lower()}_{targetFloatType}.bin'
outputFileName = f'dllama_model_{modelName.lower()}_{targetFloatType}.m'

print(f'Model name: {modelName}')
print(f'Target float type: {targetFloatType}')
Expand Down
Loading

0 comments on commit 2e523f6

Please # to comment.