Skip to content

Commit

Permalink
test: fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
giladgd committed Jan 6, 2025
1 parent 2527413 commit bb33a5d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 5 deletions.
12 changes: 12 additions & 0 deletions src/evaluator/LlamaContext/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ export type SequenceEvaluateMetadataOptions = {
* Same as `probabilities.get(token)` from the output.
*
* If you need only this value, you can skip getting the full probabilities list to improve performance.
*
* This value might be slightly different when evaluated on different GPUs and configurations.
*/
readonly confidence?: boolean,

Expand Down Expand Up @@ -359,6 +361,8 @@ export type SequenceEvaluateOutput<
* Same as `probabilities.get(token)`.
*
* If you need only this value, you can skip getting the full probabilities list to improve performance.
*
* This value might be slightly different when evaluated on different GPUs and configurations.
*/
confidence: number,

Expand All @@ -367,6 +371,8 @@ export type SequenceEvaluateOutput<
*
* A probability is a number from `0` to `1`.
*
* The probabilities might be slightly different when evaluated on different GPUs and configurations.
*
* The map is sorted by the probability of the tokens from the highest to the lowest,
* and is reflected in the order of the entries when iterating over the map.
* Use `.entries().next().value` to get the top probability pair
Expand All @@ -392,6 +398,8 @@ export type ControlledEvaluateInputItem = Token | [token: Token, options: {
* Same as `next.probabilities.get(next.token)` from the output.
*
* If you need only this value, you can skip getting the full probabilities list to improve performance.
*
* This value might be slightly different when evaluated on different GPUs and configurations.
*/
confidence?: boolean,

Expand Down Expand Up @@ -437,6 +445,8 @@ export type ControlledEvaluateIndexOutput = {
* Same as `next.probabilities.get(next.token)`.
*
* If you need only this value, you can skip getting the full probabilities list to improve performance.
*
* This value might be slightly different when evaluated on different GPUs and configurations.
*/
confidence?: number,

Expand All @@ -445,6 +455,8 @@ export type ControlledEvaluateIndexOutput = {
*
* A probability is a number from `0` to `1`.
*
* The probabilities might be slightly different when evaluated on different GPUs and configurations.
*
* The map is sorted by the probability of the tokens from the highest to the lowest,
* and is reflected in the order of the entries when iterating over the map.
* Use `.entries().next().value` to get the top probability pair
Expand Down
6 changes: 5 additions & 1 deletion test/modelDependent/llama3.1/controlledEvaluate.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ import {getTestLlama} from "../../utils/getTestLlama.js";

describe("llama 3.1", () => {
describe("controlled evaluate", () => {
test("get probabilities for 3 tokens", {timeout: 1000 * 60 * 60 * 2}, async () => {
test("get probabilities for 3 tokens", {timeout: 1000 * 60 * 60 * 2}, async (testContext) => {
const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
const llama = await getTestLlama();

// the precise values are different for each GPU type, so we skip the test for GPUs other than metal
if (llama.gpu !== "metal")
testContext.skip();

const model = await llama.loadModel({
modelPath
});
Expand Down
24 changes: 20 additions & 4 deletions test/modelDependent/llama3.1/evaluateWithMetadata.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,14 @@ describe("llama 3.1", () => {
`);
});

test("with probabilities", {timeout: 1000 * 60 * 60 * 2}, async () => {
test("with probabilities", {timeout: 1000 * 60 * 60 * 2}, async (testContext) => {
const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
const llama = await getTestLlama();

// the precise values are different for each GPU type, so we skip the test for GPUs other than metal
if (llama.gpu !== "metal")
testContext.skip();

const model = await llama.loadModel({
modelPath
});
Expand Down Expand Up @@ -246,10 +250,14 @@ describe("llama 3.1", () => {
`);
});

test("with confidence", {timeout: 1000 * 60 * 60 * 2}, async () => {
test("with confidence", {timeout: 1000 * 60 * 60 * 2}, async (testContext) => {
const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
const llama = await getTestLlama();

// the precise values are different for each GPU type, so we skip the test for GPUs other than metal
if (llama.gpu !== "metal")
testContext.skip();

const model = await llama.loadModel({
modelPath
});
Expand Down Expand Up @@ -315,10 +323,14 @@ describe("llama 3.1", () => {
`);
});

test("with probabilities and confidence", {timeout: 1000 * 60 * 60 * 2}, async () => {
test("with probabilities and confidence", {timeout: 1000 * 60 * 60 * 2}, async (testContext) => {
const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
const llama = await getTestLlama();

// the precise values are different for each GPU type, so we skip the test for GPUs other than metal
if (llama.gpu !== "metal")
testContext.skip();

const model = await llama.loadModel({
modelPath
});
Expand Down Expand Up @@ -504,10 +516,14 @@ describe("llama 3.1", () => {
`);
});

test("confidence alone matches probability alone", {timeout: 1000 * 60 * 60 * 2}, async () => {
test("confidence alone matches probability alone", {timeout: 1000 * 60 * 60 * 2}, async (testContext) => {
const modelPath = await getModelFile("Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf");
const llama = await getTestLlama();

// the precise values are different for each GPU type, so we skip the test for GPUs other than metal
if (llama.gpu !== "metal")
testContext.skip();

const model = await llama.loadModel({
modelPath
});
Expand Down

0 comments on commit bb33a5d

Please # to comment.