Skip to content

Commit

Permalink
require name for attachment, add recursive summarizer
Browse files Browse the repository at this point in the history
  • Loading branch information
DavideWiest committed Nov 4, 2023
1 parent 101d1cb commit 9d00a9b
Show file tree
Hide file tree
Showing 10 changed files with 89 additions and 14 deletions.
4 changes: 2 additions & 2 deletions ContextFlow/Application/Prompting/Attachement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class Attachment
/// <summary>
/// The name of the attachment. This is optional but recommended. Choose a name that makes the context clear for the LLM.
/// </summary>
public string? Name { get; set; } = null;
public string Name { get; set; } = null;
/// <summary>
/// The content of the attachment. Must be a string.
/// </summary>
Expand All @@ -26,7 +26,7 @@ public class Attachment
/// </summary>
public bool IsInline { get; set; } = false;

public Attachment(string? name, string content, bool isInline)
public Attachment(string name, string content, bool isInline)
{
Name = name;
Content = content;
Expand Down
4 changes: 2 additions & 2 deletions ContextFlow/Application/Request/LLMRequestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ internal LLMRequestBase(Prompt prompt, LLMConfig llmConfig)
}

/// <summary>
/// Adds an attachment "Output length" that dictates the LLM that the output has to be under a calculated number of words.
/// Adds an attachment "Output length" that dictates the LLM that the output has to be below a calculated number of words.
/// </summary>
/// <param name="tokenToWordRatio">The ratio between words and tokens. 4 is a rough mean estimate, but it varies across languages.</param>
/// <param name="marginOfSafetyMul">This will be multiplied to the word-count. Set it higher if the LLM has a higher chance of producing more tokens than it should.</param>
Expand All @@ -29,7 +29,7 @@ public LLMRequestBase UsingOutputLimitAttachment(double tokenToWordRatio = 4, do

int availableTokenSpace = LLMConfig.MaxTotalTokens - LLMConfig.MaxInputTokens;
int availableWords = (int)Math.Floor(availableTokenSpace / tokenToWordRatio * marginOfSafetyMul);
Prompt.UsingAttachment(new Attachment("Output length", $"The output must be under {availableWords} words long", true));
Prompt.UsingAttachment(new Attachment("Output length", $"The output must be below {availableWords} words long", true));
return this;
}
}
1 change: 1 addition & 0 deletions ContextFlow/ContextFlow.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
<GenerateAssemblyCompanyAttribute>false</GenerateAssemblyCompanyAttribute>
<GenerateAssemblyFileVersionAttribute>false</GenerateAssemblyFileVersionAttribute>
<GenerateAssemblyVersionAttribute>false</GenerateAssemblyVersionAttribute>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
</PropertyGroup>

<ItemGroup>
Expand Down
10 changes: 8 additions & 2 deletions Demo/Program.cs
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
// See https://aka.ms/new-console-template for more information
Console.WriteLine("Hello, World!");

using ContextFlow.Infrastructure.Providers.OpenAI;
using Demo;

Console.WriteLine(WriteAnArticle.Write("the history of India"));
//Console.WriteLine(await WriteAnArticleAsync.Write("the history of India"));
//Console.WriteLine(RecursiveSummarizer.Summarize("<book or long article>", new OpenAITokenizer("gpt-3.5-turbo"), 1024));
//Console.WriteLine(CompleteAssignment.CompleteAssignmentFromSource("<source>"));
68 changes: 68 additions & 0 deletions Demo/RecursiveSummarizer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
using ContextFlow.Application.Templates;
using ContextFlow.Application.TextUtil;
using ContextFlow.Domain;
using ContextFlow.Infrastructure.Providers;
using ContextFlow.Infrastructure.Providers.OpenAI;
using OpenAI_API.Moderation;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Demo;

public static class RecursiveSummarizer
{
public static (string Summary, int Depth) Summarize(string input, LLMTokenizer tokenizer, int summaryTokenLen, int nSubSummariesPerNextSummary)
{
// split the inputted text
var inputsSplit = new HierarchichalTextSplitter(tokenizer, summaryTokenLen, HierarchichalTextSplitter.MarkdownBasedHierarchy, HierarchichalTextSplitter.MarkdownBasedAddToBeginnings)
.Split(input);

var summaries = inputsSplit;
int depth = 0;

// while there is more than one summary or a summary is longer than the maximum number of tokens
while (summaries.Count > 1 || tokenizer.CountTokens(summaries[0]) > summaryTokenLen)
{
depth++;
// group by nSubSummariesPerNextSummary and merge into one string per group
var summaryBlocks = GroupByCount(summaries, nSubSummariesPerNextSummary).Select(ls => String.Join("\n", ls));

// summarize strings of grouped summaries into the next summary
summaries = SummarizeInner(summaryBlocks, summaryTokenLen);
}

return (String.Join("\n\n", summaries), depth);
}

private static List<List<string>> GroupByCount(List<string> inputs, int groupsize)
{
var result = new List<List<string>>();
for (int i = 0; i < inputs.Count; i += groupsize)
{
result.Add(inputs.Skip(i).Take(groupsize).ToList());
}
return result;
}

private static List<string> SummarizeInner(IEnumerable<string> inputs, int summaryTokenLen)
{
var con = new OpenAIChatConnection();
var result = new List<string>();

foreach (var input in inputs)
{
int availableTokenSpace = summaryTokenLen;
double tokenToWordRatio = 3.5;
double marginOfSafetyMul = 0.8;
int availableWords = (int)Math.Floor(availableTokenSpace / tokenToWordRatio * marginOfSafetyMul);

var targetLength = $"Below {availableWords} words";
result.Add(new SummarizeTemplate(input, targetLength).GetLLMRequest(con, "gpt-3.5-turbo").Complete().RawOutput);
}
return result;
}

}
6 changes: 3 additions & 3 deletions Demo/WriteAnArticle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ namespace Demo;

public static class WriteAnArticle
{
public static string Write(string topic)
public static string Write(string about)
{
// define the prompts that will be used below
var outlineprompt = new Prompt($"Write an outline for an article about {topic}.")
var outlineprompt = new Prompt($"Write an outline for an article about {about}.")
.UsingOutputDescription("A simple unordered list consisting of headings");

var writeprompt = new Prompt($"Write a paragraph of appropriate length about following topic. Your text will be a part of an article about {topic}.");
var writeprompt = new Prompt($"Write a paragraph of appropriate length about following topic. Your text will be a part of an article about {about}.");

// create a connection to the OpenAI API
var con = new OpenAIChatConnection(); // With api-key: new OpenAIChatConnection("<api-key>");
Expand Down
6 changes: 3 additions & 3 deletions Demo/WriteAnArticleAsync.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@ namespace Demo;

public class WriteAnArticleAsync
{
public static async Task<string> Write(string topic)
public static async Task<string> Write(string about)
{
// define the prompts that will be used below
var outlineprompt = new Prompt($"Write an outline for an article about {topic}.")
var outlineprompt = new Prompt($"Write an outline for an article about {about}.")
.UsingOutputDescription("A simple unordered list consisting of headings");

var writeprompt = new Prompt($"Write a paragraph of appropriate length about following topic. Your text will be a part of an article about {topic}.");
var writeprompt = new Prompt($"Write a paragraph of appropriate length about following topic. Your text will be a part of an article about {about}.");

// create a connection to the OpenAI API
var con = new OpenAIChatConnectionAsync(); // With api-key: new OpenAIChatConnectionAsync("<api-key>");
Expand Down
2 changes: 1 addition & 1 deletion Tests/IOTestFIles/SaverAsyncStorageTest.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"Actions": {},
"AsyncActions": {}
},
"timestamp": "2023-10-31 22:34:00"
"timestamp": "2023-11-04 18:51:15"
}
}
}
2 changes: 1 addition & 1 deletion Tests/IOTestFIles/SaverStorageTest.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"Actions": {},
"AsyncActions": {}
},
"timestamp": "2023-10-31 22:34:00"
"timestamp": "2023-11-04 18:51:15"
}
}
}
Binary file added testresults.txt
Binary file not shown.

0 comments on commit 9d00a9b

Please sign in to comment.