Skip to content

Commit eb20692

Browse files
committed
feat: llms-full.txt
1 parent 96852e1 commit eb20692

File tree

4 files changed

+219
-1
lines changed

4 files changed

+219
-1
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
node_modules
33
.DS_Store
44
.vercel
5+
/public/llms-full.txt

deno_scripts/deno.lock

Lines changed: 86 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

deno_scripts/extract-llms.ts

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import { walk } from "https://deno.land/[email protected]/fs/walk.ts";
2+
import { parse } from "https://deno.land/[email protected]/flags/mod.ts";
3+
import { join, relative } from "https://deno.land/[email protected]/path/mod.ts";
4+
import { existsSync } from "https://deno.land/[email protected]/fs/exists.ts";
5+
6+
// Parse command line arguments
7+
const args = parse(Deno.args);
8+
const workspaceDir = args.dir || Deno.cwd();
9+
const outputFile = join(workspaceDir, "public", "llms-full.txt");
10+
const toIgnore = [
11+
"/README.md",
12+
"pages/changelog.mdx",
13+
"pages/blog.mdx",
14+
"pages/index.mdx"
15+
]
16+
17+
async function getGitignorePatterns(dir: string): Promise<string[]> {
18+
const gitignorePath = join(dir, ".gitignore");
19+
20+
if (existsSync(gitignorePath)) {
21+
const content = await Deno.readTextFile(gitignorePath);
22+
const ans = content
23+
.split("\n")
24+
.map(line => line.trim())
25+
.filter(line => line && !line.startsWith("#"));
26+
ans.push(...toIgnore)
27+
return ans;
28+
}
29+
30+
return [];
31+
}
32+
33+
function shouldIgnore(path: string, ignorePatterns: string[]): boolean {
34+
const relativePath = relative(workspaceDir, path);
35+
36+
for (const pattern of ignorePatterns) {
37+
// Simple pattern matching for common .gitignore patterns
38+
if (pattern.startsWith("/") && relativePath.startsWith(pattern.slice(1))) {
39+
return true;
40+
} else if (pattern.endsWith("/") && relativePath.includes(pattern)) {
41+
return true;
42+
} else if (relativePath === pattern || relativePath.includes(`/${pattern}`)) {
43+
return true;
44+
} else if (pattern.includes("*")) {
45+
// Handle glob patterns (simple version)
46+
const regexPattern = pattern
47+
.replace(/\./g, "\\.")
48+
.replace(/\*/g, ".*");
49+
if (new RegExp(`^${regexPattern}$`).test(relativePath)) {
50+
return true;
51+
}
52+
}
53+
}
54+
55+
return false;
56+
}
57+
58+
async function main() {
59+
console.log(`Scanning workspace: ${workspaceDir}`);
60+
console.log(`Output will be saved to: ${outputFile}`);
61+
62+
// Get gitignore patterns
63+
const ignorePatterns = await getGitignorePatterns(workspaceDir);
64+
console.log(`Found ${ignorePatterns.length} patterns in .gitignore`);
65+
66+
// Add default patterns to always ignore
67+
ignorePatterns.push("node_modules");
68+
ignorePatterns.push(".git");
69+
70+
// Create a string to store the concatenated content
71+
let concatenatedContent = "";
72+
let fileCount = 0;
73+
const paths: string[] = []
74+
75+
// Walk through the directory to find all markdown files
76+
for await (const entry of walk(workspaceDir, {
77+
exts: ["md", "mdx"],
78+
includeDirs: false,
79+
skip: [
80+
(path) => shouldIgnore(path, ignorePatterns),
81+
],
82+
})) {
83+
const { path } = entry;
84+
85+
// Skip if in ignored directory
86+
if (shouldIgnore(path, ignorePatterns)) {
87+
continue;
88+
}
89+
90+
paths.push(path)
91+
}
92+
93+
paths.push(join(workspaceDir, "/node_modules/loro-crdt/nodejs/loro_wasm.d.ts"));
94+
for (const path of paths) {
95+
try {
96+
const relativePath = relative(workspaceDir, path);
97+
// Read the file content
98+
const fileContent = await Deno.readTextFile(path);
99+
100+
// Add file path as section header followed by the content
101+
concatenatedContent += `\n\n# FILE: ${relativePath}\n\n`;
102+
concatenatedContent += fileContent;
103+
104+
fileCount++;
105+
if (fileCount % 10 === 0) {
106+
console.log(`Processed ${fileCount} files...`);
107+
}
108+
} catch (error) {
109+
console.error(`Error reading file ${path}:`, error);
110+
}
111+
}
112+
113+
// Ensure the output directory exists
114+
const outputDir = join(workspaceDir, "public");
115+
try {
116+
await Deno.mkdir(outputDir, { recursive: true });
117+
} catch (error) {
118+
if (!(error instanceof Deno.errors.AlreadyExists)) {
119+
throw error;
120+
}
121+
}
122+
123+
// Write the concatenated content to the output file
124+
await Deno.writeTextFile(outputFile, concatenatedContent);
125+
126+
console.log(`\nCompleted: ${fileCount} markdown files have been concatenated into ${outputFile}`);
127+
}
128+
129+
// Run the main function
130+
await main();

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
"version": "0.0.1",
44
"scripts": {
55
"dev": "next dev",
6-
"build": "node gen-rss.js && next build",
6+
"build": "pnpm run extract-llms && node gen-rss.js && next build",
77
"postbuild": "next-sitemap",
88
"start": "next start",
99
"test": "deno run -A ./deno_scripts/run_code_blocks.ts",
1010
"storybook": "storybook dev -p 6006",
1111
"build-storybook": "storybook build",
12+
"extract-llms": "deno run -A ./deno_scripts/extract-llms.ts",
1213
"changelog": "node gen-rss.js"
1314
},
1415
"repository": {

0 commit comments

Comments
 (0)