|
| 1 | +import { walk } from "https://deno.land/[email protected]/fs/walk.ts"; |
| 2 | +import { parse } from "https://deno.land/[email protected]/flags/mod.ts"; |
| 3 | +import { join, relative } from "https://deno.land/[email protected]/path/mod.ts"; |
| 4 | +import { existsSync } from "https://deno.land/[email protected]/fs/exists.ts"; |
| 5 | + |
| 6 | +// Parse command line arguments |
| 7 | +const args = parse(Deno.args); |
| 8 | +const workspaceDir = args.dir || Deno.cwd(); |
| 9 | +const outputFile = join(workspaceDir, "public", "llms-full.txt"); |
| 10 | +const toIgnore = [ |
| 11 | + "/README.md", |
| 12 | + "pages/changelog.mdx", |
| 13 | + "pages/blog.mdx", |
| 14 | + "pages/index.mdx" |
| 15 | +] |
| 16 | + |
| 17 | +async function getGitignorePatterns(dir: string): Promise<string[]> { |
| 18 | + const gitignorePath = join(dir, ".gitignore"); |
| 19 | + |
| 20 | + if (existsSync(gitignorePath)) { |
| 21 | + const content = await Deno.readTextFile(gitignorePath); |
| 22 | + const ans = content |
| 23 | + .split("\n") |
| 24 | + .map(line => line.trim()) |
| 25 | + .filter(line => line && !line.startsWith("#")); |
| 26 | + ans.push(...toIgnore) |
| 27 | + return ans; |
| 28 | + } |
| 29 | + |
| 30 | + return []; |
| 31 | +} |
| 32 | + |
| 33 | +function shouldIgnore(path: string, ignorePatterns: string[]): boolean { |
| 34 | + const relativePath = relative(workspaceDir, path); |
| 35 | + |
| 36 | + for (const pattern of ignorePatterns) { |
| 37 | + // Simple pattern matching for common .gitignore patterns |
| 38 | + if (pattern.startsWith("/") && relativePath.startsWith(pattern.slice(1))) { |
| 39 | + return true; |
| 40 | + } else if (pattern.endsWith("/") && relativePath.includes(pattern)) { |
| 41 | + return true; |
| 42 | + } else if (relativePath === pattern || relativePath.includes(`/${pattern}`)) { |
| 43 | + return true; |
| 44 | + } else if (pattern.includes("*")) { |
| 45 | + // Handle glob patterns (simple version) |
| 46 | + const regexPattern = pattern |
| 47 | + .replace(/\./g, "\\.") |
| 48 | + .replace(/\*/g, ".*"); |
| 49 | + if (new RegExp(`^${regexPattern}$`).test(relativePath)) { |
| 50 | + return true; |
| 51 | + } |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + return false; |
| 56 | +} |
| 57 | + |
| 58 | +async function main() { |
| 59 | + console.log(`Scanning workspace: ${workspaceDir}`); |
| 60 | + console.log(`Output will be saved to: ${outputFile}`); |
| 61 | + |
| 62 | + // Get gitignore patterns |
| 63 | + const ignorePatterns = await getGitignorePatterns(workspaceDir); |
| 64 | + console.log(`Found ${ignorePatterns.length} patterns in .gitignore`); |
| 65 | + |
| 66 | + // Add default patterns to always ignore |
| 67 | + ignorePatterns.push("node_modules"); |
| 68 | + ignorePatterns.push(".git"); |
| 69 | + |
| 70 | + // Create a string to store the concatenated content |
| 71 | + let concatenatedContent = ""; |
| 72 | + let fileCount = 0; |
| 73 | + const paths: string[] = [] |
| 74 | + |
| 75 | + // Walk through the directory to find all markdown files |
| 76 | + for await (const entry of walk(workspaceDir, { |
| 77 | + exts: ["md", "mdx"], |
| 78 | + includeDirs: false, |
| 79 | + skip: [ |
| 80 | + (path) => shouldIgnore(path, ignorePatterns), |
| 81 | + ], |
| 82 | + })) { |
| 83 | + const { path } = entry; |
| 84 | + |
| 85 | + // Skip if in ignored directory |
| 86 | + if (shouldIgnore(path, ignorePatterns)) { |
| 87 | + continue; |
| 88 | + } |
| 89 | + |
| 90 | + paths.push(path) |
| 91 | + } |
| 92 | + |
| 93 | + paths.push(join(workspaceDir, "/node_modules/loro-crdt/nodejs/loro_wasm.d.ts")); |
| 94 | + for (const path of paths) { |
| 95 | + try { |
| 96 | + const relativePath = relative(workspaceDir, path); |
| 97 | + // Read the file content |
| 98 | + const fileContent = await Deno.readTextFile(path); |
| 99 | + |
| 100 | + // Add file path as section header followed by the content |
| 101 | + concatenatedContent += `\n\n# FILE: ${relativePath}\n\n`; |
| 102 | + concatenatedContent += fileContent; |
| 103 | + |
| 104 | + fileCount++; |
| 105 | + if (fileCount % 10 === 0) { |
| 106 | + console.log(`Processed ${fileCount} files...`); |
| 107 | + } |
| 108 | + } catch (error) { |
| 109 | + console.error(`Error reading file ${path}:`, error); |
| 110 | + } |
| 111 | + } |
| 112 | + |
| 113 | + // Ensure the output directory exists |
| 114 | + const outputDir = join(workspaceDir, "public"); |
| 115 | + try { |
| 116 | + await Deno.mkdir(outputDir, { recursive: true }); |
| 117 | + } catch (error) { |
| 118 | + if (!(error instanceof Deno.errors.AlreadyExists)) { |
| 119 | + throw error; |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + // Write the concatenated content to the output file |
| 124 | + await Deno.writeTextFile(outputFile, concatenatedContent); |
| 125 | + |
| 126 | + console.log(`\nCompleted: ${fileCount} markdown files have been concatenated into ${outputFile}`); |
| 127 | +} |
| 128 | + |
| 129 | +// Run the main function |
| 130 | +await main(); |
0 commit comments