Skip to content

Ingest

Ingest #13259

Workflow file for this run

name: Ingest
on:
workflow_dispatch:
schedule:
- cron: "10 8-23/3 * * *" # every 3rd hour from 8 through 23
push:
branches:
- master
paths:
- plugins/**
- src/**
- static/**
- sidebar.js
- package.json
- yarn.lock
- tailwind.config.js
env:
# Assignees for broken links issues (all repos)
BROKEN_LINKS_ASSIGNEES: "kanelatechnical,ilyam8"
jobs:
ingest:
runs-on: ubuntu-latest
steps:
- name: Access Token
uses: actions/checkout@v3
with:
token: ${{ secrets.GITHUB_TOKEN }}
fetch-depth: 0
- name: github authentication
uses: webfactory/[email protected]
with:
ssh-private-key: ${{ secrets.NETDATABOT_SSH_PRIVATE_KEY }}
- name: Init python env
uses: actions/setup-python@v4
with:
python-version: "3.9"
- name: Setup python env
run: |
pip install -r .learn_environment/ingest-requirements.txt
- name: Ingest process, integration generation and learn_link checking
id: ingest
run: |
# Run ingest and capture output
set +e
OUTPUT=$(python ingest/ingest.py --fail-links 2>&1)
EXIT_CODE=$?
set -e
echo "$OUTPUT"
# Save output for later steps
EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64)
echo "output<<$EOF" >> $GITHUB_OUTPUT
echo "$OUTPUT" >> $GITHUB_OUTPUT
echo "$EOF" >> $GITHUB_OUTPUT
# Check if there are broken links (exit code 1 means broken links found)
if [ $EXIT_CODE -eq 1 ]; then
echo "has_broken_links=true" >> $GITHUB_OUTPUT
else
echo "has_broken_links=false" >> $GITHUB_OUTPUT
fi
# Don't fail the workflow - we'll create an issue instead
exit 0
- name: Update kickstart checksum
run: |
docfile="docs/netdata-agent/installation/linux/linux.mdx"
wget -O /tmp/kickstart.sh https://raw.githubusercontent.com/netdata/netdata/master/packaging/installer/kickstart.sh || exit 1
checksum="$(md5sum /tmp/kickstart.sh | cut -d ' ' -f 1)"
sed -e "s/@KICKSTART_CHECKSUM@/${checksum}/" "${docfile}" > tmp || exit 1
mv tmp "${docfile}" || exit 1
- name: Create pull request
uses: peter-evans/[email protected]
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: Ingest new documentation
title: "Ingest New Documentation"
body: |
- Ingest new documentation
Auto-generated by [create-pull-request][1]
[1]: https://github.com/peter-evans/create-pull-request:
branch: ingest
labels: ingest, automation
- name: Create or update broken links issue
if: steps.ingest.outputs.has_broken_links == 'true'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const output = `${{ steps.ingest.outputs.output }}`;
const assignees = '${{ env.BROKEN_LINKS_ASSIGNEES }}'.split(',').map(s => s.trim()).filter(s => s);
const today = new Date().toISOString().split('T')[0];
// Build issue body with the script output
let body = `## Broken Links Detected\n\n`;
body += `**Date:** ${today}\n\n`;
body += `This issue was automatically created by the ingest workflow.\n\n`;
body += `### Ingest Output\n\n`;
body += `\`\`\`\n`;
// Extract only the broken links sections from output
const lines = output.split('\n');
let inBrokenSection = false;
let brokenOutput = [];
for (const line of lines) {
if (line.includes('### Uncorrelated URLs') || line.includes('### Broken Header/Anchor Links')) {
inBrokenSection = true;
}
if (inBrokenSection) {
brokenOutput.push(line);
}
if (line.includes('### FAILURE:')) {
break;
}
}
body += brokenOutput.join('\n');
body += `\n\`\`\`\n\n`;
body += `---\n`;
body += `> Please fix these broken links in the respective source repositories.\n`;
body += `> Once fixed, the links will be updated in Learn on the next ingest run.\n`;
// Check if there's already an open issue
const existingIssues = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'broken-links'
});
if (existingIssues.data.length > 0) {
// Update existing issue
const issueNumber = existingIssues.data[0].number;
await github.rest.issues.update({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
body: body,
assignees: assignees
});
console.log(`Updated existing issue #${issueNumber}`);
} else {
// Create new issue
const issue = await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: `[Broken Links] Documentation has broken internal links`,
body: body,
labels: ['broken-links', 'automation'],
assignees: assignees
});
console.log(`Created issue #${issue.data.number}`);
}