Initial Release

This commit is contained in:
Alonso Guevara
2024-07-01 15:25:30 -06:00
commit 81b81cf60b
575 changed files with 50648 additions and 0 deletions

19
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,19 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
version: 2
updates:
- package-ecosystem: "npm" # See documentation for possible values
directory: "docsite/" # Location of package manifests
schedule:
interval: "weekly"
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "weekly"
- package-ecosystem: "github-actions"
# Workflow files stored in the default location of `.github/workflows`. (You don't need to specify `/.github/workflows` for `directory`. You can use `directory: "/"`.)
directory: "/"
schedule:
interval: "weekly"

36
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,36 @@
<!--
Thanks for contributing to GraphRAG!
Please do not make *Draft* pull requests, as they still notify anyone watching the repo.
Create a pull request when it is ready for review and feedback.
About this template
The following template aims to help contributors write a good description for their pull requests.
We'd like you to provide a description of the changes in your pull request (i.e. bugs fixed or features added), the motivation behind the changes, and complete the checklist below before opening a pull request.
Feel free to discard it if you need to (e.g. when you just fix a typo). -->
## Description
[Provide a brief description of the changes made in this pull request.]
## Related Issues
[Reference any related issues or tasks that this pull request addresses.]
## Proposed Changes
[List the specific changes made in this pull request.]
## Checklist
- [ ] I have tested these changes locally.
- [ ] I have reviewed the code changes.
- [ ] I have updated the documentation (if necessary).
- [ ] I have added appropriate unit tests (if applicable).
## Additional Notes
[Add any additional notes or context that may be helpful for the reviewer(s).]

95
.github/workflows/gh-pages.yml vendored Normal file
View File

@@ -0,0 +1,95 @@
name: gh-pages
on:
push:
branches: [main]
permissions:
contents: write
env:
POETRY_VERSION: 1.8.3
PYTHON_VERSION: '3.11'
NODE_VERSION: 18.x
jobs:
build:
runs-on: ubuntu-latest
env:
GH_PAGES: 1
DEBUG: 1
GRAPHRAG_LLM_TYPE: "azure_openai_chat"
GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
GRAPHRAG_CACHE_TYPE: "blob"
GRAPHRAG_CACHE_CONNECTION_STRING: ${{ secrets.BLOB_STORAGE_CONNECTION_STRING }}
GRAPHRAG_CACHE_CONTAINER_NAME: "cicache"
GRAPHRAG_CACHE_BASE_DIR": "cache"
GRAPHRAG_LLM_MODEL: gpt-3.5-turbo-16k
GRAPHRAG_EMBEDDING_MODEL: text-embedding-ada-002
# We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4
GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4
GRAPHRAG_LLM_RPM: 270 # 1,080 / 4
GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4
GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4
GRAPHRAG_CHUNK_SIZE: 1200
GRAPHRAG_CHUNK_OVERLAP: 0
# Azure AI Search config
AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}
steps:
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Poetry ${{ env.POETRY_VERSION }}
uses: abatilo/actions-poetry@v3.0.0
with:
poetry-version: ${{ env.POETRY_VERSION }}
- name: Use Node ${{ env.NODE_VERSION }}
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install Yarn dependencies
run: yarn install
working-directory: docsite
- name: Install Poetry dependencies
run: poetry install
- name: Install Azurite
id: azuright
uses: potatoqualitee/azuright@v1.1
- name: Generate Indexer Outputs
run: |
poetry run poe test_smoke
zip -jrm docsite/data/operation_dulce/dataset.zip tests/fixtures/min-csv/output/*/artifacts/*.parquet
- name: Build Jupyter Notebooks
run: poetry run poe convert_docsite_notebooks
- name: Build docsite
run: yarn build
working-directory: docsite
- name: List docsite files
run: find docsite/_site
- name: Deploy to GitHub Pages
uses: JamesIves/github-pages-deploy-action@v4.6.1
with:
branch: gh-pages
folder: docsite/_site
clean: true

30
.github/workflows/javascript-ci.yml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: JavaScript CI
on:
push:
branches: [main]
pull_request:
branches: [main]
env:
NODE_VERSION: 18.x
jobs:
javascript-ci:
runs-on: ubuntu-latest
strategy:
fail-fast: false
steps:
- name: Use Node ${{ env.NODE_VERSION }}
uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- uses: actions/checkout@v4
- run: yarn install
working-directory: docsite
name: Install Dependencies
- run: yarn build
working-directory: docsite
name: Build Docsite

122
.github/workflows/python-ci.yml vendored Normal file
View File

@@ -0,0 +1,122 @@
name: Python CI
on:
push:
branches: [main]
pull_request:
branches: [main]
permissions:
contents: read
pull-requests: read
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
# Only run the for the latest commit
cancel-in-progress: true
env:
POETRY_VERSION: 1.8.3
jobs:
python-ci:
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
os: [ubuntu-latest, windows-latest]
env:
DEBUG: 1
GRAPHRAG_LLM_TYPE: "azure_openai_chat"
GRAPHRAG_EMBEDDING_TYPE: "azure_openai_embedding"
GRAPHRAG_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHRAG_API_BASE: ${{ secrets.GRAPHRAG_API_BASE }}
GRAPHRAG_API_VERSION: ${{ secrets.GRAPHRAG_API_VERSION }}
GRAPHRAG_LLM_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_LLM_DEPLOYMENT_NAME }}
GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME: ${{ secrets.GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME }}
GRAPHRAG_CACHE_TYPE: "blob"
GRAPHRAG_CACHE_CONNECTION_STRING: ${{ secrets.BLOB_STORAGE_CONNECTION_STRING }}
GRAPHRAG_CACHE_CONTAINER_NAME: "cicache"
GRAPHRAG_CACHE_BASE_DIR": "cache"
GRAPHRAG_LLM_MODEL: gpt-3.5-turbo-16k
GRAPHRAG_EMBEDDING_MODEL: text-embedding-ada-002
# We have Windows + Linux runners in 3.10 and 3.11, so we need to divide the rate limits by 4
GRAPHRAG_LLM_TPM: 45_000 # 180,000 / 4
GRAPHRAG_LLM_RPM: 270 # 1,080 / 4
GRAPHRAG_EMBEDDING_TPM: 87_500 # 350,000 / 4
GRAPHRAG_EMBEDDING_RPM: 525 # 2,100 / 4
GRAPHRAG_CHUNK_SIZE: 1200
GRAPHRAG_CHUNK_OVERLAP: 0
# Azure AI Search config
AZURE_AI_SEARCH_URL_ENDPOINT: ${{ secrets.AZURE_AI_SEARCH_URL_ENDPOINT }}
AZURE_AI_SEARCH_API_KEY: ${{ secrets.AZURE_AI_SEARCH_API_KEY }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: dorny/paths-filter@v3
id: changes
with:
filters: |
python:
- 'graphrag/**/*'
- 'poetry.lock'
- 'pyproject.toml'
- '**/*.py'
- '**/*.toml'
- '**/*.ipynb'
- '.github/workflows/python*.yml'
- 'tests/smoke/*'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: abatilo/actions-poetry@v3.0.0
with:
poetry-version: $POETRY_VERSION
- name: Install dependencies
shell: bash
run: poetry self add setuptools && poetry run python -m pip install gensim && poetry install
- name: Check Semversioner
run: |
poetry run semversioner check
- name: Check
run: |
poetry run poe check
- name: Build
run: |
poetry build
- name: Install Azurite
id: azuright
uses: potatoqualitee/azuright@v1.1
- name: Unit Test
run: |
poetry run poe test_unit
- name: Integration Test
run: |
poetry run poe test_integration
- name: Smoke Test
if: steps.changes.outputs.python == 'true'
run: |
poetry run poe test_smoke
- uses: actions/upload-artifact@v4
if: always()
with:
name: smoke-test-artifacts-${{ matrix.python-version }}-${{ matrix.poetry-version }}-${{ runner.os }}
path: tests/fixtures/*/output
- name: E2E Test
if: steps.changes.outputs.python == 'true'
run: |
./scripts/e2e-test.sh

56
.github/workflows/python-publish.yml vendored Normal file
View File

@@ -0,0 +1,56 @@
name: Python Publish
on:
release:
types: [created]
push:
branches: [main]
env:
POETRY_VERSION: 1.8.3
PYTHON_VERSION: 3.10
jobs:
publish:
name: Upload release to PyPI
if: github.ref == 'refs/heads/main'
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/graphrag
permissions:
id-token: write # IMPORTANT: this permission is mandatory for trusted publishing
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
fetch-tags: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: $PYTHON_VERSION
- name: Install Poetry
uses: abatilo/actions-poetry@v3.0.0
with:
poetry-version: $POETRY_VERSION
- name: Add poetry-dynamic-versioning plugin
run: poetry self add "poetry-dynamic-versioning[plugin]"
- name: Install dependencies
shell: bash
run: poetry install
- name: Build Distributable
shell: bash
run: poetry build
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
packages-dir: dist
skip-existing: true
verbose: true

15
.github/workflows/semver.yml vendored Normal file
View File

@@ -0,0 +1,15 @@
name: Semver Check
on:
pull_request:
branches: [main]
jobs:
semver:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Check Semver
run: ./scripts/semver-check.sh

15
.github/workflows/spellcheck.yml vendored Normal file
View File

@@ -0,0 +1,15 @@
name: Spellcheck
on:
push:
branches: [main]
pull_request:
paths:
- '**/*'
jobs:
spellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Spellcheck
run: ./scripts/spellcheck.sh

67
.gitignore vendored Normal file
View File

@@ -0,0 +1,67 @@
# Node Artifacts
*/node_modules/
docsite/*/src/**/*.js
docsite/*/lib/
docsite/*/storybook-static/
docsite/*/docsTemp/
docsite/*/build/
.swc/
dist/
# https://yarnpkg.com/advanced/qa#which-files-should-be-gitignored
docsite/.yarn/*
!docsite/.yarn/patches
!docsite/.yarn/releases
!docsite/.yarn/plugins
!docsite/.yarn/sdks
!docsite/.yarn/versions
docsite/.pnp.*
.yarn/*
!.yarn/patches
!.yarn/releases
!.yarn/plugins
!.yarn/sdks
!.yarn/versions
.pnp.*
# Python Artifacts
python/*/lib/
# Test Output
.coverage
coverage/
licenses.txt
examples_notebooks/*/lancedb
examples_notebooks/*/data
tests/fixtures/cache
tests/fixtures/*/cache
tests/fixtures/*/output
lancedb/
# Random
.DS_Store
*.log*
.venv
.conda
.tmp
.env
build.zip
.turbo
__pycache__
.pipeline
# Azurite
temp_azurite/
__azurite*.json
__blobstorage*.json
__blobstorage__/
# Getting started example
ragtest/
.ragtest/
.pipelines
.pipeline

View File

@@ -0,0 +1,4 @@
{
"type": "minor",
"description": "Initial Release"
}

11
.vscode/extensions.json vendored Normal file
View File

@@ -0,0 +1,11 @@
{
"recommendations": [
"arcanis.vscode-zipfs",
"ms-python.python",
"charliermarsh.ruff",
"ms-python.vscode-pylance",
"bierner.markdown-mermaid",
"streetsidesoftware.code-spell-checker",
"ronnidc.nunjucks"
]
}

12
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,12 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "Attach to Node Functions",
"type": "node",
"request": "attach",
"port": 9229,
"preLaunchTask": "func: host start"
}
]
}

52
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,52 @@
{
"search.exclude": {
"**/.yarn": true,
"**/.pnp.*": true
},
"eslint.nodePath": ".yarn/sdks",
"typescript.tsdk": ".yarn/sdks/typescript/lib",
"typescript.enablePromptUseWorkspaceTsdk": true,
"javascript.preferences.importModuleSpecifier": "relative",
"javascript.preferences.importModuleSpecifierEnding": "js",
"typescript.preferences.importModuleSpecifier": "relative",
"typescript.preferences.importModuleSpecifierEnding": "js",
"explorer.fileNesting.enabled": true,
"explorer.fileNesting.patterns": {
"*.ts": "${capture}.ts, ${capture}.hooks.ts, ${capture}.hooks.tsx, ${capture}.contexts.ts, ${capture}.stories.tsx, ${capture}.story.tsx, ${capture}.spec.tsx, ${capture}.base.ts, ${capture}.base.tsx, ${capture}.types.ts, ${capture}.styles.ts, ${capture}.styles.tsx, ${capture}.utils.ts, ${capture}.utils.tsx, ${capture}.constants.ts, ${capture}.module.scss, ${capture}.module.css, ${capture}.md",
"*.js": "${capture}.js.map, ${capture}.min.js, ${capture}.d.ts",
"*.jsx": "${capture}.js",
"*.tsx": "${capture}.ts, ${capture}.hooks.ts, ${capture}.hooks.tsx, ${capture}.contexts.ts, ${capture}.stories.tsx, ${capture}.story.tsx, ${capture}.spec.tsx, ${capture}.base.ts, ${capture}.base.tsx, ${capture}.types.ts, ${capture}.styles.ts, ${capture}.styles.tsx, ${capture}.utils.ts, ${capture}.utils.tsx, ${capture}.constants.ts, ${capture}.module.scss, ${capture}.module.css, ${capture}.md, ${capture}.css",
"tsconfig.json": "tsconfig.*.json",
"package.json": "package-lock.json, turbo.json, tsconfig.json, rome.json, biome.json, .npmignore, dictionary.txt, cspell.config.yaml",
"README.md": "*.md, LICENSE, CODEOWNERS",
".eslintrc": ".eslintignore",
".prettierrc": ".prettierignore",
".gitattributes": ".gitignore",
".yarnrc.yml": "yarn.lock, .pnp.*",
"jest.config.js": "jest.setup.mjs",
"pyproject.toml": "poetry.lock, poetry.toml",
"cspell.config.yaml": "dictionary.txt"
},
"azureFunctions.postDeployTask": "npm install (functions)",
"azureFunctions.projectLanguage": "TypeScript",
"azureFunctions.projectRuntime": "~4",
"debug.internalConsoleOptions": "neverOpen",
"azureFunctions.preDeployTask": "npm prune (functions)",
"appService.zipIgnorePattern": [
"node_modules{,/**}",
".vscode{,/**}"
],
"python.defaultInterpreterPath": "python/services/.venv/bin/python",
"python.languageServer": "Pylance",
"python.analysis.typeCheckingMode": "basic",
"cSpell.customDictionaries": {
"project-words": {
"name": "project-words",
"path": "${workspaceRoot}/dictionary.txt",
"description": "Words used in this project",
"addWords": true
},
"custom": true, // Enable the `custom` dictionary
"internal-terms": true // Disable the `internal-terms` dictionary
}
}

41
.vsts-ci.yml Normal file
View File

@@ -0,0 +1,41 @@
name: GraphRAG CI
pool:
vmImage: ubuntu-latest
trigger:
batch: true
branches:
include:
- main
variables:
isMain: $[eq(variables['Build.SourceBranch'], 'refs/heads/main')]
pythonVersion: "3.10"
poetryVersion: "1.6.1"
nodeVersion: "18.x"
artifactsFullFeedName: "Resilience/resilience_python"
stages:
- stage: Compliance
dependsOn: []
jobs:
- job: compliance
displayName: Compliance
pool:
vmImage: windows-latest
steps:
- task: CredScan@3
inputs:
outputFormat: sarif
debugMode: false
- task: ComponentGovernanceComponentDetection@0
inputs:
scanType: "Register"
verbosity: "Verbose"
alertWarningLevel: "High"
- task: PublishSecurityAnalysisLogs@3
inputs:
ArtifactName: "CodeAnalysisLogs"
ArtifactType: "Container"

6
CODEOWNERS Normal file
View File

@@ -0,0 +1,6 @@
# These owners will be the default owners for everything in
# the repo. Unless a later match takes precedence,
# @global-owner1 and @global-owner2 will be requested for
# review when someone opens a pull request.
* @microsoft/societal-resilience

9
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,9 @@
# Microsoft Open Source Code of Conduct
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
Resources:
- [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
- [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
- Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns

79
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,79 @@
# Contributing to GraphRAG
Thank you for your interest in contributing to GraphRAG! We welcome contributions from the community to help improve the project.
## Code of Conduct
This project welcomes contributions and suggestions. Most contributions require you to agree to a Contributor License Agreement (CLA)
declaring that you have the right to, and actually do, grant us the rights to use your contribution.
For details, visit https://cla.microsoft.com.
When you submit a pull request, a CLA-bot will automatically determine whether you need
to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the
instructions provided by the bot. You will only need to do this once across all repositories using our CLA.
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
## How to Contribute
1. Fork the repository and clone it to your local machine.
2. Create a new branch for your contribution: `git checkout -b my-contribution`.
3. Make your changes and ensure that the code passes all tests.
4. Commit your changes: `git commit -m "Add my contribution"`.
5. Create and commit a semver impact document by running `poetry run semversioner add-change -t <major|minor|patch> -d <description>`.
6. Push your changes to your forked repository: `git push origin my-contribution`.
7. Open a pull request to the main repository.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.** Instead, please report them to the Microsoft Security Response Center (MSRC).
See [SECURITY.md](./SECURITY.md) for more information.
## Before you start, file an issue
Please follow this simple rule to help us eliminate any unnecessary wasted effort & frustration, and ensure an efficient and effective use of everyone's time - yours, ours, and other community members':
> 👉 If you have a question, think you've discovered an issue, would like to propose a new feature, etc., then find/file an issue **BEFORE** starting work to fix/implement it.
### Search existing issues first
Before filing a new issue, search existing open and closed issues first: This project is moving fast! It is likely someone else has found the problem you're seeing, and someone may be working on or have already contributed a fix!
If no existing item describes your issue/feature, great - please file a new issue:
### File a new Issue
- Don't know whether you're reporting an issue or requesting a feature? File an issue
- Have a question that you don't see answered in docs, videos, etc.? File an issue
- Want to know if we're planning on building a particular feature? File an issue
- Got a great idea for a new feature? File an issue/request/idea
- Don't understand how to do something? File an issue
- Found an existing issue that describes yours? Great - upvote and add additional commentary / info / repro-steps / etc.
If from the previous guide you find yourself in the need of file an Issue please use the [issue tracker](https://github.com/microsoft/graphrag/issues).
Provide as much detail as possible to help us understand and address the problem.
### Add information
**Complete the new Issue form, providing as much information as possible**. The more information you provide, the more likely your issue/ask will be understood and implemented. Helpful information includes:
- What device you're running (inc. CPU type, memory, disk, etc.)
- What OS your device is running
- What tools and apps you're using (e.g. VS 2022, VSCode, etc.)
- **We LOVE detailed repro steps!** What steps do we need to take to reproduce the issue? Assume we love to read repro steps. As much detail as you can stand is probably _barely_ enough detail for us!
- Prefer error message text where possible or screenshots of errors if text cannot be captured
- **If you intend to implement the fix/feature yourself then say so!** If you do not indicate otherwise we will assume that the issue is our to solve, or may label the issue as `Help-Wanted`.
### DO NOT post "+1" comments
> ⚠ DO NOT post "+1", "me too", or similar comments - they just add noise to an issue.
If you don't have any additional info/context to add but would like to indicate that you're affected by the issue, upvote the original issue by clicking its [+😊] button and hitting 👍 (+1) icon. This way we can actually measure how impactful an issue is.
---
## Thank you
We appreciate your contributions to GraphRAG!

85
DEVELOPING.md Normal file
View File

@@ -0,0 +1,85 @@
# GraphRAG Development
# Requirements
| Name | Installation | Purpose |
| ------------------- | ------------------------------------------------------------ | ----------------------------------------------------------------------------------- |
| Python 3.10 or 3.11 | [Download](https://www.python.org/downloads/) | The library is Python-based. |
| Poetry | [Instructions](https://python-poetry.org/docs/#installation) | Poetry is used for package management and virtualenv management in Python codebases |
# Getting Started
## Install Dependencies
```sh
# Install Python dependencies.
poetry install
```
## Execute the Indexing Engine
```sh
poetry run poe index <...args>
```
## Executing Queries
```sh
poetry run poe query <...args>
```
# Azurite
Some unit and smoke tests use Azurite to emulate Azure resources. This can be started by running:
```sh
./scripts/start-azurite.sh
```
or by simply running `azurite` in the terminal if already installed globally. See the [Azurite documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite) for more information about how to install and use Azurite.
# Lifecycle Scripts
Our Python package utilizes Poetry to manage dependencies and [poethepoet](https://pypi.org/project/poethepoet/) to manage build scripts.
Available scripts are:
- `poetry run poe index` - Run the Indexing CLI
- `poetry run poe query` - Run the Query CLI
- `poetry build` - This invokes `poetry build`, which will build a wheel file and other distributable artifacts.
- `poetry run poe test` - This will execute all tests.
- `poetry run poe test_unit` - This will execute unit tests.
- `poetry run poe test_integration` - This will execute integration tests.
- `poetry run poe test_smoke` - This will execute smoke tests.
- `poetry run poe check` - This will perform a suite of static checks across the package, including:
- formatting
- documentation formatting
- linting
- security patterns
- type-checking
- `poetry run poe fix` - This will apply any available auto-fixes to the package. Usually this is just formatting fixes.
- `poetry run poe fix_unsafe` - This will apply any available auto-fixes to the package, including those that may be unsafe.
- `poetry run poe format` - Explicitly run the formatter across the package.
## Troubleshooting
### "RuntimeError: llvm-config failed executing, please point LLVM_CONFIG to the path for llvm-config" when running poetry install
Make sure llvm-9 and llvm-9-dev are installed:
`sudo apt-get install llvm-9 llvm-9-dev`
and then in your bashrc, add
`export LLVM_CONFIG=/usr/bin/llvm-config-9`
### "numba/\_pymodule.h:6:10: fatal error: Python.h: No such file or directory" when running poetry install
Make sure you have python3.10-dev installed or more generally `python<version>-dev`
`sudo apt-get install python3.10-dev`
### LLM call constantly exceeds TPM, RPM or time limits
`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify this values
to reduce concurrency. Please refer to the [Configuration Documents](https://microsoft.github.io/graphrag/posts/config/overview/)

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

41
RAI_TRANSPARENCY.md Normal file
View File

@@ -0,0 +1,41 @@
# GraphRAG: Responsible AI FAQ
## What is GraphRAG?
GraphRAG is an AI-based content interpretation and search capability. Using LLMs, it parses data to create a knowledge graph and answer user questions about a user-provided private dataset.
## What can GraphRAG do?
GraphRAG is able to connect information across large volumes of information and use these connections to answer questions that are difficult or impossible to answer using keyword and vector-based search mechanisms. Building on the previous question, provide semi-technical, high-level information on how the system offers functionality for various uses. This lets a system using GraphRAG to answer questions where the answers span many documents as well as thematic questions such as “what are the top themes in this dataset?.”
## What are GraphRAGs intended use(s)?
* GraphRAG is intended to support critical information discovery and analysis use cases where the information required to arrive at a useful insight spans many documents, is noisy, is mixed with mis and/or dis-information, or when the questions users aim to answer are more abstract or thematic than the underlying data can directly answer.
* GraphRAG is designed to be used in settings where users are already trained on responsible analytic approaches and critical reasoning is expected. GraphRAG is capable of providing high degrees of insight on complex information topics, however human analysis by a domain expert of the answers is needed in order to verify and augment GraphRAGs generated responses.
* GraphRAG is intended to be deployed and used with a domain specific corpus of text data. GraphRAG itself does not collect user data, but users are encouraged to verify data privacy policies of the chosen LLM used to configure GraphRAG.
## How was GraphRAG evaluated? What metrics are used to measure performance?
GraphRAG has been evaluated in multiple ways. The primary concerns are 1) accurate representation of the data set, 2) providing transparency and groundedness of responses, 3) resilience to prompt and data corpus injection attacks, and 4) low hallucination rates. Details on how each of these has been evaluated is outlined below by number.
1) Accurate representation of the dataset has been tested by both manual inspection and automated testing against a “gold answer” that is created from randomly selected subsets of a test corpus.
2) Transparency and groundedness of responses is tested via automated answer coverage evaluation and human inspection of the underlying context returned.
3) We test both user prompt injection attacks (“jailbreaks”) and cross prompt injection attacks (“data attacks”) using manual and semi-automated techniques.
4) Hallucination rates are evaluated using claim coverage metrics, manual inspection of answer and source, and adversarial attacks to attempt a forced hallucination through adversarial and exceptionally challenging datasets.
## What are the limitations of GraphRAG? How can users minimize the impact of GraphRAGs limitations when using the system?
GraphRAG depends on a well-constructed indexing examples. For general applications (e.g. content oriented around people, places, organizations, things, etc.) we provide example indexing prompts. For unique datasets effective indexing can depend on proper identification of domain-specific concepts.
Indexing is a relatively expensive operation; a best practice to mitigate indexing is to create a small test dataset in the target domain to ensure indexer performance prior to large indexing operations.
## What operational factors and settings allow for effective and responsible use of GraphRAG?
GraphRAG is designed for use by users with domain sophistication and experience working through difficult information challenges. While the approach is generally robust to injection attacks and identifying conflicting sources of information, the system is designed for trusted users. Proper human analysis of responses is important to generate reliable insights, and the provenance of information should be traced to ensure human agreement with the inferences made as part of the answer generation.
GraphRAG yields the most effective results on natural language text data that is collectively focused on an overall topic or theme, and that is entity rich entities being people, places, things, or objects that can be uniquely identified.
While GraphRAG has been evaluated for its resilience to prompt and data corpus injection attacks, and has been probed for specific types of harms, the LLM that the user configures with GraphRAG may produce inappropriate or offensive content, which may make it inappropriate to deploy for sensitive contexts without additional mitigations that are specific to the use case and model. Developers should assess outputs for their context and use available safety classifiers, model specific safety filters and features (such as https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety), or custom solutions appropriate for their use case.

52
README.md Normal file
View File

@@ -0,0 +1,52 @@
# GraphRAG
👉 [Use the GraphRAG Accelerator solution](https://github.com/Azure-Samples/graphrag-accelerator)
👉 [Microsoft Research Blog Post](https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/)
👉 [Read the docs](https://microsoft.github.io/graphrag)
## Overview
The GraphRAG project is a data pipeline and transformation suite that is designed to extract meaningful, structured data from unstructured text using the power of LLMs.
To learn more about GraphRAG and how it can be used to enhance your LLMs ability to reason about your private data, please visit the <a href="https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/" target="_blank">Microsoft Research Blog Post.</a>
## Quickstart
To get started with the GraphRAG system we recommend trying the [Solution Accelerator](https://github.com/Azure-Samples/graphrag-accelerator) package. This provides a user-friendly end-to-end experience with Azure resources.
## Repository Guidance
This repository presents a methodology for using knowledge graph memory structures to enhance LLM outputs. Please note that the provided code serves as a demonstration and is not an officially supported Microsoft offering.
## Diving Deeper
- To learn about our contribution guidelines, see [CONTRIBUTING.md](./CONTRIBUTING.md)
- To start developing _GraphRAG_, see [DEVELOPING.md](./DEVELOPING.md)
## Prompt Tuning
Using _GraphRAG_ with your data out of the box may not yield the best possible results.
We strongly recommend to fine-tune your prompts following the [Prompt Tuning Guide](https://microsoft.github.io/graphrag/posts/prompt_tuning/overview/) in our documentation.
## Responsible AI FAQ
See [RAI_TRANSPARENCY.md](./RAI_TRANSPARENCY.md)
- [What is GraphRAG?](./RAI_TRANSPARENCY.md#what-is-graphrag)
- [What can GraphRAG do?](./RAI_TRANSPARENCY.md#what-can-graphrag-do)
- [What are GraphRAGs intended use(s)?](./RAI_TRANSPARENCY.md#what-are-graphrags-intended-uses)
- [How was GraphRAG evaluated? What metrics are used to measure performance?](./RAI_TRANSPARENCY.md#how-was-graphrag-evaluated-what-metrics-are-used-to-measure-performance)
- [What are the limitations of GraphRAG? How can users minimize the impact of GraphRAGs limitations when using the system?](./RAI_TRANSPARENCY.md#what-are-the-limitations-of-graphrag-how-can-users-minimize-the-impact-of-graphrags-limitations-when-using-the-system)
- [What operational factors and settings allow for effective and responsible use of GraphRAG?](./RAI_TRANSPARENCY.md#what-operational-factors-and-settings-allow-for-effective-and-responsible-use-of-graphrag)
## Trademarks
This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft
trademarks or logos is subject to and must follow
[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
Any use of third-party trademarks or logos are subject to those third-party's policies.
## Privacy
[Microsoft Privacy Statement](https://privacy.microsoft.com/en-us/privacystatement)

37
SECURITY.md Normal file
View File

@@ -0,0 +1,37 @@
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).

27
SUPPORT.md Normal file
View File

@@ -0,0 +1,27 @@
# Support
## How to file issues and get help
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
For help and questions about using this project, please create a GitHub issue with your question.
## Microsoft Support Policy
# Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
# Support
## How to file issues and get help
This project uses GitHub Issues to track bugs and feature requests. Please search the existing
issues before filing new issues to avoid duplicates. For new issues, file your bug or
feature request as a new Issue.
For help and questions about using this project, please file an issue on the repo.
## Microsoft Support Policy
Support for this project is limited to the resources listed above.

33
cspell.config.yaml Normal file
View File

@@ -0,0 +1,33 @@
$schema: https://raw.githubusercontent.com/streetsidesoftware/cspell/main/cspell.schema.json
version: "0.2"
allowCompoundWords: true
dictionaryDefinitions:
- name: dictionary
path: "./dictionary.txt"
addWords: true
dictionaries:
- dictionary
ignorePaths:
- cspell.config.yaml
- node_modules
- _site
- /project-words.txt
- default_pipeline.yml
- .turbo
- output/
- dist/
- temp_azurite/
- __pycache__
- pyproject.toml
- entity_extraction.txt
- package.json
- tests/fixtures/
- docsite/data/
- docsite/nbdocsite_template/
- docsite/posts/query/notebooks/inputs/
- examples_notebooks/inputs/
- "*.csv"
- "*.parquet"
- "*.faiss"
- "*.ipynb"
- "*.log"

143
dictionary.txt Normal file
View File

@@ -0,0 +1,143 @@
# Team
Alonso
# Pythonisms
PYTHONPATH
pycache
nopython
virtualenv
Pythonisms
pyproject
ipynb
pymodule
virtualenv
virtualenvs
signum
ospath
msecs
asdict
isna
getcwd
fillna
noqa
# Azure
abfs
Hnsw
odata
# NLTK Terms
chunker
wordnet
maxent
punkt
# Libraries
Langchain
networkx
graspologic
graphml
dataframe
dataframes
nltk
datashaper
numba
graphintelligence
dotenv
tiktoken
pydantic
faiss
pyarrow
pyaml
Chakra
poethepoet
pyodbc
lancedb
httpx
pymongo
uvloop
aiofiles
asyncio
numpy
pypi
nbformat
semversioner
# Library Methods
iterrows
rprint
ndarray
nprobe
astype
monkeypatches
nlist
ntotal
quantizer
arun
tiktokens
dtype
linewidths
asearch
iloc
itertuples
isin
nocache
nbconvert
# Verbs
binarize
prechunked
openai
genid
umap
concat
unhot
groupby
retryer
agenerate
aembed
dedupe
# LLM Terms
AOAI
embedder
llm
llms
# Galaxy-Brain Terms
Unipartite
idempotently
covariate
covariates
logit
confusors
# Charting
plottable
scatterplot
# Resilience Team Terms
megapipeline
columnwise
docstore
datasource
devcontainers
codebases
# Microsoft
MSRC
# Broken Upstream
# TODO FIX IN DATASHAPER
Arrary
# Prompt Inputs
dulce
Asadi
ABILA
Abila
POKRALLY
# English
skippable
upvote

25
docsite/.eleventy.js Normal file
View File

@@ -0,0 +1,25 @@
const { EleventyHtmlBasePlugin } = require("@11ty/eleventy");
const syntaxHighlight = require("@11ty/eleventy-plugin-syntaxhighlight");
const codeClipboard = require("eleventy-plugin-code-clipboard");
const pluginMermaid = require("@kevingimbel/eleventy-plugin-mermaid");
const markdownIt = require('markdown-it');
module.exports = (eleventyConfig) => {
eleventyConfig.addPlugin(syntaxHighlight);
eleventyConfig.addPlugin(codeClipboard);
eleventyConfig.addPlugin(pluginMermaid);
eleventyConfig.addPlugin(EleventyHtmlBasePlugin, {
baseHref: process.env.DOCSITE_BASE_URL || ""
});
eleventyConfig.addPassthroughCopy("data");
eleventyConfig.addPassthroughCopy("img");
// Ignore auto-generated content
eleventyConfig.setUseGitIgnore(false);
const markdownLibrary = markdownIt({
html: true
}).use(codeClipboard.markdownItCopyButton);
eleventyConfig.setLibrary("md", markdownLibrary);
};

2
docsite/.eleventyignore Normal file
View File

@@ -0,0 +1,2 @@
!posts/index/verbs/*.md
!posts/index/workflows/*.md

6
docsite/.gitignore vendored Normal file
View File

@@ -0,0 +1,6 @@
_site
_posts
posts/query/notebooks/*.ipynb
posts/query/notebooks/*_nb.md
*.parquet
*.zip

893
docsite/.yarn/releases/yarn-4.0.2.cjs vendored Normal file

File diff suppressed because one or more lines are too long

5
docsite/.yarn/sdks/integrations.yml vendored Normal file
View File

@@ -0,0 +1,5 @@
# This file is automatically generated by @yarnpkg/sdks.
# Manual changes might be lost!
integrations:
- vscode

7
docsite/.yarnrc.yml Normal file
View File

@@ -0,0 +1,7 @@
compressionLevel: mixed
enableGlobalCache: false
nodeLinker: pnp
yarnPath: .yarn/releases/yarn-4.0.2.cjs

115
docsite/_includes/page.css Normal file
View File

@@ -0,0 +1,115 @@
html {
padding: 0;
margin: 0;
}
body{
font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
padding: 0;
margin: 0;
}
footer{
width: 100%;
height: 32px;
font-size: 12px;
display: flex;
flex-direction: row;
justify-content: center;
gap: 18px;
align-items: center;
color: #5d5d5d;
background: #e9eaeb;
border-top: 1px solid #c4c5c6;
}
#cookiesManager{
cursor: pointer;
color: #485fc7;
}
.page-content {
display: flex;
flex-direction: row;
margin: 0;
padding: 0;
overflow: scroll;
padding: 0;
margin: 0;
}
header {
background-color: lightgrey;
height: 2%;
padding: 10px;
}
nav {
padding: 1em;
min-width: 200px;
}
main {
flex: 1;
padding: 0 5em 0 5em;
}
.logotitle {
font-size: 1.5em;
font-weight: bold;
margin: 5px;
}
.number {
all: unset;
}
.tag.token {
all: unset;
}
main ul {
list-style-type: disc;
padding-left: 30px;
margin-top: 10px;
}
h1 {
font-size: 2rem;
margin-top: 10px;
}
h2 {
font-size: 1.5rem;
margin-top: 10px;
font-weight: 500;
}
h3 {
font-size: 1rem;
margin-top: 10px;
font-weight: 500;
}
p {
margin-top: 10px;
}
/* Accessibility styling */
a {
color: #485fc7;
text-decoration: underline;
}
.menu-list a {
text-decoration: none;
}
.token.comment, .token.prolog, .token.doctype, .token.cdata {
color: #8093a5;
}
.token.property, .token.tag, .token.constant, .token.symbol, .token.deleted {
color: #ff36ab;
}

164
docsite/_includes/page.njk Normal file
View File

@@ -0,0 +1,164 @@
---
title: GraphRAG
---
{% set css %}
{% include "page.css" %}
{% endset %}
{% macro link_to(url, text) %}
<a href="{{url}}"{% if page.url == url %} class="is-active" aria-current="page"{% endif %}>{{text}}</a>
{% endmacro %}
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ title }}</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
<link href="https://unpkg.com/prismjs@1.20.0/themes/prism-okaidia.css" rel="stylesheet">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/Primer/19.1.1/tooltips.min.css" crossorigin="anonymous" referrerpolicy="no-referrer" />
<style>{{ css | safe }}</style>
{% mermaid_js %}
{% initClipboardJS %}
{% block js %}
<script src="https://wcpstatic.microsoft.com/mscc/lib/v2/wcp-consent.js" type="text/javascript"></script>
<script>
function onConsentChanged(categoryPreferences) {
console.log("onConsentChanged", categoryPreferences);
}
var siteConsent
function initialize(){
var currentYear = new Date().getFullYear()
document.getElementById("copyright").innerHTML = `©️ ${currentYear} Microsoft`;
window.WcpConsent && WcpConsent.init("en-US", "cookie-banner", function (err, _siteConsent) {
if (!err) {
siteConsent = _siteConsent; //siteConsent is used to get the current consent
} else {
console.log("Error initializing WcpConsent: "+ err);
}
}, onConsentChanged, WcpConsent.themes.light);
}
addEventListener("DOMContentLoaded", initialize)
addEventListener("DOMContentLoaded", checkCookieManager)
function checkCookieManager(){
if(siteConsent.isConsentRequired){
document.getElementById("cookiesManager").style.display = 'block';
document.getElementById("divider").style.display = 'block';
}
else{
document.getElementById("cookiesManager").style.display = 'none';
document.getElementById("divider").style.display = 'none';
}
}
function manageConsent() {
if(siteConsent.isConsentRequired){
siteConsent.manageConsent();
}
}
</script>
{% endblock %}
</head>
<body>
<header>
<div id="cookie-banner"></div>
<a href="/"><span class="logotitle">GraphRAG</span></a>
</header>
<div class="page-content">
<!-- Sidebar -->
<aside class="menu">
<ul class="menu-list">
<li>
{{link_to("/", "Welcome")}}
</li>
<!-- Get Started Links -->
<li>
{{link_to("/posts/get_started/", "Get Started")}}
{{link_to("/posts/developing/", "Developing")}}
</li>
<!-- Indexing Links -->
<li>
{{link_to("/posts/index/overview/", "Indexing")}}
<ul>
{%- for post in collections.indexing -%}
<li>{{link_to(post.url, post.data.navtitle)}}</li>
{%- endfor -%}
<li>
{{link_to("/posts/config/overview/", "Configuration")}}
<ul>
<li>{{link_to("/posts/config/init", "Init command")}}</li>
<li>{{link_to("/posts/config/env_vars", "Using Env Vars")}}</li>
<li>{{link_to("/posts/config/json_yaml", "Using JSON or YAML")}}</li>
<li>{{link_to("/posts/config/custom", "Fully Custom")}}</li>
<li>{{link_to("/posts/config/template", "Template")}}</li>
</ul>
</li>
<li>
{{link_to("/posts/prompt_tuning/overview/", "Prompt Tuning")}}
<ul>
<li>
{{link_to("/posts/prompt_tuning/auto_prompt_tuning/", "Automatic Templating")}}
</li>
<li>
{{link_to("/posts/prompt_tuning/manual_prompt_tuning/", "Manual Prompt Tuning")}}
</li>
</ul>
</li>
</ul>
</li>
<!-- Query Links -->
<li>
{{link_to("/posts/query/overview/", "Query")}}
<ul>
{%- for post in collections.orchestration -%}
<li>{{link_to(post.url, post.data.navtitle)}}</li>
{%- endfor -%}
<li>
{{link_to("/posts/query/notebooks/overview/", "Notebooks")}}
<ul>
<li>{{link_to("/posts/query/notebooks/global_search_nb", "Global Search")}}</li>
<li>{{link_to("/posts/query/notebooks/local_search_nb", "Local Search")}}</li>
</ul>
</li>
</ul>
</li>
</ul>
</aside>
<!-- Main Content -->
<main>
<h1>{{title}}</h1>
{{ content | safe }}
</main>
</div>
<footer>
<a href="https://go.microsoft.com/fwlink/?LinkId=521839">Privacy</a>
|
<a href="https://go.microsoft.com/fwlink/?LinkId=2259814">Consumer Health Privacy</a>
|
<span id="cookiesManager" onClick="manageConsent();">Cookies</span>
<span id="divider">|</span>
<a href="https://go.microsoft.com/fwlink/?LinkID=206977">Terms of Use</a>
|
<a href="https://www.microsoft.com/trademarks">Trademarks</a>
|
<a href="https://www.microsoft.com" id="copyright"></a>
|
<a href="https://github.com/microsoft/graphrag">GitHub</a>
|
<a href="https://github.com/Azure-Samples/graphrag-accelerator">Solution Accelerator</a>
</footer>
</body>
</html>

View File

@@ -0,0 +1,3 @@
# About
This document (Operation Dulce) is an AI-generated science fiction novella, included here for the purposes of integration testing.

View File

@@ -0,0 +1,970 @@
# Operation: Dulce
## Chapter 1
The thrumming of monitors cast a stark contrast to the rigid silence enveloping the group. Agent Alex Mercer, unfailingly determined on paper, seemed dwarfed by the enormity of the sterile briefing room where Paranormal Military Squad's elite convened. With dulled eyes, he scanned the projectors outlining their impending odyssey into Operation: Dulce.
“I assume, Agent Mercer, youre not having second thoughts?” It was Taylor Cruzs voice, laced with an edge that demanded attention.
Alex flickered a strained smile, still thumbing his folder's corner. "Of course not, Agent Cruz. Just trying to soak in all the details." The compliance in his tone was unsettling, even to himself.
Jordan Hayes, perched on the opposite side of the table, narrowed their eyes but offered a supportive nod. "Details are imperative. Well need your clear-headedness down there, Mercer."
A comfortable silence, the kind that threaded between veterans of shared secrets, lingered briefly before Sam Rivera, never one to submit to quiet, added, "Ive combed through the last transmission logs. If anyone can make sense of the anomalies, its going to be the two of you."
Taylor snorted dismissively. “Focus, people. We have protocols for a reason. Speculation is counter-productive.” The words 'counter-productive' seemed to hang in the air, a tacit reprimand directed at Alex.
Feeling the weight of his compliance conflicting with his natural inclination to leave no stone unturned, Alex straightened in his seat. "I agree, Agent Cruz. Protocol is paramount," he said, meeting Taylor's steely gaze. It was an affirmation, but beneath it lay layers of unspoken complexities that would undoubtedly unwind with time.
Alex's submission, though seemingly complete, didn't escape Jordan, who tilted their head ever so slightly, their eyes revealing a spark of understanding. They knew well enough the struggle of aligning personal convictions with overarching missions. As everyone began to collect their binders and prepare for departure, a quiet resolve took form within Alex, galvanized by the groundwork laid by their interactions. He may have spoken in compliance, but his determination had merely taken a subtler form — one that wouldn't surrender so easily to the forthcoming shadows.
\*
Dr. Jordan Hayes shuffled a stack of papers, their eyes revealing a tinge of skepticism at Taylor Cruz's authoritarian performance. _Protocols_, Jordan thought, _are just the framework, the true challenges we're about to face lie well beyond the boundaries of any protocol._ They cleared their throat before speaking, tone cautious yet firm, "Let's remember, the unknown variables exceed the known. We should remain adaptive."
A murmur of agreement echoed from Sam Rivera, who leaned forward, lacing their fingers together as if weaving a digital framework in the air before them, "Exactly, adaptability could be the key to interpreting the signal distortions and system malfunctions. We shouldn't discount the… erratic."
Their words hung like an electric charge in the room, challenging Taylor's position with an inherent truth. Cruzs jaw tightened almost imperceptibly, but the agent masked it with a small nod, conceding to the omnipresent threat of the unpredictable.
Alex glanced at Jordan, who never looked back, their gaze fixed instead on a distant point, as if envisioning the immense dark corridors they were soon to navigate in Dulce. Jordan was not one to embrace fantastical theories, but the air of cautious calculation betrayed a mind bracing for confrontation with the inexplicable, an internal battle between the evidence of their research and the calculating skepticism that kept them alive in their field.
The meeting adjourned with no further comments, the team members quietly retreading the paths to their personal preparations. Alex, trailing slightly behind, observed the others. _The cautious reserve Jordan wears like armor doesn't fool me_, he thought, _their analytical mind sees the patterns I do. And that's worth more than protocol. That's the connection we need to survive this._
As the agents dispersed into the labyrinth of the facility, lost in their thoughts and preparations, the base's halogen lights flickered, a brief and unnoticed harbingers of the darkness to come.
\*
A deserted corridor inside the facility stretched before Taylor Cruz, each footstep rhythmic and precise. Cruz, ambitious and meticulous, eyed the troops passing by with a sardonic tilt of the lips. Obedience—it was as much a tool as any weapon in the arsenal, and Cruz wielded it masterfully. To them, it was another step toward unfettered power within the dark bowels of the military complex.
Inside a secluded equipment bay, Cruz began checking over gear with mechanical efficiency. They traced fingers over the sleek surface of an encrypted radio transmitter. "If protocols are maintained," said Cruz aloud, rehearsing the speech for their subordinates, "not only will we re-establish a line of communication with Dulce, but we shall also illuminate the darkest secrets it conceals."
Agent Hayes appeared in the doorway, arms crossed and a knowing glint in their eyes. "You do understand," Jordan began, the words measured and probing, "that once we're in the depths, rank gives way to survival instincts. It's not about commands—it's empowerment through trust."
The sentiment snagged on Cruz's armor of confidence, probing at the insecurities festering beneath. Taylor offered a brief nod, perhaps too curt, but enough to acknowledge Jordan's point without yielding ground. "Trust," Cruz mused, "or the illusion thereof, is just as potent."
Silence claimed the space between them, steeped in the reality of the unknown dangers lurking in the shadows of the mission. Cruz diligently returned to the equipment, the act a clear dismissal.
Not much later, Cruz stood alone, the hollow echo of the bay a stark reminder of the isolation that power often wrought. With each checked box, their resolve steeled further, a silent vow to usher their team through the abyss—whatever it might hold—and emerge enshrined in the respect they so deeply craved.
## Chapter 2
Sam Rivera sat alone in a cramped office, the hum of a dozen servers murmuring a digital lullaby in the background. Surrounded by the glow of multiple screens, their eyes danced across lines of code and intercepted comm signals from Dulce — a kaleidoscope of data that their curious and isolated mind hungered to decrypt.
To an outsider, it might have looked like obsession, this fervent quest for answers. But to Sam, it was a dance — a give and take with the mysteries of the universe. Their fingers paused over the keyboard as they leaned back in the chair, whispering to thin air, "What secrets are you hiding from us?"
The stillness of the room broke with the unexpected arrival of Alex Mercer, whose encroaching shadow loomed over Sam's workspace. The cybersecurity expert craned their neck upwards, met by the ever-so-slight furrow in Alex's brow. "Got a minute, Rivera?"
"Always," Sam said, a smile surfacing as they swiveled to face their mentor more directly. _He has that look — like something's not sitting right with him,_ they noted inwardly.
Alex hesitated, weighing his words carefully. "Our tech is top-tier, but the silence from Dulce... It's not just technology that will see us through, it's intuition and... trust." His gaze pierced through the digital haze, trying to instill something more profound than advice.
Sam regarded Alex for a moment, the sincerity in his voice resonating with their own unspoken desire to prove their worth. "Intuition," they mirrored thoughtfully. "I guess sometimes the numbers don't have all the answers."
Their shared silence held a newfound understanding, a recognition that between the ones and zeros, it was their combined human insights that might prevail against the impossible. As Alex turned to leave, Sam's eyes drifted back to the screens, now seeing them not as barriers to isolate behind, but as windows into the vast and enigmatic challenge that awaited their team.
Outside the office, the persistent buzz of activity in the facility belied the unease that gripped its inhabitants. A restlessness that nibbled on the edges of reality, as though forewarning of the threshold they were soon to cross — from the known into the realm of cosmic secrets and silent threats.
\*
Shadows played against the walls of the cramped underground meeting room, where Alex Mercer stood gazing at the concealed elevator that would deliver them into the bowels of Dulce base. The air was thick, every breath laced with the weight of impending confrontation, the kind one feels when stepping into a legend. Though armed with an array of advanced weaponry and gear, there was an unshakeable sense that they were delving into a conflict where the physical might be of little consequence.
"I know what you're thinking," Jordan Hayes remarked, approaching Mercer. Their voice was low, a blend of confidence and hidden apprehension. "This feels like more than a rescue or reconnaissance mission, doesn't it?"
Alex turned, his features a mask of uneasy resolve. "It's like we're being pulled into someone elses game. Not just observers or participants, but... pawns."
Jordan gave a short nod, their analytical mind colliding with the uncertain dynamics of this operation. "I've felt that way since the briefing. Like there's a layer were not seeing. And yet, we have no choice but to play along." Their eyes locked with Alex's, silently exchanging a vow to remain vigilant.
"You two need to cut the philosophical chatter. We have positions to secure," Taylor Cruz interjected sharply, stepping into their exchange. The authority in Taylor's voice brooked no argument; it was their way of pulling everyone back to the now.
Alex's response was measured, more assertive than moments ago. "Acknowledged, Agent Cruz," he replied, his voice steadier, mirroring the transformation brewing within. He gripped his rifle with a newfound firmness. "Let's proceed."
As they congregated at the elevator, a tension palpable, Sam Rivera piped in with a tone of balanced levity, "Hope everyones brought their good luck charms. Something tells me were going to need all the help we can get."
Their laughter served as a brief respite from the gravity of their mission, a shared moment that reinforced their common humanity amidst the unknowable. Then, as one, they stepped into the elevator. The doors closed with a silent hiss, and they descended into the darkness together, aware that when they returned, if they returned, none of them would be the same.
\*
The sense of foreboding hung heavier than the darkness that the artificial lights of the elevator shaft failed to fully penetrate. The team was descending into the earth, carrying with them not only the weight of their equipment but also the silent pressure of the invisible war they were about to fight—a war that seemed to edge away from physicality and into the unnervingly psychological.
As they descended, Dr. Jordan Hayes couldn't help but muse over the layers of data that could wait below, now almost longing for the comfort of empirical evidence. _To think that this reluctance to accept other possibilities may have been my biggest blind spot,_ Jordan contemplated, feeling the hard shell of skepticism begin to crack.
Alex caught Jordan's reflective gaze and leaned in, his voice barely a murmur over the hum of the elevator. "Once we're down there, keep that analytical edge sharp. You see through the mazes of the unexplained better than anyone."
The compliment was unexpected and weighed differently than praise from others. This was an acknowledgment from someone who stood on the front lines of the unknown with eyes wide open. "Thank you, Alex," Jordan said, the words carrying a trace of newfound assertiveness. "You can count on me."
The exchange was cut short by a shudder that ran through the elevator, subtle, but enough to make them instinctively hold their breaths. It wasn't the mechanical stutter of old gears but a vibration that seemed to emanate from the very walls of the shaft—a whisper of something that defied natural explanation.
Cruz was the first to react, all business despite the shadow that crossed their expression. "Systems check. Now," they barked out, masking the moment of disquiet with swift command.
Every agent checked their gear, sending confirmation signals through their comms, creating a chorus of electronic beeps that promised readiness. But there was an unspoken question among them: was their technology, their weaponry, their protocols sufficient for what awaited them or merely a fragile comfort?
Against the gravity of the silence that was once again closing in, Sam's voice crackled through, only half-jest. "I'd laugh if we run into Martians playing poker down there—just to lighten the mood, you know?"
Despite—or perhaps because of—the oddity of the moment, this elicited a round of chuckles, an audible release of tension that ran counterpoint to the undercurrent of anxiety coursing through the team.
As the elevator came to a halting, eerie calm at the sub-level, the group stepped off, finding themselves at the threshold of Dulce's mysterious halls. They stood in a tight pack, sharing a cautious glance before fanning out into the unknown, each one acutely aware that the truth was inevitably intertwined with danger.
Into the depths of Dulce, the team advanced, their silence now a shared testament to the camaraderie born of facing the abyss together—and the steel resolve to uncover whatever horrors lay hidden in its shadows.
\*
The weight of the thick metal door closing behind them reverberated through the concrete hallway, marking the final threshold between the familiar world above and the strangeness that lay beneath. Dulce base, a name that had been whispered in the wind-blown deserts above and in the shadowed corners of conspiracy forums, now a tangible cold reality that they could touch — and that touched them back with a chill.
Like lambs led to an altar of alien deities, so did Agents Alex Mercer, Jordan Hayes, Taylor Cruz, and Sam Rivera proceed, their movements measured, their senses heightened. The air was still, almost respectful of the gravity of their presence. Their torch beams sliced through the darkness, uncovering steel doors with warnings that spoke of top secrets and mortal dangers.
Taylor Cruz, stepping firmly into the role of de facto leader, set a brisk pace. "Eyes sharp, people. Comms check, every thirty seconds," Taylor ordered, their voice echoing slightly before being swallowed by the surrounding silence.
Sam, fiddling with a handheld device aimed at detecting electronic anomalies, offered a murmured "Copy that," their usual buoyancy dimmed by the oppressive atmosphere.
It was Jordan Hayes who paused at an innocuous looking panel, nondescript amongst the gauntlet of secured doorways. "Mercer, Rivera, come see this," Jordans voice was marked with a rare hint of urgency.
Alex joined Jordan's side, examining the panel which, at a mere glance, seemed just another part of the base's infrastructure. Yet, to the trained eye, it appeared out of place—a facade.
Jordan explained their reasoning as Sam approached, instinctively understanding the significance of what lay beneath, "This panel is a recent addition — covering something they didn't want found."
Before Alex could respond, the soft whir of an approaching drone cut through their muffled exchange. Taylor had looped back upon hearing the commotion. "Explanations later. We can't afford to attract..." Cruzs voice trailed off as the small airborne device came into view, its sensors locked onto the group.
Sam was the first to react, their tech-savvy mind already steps ahead. "I've got this," they declared, fingers flying over the controls of their own gadgetry to ward off the impending threat.
The drone lingered, its scan seeming more curious than hostile. But within moments, courtesy of Sam's interference, the little sentinel drifted away, retreating into the shadows as if accepting a silent truce. The crew exhaled, a moment of collective relief palpable in the air.
Cruz squared their shoulders, clearly ruffled but not conceding any ground. "Move out," they directed, a hint more forceful than before. "And Rivera, keep that trick handy."
The team pressed onward, the quiet now filled with the soft beeps of regular comms checks, their pace undeterred by the confrontation. Yet, every agent held a renewed sense of wariness, their trust in one another deepening with the knowledge that the base—its technology, its secrets—was alive in a way they hadn't fully anticipated.
As they converged upon a central hub, the imposing doors to the mainframe room stood ajar — an invitation or a trap, neither option comforting. Without a word, they fortified their resolve and stepped through the threshold, where the dim glow of operational LED lights and the distant hum of machinery hinted at Dulces still-beating heart.
Solemnly, yet unmistakably together, they moved deeper into the heart of the enigma, ready to unmask the lifeforce of Dulce base or confront whatever existential threat lay in wait. It was in that unwavering march towards the unknown that their destinies were forever cemented to the legacy of Operation: Dulce.
## Chapter 3
The thrumming of monitors cast a stark contrast to the rigid silence enveloping the group. Agent Alex Mercer, unfailingly determined on paper, seemed dwarfed by the enormity of the sterile briefing room where Paranormal Military Squad's elite convened. With dulled eyes, he scanned the projectors outlining their impending odyssey into Operation: Dulce.
\*
The cooling vents hummed in a monotonous drone, but it was the crackle of the comms system coming to life that cut through the labs tension. Dr. Jordan Hayes hovered over a table arrayed with alien technology, their fingers delicately probing the enigmatic circuitry retrieved from the crash site. Agent Alex Mercer watched, admiration blooming in silent solidarity for Jordan's deft touch and unspoken drive.
Jordan, always composed, only allowed the faintest furrow of concentration to mar their brow. "What we understand about physics..." they muttered, trailing off as they realigned a translucent component. The device emitted a low pulse, causing Jordan to still. "Could be fundamentally changed by this."
A calculated risk—that's what this was. And for a person of science, a gamble was worth the potential paradigm shift.
"Ive been thinking," Alex started, his eyes still fixed on the immediately tangible mystery before them. "About whats at stake here. Not the mission parameters, but what this means for us—humanity."
Jordan glanced up, meeting his eyes just long enough to convey the shared enormity of their situation; the career-defining glory and existential dread entwined. "The quest for understanding always comes at a price. We're standing on the precipice of knowledge that could either elevate us or condemn us."
The charged air between them spiked as Taylor Cruzs brusque tones sliced through their reverie. "Hayes, Mercer, this isn't philosophy hour. Focus on the task. We need actionable intel, not daydreams."
With a sound of restrained acknowledgment, Jordan returned their gaze to the device, while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
It was a small transformation, barely perceptible, but one that Alex noted with an inward nod. They had all been brought here by different paths and for different reasons. Yet, beneath the veneer of duty, the enticement of the vast unknown pulled them inexorably together, coalescing their distinct desires into a shared pulse of anticipation.
Marshaled back to the moment by the blink of lights and whir of machinery, they refocused their efforts, each movement sharpened by the knowledge that beyond understanding the unearthly artifacts, they might be piecing together the future of their species.
\*
Amidst the sterility of the briefing room, the liminal space between the facts laid out and the hidden truths, sat Sam Rivera, his demeanor an artful balance of focus and a casual disguise of his razor-sharp talent with technology. Across from him, Alex Mercer lingered in thought, the mental cogs turning as each file on Dulce stirred more than curiosity—it beckoned to a past both honored and burdensome.
"You've been quiet, Sam," Alex noted, catching the younger man's contemplative gaze. "Your take on these signal inconsistencies?"
There was a respect in Alex's tone, though a respectful distance remained—a gulf of experience and a hint of protective mentorship that stood between them. Sam nodded, recognizing the space afforded to him, and he couldn't help but feel the weight of expectation pressing upon his shoulders. It wasn't just the mission that was immense, it was the trust being placed in him.
"The patterns are... off," Sam admitted, hesitant but driven. "If I'm right, what we're looking at isn't random—it's a structured anomaly. We need to be ready for anything."
Alex's eyes brightened with a subtle approval that crossed the distance like a silent nod. "Good. Keen eyes will keep us ahead—or at least not blindsided," he said, affirming the belief that inscribed Sam's role as more than the tech personnel—he was to be a guiding intellect in the heart of uncertainty.
Their exchange was cut short by Taylor Cruz's abrupt arrival, his gait brimming with a robust confidence that veiled the sharp undercurrents of his striving nature. "Time to gear up. Dulce waits for no one," Taylor announced, his voice carrying an iron resolve that knew the costs of hesitation—though whether the cost was calculated in human or career terms was an ambiguity he wore like a badge of honor.
As Sam and Alex nodded in unison, the icy chasm of hierarchy and cryptic protocols seemed momentarily to bridge over with an understanding—this mission was convergence, a nexus point that would challenge each of their motives and strength.
They filed out of the briefing room, their footsteps synchronized, a rhythm that spoke volumes of the unknown cadence they would soon march to within the base's veins. For Alex Mercer, the link with Sam Rivera, though distant, was now poised with a mutuality ready to be tested; for Taylor Cruz, the initiative pulsed like a heartbeat, anticipation thinly veiled behind a mask of duty.
In the midst of the descent, they were each alone yet irrevocably joined, stepping closer towards the volatile embrace of Operation: Dulce.
## Chapter 4
The corridors of the Dulce military base were as silent as a tomb and twice as chilling. Alex Mercer walked with a surety that belied his bubbling undercurrents of doubt. The briefing had been definitive, sturdy pillars of facts and protocols, yet as he ventured deeper, the ominous atmosphere gnawed at him—a stark reminder of how much remained unknown.
Jordan Hayes trailed a few steps behind, their detached exterior breaking for a moment as they caught up to Alex. "What's on your mind?" Jordan asked, their astuteness cutting through the unspoken tension.
Alex glanced back at them. This place was a puzzle, a treacherous labyrinth where the walls whispered secrets, and among them, he sensed a call to question, to challenge the narrative they'd been sold. "The silence here... It's almost as if the base is waiting for something—or someone."
"Just stay sharp, Mercer," Jordan cautioned, yet their eyes lingered on the quietude around them, conceiving the same shadow of doubt that unsettled Alex.
Before they could delve into further discussion, the distinctive click of a safety catch echoed in the hollow space. Both agents turned to find Taylor Cruz standing resolute, primed for combat. Taylor's gaze was scrutinizing and cold, a stark contrast to the growing unease that smoldered silently amongst the rest.
"Chatter is a liability," Taylor snapped, with a commanding flair that bordered on tyrannical. "We move forward, eyes open, mouths shut."
Alex felt the tight grip of compliance strangle his gut, a lesson learned under the hard tutelage of rank and order. But here, in the bowels of Dulce, those instincts began to wane, the imperative to adhere now conflicting with the pressing urgency to confront the shadows they were enmeshed in.
Then, unexpectedly, the lights flickered, a power fluctuation—or a sign? Alex's hand instinctively went to his sidearm, his mindset shifting from soldier to skeptic. The base, with its unyielding coldness, had just given them their first nudge into the realm of the speculative, an invitation to peel back the veneer of reality.
"We should consider all possibilities," Alex murmured, more to himself than the others, his voice a barely audible breath against the sterile air of the complex.
Taylor's posture stiffened at the challenge, yet their response was uncharacteristically reserved, notable in its lack of rebuke. "Agreed. For now, keep moving. But stay vigilant."
A surprise—an echo of agreement from the last person Alex expected it from. And there it was, the glimpse of a wrinkle in the unyielding fabric of command, a hint that perhaps they were all starting to sense the strangeness that permeated this place.
Progressing with determined steps, the trio moved deeper, silently acknowledging the evolution of their predicament. It was a small yet transformative concession to the unknown forces at play, an acknowledgment from each agent that, despite their disparate goals and ideals, the true nature of the Dulce base was an enigma that would forge new paths through their convictions.
As they reached the central communications hub, the truth that awaited them lurked in the shadows, its eyes unseen but felt by all. The walls didn't just whisper now; they spoke in tones only the brave—or the foolish—would dare to listen to.
\*
The subterranean silence of Dulce was an oppressive entity of its own, wrapping the team in a cloak of uneasiness as they pressed on through the dimly lit corridor. Jordan Hayes found themselves contemplating the ramifications of each step taken into this suspended world, where the sterile air seemed to mock the gravity of their predicament. The closer they got to the communication hub, the more Jordan's mind wandered toward the realm of the inexplicable.
Beside Jordan, Alex Mercer moved forward with deliberation, his gaze scanning the heavy utility doors they passed—one of which was partially ajar, beckoning them with its darkness. "After you, Dr. Hayes," Alex said, gesturing toward the mysterious opening. A hint of shared understanding passed between them; knowledge was the guiding star of this mission as much as confrontation or recovery.
Jordan peered inside, the beam from their flashlight slicing through the obscurity. The room beyond was a chaotic cascade of papers, overturned furniture, and the particular kind of disorder born from hasty evacuation—or something far more sinister.
"It's like they vanished in the middle of something urgent," Alex murmured, his voice tight with a mix of concern and anticipation. He began to sift through the scattered reports, each page a potential clue to the enigmatic silence that shrouded Dulce.
Behind them, Taylor watched with a disciplined patience, their authority the foundation upon which the operation was built. Their voice cut into the stillness, a reminder of their presence, "Time is not our ally here."
Drawing back from momentary distraction, Jordan acknowledged the wisdom in Taylor's words, yet could feel the shift in their stance—from skeptical, reserved analyst, to a proactive agent within the narrative. "You're right; these documents may hold critical insights. Let's collect what we can and analyze them properly."
From the darkened hollows of the room, shadows seemed to cast subtle judgment as Alex and Jordan worked together with heightened urgency. Taylor, for once, didn't intervene but instead surveyed the entrance, their mind anticipating the unknown variables that lay ahead.
Unexpectedly, a soft hiss emanated from a neglected terminal on the desk. Jordan's head snapped up, their heart rate accelerating at the potential ramifications. Without a word, they moved to the machine, hands driven by the newfound conviction that knowledge was more than power—it was survival.
As Jordan began to extract what data they could from the terminal, the first comprehensible communication from the depths of Dulce in far too long crackled through: an automated distress marker, looping endlessly without further context. It was a revelation, one that reverberated through the group, confirming their fears and igniting an even greater need to press on.
Watching Jordan's dogged determination, Alex witnessed the minor transformation in his colleague unfold—a shift from doubt to action, a sliver of belief in the possibilities beyond their rational understanding. This forge of resolve amidst the alien echoes of Dulce not only bonded them closer as a team but compelled them forward with a sharpened edge of responsibility to the truth, wherever it would lead.
As they collected their findings and regrouped, the base around them imperceptibly changed, the air charged with the vibration of secrets poised on the brink of revelation. And in that charged silence, the group moved on, each now carrying pieces of a puzzle that would soon converge into a picture of galactic significance.
\*
In the chill of the cramped server room, the hum of machinery was the backbone to a symphony of data streams coursing through the air. Dr. Jordan Hayes, nerves alight with the mission's mounting unknowns, patched into the last known coordinates of the unsent distress broadcast they had uncovered. They were so close to the core now to the truth behind the blackout it was almost tangible.
Beside them stood Agent Alex Mercer, ever the soldier, yet with eyes that betrayed an intellect craving to understand the murk beneath the surface. "Any progress, Dr. Hayes?" Alex queried, his voice betraying a subtle urgency.
"Getting there," Jordan replied, fingers dancing across the keyboard. "Whoever sent this was cut off mid-transmission. It's as if Dulce itself swallowed the message whole."
Taylor Cruz closed in, their frame casting a long shadow over the duo, evoking an almost palpable wall between them and the forward momentum of their mission. "Time is against us," Taylor intoned, more statement than threat. "What we uncover here determines our next course of action."
Alex acknowledged Taylor with a brisk nod, his stance firm. Yet inwardly, the tightening grip he felt from Taylor's words couldn't throttle the swell of his own investigative instinct. His soldier's obedience had begun to war with the advocate's zeal for unveiling the dark heart of Dulce's secrets.
And then, the unexpected occurred. The screens flashed in unison, spilling a discordant stream of symbols and images that defied immediate analysis. Jordan's breath caught this was the response they had been fishing for, an alien communication protocol resonating just at the edge of human comprehension.
Each member of the team felt it: a shift in the rooms very atmosphere, like a veil being drawn from their perception. Alex and Jordan stood still, absorbed in the bewilderment of contact, while Taylor, despite their authority, hesitated a minor betrayal that unease was creeping into even their disciplined heart.
"Thoughts, Rivera?" Taylor rallied, seeking the counsel of Sam Rivera, whose eyes were wide with exhilaration.
Sam stepped forward, breaking the spell of stillness. "It's like nothing I've ever seen before, but I think I can bridge our systems to communicate," they declared, a wisp of optimism braiding their voice. They set about adapting their gear to transmute the foreign signals into something the team could dissect, their actions a testament to the mentorship and belief instilled in them by Mercer and the team.
Taylor observed them, a cold calculation behind their facade, as they weighed the worth of this anomaly. It was a crossroad that potentially led to either monumental breakthrough or unprecedented catastrophe. "Once you've established a line, document everything. We can't afford to miss any detail," Taylor ordered, the words sharper than intended.
The connection was made, and with trembling anticipation, the team listened as the first garbled outputs began to emerge, their very essence promising insights that could alter the course of history. It was an enigmatic dance with the unknown, the pulse of Dulce no longer just a place, but a herald to an alien register the team had yet to decipher.
Together, they stood at the precipice of understanding, where the faint glow of their monitors cast more than just light it cast the shadow of burgeoning transformation. It was in this moment, in the grasp of an extraterrestrial tongue, that the team, bound by a hunger for knowledge and the raw edge of survival, found their mission reframed from a search for answers to the articulation of a question humankind had yet to fully ask.
Silent in their commune with the inexplicable frequency, they realized they were not merely investigators; they had become liaisons on behalf of Earth, interpreters of a cosmic message that could redefine their very existence. The implications loomed large, but now, they would not face them alone they would face them as a united front, wrought together by the very mysteries that once drove them apart.
## Chapter 5
Dr. Jordan Hayes clutched the edge of the briefing room table, their fingers white-knuckled against the laminate surface, as an array of constellations rotated on the projector—charts and graphs bleeding across the stars. In the dim room, nebulas and dark matter seemed within arm's reach, tangible yet unfathomable.
Sam Rivera leaned back against the wall, arms crossed, gaze darting between the swirling cosmos and the faces of their companions. A taut line of concentration etched their young features, a mingling of fervent curiosity with the nascent understanding of the high stakes for which they played.
Jordan's voice broke the profound silence. "The patterns in the signal disruptions sync with none other than zenithal star alignments. It's as if... as if these 'meet and greets' were scheduled, predestined by celestial mechanics."
The statement hung heavy, daring the occupants of the room to unravel its implications. Alex Mercer, his prior military resolve momentarily suspended, absorbed the hypothesis with a visible hunger. "It's like we're adhering to an appointment we never knew we had," he murmured, his heart a drumbeat in his chest.
Taylor Cruz snorted—a sound that clattered against the high concepts like a tumbledown shack in a futurist cityscape. Folding their arms, they glanced between the agents, their apprehension clad in the contempt of practicality. "What we need are facts, not mystic conjecture."
Alex pivoted on his heel, facing Taylor squarely, and his voice found its edge of steel. "This isn't mysticism, Cruz. It's a hypothesis based on observed phenomena as unpredictable as the place we're standing in."
Taylor's gaze never wavered, yet the slight twitch at the corner of their mouth belied their taut composure. "If there's a semblance of truth to it, then it's critical intel. But remember, we're not astrologers—we're soldiers and scientists."
Jordan met Taylors gaze with a curt nod, accepting the caution even as the crucible of their intellect smoldered with the fervor of cosmic discovery. Their eyes flicked to Sam, whose steady presence and ready tech affirmed a burgeoning dynamic—the makings of a sentinel, standing guard over the threshold of human understanding and cosmic reality.
With the projector casting pallid light over their features, each agent became a silhouette of purpose, shadows pillared against the backdrop of an endless universe. The story they were embroiled in would soon demand they plunge into darkness to retrieve the light of knowledge—a light that could very well redraw the shape of their world.
They left the briefing room with a shared silence, each pondering the vast weave of celestial intent and terrestrial response, sensing that the galactic appointment to which they'd unwittingly RSVPd was more insistent—and more threatening—than any operation theyd faced before.
\*
As the Paranormal Military Squad team convened in the heart of the Dulce military complex, an air of bristling expectation clung to the walls of the underground sanctum. Alex Mercers brow furrowed while watching his companions—Jordan Hayes, diligently setting up their makeshift lab station, and Sam Rivera meticulously checking the communication relays they had restored. Taylor Cruz observed with hawk-like focus, yet to betray the strain that their command posed on them.
The gravity of the mission had shifted, deepened; each member of the team felt its pull, tethered to the understanding that they were now part of a larger narrative—a cosmic play with Earth as a stage and the human race unwitting actors.
Jordan paused, a tension creeping across their shoulders as they aligned the satellite data with the alien message that had been decoded. "The instructions in this message," Jordan started, the timbre of their voice betraying their usual composure. "They're coordinates and... a warning."
Sam leaned in, their eyes widening behind the glow of their laptop screen. "A warning? Like, stay away from, or beware of...?" Their words trailed off, uncertainty a new companion in their lexicon.
Alex exhaled slowly, his mind racing to connect the dots. "It doesn't matter which," he said, decisive yet contemplative. "What matters is we understand intent. Are we being warned out of concern, or are we stumbling upon a threat?"
Cruzs iron-clad facade momentarily cracked, a fleeting glimpse of vulnerability flashing through their eyes. "We need to know if this entails additional risk to the operation," they said, directing their gaze specifically at Alex. "Mercer, I rely on you to keep the team grounded. No one goes off-course."
Their reminder seemed both a command and a plea—rooted in an understanding that each member of the team now faced the duality of their roles, protectors of earthly secrets and heralds of potentially devastating revelations.
Sam's fingers stilled mid-type, their task forgotten as they absorbed the weight of the unfolding reality. "We're the first line of defense... or detection," they mused half to themselves, a growing sense of agency within the larger play they were cast into.
Jordan returned to the data, more resolute in their actions. The warning, whether cautionary or dire, was a beacon they no longer could ignore; its light casting aside shadows of doubt and igniting a collective purpose within the team.
Alex watched Jordan and Sam, feeling a brotherhood in their shared quest. As Cruz paced, poised on the cusp of decisions that would mark their career and perhaps the fate of many, Alex knew the narrative had changed. They were no longer mere operatives; they had become guardians of a threshold, keepers of a message from a realm beyond stars and stripes. This elevation in their mission could not be shackled by regulations and established protocols—it demanded a new perspective, a new resolve.
Tension threaded through the dialogue of beeps and static as communications with Washington buzzed in the background. The team stood, a portentous air enveloping them. It was clear that the decisions they made in the ensuing hours could redefine humanity's place in the cosmos or condemn them to ignorance and potential peril.
Their connection to the stars solidified, the group moved to address the crystallizing warning, shifting from passive recipients to active participants. Mercers latter instincts gained precedence— the teams mandate had evolved, no longer solely to observe and report but to interact and prepare. A metamorphosis had begun, and Operation: Dulce hummed with the newfound frequency of their daring, a tone set not by the earthly hierarchies but by the pulsing symphony of the universe itself.
\*
The desert night loomed eerily still as echoes of hidden activity reverberated deep beneath the bleak sands of New Mexico. Diverting his gaze from the array of sensors before him, Jordan Hayes allowed a rare breath, deep and anxious. Turning to Alex Mercer's focused silhouette, the nocturnal landscape illuminated softly by makeshift floodlights, Jordan felt the syncopated tempo of apprehension and exhilaration jockey for primacy within.
"The closer we get to unlocking these messages, the more I feel like we're peeling back layers of reality itself," Jordan confided, eyes not leaving the monitors that presented a constellation of data points.
"Yes," Alex replied, his voice steady as he considered the implications of their discovery. "And we have to be ready for whatever we find beneath those layers. Whether it's a breakthrough or a Pandora's Box."
Silence settled between them, broken only by the occasional buzz of communications equipment attempting to bridge terrestrial and extraterrestrial intelligences. Tense moments drifted by, laden with the expectant weight of near breakthrough, when a soft chime signaled an incoming transmission -- a rare sound that set every agent on high alert.
Absent was the voice of Washington or Paranormal Military Squad command. Instead, a rhythmic series of pulses and tones filled the air, deliberately patterned, unmistakably non-human.
Sam Rivera adjusted the sensitivity of the decoding equipment, their hands shaking with anticipation as much as focus. "I have it!" they announced, the signal transforming under their expertise into a sequence of visual symbols on the screen before them.
Their shared excitement was palpable, a kinetic force resonating between the team members as they crowded around the display.
"What does it say?" Taylor Cruz demanded, the urgency in his tone scraping against the newfound wonderment.
Interpreting the alien syntax required not only decoding but intuition and empathy. The words that emerged upon the screen were at once coherent and enigmatic: "*Voyage. Convergence. Peril.*"
The stark simplicity of the message struck them collectively, a chill breeze wafting through their resolve.
Alex stepped forward, piecing together the cryptic communication with a growing sense of obligation. "Its a call to action," he deduced, "or possibly a summons."
Jordan's gaze met Alexs, both understanding that this was no longer an investigation or mere extraction of hidden truths. This was humanity's unwitting enlistment into a galactic dialogue that defied boundaries of nation, creed, or protocol.
Sam's eyes were aglow, not with fear, but with the profound acceptance of inevitability that comes with groundbreaking revelation. Moreover, within Taylor's stern exterior churned the seed of reluctant admiration for the unclassified, the uncharted realms they were approaching.
Together, they accepted the pivot in their mission, readjusting their objectives from exploration to engagement, and from isolation to a communal outreach beyond the stars. As dawn's first light threatened the horizon, it became clear that they were no longer merely operatives of a clandestine governmental faction—they were delegates on behalf of Earth, embarking on a voyage orchestrated by destinies unrelated to the mere geopolitics of their world.
Turning to each other, their silhouettes sketched against the coming dawn, the agents recognized the transformation within and amongst them. They were bound by more than duty—they were intricately woven into the fabric of an unfolding cosmic opera, one in which they had been granted an undeniable role. And as they set course for the coordinates that beckoned them like a distant siren's call, it was with a solemn dedication to not only uncover the mysteries ahead but to navigate the convergence, and the peril, as unified emissaries of a world on the cusp of a broader understanding.
\*
Beneath the hum of the fluorescent lights and the vigilance of silent monitors, Alex Mercer stood with his team in the threshold of the base's command center, their faces etched with the fatigue of hours spent unraveling galactic mysteries. Jordan Hayes broke the stillness with a delicate fusion of disbelief and resolve. "The signal..." they began, their tone deliberate, "its evolving. Its not just sending a message—its responding to us."
Taylor Cruz leaned over the console, their eyes narrowing with intrigue and a flicker of unease, studying the alternating patterns on the screen. "Responding? Like its alive?" Taylor asked, a question that bordered on the edge of wonder and alarm.
Sam Riveras gaze was locked onto their interface, a digital orchestra at their fingertips. "It could be some form of advanced AI. Or something else entirely," they contributed, a note of exhilaration betraying the gravity of the situation.
Alex paced before the terminal, absorbing the enormity of their predicament. Their mission—once rooted in the solid ground of military discipline and covert operations—had transcended into an encounter of unprecedented import. "We need to be cautious," he advised, his voice a low rumble of cautious strategy. "If this signal is intelligent, how we interact with it could dictate the outcome of this entire operation."
Jordan met Alex's gaze with a nod, the weight of the responsibility shared and accepted. "We have protocols for first contact, but nothing for... this," Jordan admitted. The room was gripped with tension, each breath seemingly louder than the last.
Then, with a sudden burst that filled the command center, the signal coalesced into a clear and distinct pattern which replicated and expanded, its complexity revealing the hand—or mind—of an intelligent architect.
Taylor's instinct for command surged forth. "Prepare to record and analyze. Whatever it is, we need to understand it—" But their words were cut short as the signal surged, enveloping the room in a brief, blinding cascade of light.
In that pulse of brilliance, a shared revelation coursed through the team. The signal had become a bridge, an extension of unknown consciousness reaching towards them, testing, communicating, searching.
Alex stepped back from the light, feeling a profound change unravelling within him. The path forward would not be one of confrontation or conquest, but of connection and comprehension.
Jordan turned to Alex and Taylor, seeing in their faces a reflection of the same metamorphosis taking place within themselves—a movement from observers to participants, from agents to ambassadors.
With a collective breath, the team faced the kaleidoscope of lights. The alien signal, once a harbinger of enigma, was now a catalyst for transformation—a symphony of light and sound that echoed the beginnings of a new relationship between humanity and the alien unknown.
And so, with deliberate steps, Alex Mercer led his team into the luminous fray. Science, protocol, and survival instinct harmonized within them, each member poised on the cusp of a new chapter in human history.
They were no longer merely the instruments of Paranormal Military Squad's will—they were the vanguard of humankinds first definitive leap into the cosmic community.
With the last echoes of the signal resonating in the control room, they each embraced the sequencing of the transmission, the dance of extraterrestrial light that now wrote itself into their story. The chapter of Operation: Dulce drew to a close, but the narrative of their destiny had only just begun.
## Chapter 6
\*
The cool darkness of the command center at Dulce base was a stark contrast to the brewing storm outside, where the unforgiving New Mexico desert winds whispered of the hidden truths that lay buried deep beneath its surface. Dr. Jordan Hayes sat, their eyes fixed on the readout, the frenetic dance of symbols and numbers reflecting off their determined face. They were on the cusp of an epiphany, teetering between the widely accepted laws of physics and the promise of a new cosmic paradigm.
Alex Mercer watched from across the room, noting the subtle shifts in Jordans posture that belied a developing readiness to embrace the unbelievable. “Find something?” Alexs question, asked with a blend of curiosity and solidarity, bridged the gap between a command and a genuine query among equals.
Jordan's response was slow, measured against the magnitude of their analysis. “This isnt random static. Its a pattern - a repeated sequence phasing in and out but distinctly artificial.” Jordan turned away from the screen, locking eyes with Alex. “This could change everything.”
Sam Rivera leaned in, their eyes alight with the fires of revelation and a quenchless thirst for understanding. “A pattern means intention. Could it be a message?”
A figure emerged from the doorway, casting a long shadow into the room - Taylor Cruz. “Intentions can be friendly, or hostile. We shouldnt forget that,” said Taylor, bringing a dose of their usual pragmatism into the heart of discovery.
Alex acknowledged Taylors caution with a nod, understanding the need to keep their feet grounded even as their spirits soared toward the unknown. “Then lets be the first to find out which it is."
The team gathered around the monitors, the soft tapping of Jordan's keystrokes now punctuated by the occasional crackle of Sam's radio equipment. The sound was almost ritualistic, a prelude to humanitys potential first, knowing foray into a larger universe.
Jordans fingers paused, suspended in mid-air. The signal had evolved, becoming a beacon that somehow felt less alien and more familiar. It was as if the complexities of their message were unfolding into something more accessible, more terrestrial.
A hushed excitement swept through the room. The transformation suggested an awareness on the part of the unknown senders; a finesse that spoke volumes about their capabilities and perhaps their intentions.
With the growing realization that they were engaging with an intelligence far exceeding their previous understanding, the team prepared to reach back across the cosmic divide. Prepared or not, they were no longer bystanders in this galactic narrative. They were active correspondents in an exchange that transcended galaxies and welcomed them into an expansive, possibly fraught, interstellar conversation.
\*
Inside the cavernous central hub of Dulce military base, Dr. Jordan Hayes stood in near-darkness, surrounded by a nest of cables and monitors that buzzed with silent, cryptic life. Jordan's eyes narrowed to focus on the sequences that danced across the screen—patterns that could unravel the cosmic enigma surrounding them.
Alex Mercer approached with his characteristic stride, a signal of reliability in the chaos. "Status report, Dr. Hayes?" he inquired, his voice low, almost blending into the soundscape of beeping consoles and swirling fans.
"We're on the brink of unravelling the signal's origin," Jordan replied, the weight of implications heavy in their tone. "There's intelligence behind it, a thought process alien to our own."
As if summoned by their analysis, Taylor Cruz approached with authority radiating from every pore. "Understand this, we need to know if it's friend or foe. Don't get wrapped up in the existential—our lives may depend on the answers you provide."
Sam Rivera, their hands adroitly adjusting a device to fine-tune the signal, chimed in with optimism undercut by anxious anticipation. "We're deciphering the comm encryption. Soon, we'll have a channel open—not just listening in, but speaking back."
Alex nodded his understanding, his strategic mind processing the tactical implications while grappling with the more profound humanistic impact. "When we do, we'll tread carefully, communicate with purpose," he reassured the team.
The operation had evolved rapidly, from a stealthy incursion into a clandestine labyrinth to an exchange with an extraterrestrial intellect. Their earlier trepidation transformed into determined focus, as they prepared to extend humanitys hand into the vast unknown.
An alert on one of the monitor stations snapped the team into alarm. The signal had not simply been waiting—it had been calculating. Now, it reached its crescendo, demanding their attention with a provocative urgency.
Jordan's fingers raced over the keyboard, their eyes simultaneously interpreting data and sharing directives. "Its a linguistic lock, a test of comprehension. We crack this, we establish dialogue."
Taylor's presence was a beacon of steely resolve. "Then lets solve it. This is what we trained for—the unknown."
Alex and Sam exchanged a look that telegraphed their shared determination—this was not only the mission they had trained for; it was the mission they had been destined for.
Together, the Paranormal Military Squad team leaned into the challenge, their minds honing in on the complex patterns with a singular goal: to unlock the conversation with an intelligence that had already begun to shift the foundations of what they knew, or thought they knew, about the universe.
In a symphony of clicks and murmurs, they worked, knowing they were about to make a giant leap not just for themselves or Paranormal Military Squad, but for all of humanity. As the final pieces fell into place, Dulce's militaristic silence was shattered by the sound of intergalactic contact—by the sound of history being made.
## Chapter 7
In the enclosed space of Dulces command center, the air was thick with anticipation, each team member poised to tread the razor's edge between scientific breakthrough and galactic peril. Dr. Jordan Hayes focused intently on the screen, their fingers tapping a staccato rhythm against the keyboard as lines of alien code cascaded down the monitor.
Alex Mercer's steely gaze surveyed the room, stopping on each member of his team. "Thoughts?" he asked, echoing the unspoken tension. His question, while directed at the group, lingered on Jordan—acknowledging their expertise and inviting collaboration rather than dictating orders.
Jordans brow furrowed, an indicator of the mental gymnastics being performed. "It's unprecedented," they finally said, their voice a testament to the gravity of the moment. "Behavioral algorithms... if we're right, this code could reveal extraterrestrial thought patterns."
Before anyone could react, Taylor Cruz interjected with the assertiveness of someone accustomed to commandeering the discourse. "Then lets ensure were deciphering it correctly," Taylor stated, their tone suggesting they were still battling to maintain control over an increasingly alien situation.
Sam Rivera hovered near the mainframe, youthful energy barely contained under the surface. "What if its more than just a message? What if theyre trying to extend consciousness across the stars?"
The room fell into a contemplative silence, broken only by the hum of electronic equipment and the distant thud of secured doors locking in rhythm. The weight of responsibility rested on each agent's shoulders—a heaviness palpable in the air they shared.
Alex stepped forward, reaching a subtle decision, one dictated by foresight and the humanity nestled at the core of their mission. "We approach with the aim to understand, not to confront," he said, softening his military bearing into a more diplomatic stance.
Jordan nodded, appreciating the leadership that Alex displayed in the face of the unknown, and turned back to the cryptic data. Here, before them all, was a tangible piece of evidence—proof of an extraterrestrial sentience that had outreached the bounds of their expectations.
Taylor took a breath, simultaneously exuding a sense of preparedness and venturing into the unknown alongside their peers. "Then lets do what Paranormal Military Squad does best—investigate and adapt," Taylor added, finding comfort in the familiar even as they stood on the cusp of an unprecedented alchemy of science and mystery.
The team leaned into their respective roles, driven by the urgency of the assignment and the pull of an insatiable curiosity. Sam offered a grin that belied the tension, a youthfulness that reminded them all of the profound excitement nested within the terror of the unknown.
Quietly but resolutely, they turned back to their instruments, each of them a sentinel on the threshold of a new reality. The once implicit lines of command were now woven into a shared tapestry of hierarchy and camaraderie. As they danced with the unknown, they were beacons of sentient endeavor, casting the light of human consciousness into the vast darkness that called to them.
\*
\*
Dulce Base's cavernous darkness was pierced by the sharp luminescence of monitors, casting an electric glow onto the faces of those who dared to unearth its secrets. Dr. Jordan Hayes stood motionless, eyes glazed in concentration, their mind a nexus where terrestrial science battled with celestial unknowns.
Alex Mercer watched from a slight distance, the weight of command tangible upon his shoulders, though lightened by the shared burden now held amongst them. "We could be on the frontier of a new kind of diplomacy," he mused aloud, giving voice to the moment's gravity.
At those words, Jordan's trance broke. "If that's the case, then these communications," Jordan motioned to the stream of data, "are our olive branch across the cosmos."
Taylor Cruz, who paced with restless energy, halted and faced the team—his stoicism marred by the erratic dance of lights reflected in his eyes. "An olive branch, or an invitation to a battlefield?" he posed, ever the strategist, his words laced with a hint of cynicism.
Sam Rivera, nestled amongst an array of equipment, licked their lips—a mixture of nerves and anticipation palpable. "We're mapping out something incredible here. Whether it's peace or war, we're the cartographers."
Silence enveloped them like the expanse of space itself, each member contemplating the chasms they might bridge—or the abysses into which they might unwittingly descend.
Alex's demeanor assumed a quiet resolve—the profound knowledge that this mission was as much about navigating uncharted philosophical territories as it was about ensuring survival. "Whichever it proves to be, we'll face it. Prepared, unified."
A nod passed between Jordan and Alex, a silent exchange of mutual respect and shared mission. Sam, buoyed by the weighty encounters of the mind and machinery, entered keystrokes with a fervor that seemed to bring them ever closer to the alien mind.
They stood there, the Paranormal Military Squad team, not just as guardians of homeworld secrets or as soldiers of clandestine wars, but as humankind's chosen few at the fulcrum of history—a history that was now unfolding to the rhythm of otherworldly codes.
Each revelation, each parsed symbol, inched them toward the line between the earthly and otherworldly. And as they stood on this precipice of cosmic negotiations, it was clear the ensuing dialogue would not just shape the future of Paranormal Military Squad—it could very well redefine the parameters of human existence.
\*
The hum of advanced computational systems tingling with cryptic transmissions framed the ambiance of Dulce's mainframe chamber. Jordan Hayes, fingers hovering over a console dense with blinking lights, furrowed their brow as sequences of alien data streamed across the screen.
Alex materialized behind them, his presence a stable beacon amidst the technological whirlwind. "Look for patterns, anomalies. Anything that might resemble a handshake protocol in their communications," he directed, his voice a low thrum, reverberating with cautious optimism.
Jordan cast a glance over their shoulder, acknowledging Alex's contribution with the shared understanding of colleagues who had transcended mere professional acquaintance. "Im isolating sequences that seem to recur with more intention than static. If these are their handshakes, then we might just be making first contact," they remarked, their focus returning to the screen with renewed vigor.
From the other end of the room, where shadows married the artificial light, Sam's voice crackled through the static of nearby speakers, "Don't forget the anomalies we detected earlier. Each one could be a word, a sentence, or even a concept untranslatable to our current understandings."
Resolute, Taylor Cruz stood at Jordan's other side, a stoic figure wrestling with the implications of their mission. "Keep pursuing this line," Taylor instructed, an undercurrent of intensity carried forth in their otherwise composed demeanor. "And remember, this isn't just about making contact; it's about securing knowledge for humanity."
Alex offered a nod that spoke volumes, conveying his understanding of the stakes at play. Here, in this chamber of possibility, the team's actions would determine if humanity stood at the brink of a new age of understanding or the onset of an unprecedented threat.
Every second thrummed with significance as Jordan and Sam worked in tandem, each keystroke a foray into the unknown. Taylor observed with a commander's scrutiny, the gravity of their role sustaining them against the waves of ambiguity breaking against their resolve.
Pivotal moments come rarely in the course of human events but here, amidst the electronic symphony of a stalwart command center, lay the incepting notes of a cosmic overture. The harmony between human and alien, between Paranormal Military Squad and the vast reaches of space, began its first tentative measures, with each member of the team a vital instrument in a celestial ensemble yet to be fully heard.
\*
The crisp air within the mainframe room of Dulce base seemed to hum with unspoken possibilities. Jordan Hayes was the centerpiece of focus, their hands dancing methodically over the console as streams of otherworldly code cascaded down monitors, each flicker a potential key to the cosmic doors they were inching open.
Alex Mercer watched, posture relaxed but eyes sharp. "Remember, this could be our first introduction, maybe even our first impression," he said, mindful of the gravity carried by each action they made henceforth.
A hint of a smile touched Jordan's face, a small acknowledgment of the monumental task at hand. "Understood. I'm balancing the signal's syntax with our algorithms. If we're interpreting this correctly, it could be... well, an invitation."
Into the electric tension of the chamber walked Taylor Cruz, their silhouette a sharp contrast against the cool lighting, radiating a presence that spoke of command and chilly tenacity. "An invitation, or a challenge?” Taylor questioned, the weight of their suspicion casting a different tint on the cascading data.
Sam Rivera, in a corner arrayed with sophisticated equipment, piped up, their voice a buoyant note amidst the tentative atmosphere. "Either way, it's a connection. One that we're uniquely positioned to navigate," they remarked with an air of optimism threading through the uncertainty.
Alex channeled the strengths of his team into the core of their approach, his leadership adapting to the contours of an unprecedented scenario. "Cautious and curious," he reflected aloud, shaping a strategy that balanced their thirst for comprehension with the prudence required in addressing the unknown.
Jordan, hands momentarily at rest, looked up. The signal was more than a sequence of bits and commands—it was a riddle wrapped in the depths of space-time, and they were on the cusp of parsing its meaning.
Taylor, hardly a step away, nodded in silent agreement. The implications of their findings might very well direct the course of human destiny from this point onward.
Finding a tempo among themselves, the Dulce team was a confluence of ambition and acumen, each member intuitive to the beats of discovery. The chamber around them held untold stories, secrets coaxed from the stars, that now, led by Paranormal Military Squad's finest, began to unravel.
The future in those moments was unwritten, a narrative scribed not in the dust of desert confines, but in the potential for interstellar diplomacy and understanding. As they prepared to script humanity's next chapter, the room seemed to pulse with the heartbeat of a story far greater than the sum of its parts.
## Chapter 8
The grit of an earthbound dust storm contrasted sharply with the pristine sterility of the underground command center. Alex Mercer, eyes set with fervent determination, stood over Jordan Hayes, whose fingers danced across the keyboard with rapid purpose. Monitoring the progression of alien code unraveling before them, Mercer spoke with a tempered urgency, "Keep it steady, Jordan. We might be initiating the first true interspecies communication bridge here. It's all about finesse now."
Taylor Cruz, the embodiment of military precision, surveyed the room with a calculated gaze from their vigil beside an array of glimmering screens. "Remember, these could be delicate negotiations -- or coded threats. Stay sharp," Cruz added, their voice cool as polished steel.
Jordan, with a silent nod, recognized the gravity of both stances. Gravitating between scientific acuity and diplomatic caution, they replied, "The sequence is aligning—syncing with our comms. It's looking more and more like direct engagement."
Amid the banks of electronic machinery, the thrumming pulse of an impending interspecies signal exchange, Sam Rivera interjected with a youthful zeal that cut through the weighty atmosphere, "It's not just an exchange. It's a... symphony. It's as if they're teaching us their language through modulation."
A moment of profound silence swept over the team. The isolation of their location, deep within the top-secret labyrinth of Dulce, became suffused with an almost palpable sense of historical significance.
"Then our response needs to be equally symphonic," Alex uttered, contemplating the awe-inspiring transmutation of their task from a simple recovery mission to a full-blown cosmic concerto.
With a renewed sense of wonder tempered by caution, the Paranormal Military Squad team found themselves harmonizing a delicate balance between envoys and interpreters. The long shadow cast by their duty was now illuminated by the brilliant glow of otherworldly dialogue.
In this carefully orchestrated march towards the unknown, each individual's expertise became critical notes in a larger melody. The narrative of human achievement, so often defined by solitary pursuits, now emerged as a collaborative opus, each member of the team a maestro in their right.
The protocols of encounters, the mathematics of languages, and the poetics of connection all fused into a singular moment of convergence. The echo of their efforts reverberated back to them, not through the cavernous base's concrete walls, but from light-years away, in the form of a reply, intangible yet infinitely profound.
\*
Amidst the hum of the supercomputers and the faint static from the scrambled transmissions, Alex Mercer cast a thoughtful glance across the dimly lit room toward where Dr. Jordan Hayes was methodically adjusting the archaic dials of the decryption machine. "Any progress?" he asked, his tone conveying both impatience and the deep-seated respect born from countless shared challenges.
Jordan did not look up, their gaze remained locked on the flickering lights that represented a dialogue suspended between worlds. Their fingers ceased their dance, hovering meditatively over the controls. "We might be on the cusp of a breakthrough," Jordan suggested. "The signal... it's evolved. It's reflexive now, responsive in a way that suggests sentience."
Taylor Cruz's familiar sharp strides approached the two, breaking the rhythm of soft beeps. "Responsive is good, if it means understanding," Taylor said, head tilted as they peered at the encryption data scrolling by. "But remember, comprehension can bring revelation or conflict."
Sam Riveras youthful voice permeated the tension, brimming with an excitement edged by the enormity of what they faced. "If it's truly sentient, we're not just cracking a code; we're learning how to converse with an entirely new form of consciousness," they chimed in, the weight of history not lost on the zealous astrotechnician.
Alex nodded, his thoughts alighting on potential strategies for navigating the conversation they were cultivating with the unfathomable. "We need to keep that conversation going, echo its patterns, and speak its language," he resolved, knowing the delicate nature of their work merited every ounce of their collective acumen.
The chamber now was a crucible, forging within it the future narrative of human contact with the unknown. Every signal pulse they sent out was an invitation for understanding, and every echo back a step closer to bridging the cosmic divide. And so, together, they stood - agents in Paranormal Military Squad's clandestine ranks, united by purpose, sculpting humanitys first sonnets into the void.
\*
#### Knowledge graph updates
- (Jordan Hayes, Interprets, Communications as cosmic diplomacy, Moderate)
- (Taylor Cruz, Questions, Potential aggressiveness of alien intent, Minor)
- (Sam Rivera, Expresses, Optimism about forming a connection, Minor)
- (Alex Mercer, Adopts, Balanced strategy for contact, Moderate)
- (Paranormal Military Squad team, Navigates, Beats of cosmic discovery, Moderate)
- (Paranormal Military Squad team, Prepares, To script humanity's interstellar narrative, Major)
## Chapter 9
The sterile silence of Dulce Base's command center was thick with concentration as Alex Mercer surveyed his team, hunched over their respective technological battle stations. Each agent was a weapon against ignorance, their adversary a code from beyond the stars that held secrets to alien thought.
Dr. Jordan Hayes, whose hands had been steadfastly working the decryption algorithms, paused and looked up at Alex. "We're through the next layer of encryption," Jordan announced, a mixture of pride and gravitas in their tone. "It's communicating. It's... aware."
A shadow momentarily clouded Alex's determined features—awareness implied so much more than mere intelligence. "Aware and reactive or aware and proactive?" he queried, his experience anticipating the pivotal importance of intention.
"Unknown at this stage," Taylor Cruz interjected, looking up from a datasheet. "But I urge caution. We tread the line between breakthrough and disaster with each keystroke."
Sam Rivera, ever the source of technological acumen, added their voice to the conversation. "The signal's adapting every time we interact with it. Like a conversation where both parties are learning each other's language in real time."
Alex leaned in, rested a hand on Jordan's shoulder—a sign of companionship and an affirmation of trust. "Keep the communication channels open. But let no message, no pulse go unchecked. This could be our Rosetta Stone or our Tower of Babel."
Silence fell over them, a momentary lull as each member of the team contemplated the historic weight of their task. Yet, it was impregnated with a tangible sense of excitement—a collective energy that thrummed through the air just as palpably as the electric current through the banks of machines surrounding them.
They continued their work, squaring shoulders against the magnitude of their undertaking. The agents were standing not just at the precipice of a new chapter for Paranormal Military Squad but for all of humanity. For now, they communicated with powerful unknowns, but with each exchange, they were etching the first words of a dialogue that might forever alter humanity's place in the cosmos.
\*
\*
The sterile silence of Dulce Base's command center was thick with concentration as Alex Mercer surveyed his team, hunched over their respective technological battle stations. Each agent was a weapon against ignorance, their adversary a code from beyond the stars that held secrets to alien thought.
Dr. Jordan Hayes, whose hands had been steadfastly working the decryption algorithms, paused and looked up at Alex. "We're through the next layer of encryption," Jordan announced, a mixture of pride and gravitas in their tone. "It's communicating. It's... aware."
A shadow momentarily clouded Alex's determined features—awareness implied so much more than mere intelligence. "Aware and reactive or aware and proactive?" he queried, his experience anticipating the pivotal importance of intention.
"Unknown at this stage," Taylor Cruz interjected, looking up from a datasheet. "But I urge caution. We tread the line between breakthrough and disaster with each keystroke."
Sam Rivera, ever the source of technological acumen, added their voice to the conversation. "The signal's adapting every time we interact with it. Like a conversation where both parties are learning each other's language in real time."
Alex leaned in, rested a hand on Jordan's shoulder—a sign of companionship and an affirmation of trust. "Keep the communication channels open. But let no message, no pulse go unchecked. This could be our Rosetta Stone or our Tower of Babel."
Silence fell over them, a momentary lull as each member of the team contemplated the historic weight of their task. Yet, it was impregnated with a tangible sense of excitement—a collective energy that thrummed through the air just as palpably as the electric current through the banks of machines surrounding them.
They continued their work, squaring shoulders against the magnitude of their undertaking. The agents were standing not just at the precipice of a new chapter for Paranormal Military Squad but for all of humanity. For now, they communicated with powerful unknowns, but with each exchange, they were etching the first words of a dialogue that might forever alter humanity's place in the cosmos.
\*
Alex Mercer's eyes were fixed on the monitors, the reflected light casting an ethereal glow across his stoic face. The room buzzed with tension, a cacophony of low hums and electronic beeps that underscored the historic nature of their actions. He moved to where Dr. Jordan Hayes was immersed in their work, scrutinizing the alien code streaming rapidly down the terminal.
"Find anything that might look like an entry point or a... digital handshake?" Alex asked, his voice steady, betraying none of the tension gripping his chest.
Jordan looked up briefly, their expression weary yet intense, "Potentially. It's as if the code is anticipating our input, modifying itself in real-time. I've never seen anything like it."
From across the room, Taylor Cruz's sharp voice cut through the hum. "Then it's learning or, possibly worse, baiting us. Proceed with extreme caution," they commanded, their firm stance reinforcing the gravity of the situation.
Sam Rivera, surrounded by a cascade of screens and interfaces, added, "It's almost organic in its complexity. Any minute now, and I might have a way in."
A slight nod was Alex's immediate response, his mind racing through the potential scenarios. "Everyone, stay alert. This could be the beginning of something profound." His seasoned eyes never left the unfolding drama on the monitors.
The room fell silent, the air heavy with unspoken questions. Were they mere moments away from unlocking an otherworldly dialogue? Or was it a Pandora's box that, once opened, could not be closed?
Alex moved closer to the main console, his fingers hovering over the command keys. With the precision of a maestro orchestrating a symphony, he communicated silently with Jordan respectful of their expertise, aware that the next move could alter the course of human history.
Jordan met his gaze, nodding sharply, and refocused on the task. The signal seemed to pulse with sentient curiosity, drawing them further into its intricate web.
A sudden flurry of alerts and the intensifying glow of monitors heralded that they had bridged a technological chasm. The alien intelligence on the other end was no longer a distant enigma it was an active participant, responding to their digital overtures with an unknown agenda.
The team's meticulous efforts had led them to a momentous threshold. Beyond lay unprecedented contact a nexus of curiosity and potential peril. Within the confines of the base, against the backdrop of a silent desert night, the Paranormal Military Squad operatives became mediators of Earth's bid for cosmic relevance, their every action now a gesture in the grand dance of intergalactic relations.
## Chapter 10
The corridors of the Dulce military base, now silent, echoed with a history of whispered conspiracies and furtive movements. But in the command center, a delicate tapestry of light and sound was being woven as the echoes of cosmic dialogue resonated through the high-tech enclave. Dr. Jordan Hayes, now leading the efforts, called out from their workstation, "Ive isolated the signal's harmonics. It's more than a call; it's a song, an interstellar sirens call."
Alex Mercer, steady and resilient in the face of the incomprehensible, acknowledged with a quiet nod, "A song that we need to learn—quickly." His eyes, heavy with responsibility, scanned the room, watching his team work tirelessly at the intersection of science and speculation.
Sam Rivera, dulled by fatigue yet driven by unshakeable resolve, manipulated a complex array of audio interfaces. "There's a pattern, a repeating motif. It's structured, intentional," they muttered, their revelation a bridge between the known and the unimaginable.
Taylor Cruz, a figure of central authority, paced the length of the room, their usual unflappable demeanor betraying a rare flicker of apprehension. "We should be wary of the sirens call," Taylor interjected, invoking myths of old as a cautionary metaphor. "We don't want to crash upon unseen shores."
Undeterred, Jordan cast a determined glance at the team. "We navigate by starlight now, not by the limited light of our previous understanding." Their voice was a beacon, charting a course through unchartered realities.
Every individual was acutely aware that each moment in that room was a conduit to an epochal shift for civilization. The mysterious signals, once distant and alien, had coalesced into complex and harmonious oscillations—beacons of an extraterrestrial intellect inviting Earth to join in a cosmic consortium.
Silently, Alex approached the mainframe, his trained fingers aligning with the consoles mechanisms. The room watched in collective breathlessness as he set the frequency in motion, an introductory phrase to an otherworldly melody—a symphony that could bind worlds or spell devastation for all they knew.
In the control room of Dulce, amongst whispered legends and the quiet hum of machines, humanity's ambassadors now stood, stretching their hands into the void, reaching for the hand that would either pull them into the light of new stars or into the maw of darkness between them.
\*
Underground, the Dulce facility's command center was awash with frenetic energy, a stark juxtaposition against the silent, decrepit corridors that enveloped them. The air hummed with anticipation as Dr. Jordan Hayes and Alex Mercer hunched over a console. The sterile light from the monitors cast an otherworldly glow upon their faces, now reflecting a mosaic of alien characters rapidly translating across the screen.
"The patterns are evolving," Jordan murmured, concentration etched into their every feature. "Its as if our attempts to decrypt have accelerated its learning. Its adapting to us."
Alex, who stood steadfast behind Jordan, felt a tinge of uncharted fear quickly quelled by the fire of discovery raging within him. "Keep it up," he urged. "But whatever this is becoming, we need to ensure it remains within our control."
Taylor Cruz interjected, their voice slicing through the buzz of activity. "Control may be an illusion when facing an intelligence that literally writes its own rules," they stated stoically, casting a watchful eye over the flurry of data.
"It's like it's learning to communicate," offered Sam Rivera from a nearby interface, their youthful energy boding a mix of awe and anxiety. "This gives talking to strangers a whole new meaning."
Alex surveyed his team—each face a study in concentration, determination, and not a small measure of trepidation. "This might well be our first contact," he acknowledged, "And we need to be ready for whatever answers back."
Together, they stood on the edge of the unknown, forging humanitys response to a message from the heavens. The ensuing silence was palpable—a collective introspection about their role in this grand cosmic play, one that could rewrite human history.
The encrypted dialogue continued to unfold, its intricate patterns showing an almost uncanny anticipation of their investigative strategies. The air turned heavy with the scent of electricity and ambition as they closed in on a pivotal response.
As the signals intelligence—whether artificial or biological—grew more profound, so too did the realization that their mission had morphed from passive observation to active engagement. There was no turning back now. Each agent embraced their part in the delicate dance of an interstellar exchange that could change everything they thought they knew about life, intelligence, and the dark void beyond Earth's atmosphere.
\*
The underground halls of Dulce Base, usually buzzing with covert operations, now thrummed with a different kind of energy, an electric mix of fear and fascination. At the heart of the base, in a room shielded from the worlds eyes, Alex Mercer, Jordan Hayes, Taylor Cruz, and Sam Rivera huddled around a bank of monitors. Each screen flickered erratically with the alien script that had become the center of their lives—and perhaps the pivot on which humanitys future would turn.
Jordan's eyes never wavered from the displays, their expression was one of rapt concentration, interspersed with flashes of revelation. "We're conversing with the stars," they whispered, almost to themselves. The words hung in the air, a testament to the awe-inspiring strangeness of the situation.
"The language is morphing; changing its structure with every exchange we have," Sam chimed in, enthusiasm tinged with the solemnity of the occasion. "It's like witnessing the birth of a new form of dialogue—one that spans galaxies."
Taylor, despite the situation's precariousness, maintained an appearance of ironclad composure. "Keep the communication stream secured and monitored. We don't know what we're dealing with yet," they reminded the team, a bastion of protocol amidst uncertainty.
Alex watched his team expand the parameters of human achievement; their work here would possibly define an era. "This is untrodden territory," he acknowledged, "and in every word we script, in every response we decode, we're drawing a map that others will follow."
Jordan turned to Alex, a nod acknowledging the shared responsibility of this moment. They had embarked on a new voyage, an odyssey not of the body, but of the intellect and spirit. No longer explorers of the Earthly realm, they had been promoted by circumstance to ambassadors of humanity in a silent and boundless ocean.
A sudden pulse of energy from the monitors signaled a breakthrough; the language had not only adapted but it seemed to resonate, to harmonize with their attempts at making contact. The alien script now sprawled across the screens didn't just ask to be understood—it invited interpretation, collaboration, maybe even companionship across the cold distances of space.
As they stood before the precipice of first contact, Paranormal Military Squad's finest became the architects of a symphony meant to echo through the cosmos. But more than architects, they were the first to play the notes of this cosmic composition, daring to believe that on the other end, someone—or something—might be listening, ready to join the chorus.
\*
The underground command center of Dulce Base, once pulsing with clandestine operations, now resonated with the charge of an impending cosmic threshold. Encircled by banks of whirring machinery, each monitor flickered erratically with alien script that had occupied center stage in the lives of Alex Mercer, Jordan Hayes, Taylor Cruz, and Sam Rivera.
Jordan's gaze didnt flit for even a moment from the screens, where indiscernible alien messages ebbed and flowed like the tide. The ciphers and symbols cascaded down as they tweaked the algorithmic sliders. "This sequence here," Jordan began, voice both hushed and heavy, "its not just transmitting; it resonates—it's designed to be felt."
The room took a collective breath, the remarkable implication hanging in the air like a careful revelation. Sam Rivera was the first to respond, their voice alive with ingenuity: "It's a form of communication stretching well beyond words. We need to respond in kind—the whole array of human expression might be at play here."
Taylor's eyes remained fixed on the figures playing across the data sheets. "If that's the case," Taylor intoned pragmatically, "we must tread carefully. This is no longer just about being heard—it's about being understood."
Alex watched his team, each a fulcrum of insight and expertise, and felt the solemnity of the role they were about to assume. "Then we'll ensure our message is clear and full. Our humanity is our strength in this dialogue," he declared, the depths of his experience fueling a commanding reassurance.
The anticipation was palpable as the agents contemplated the vastness of their endeavor. They were not merely probing at the secrets of the planar cosmos—they were negotiating across the starry expanse, extending to distant intelligences the full spectrum of human curiosity and compassion.
A symphony of beeping consoles orchestrated their next steps as they prepared to articulate their interplanetary overture. The rhythmic tapping of Jordan's keystrokes set the tempo for an undertaking that traversed beyond algorithms and encryption.
The base withstood time and whispered secrets of its own, but none so grand as this moment of creation—an invitation to the universe that promised to echo through both the echoes of space and the annals of human history.
## Chapter 11
The sterile silence of Dulce Base's command center was thick with concentration as Alex Mercer surveyed his team, hunched over their respective technological battle stations. Each agent was a weapon against ignorance, their adversary a code from beyond the stars that held secrets to alien thought.
\*
The thrum of the colossal machinery vibrated through the subterranean facility as Alex Mercer stood amidst the whispers of technology, each carrying voices from worlds apart. He watched as Sam Rivera adjusted a complex array of cosmic translators, their expression a mixture of anticipation and awe.
"Are we ready, Mercer?" Taylor Cruz asked, the soft glow of the command center consoles reflecting upon their stern face.
Alex turned towards Taylor, his eyes holding a depth that betrayed the enormity of the threshold they were about to cross. "This is it," he said. "Initiate the protocol. It's time we answer the cosmos."
Jordan Hayes, stationed at the mainframe, typed rhythmically, a blue hue painting their focused features. The eerie silence that had settled over the team was interrupted by a visceral sound—humankind's response to the alien dialogue, now streaming into the abyss.
The control room, once a fortress of solitude, erupted into an oasis of life. Lights flickered in tandem, echoing the symphony of interstellar communication. They stood together at the edge of discovery, facing the symmetry and discord of a universe unknown.
"If we're right, we've just become Earth's first emissaries to a celestial congress we're only beginning to comprehend," Jordan's voice was somber, resonating with a mix of trepidation and honor.
The room filled with the resonance of human and alien minds converging, creating a new narrative within the fathomless expanse of existence. Paranormal Military Squad, once protectors of Earth's clandestine secrets, had now become the tether linking humanity to the cosmic fold.
\*
The underground command center of Dulce Base, once pulsing with covert operations, now resonated with the charge of an impending cosmic threshold. Encircled by banks of whirring machinery, each monitor flickered erratically with alien script that had occupied center stage in the lives of Alex Mercer, Jordan Hayes, Taylor Cruz, and Sam Rivera.
Jordan's gaze didnt flit for even a moment from the screens, where indiscernible alien messages ebbed and flowed like the tide. The ciphers and symbols cascaded down as they tweaked the algorithmic sliders. "This sequence here," Jordan began, voice both hushed and heavy, "its not just transmitting; it resonates—it's designed to be felt."
The room took a collective breath, the remarkable implication hanging in the air like a careful revelation. Sam Rivera was the first to respond, their voice alive with ingenuity: "It's a form of communication stretching well beyond words. We need to respond in kind—the whole array of human expression might be at play here."
Taylor's eyes remained fixed on the figures playing across the data sheets. "If that's the case," Taylor intoned pragmatically, "we must tread carefully. This is no longer just about being heard—it's about being understood."
Alex watched his team, each a fulcrum of insight and expertise, and felt the solemnity of the role they were about to assume. "Then we'll ensure our message is clear and full. Our humanity is our strength in this dialogue," he declared, the depths of his experience fueling a commanding reassurance.
The anticipation was palpable as the agents contemplated the vastness of their endeavor. They were not merely probing at the secrets of the planar cosmos—they were negotiating across the starry expanse, extending to distant intelligences the full spectrum of human curiosity and compassion.
A symphony of beeping consoles orchestrated their next steps as they prepared to articulate their interplanetary overture. The rhythmic tapping of Jordan's keystrokes set the tempo for an undertaking that traversed beyond algorithms and encryption.
The base withstood time and whispered secrets of its own, but none so grand as this moment of creation—an invitation to the universe that promised to echo through both the echoes of space and the annals of human history.
## Chapter 12
The underground facility of Dulce Base, once shrouded in silence and operational secrecy, now hummed with an energy that cradled the promise of cosmic revelation. Alex Mercer stood pensively by the central terminal, flanked by Dr. Jordan Hayes, Taylor Cruz, and Sam Rivera, each poised at the edge of a history-defining moment.
Jordan's fingers ghosted across the console, tracing patterns of otherworldly origin. "The signals architecture is becoming more complex, resembling aspects of human cognition—recognition, learning, even... empathy?" they postulated with furrowed concern.
Alex turned his gaze upon Jordan, his voice quiet but resolute, "Empathy could bridge galaxies. Let's harness this connection and proceed with cautious optimism."
Taylor, ever the sober sentinel, projected a more pragmatic standpoint. "Empathy or not, we are duty-bound to assess the risk to humanity. Every new discovery warrants a measured response."
The static hiss of communications equipment filled the air, its purpose now transformed into a dialogue with an intelligence beyond the stars. It was Sam, wide-eyed amid the myriad lights and switches, who broke the silence, "We have provisional confirmation of the signals intent—initiation. Were being brought into a broader spectrum of cognizance."
The chamber lay still for a heartbeat, the Paranormal Military Squad agents steeped in contemplation of the path unfurling before them—a path paved with possibilities of diplomacy or disruption, each step a venture further into the cosmic unknown.
Alex stepped closer to the viewing monitors, each depicting alien symbols seemingly reaching out from the void. "Initiate the broadcast," he spoke with quiet command. "Our response will mark humanitys readiness to partake in the wider conversation of conscious beings."
Amidst the crackling air of expectation, the team wordlessly returned to their stations. They had transcended their roles as protectors of Earth's clandestine lore to become the harbingers of an interstellar parley that could change the existential course of life on their pale blue dot.
The deep hum of the terminal emitted a signal—a testament to the uncanny reality that Earth was now actively partaking in an exchange not bound by gravity nor the limits of the solar wind.
Here, in the depths of Dulce, a message from humanity woven from understanding and uncertainty was cast into the firmament, an epitheg of their desire to join the universal dialogue and discover their place among the constellations.
\*
The somber depths of the Dulce Base command center stood in stark counterpoint to the animated flurry of activity around the central comms array. Alex Mercer's silhouette loomed behind Dr. Jordan Hayes, who sat with a posture indicating laser focus on the decryption process. A quiet murmur of digital soundscape filled the space, subtly heightened by the anticipation of contact with an intelligence beyond the Earth.
Jordan's voice was steady, betraying none of the extraordinary nature of their work, "Looking through the signal's pattern, it's evident were dealing with a form of intelligence—calculating, mirroring, possibly even understanding."
Alex's reflection bounced off the darkened screens, his head nodding in silent affirmation. "Were walking a delicate line. Our response should be thoughtful, measured. Were ambassadors, not merely explorers."
Taylor Cruz approached, arms folded, their words slicing through the din of careful keystrokes and soft whirrs, "If theres even the slightest chance it understands, we cant afford missteps. The language of the stars might be more absolute than ours."
From another terminal, Sam Rivera brought youthful vigor to the conversation, "Theres rhythm in these patterns. If this is their way of reaching out, our reply should encapsulate all that we are—all that humanity stands for."
Looking around at his team, Alex saw resolve etched on every face. The chamber, usually somber and echoing with the quiet steps of covert agents, now felt alive with the heartbeat of discovery. They were not just professionals operating in the gloom; they were a collective standing at the helm of a momentous journey.
"Lets begin," he said, returned by the resolve in his voice. "Every second counts." With that, they pressed forward, setting in motion a reply to a conversation billions of years in the making.
The dance with an unseen partner commenced, each pulse they sent out a step taken with caution and hope. And as those digital pulses journeyed through the black sea of infinity, Earth, for perhaps the first time, joined a pan-galactic dialogue that whispered secrets of the cosmos—secrets that, until now, had been lost in the silent vastness of space.
\*
As the team stood in the centralized nerve center of Dulce's underground fortress, the solemn atmosphere was reverent, overseeing systems that engaged with an intelligence from the void. Alex's stance was contemplative as he gazed at Jordan Hayes, who presided over the console, the tension of the moment reaching a tactile fervor. Each rhythmic tap of Hayes's fingers on the keys was a foray into uncharted symphonies of contact.
Observing Hayes unravel the dense alien encryption, Alex spoke, a diplomatic tenor underpinning his words, "Keep focused on the syntax, dissect its nuances. We're not just decoding signals; we're translating intentions."
Without diverting from their task, Jordan acknowledged the insight. "Indeed, if their understanding of us is as deep as we hope, we're paving the way for dialogue far beyond our current realm."
Taylor Cruz, near the rear of the room, provided a steady oversight. "As horizonless as our prospects may seem," Taylor intoned, "remain diligent. Complacency before alien cognition could spell catastrophe."
Sam's youthful voice resonated with optimism, "Imagine—forming a rapport with a consciousness separate from our reality; we're drafting the bridge to stars alive with minds!"
The sentiment hung for a moment before Alex gathered his conviction. "Dialogue is our vessel. We are not just agents of enigma; we are the threads that may weave a new cosmic relationship." His words seemed to reflect off the walls, reaching beyond the room's confines, a quiet yet resilient vow.
Their task was titanic, stepping stones laid delicately into new territories of existence. The signal, once an esoteric strand in the echo of the universe, beckoned now with a clarity rocketing the complexity of thoughts from a distant order.
Action by action, the Paranormal Military Squad team bridged the vast interstellar distances, their expertise and empathy casting a beacon of unity into frontiers of intelligence and knowledge. Their work, a partnership struck with an unseen cosmic congregation, each pulse sent and received a line in Earth's novitiate envoi to the cosmic shores.
\*
Under the stark, unforgiving lights of Dulce Base's underground command center, tension buzzed harder than the banks of supercomputers that lined the walls. Agent Alex Mercer leaned over the shoulder of Jordan Hayes, whose eyes were locked onto the display screen, where an incomprehensible series of alien symbols streamed past incessantly.
“Any progress on the decryption?” Alex's voice was steady, a controlled presence necessary in the gravity of their undertaking.
Jordan tapped a key, pausing the flow of code, and leaned back with a deep sigh. "We've broken through another subset of the cipher. It's revealing... well, indications of a complex society, not unlike our own." His eyes met Alex's with an unspoken question that hung heavily between them—were they truly prepared for what they might find?
Taylor Cruz strode into the room, a tightly coiled spring of ambition and authority, and peered at the screen. "Understand their society, and we may predict behavior. Remain expedient—we don't know how much time we have before the situation shifts." There was an edge of stark realism to Taylor's words, the underlying message clear: every revelation bore its own set of risks.
Alex nodded thoughtfully, recognizing the validity of Cruz's caution. Turning to Sam, who was tinkering with a device that buzzed quietly on the table, he asked, “Sam, can your contraption get us any further?”
Sam looked up with a smirk, a twinkle of mischief in their eye. “Its not just any contraption, its potentially a direct line to their thoughts. Give me a moment more, and I'll have something for you.”
The air ticked with electronic beeps and the rustling sound of the Paranormal Military Squad team at work. They were so close to peering into the intelligence of an alien race—a reality on the brink of dramatically expanding their understanding of the universe.
The machinery whirred in response to Sams precise touches, and suddenly, the room filled with a low hum—something had changed, a signal had been successfully sent. The team held their breath as they listened. The sound that filled the room was unmistakable: a response, an alien voice filtered through the static of space and time.
Alex exchanged a look of quiet triumph with Jordan. The breakthrough was monumental; they were no longer casting messages into the void but engaged in a dialogue—an exchange that marked the beginning of Operation: Dulces true unfolding. This was it, the first steps into an interstellar odyssey that demanded every ounce of their courage and wit.
## Chapter 13
Dr. Jordan Hayes shuffled a stack of papers, their eyes revealing a tinge of skepticism at Taylor Cruz's authoritarian performance. _Protocols_, Jordan thought, _are just the framework, the true challenges we're about to face lie well beyond the boundaries of any protocol._ They cleared their throat before speaking, tone cautious yet firm, "Let's remember, the unknown variables exceed the known. We should remain adaptive."
A murmur of agreement echoed from Sam Rivera, who leaned forward, lacing their fingers together as if weaving a digital framework in the air before them, "Exactly, adaptability could be the key to interpreting the signal distortions and system malfunctions. We shouldn't discount the… erratic."
Their words hung like an electric charge in the room, challenging Taylor's position with an inherent truth. Cruzs jaw tightened almost imperceptibly, but the agent masked it with a small nod, conceding to the omnipresent threat of the unpredictable.
Alex glanced at Jordan, who never looked back, their gaze fixed instead on a distant point, as if envisioning the immense dark corridors they were soon to navigate in Dulce. Jordan was not one to embrace fantastical theories, but the air of cautious calculation betrayed a mind bracing for confrontation with the inexplicable, an internal battle between the evidence of their research and the calculating skepticism that kept them alive in their field.
The meeting adjourned with no further comments, the team members quietly retreading the paths to their personal preparations. Alex, trailing slightly behind, observed the others. _The cautious reserve Jordan wears like armor doesn't fool me_, he thought, _their analytical mind sees the patterns I do. And that's worth more than protocol. That's the connection we need to survive this._
As the agents dispersed into the labyrinth of the facility, lost in their thoughts and preparations, the base's halogen lights flickered, a brief and unnoticed harbingers of the darkness to come.
\*
The gritty, wind-tossed surface of New Mexico, just above the cavernous domain of Dulce Base, offered no shelter from the burgeoning storm—the scouring sands an earthly reminder of chaos theories in motion. Far beneath, a similar maelstrom brewed within the confines of the command center, as Paranormal Military Squad's handpicked squad stood poised for potential enormities of contact.
Ruffling through printed transmission logs, Jordan Hayes dialed the focus of their analytical prowess onto the emerging pattern of signals crisscrossing between Earth and the unfathomable. "Our responses so far have echoed their complexity, but the real divergence is yet to come," Jordan remarked stoically, the calm belying the mounting surge of adrenaline for the revelation ahead.
Alex Mercer's figure, a silhouette sharpened by the purpose, loomed at the periphery of the monitors' sickly glow. "Indeed," he assented, "The echoes are the easy part. It will be the introduction of our own, human variable that truly begins our dialogue."
Taylor Cruz, windowless command center notwithstanding, appeared as though they could feel the tempest above. Their eyes never left the monitors as they unspooled their hard wisdom. "For all our advances, we find ourselves deciphering the swings and nuances of an interstellar pendulum. Predict its arc, and we may preempt the gravity of its message."
Amidst a chorus of bleeps and static, Sam Rivera's tech-clad hands moved rhythmically, their spirited approach to unruly streams of data bordering an intimate dance with entropy. "Entropy that leads to discovery," Sam mused, responding to Taylor's metaphor. "Each step into the unknown is a step away from precedent."
Alex, drawing near Jordan, spoke again, his voice now a thread woven through the very fabric of their operations. "Let's be the cartographers of this new territory. Our initial shades of understanding could color the cosmos for generations to come."
Their gazes fell upon a screen as the latest transmission painted its digital blooms of alien script across the black. This time, the pattern wavered in an almost imperceptible fashion, a modification that whispered of active, alien thought awaiting their next move. A hush enveloped the Paranormal Military Squad ensemble, the gravity of the pathogen undeniable. They were about to issue a reply, one poised to reshape the very concept of humanity's outreach into the cosmos.
The New Mexico desert's secrets were infamous, its storms a mere prelude to the revelations that the team—united in purpose—would unleash upon the world. The howling winds outside found their counterpart in the newfound resolve within, as Dulce's stalwart guardians readied themselves to send forth humanity's retort to the echoes from beyond.
\*
The cavernous control room, deeply entrenched beneath the desolate New Mexico terrain, held the Paranormal Military Squad team in intense focus; an island of calm amid the storm of cosmic dialectics. Dr. Jordan Hayes worked methodically, every keystroke an intricate step in their tenuous cosmic ballet. Suddenly, they paused, a signal pattern resonating from the screen. "This is new; it's...inviting. Its as if the signal is not just calling to us but weaving its intelligence through ours."
Alex Mercer scrutinized the shift in data. "A confluence of minds, then. If we're to meet them halfway, Jordan, our reply must be both innovative and discerning," he proposed, a glimmer of profound curiosity behind his authoritative demeanor.
Taylor Cruz, whose sharp eyes missed nothing, nodded from beside a secondary panel. "Innovative, yes, but also defensive. This interaction is a razors edge, and we cannot afford to bleed before the unknown," Taylor reminded them, the metaphor a stark warning of potential dangers.
Against the backdrop of their conversation, Sam Riveras youthful optimism cut through the tension. "If theyre weaving through our intellect, then we've achieved something beyond first contact—we're at the genesis of interstellar symbiosis," they posited with a mix of reverence and excitement.
Alex returned Sams smile with his own, tempered and faint, as he turned back to the task at hand. The magnitude of their mission extended beyond the fabric of the universe, an exploration into the threads that connected sentient beings across the vast expanse. “Lets reply with our own woven tapestry of thought—delicate, but deliberate.”
With renewed determination, the room came alive with an undercurrent of anticipation, its occupants charged with the potential of forging an alliance with the cosmos. Paranormal Military Squad's finest were no longer merely soldiers and scientists; they had become pioneers on the vanguard of humanitys greatest odyssey.
The New Mexican sands above, impassive to the change brewing underneath, stood as silent sentinels as Earth's emissaries crafted their response. A response that, composed with care and imbued with humanity's essence, reached into the void, connecting with an otherworldly intelligence that awaited their harmony in the cosmic conversation.
## Chapter 14
The command center of Dulce Base lay shrouded in shadows that seemed to claw at the edges of the dimly lit array of screens and consoles. Alex Mercer, focused and unwavering, watched as Dr. Jordan Hayes parsed the latest string of alien signals—a symphony of otherworldly communications that threatened to either enlighten or confound.
"Were encountering a paradigm shift with every transmission," Jordan Hayes murmured, the pulsing glow of the monitor painting their features with an almost spectral hue. "This signal... its evolving, becoming denser, more sophisticated. As if it's growing alongside us—tandem evolution."
The air was electric, charged with the raw potential of uncharted discovery and laden with the gravity of existential risk. Taylor Cruz, who always seemed here to mold such gravity into actionable strategies, stepped forward. "We must contain this evolution within parameters we can manage. We cannot be bystanders to an uncontrolled ascent of intelligence."
Sam Rivera, the youngest of the cohort, worked feverishly at their station. "It's not just intelligence—these signals have rhythm, a kind of music suggesting not just evolution, but a dance! We're being invited to partake in the cosmos's ballet!" they exclaimed, a touch of youthful exuberance breaking through the solemnity.
Alex turned, facing his team, the stoic mask of command tempered by the perceptible flicker of awe in his gaze. "Let this dance then be our dialogue. We will match their steps with prudent but daring measures—our humanity as our guide."
In the ensuing hours, the Paranormal Military Squad team forged a rhythm of their own, their collective expertise a beacon piercing through the fog of the unknown. The signal, increasingly intricate and seemingly conscious, now demanded not just observation but participation, an interstellar pas de deux that hummed with the promise and peril of first contact.
Before them, the communications interface flickered to life with a received transmission—a resonant hum that seemed to vibrate through the very foundations of the base. They had successfully established a back-and-forth with whatever intelligence lay hidden among the stars. Every subsequent note they struck within the cosmic ether would come to define humanity's place within the galactic community—heralds of Earth's grand entrance into a universe far less silent than once perceived.
\*
In the concrete belly of Dulce Base, dimly lit by the jagged dance of fluorescent lights above, Sam Rivera perched on the edge of their seat, their eager fingers fluttering across an ancient keyboard. The stark, cold room—reminiscent of a time when covert operations and unspoken dread ruled supreme—now housed a peculiar blend of old-world machinery and sleek, modern interfaces.
Alex Mercer, standing steadfast like a bridge between the enigmatic past and the unfathomable present, watched on. In his eyes flashed the foreboding excitement of change. "Sam," he started, his voice steadfast, "the patterns in these signals, what do they tell us about the nature of our... guest?"
Sam's eyes glimmered with something akin to thrill—or was it trepidation? "It's like we're mirroring each other, evolving together through this.. dialogue. Like it knows us, understands us, and it's… learning."
Jordan Hayes, preoccupied at a nearby console, chimed in without lifting their gaze. "It's a dialogue that transcends mere words, Alex. We're being woven into a narrative far grander than the sum of our known sciences."
Taylor Cruz, arms crossed, wore the heavy mantle of their skepticism comfortably. "Keep theorizing," they interjected crisply, "but remember the grounding reality of what we are part of here. This contact is a blade that cuts both ways."
In this cavern of history, voices both human and inhuman whispered secrets to those brave enough to listen. Each member present understood the gravity that pulled at their feet; no longer were they mere mortals shackled to their terrestrial plane. The digital pings and encrypted calls resonated with an implication of a cosmic agenda that would not be ignored.
Jordan's fingers paused, hovering in hesitation. What ripple might the next keystroke send through the fabric of known existence? It was a step into the ballet of the infinite, where the Paranormal Military Squad team played their part in the waltz of wonders with an audience of stars.
\*
## Chapter 15
In the clandestine hush of Dulce Base's subterranean command center, the Paranormal Military Squad team had become a crucible for interstellar communication. Dr. Jordan Hayes' gaze lingered on the screen as they navigated through the convolution of alien code. Each character held the potential to unravel a new dimension of contact, and with Sam Rivera's keen interjection, they were crafting humanity's inaugural cosmological discourse.
Alex Mercer peered over Jordan's shoulder, calculating the implications of every visual nuance that cascaded across the monitor. "Look for consistency—any repeating motifs could signal a willingness to engage. We're drafting history with each exchange," he remarked, aware of the delicate balance between forging a bond and exposing vulnerabilities.
Taylor Cruz, stoic and enigmatic, observed the interplay from the threshold, a silhouette against the machinery's luminescence. "Remember, while we seek common ground, the foundation we stand upon remains Terra firma. Caution must temper our curiosity," they stated, their voice an anchor amidst the current of excitement.
The command center buzzed with energy, rivaled only by the tempest overhead that concealed their operation. Sam, with swift dexterity, navigated the communications relay. "Their signals resonate almost musically. It's as if they're composing a symphony, and we've been handed the baton to conduct the next movement," they offered, imbuing the scenario with a blend of scientific adventurism and poetic license.
Amidst the whirring servers and the occasional flicker of emergency lighting, the essence of their mission transcended mere reconnaissance. They were humanity's elected envoys at the brink of a celestial alliance—or confrontation—with an audience as vast as the universe itself.
Alex stepped back, his profile etched by the chamber's artificial day. "Then let's ensure our contribution to this symphony harmonizes with theirs. It's time for humanity's voice to rise and be counted among the cosmic ensemble."
Under his directive, the Paranormal Military Squad team initiated their calculated response, weaving thoughts and theories into a digital overture aimed at the heart of alien intellect. As the digital stream punctured the endless night, each member of this clandestine group was acutely aware of the irrevocable step they undertook—bringing Earth into the pantheon of galactic entities designed to converse among the stars.
\*
Clusters of high-tech equipment bathed the Dulce underground command center in an eerie blue light. Sam Rivera's fingers flew across the keyboard, navigating an elaborate network of alien patterns. The very air seemed to pulse with the ebb and flow of cryptic communications reaching across the stars. "I've got something!" Sam's announcement tore through the focus in the room, drawing every pair of eyes to the torrent of symbols unraveling on the screen.
With the pacing of a seasoned officer gauging the moment before action, Alex Mercer approached, his calm demeanor belying an acute awareness of the precipice on which they now stood. "Define 'something," Alex prompted, reinforcing the need for clarity amidst the extraordinary.
"It's repeating—a sequence thats evolved with each interaction, almost as if it's... singing," Sam theorized, the awe in their voice reflecting the potential magnitude of their discovery.
Jordan Hayes interjected from across the console, their eyes not leaving the display as they absorbed the new data. "A cosmic vocalization, then," they mused, intrigued. "A singularity in the signal that might represent a point of reference for both parties."
Taylor Cruz, hands clasped behind their back, regarded the unfolding scene, their own calculations etching lines of concern onto their stern visage. "Or a beacon—a homing tune, calling out to something we might not be ready to greet," Taylor offered, voicing the group's unspoken apprehension.
Alex's eyes locked on the screen, taking in the scope of what they were attempting to interpret. Drawing a deep breath, Alex gave a slight nod. "If this is their song, then let us respond with ours. We've come this far by mirroring their signals, now let's engage in an interstellar duet, and see where the music leads us."
With the expectation of the significant achieving a crescendo, the members of Paranormal Military Squad huddled over their equipment—sages at the threshold of a potentially world-altering communion. The strange harmonies that reverberated through the command center suggested that their interlocutors were poised, waiting, perhaps even eager, for Earth's chorus to join the symphony.
As the team initiated their reply, weaving humanity's own intricate melody into the vast cosmic dialogue, they each felt a profound change within—an evolution of purpose. They were not just messengers or investigators; they had become co-composers in a galactic orchestra, with the universe itself as their witness and concert hall.
With the exchange of harmonious signals crawling through the vacuum of space, the Paranormal Military Squad operatives found themselves part of a bridging of minds—a realization that out there, among the vast arrays of stars and planets, harmony was the true universal language.
\*
The dim glow of monitors cast an otherworldly ambiance upon Dulce Base's command center, where Paranormal Military Squad's chosen stood huddled over their instruments, suspended at history's threshold. Codes—alien in origin and nature—were being deciphered by Dr. Jordan Hayes, whose countenance bore the marks of deep concentration.
Alex Mercer, the bedrock upon which their team's resolve was founded, leaned in with an eagerness tempered by his chain of command. "Jordan, we've invested our expertise into comprehending their patterns, but now we must also endeavor to understand their intent," he urged, his voice bearing the gravitas of their mission's potential consequences.
At another console, Sam Rivera's youth did not betray their crucial role in the operation. With eyes alight, they mirrored the rapid computing before them. "There's emotion here—complex, profound even. This isn't just the output of a cold machine; it's...sentience," Sam whispered, nearly drowned by the mechanical chorus around them.
Jordan, without shifting focus from their work, replied, "It's a sentience that—should we succeed here—ushers us into a new era of existence. The cadence of these signals," they tapped the screen with a flourish, "could well be the heartbeat of this new dawn."
Taylor Cruz paused beside Mercer, their expression unreadable beneath the sterile light. "And as it beats, we must gauge whether its rhythm bodes well for us, or spells our missteps. Courage must not blind us to the hazards intrinsic to such contact," Taylor cautioned, the sentinel within them ever alert.
Alex nodded, a gesture that carried the weight of responsibility and a silent command: proceed, but with circumspection. They were not merely decoding a message; they were interpreting a dialogue across the celestial divide.
The room fell into a rhythm akin to a well-conducted ensemble. Each member's expertise proved a critical note in the unfolding symphony. Their actions were now more than mere research or defense; they were the tentative overtures of humankind reaching out to grasp the vast unknown.
Textures of sound meshed with the light from countless computations, the palpable anticipation of the agents at the edge of discovery cresting with an awareness that their work would reshape future chronicles. And when the response finally came—a signal piercing the deafening silence of uncertainty—all within Dulce's confines understood: the dawn of an interstellar continuum had just begun to break.
\*
In the sterile hum and flickering lights of Dulce Base's command center, the Paranormal Military Squad team stood as humanity's vanguard, verging on the brim of an intergalactic abyss. Dr. Jordan Hayes, analytical edges sharp, deciphered extraterrestrial patterns that bled across screens in enigmatic cascades—a daunting mosaic of potential threats and untapped wisdom.
Agent Alex Mercer, the embodiment of focus and a steadfast nerve, observed the unfolding digital drama with the gravitas due a historic first contact. "Let the data weave its narrative, Jordan," he instructed, a moderate undertone of exhilaration within his command. "It's encoding more than information—it's outlining civilization."
Jordan absorbed the directive, their gaze unflinching from the screens, feeling the weight of their next move. "The nuances here are extraordinary," they acknowledged. "It paints a picture of a culture steeped in complexities we're only starting to fathom.”
Taylor Cruz, stoicism personified yet not immune to the situation's gravity, chimed in. "Understand it, but guard against it," they cautioned, bringing a sober prudence to the room. "This culture, however advanced, remains an unknown quantity—an ocean of wonders and darkness with uncertain tides."
Sam Rivera, a visual contrast with wide eyes and restless hands, represented the other side of the room — intrigue and optimism against the drawn swords of precaution. “Think of it,” they proposed, voice bouncing with a rebellious upbeat timbre, “as the first act of a play written in constellations. We're setting the stage for a galactic narrative.”
Each team member, in their way, was both actor and scribe in this moment of tense pageantry. Heavy with the presence of risk, the command center had become not just a room of computers and glass panels but a theater for performing the elaborate choreography of contact.
Bound by resolve and curiosity, they proceeded, each data entry a trembling step onto the cosmic stage. And like all cautious pioneers edging into fertile but unnavigated lands, they understood: as they mapped the heavens, they were simultaneously mapping the furthest reaches of their own existential horizons.

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

61
docsite/index.md Normal file
View File

@@ -0,0 +1,61 @@
---
title: Welcome to GraphRAG
layout: page
---
👉 [Microsoft Research Blog Post](https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/) <br/>
👉 [GraphRAG Accelerator](https://github.com/Azure-Samples/graphrag-accelerator) <br/>
👉 [GitHub Repository](https://github.com/microsoft/graphrag)
<p align="center">
<img src="img/GraphRag-Figure1.jpg" alt="Figure 1: LLM-generated knowledge graph built from a private dataset using GPT-4 Turbo." width="450" align="center" />
</p>
<p align="center">
Figure 1: An LLM-generated knowledge graph built using GPT-4 Turbo.
</p>
GraphRAG is a structured, hierarchical approach to Retrieval Augmented Generation (RAG), as opposed to naive semantic-search
approaches using plain text snippets. The GraphRAG process involves extracting a knowledge graph out of raw text, building a community hierarchy, generating summaries for these communities, and then leveraging these structures when perform RAG-based tasks.
To learn more about GraphRAG and how it can be used to enhance your LLMs ability to reason about your private data, please visit the [Microsoft Research Blog Post](https://www.microsoft.com/en-us/research/blog/graphrag-unlocking-llm-discovery-on-narrative-private-data/).
## Solution Accelerator 🚀
To quickstart the GraphRAG system we recommend trying the [Solution Accelerator](https://github.com/Azure-Samples/graphrag-accelerator) package. This provides a user-friendly end-to-end experience with Azure resources.
## Get Started with GraphRAG 🚀
To start using GraphRAG, check out the [_Get Started_](posts/get_started) guide.
For a deeper dive into the main sub-systems, please visit the docpages for the [Indexer](posts/index/overview) and [Query](posts/query/overview) packages.
## GraphRAG vs Baseline RAG 🔍
Retrieval-Augmented Generation (RAG) is a technique to improve LLM outputs using real-world information. This technique is an important part of most LLM-based tools and the majority of RAG approaches use vector similarity as the search technique, which we call _Baseline RAG_. GraphRAG uses knowledge graphs to provide substantial improvements in question-and-answer performance when reasoning about complex information. RAG techniques have shown promise in helping LLMs to reason about _private datasets_ - data that the LLM is not trained on and has never seen before, such as an enterprises proprietary research, business documents, or communications. _Baseline RAG_ was created to help solve this problem, but we observe situations where baseline RAG performs very poorly. For example:
- Baseline RAG struggles to connect the dots. This happens when answering a question requires traversing disparate pieces of information through their shared attributes in order to provide new synthesized insights.
- Baseline RAG performs poorly when being asked to holistically understand summarized semantic concepts over large data collections or even singular large documents.
To address this, the tech community is working to develop methods that extend and enhance RAG. Microsoft Researchs new approach, GraphRAG, uses LLMs to create a knowledge graph based on an input corpus. This graph, along with community summaries and graph machine learning outputs, are used to augment prompts at query time. GraphRAG shows substantial improvement in answering the two classes of questions described above, demonstrating intelligence or mastery that outperforms other approaches previously applied to private datasets.
## The GraphRAG Process 🤖
GraphRAG builds upon our prior [research](https://www.microsoft.com/en-us/worklab/patterns-hidden-inside-the-org-chart) and [tooling](https://github.com/graspologic-org/graspologic) using graph machine learning. The basic steps of the GraphRAG process are as follows:
### Index
- Slice up an input corpus into a series of TextUnits, which act as analyzable units for the rest of the process, and provide fine-grained references ino our outputs.
- Extract all entities, relationships, and key claims from the TextUnits using an LLM.
- Perform a hierarchical clustering of the graph using the [Leiden technique](https://arxiv.org/pdf/1810.08473.pdf). To see this visually, check out Figure 1 above. Each circle is an entity (e.g., a person, place, or organization), with the size representing the degree of the entity, and the color representing its community.
- Generate summaries of each community and its constituents from the bottom-up. This aids in holistic understanding of the dataset.
### Query
At query time, these structures are used to provide materials for the LLM context window when answering a question. The primary query modes are:
- [_Global Search_](posts/query/0-global_search) for reasoning about holistic questions about the corpus by leveraging the community summaries.
- [_Local Search_](posts/query/1-local_search) for reasoning about specific entities by fanning-out to their neighbors and associated concepts.
### Prompt Tuning
Using _GraphRAG_ with your data out of the box may not yield the best possible results.
We strongly recommend to fine-tune your prompts following the [Prompt Tuning Guide](posts/index/3-prompt_tuning) in our documentation.

View File

@@ -0,0 +1,5 @@
{
"mimetypes": {
"text/markdown": true
}
}

View File

@@ -0,0 +1,94 @@
{% extends 'base/display_priority.j2' %}
{%- block header -%}
---
title:
layout: page
tags: [post, notebook]
---
{%- endblock header -%}
{% block in_prompt %}
{% endblock in_prompt %}
{% block output_prompt %}
{%- endblock output_prompt %}
{% block input %}
```
{%- if 'magics_language' in cell.metadata -%}
{{ cell.metadata.magics_language}}
{%- elif 'name' in nb.metadata.get('language_info', {}) -%}
{{ nb.metadata.language_info.name }}
{%- endif %}
{{ cell.source}}
```
{% endblock input %}
{% block error %}
{{ super() }}
{% endblock error %}
{% block traceback_line %}
{{ line | indent | strip_ansi }}
{% endblock traceback_line %}
{% block execute_result %}
{% block data_priority scoped %}
{{ super() }}
{% endblock %}
{% endblock execute_result %}
{% block stream %}
{{ output.text | indent }}
{% endblock stream %}
{% block data_svg %}
{% if "filenames" in output.metadata %}
![svg]({{ output.metadata.filenames['image/svg+xml'] | path2url }})
{% else %}
![svg](data:image/svg;base64,{{ output.data['image/svg+xml'] }})
{% endif %}
{% endblock data_svg %}
{% block data_png %}
{% if "filenames" in output.metadata %}
![png]({{ output.metadata.filenames['image/png'] | path2url }})
{% else %}
![png](data:image/png;base64,{{ output.data['image/png'] }})
{% endif %}
{% endblock data_png %}
{% block data_jpg %}
{% if "filenames" in output.metadata %}
![jpeg]({{ output.metadata.filenames['image/jpeg'] | path2url }})
{% else %}
![jpeg](data:image/jpeg;base64,{{ output.data['image/jpeg'] }})
{% endif %}
{% endblock data_jpg %}
{% block data_latex %}
{{ output.data['text/latex'] }}
{% endblock data_latex %}
{% block data_html scoped %}
{{ output.data['text/html'] }}
{% endblock data_html %}
{% block data_markdown scoped %}
{{ output.data['text/markdown'] }}
{% endblock data_markdown %}
{% block data_text scoped %}
{{ output.data['text/plain'] | indent }}
{% endblock data_text %}
{% block markdowncell scoped %}
{{ cell.source }}
{% endblock markdowncell %}
{% block unknowncell scoped %}
unknown type {{ cell.type }}
{% endblock unknowncell %}

18
docsite/package.json Normal file
View File

@@ -0,0 +1,18 @@
{
"name": "@graphrag/docsite",
"version": "0.0.1",
"private": true,
"scripts": {
"start": "eleventy --serve",
"build": "eleventy && touch _site/.nojekyll",
"build:docs": "yarn build",
"start:docs": "yarn start"
},
"dependencies": {
"@11ty/eleventy": "^2.0.1",
"@11ty/eleventy-plugin-syntaxhighlight": "^5.0.0",
"@kevingimbel/eleventy-plugin-mermaid": "^2.2.1",
"eleventy-plugin-code-clipboard": "^0.1.1",
"markdown-it": "^14.1.0"
}
}

View File

@@ -0,0 +1,168 @@
---
title: Custom Configuration Mode
navtitle: Fully Custom Config
layout: page
tags: [post]
date: 2023-01-04
---
The primary configuration sections for Indexing Engine pipelines are described below. Each configuration section can be expressed in Python (for use in Python API mode) as well as YAML, but YAML is show here for brevity.
Using custom configuration is an advanced use-case. Most users will want to use the [Default Configuration](/posts/config/overview) instead.
## Indexing Engine Examples
The [examples](https://github.com/microsoft/graphrag/blob/main/examples/) directory contains several examples of how to use the indexing engine with _custom configuration_.
Most examples include two different forms of running the pipeline, both are contained in the examples `run.py`
1. Using mostly the Python API
2. Using mostly the a pipeline configuration file
To run an example:
- Run `poetry shell` to activate a virtual environment with the required dependencies.
- Run `PYTHONPATH="$(pwd)" python examples/path_to_example/run.py` from the `root` directory.
For example to run the single_verb example, you would run the following commands:
```bash
poetry shell
```
```sh
PYTHONPATH="$(pwd)" python examples/single_verb/run.py
```
# Configuration Sections
# > extends
This configuration allows you to extend a base configuration file or files.
```yaml
# single base
extends: ../base_config.yml
```
```yaml
# multiple bases
extends:
- ../base_config.yml
- ../base_config2.yml
```
# > root_dir
This configuration allows you to set the root directory for the pipeline. All data inputs and outputs are assumed to be relative to this path.
```yaml
root_dir: /workspace/data_project
```
# > storage
This configuration allows you define the output strategy for the pipeline.
- `type`: The type of storage to use. Options are `file`, `memory`, and `blob`
- `base_dir` (`type: file` only): The base directory to store the data in. This is relative to the config root.
- `connection_string` (`type: blob` only): The connection string to use for blob storage.
- `container_name` (`type: blob` only): The container to use for blob storage.
# > cache
This configuration allows you define the cache strategy for the pipeline.
- `type`: The type of cache to use. Options are `file` and `memory`, and `blob`.
- `base_dir` (`type: file` only): The base directory to store the cache in. This is relative to the config root.
- `connection_string` (`type: blob` only): The connection string to use for blob storage.
- `container_name` (`type: blob` only): The container to use for blob storage.
# > reporting
This configuration allows you define the reporting strategy for the pipeline. Report files are generated artifacts that summarize the performance metrics of the pipeline and emit any error messages.
- `type`: The type of reporting to use. Options are `file`, `memory`, and `blob`
- `base_dir` (`type: file` only): The base directory to store the reports in. This is relative to the config root.
- `connection_string` (`type: blob` only): The connection string to use for blob storage.
- `container_name` (`type: blob` only): The container to use for blob storage.
# > workflows
This configuration section defines the workflow DAG for the pipeline. Here we define an array of workflows and express their inter-dependencies in steps:
- `name`: The name of the workflow. This is used to reference the workflow in other parts of the config.
- `steps`: The DataShaper steps that this workflow comprises. If a step defines an input in the form of `workflow:<workflow_name>`, then it is assumed to have a dependency on the output of that workflow.
```yaml
workflows:
- name: workflow1
steps:
- verb: derive
args:
column1: "col1"
column2: "col2"
- name: workflow2
steps:
- verb: derive
args:
column1: "col1"
column2: "col2"
input:
# dependency established here
source: workflow:workflow1
```
# > input
- `type`: The type of input to use. Options are `file` or `blob`.
- `file_type`: The file type field discriminates between the different input types. Options are `csv` and `text`.
- `base_dir`: The base directory to read the input files from. This is relative to the config file.
- `file_pattern`: A regex to match the input files. The regex must have named groups for each of the fields in the file_filter.
- `post_process`: A DataShaper workflow definition to apply to the input before executing the primary workflow.
- `source_column` (`type: csv` only): The column containing the source/author of the data
- `text_column` (`type: csv` only): The column containing the text of the data
- `timestamp_column` (`type: csv` only): The column containing the timestamp of the data
- `timestamp_format` (`type: csv` only): The format of the timestamp
```yaml
input:
type: file
file_type: csv
base_dir: ../data/csv # the directory containing the CSV files, this is relative to the config file
file_pattern: '.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$' # a regex to match the CSV files
# An additional file filter which uses the named groups from the file_pattern to further filter the files
# file_filter:
# # source: (source_filter)
# year: (2023)
# month: (06)
# # day: (22)
source_column: "author" # the column containing the source/author of the data
text_column: "message" # the column containing the text of the data
timestamp_column: "date(yyyyMMddHHmmss)" # optional, the column containing the timestamp of the data
timestamp_format: "%Y%m%d%H%M%S" # optional, the format of the timestamp
post_process: # Optional, set of steps to process the data before going into the workflow
- verb: filter
args:
column: "title",
value: "My document"
```
```yaml
input:
type: file
file_type: csv
base_dir: ../data/csv # the directory containing the CSV files, this is relative to the config file
file_pattern: '.*[\/](?P<source>[^\/]+)[\/](?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})_(?P<author>[^_]+)_\d+\.csv$' # a regex to match the CSV files
# An additional file filter which uses the named groups from the file_pattern to further filter the files
# file_filter:
# # source: (source_filter)
# year: (2023)
# month: (06)
# # day: (22)
post_process: # Optional, set of steps to process the data before going into the workflow
- verb: filter
args:
column: "title",
value: "My document"
```

View File

@@ -0,0 +1,217 @@
---
title: Default Configuration Mode (using Env Vars)
navtitle: Using Env Vars
tags: [post]
layout: page
date: 2023-01-03
---
## Text-Embeddings Customization
By default, the GraphRAG indexer will only emit embeddings required for our query methods. However, the model has embeddings defined for all plaintext fields, and these can be generated by setting the `GRAPHRAG_EMBEDDING_TARGET` environment variable to `all`.
If the embedding target is `all`, and you want to only embed a subset of these fields, you may specify which embeddings to skip using the `GRAPHRAG_EMBEDDING_SKIP` argument described below.
### Embedded Fields
- `text_unit.text`
- `document.raw_content`
- `entity.name`
- `entity.description`
- `relationship.description`
- `community.title`
- `community.summary`
- `community.full_content`
## Input Data
Our pipeline can ingest .csv or .txt data from an input folder. These files can be nested within subfolders. To configure how input data is handled, what fields are mapped over, and how timestamps are parsed, look for configuration values starting with `GRAPHRAG_INPUT_` below. In general, CSV-based data provides the most customizeability. Each CSV should at least contain a `text` field (which can be mapped with environment variables), but it's helpful if they also have `title`, `timestamp`, and `source` fields. Additional fields can be included as well, which will land as extra fields on the `Document` table.
## Base LLM Settings
These are the primary settings for configuring LLM connectivity.
| Parameter | Required? | Description | Type | Default Value |
| --------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | ----- | ------------- |
| `GRAPHRAG_API_KEY` | **Yes for OpenAI. Optional for AOAI** | The API key. (Note: `OPENAI_API_KEY is also used as a fallback). If not defined when using AOAI, managed identity will be used. | `str` | `None` |
| `GRAPHRAG_API_BASE` | **For AOAI** | The API Base URL | `str` | `None` |
| `GRAPHRAG_API_VERSION` | **For AOAI** | The AOAI API version. | `str` | `None` |
| `GRAPHRAG_API_ORGANIZATION` | | The AOAI organization. | `str` | `None` |
| `GRAPHRAG_API_PROXY` | | The AOAI proxy. | `str` | `None` |
## Text Generation Settings
These settings control the text generation model used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.
| Parameter | Required? | Description | Type | Default Value |
| ------------------------------------------------- | ------------------------ | ------------------------------------------------------------------------------ | ------- | --------------------- |
| `GRAPHRAG_LLM_TYPE` | **For AOAI** | The LLM operation type. Either `openai_chat` or `azure_openai_chat` | `str` | `openai_chat` |
| `GRAPHRAG_LLM_DEPLOYMENT_NAME` | **For AOAI** | The AOAI model deployment name. | `str` | `None` |
| `GRAPHRAG_LLM_API_KEY` | Yes (uses fallback) | The API key. If not defined when using AOAI, managed identity will be used. | `str` | `None` |
| `GRAPHRAG_LLM_API_BASE` | For AOAI (uses fallback) | The API Base URL | `str` | `None` |
| `GRAPHRAG_LLM_API_VERSION` | For AOAI (uses fallback) | The AOAI API version. | `str` | `None` |
| `GRAPHRAG_LLM_API_ORGANIZATION` | For AOAI (uses fallback) | The AOAI organization. | `str` | `None` |
| `GRAPHRAG_LLM_API_PROXY` | | The AOAI proxy. | `str` | `None` |
| `GRAPHRAG_LLM_MODEL` | | The LLM model. | `str` | `gpt-4-turbo-preview` |
| `GRAPHRAG_LLM_MAX_TOKENS` | | The maximum number of tokens. | `int` | `4000` |
| `GRAPHRAG_LLM_REQUEST_TIMEOUT` | | The maximum number of seconds to wait for a response from the chat client. | `int` | `180` |
| `GRAPHRAG_LLM_MODEL_SUPPORTS_JSON` | | Indicates whether the given model supports JSON output mode. `True` to enable. | `str` | `None` |
| `GRAPHRAG_LLM_THREAD_COUNT` | | The number of threads to use for LLM parallelization. | `int` | 50 |
| `GRAPHRAG_LLM_THREAD_STAGGER` | | The time to wait (in seconds) between starting each thread. | `float` | 0.3 |
| `GRAPHRAG_LLM_CONCURRENT_REQUESTS` | | The number of concurrent requests to allow for the embedding client. | `int` | 25 |
| `GRAPHRAG_LLM_TPM` | | The number of tokens per minute to allow for the LLM client. 0 = Bypass | `int` | 0 |
| `GRAPHRAG_LLM_RPM` | | The number of requests per minute to allow for the LLM client. 0 = Bypass | `int` | 0 |
| `GRAPHRAG_LLM_MAX_RETRIES` | | The maximum number of retries to attempt when a request fails. | `int` | 10 |
| `GRAPHRAG_LLM_MAX_RETRY_WAIT` | | The maximum number of seconds to wait between retries. | `int` | 10 |
| `GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION` | | Whether to sleep on rate limit recommendation. (Azure Only) | `bool` | `True` |
## Text Embedding Settings
These settings control the text embedding model used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.
| Parameter | Required ? | Description | Type | Default |
| ------------------------------------------------------- | ------------------------ | -------------------------------------------------------------------------------------------------------------------------- | ------- | ------------------------ |
| `GRAPHRAG_EMBEDDING_TYPE` | **For AOAI** | The embedding client to use. Either `openai_embedding` or `azure_openai_embedding` | `str` | `openai_embedding` |
| `GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME` | **For AOAI** | The AOAI deployment name. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_API_KEY` | Yes (uses fallback) | The API key to use for the embedding client. If not defined when using AOAI, managed identity will be used. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_API_BASE` | For AOAI (uses fallback) | The API base URL. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_API_VERSION` | For AOAI (uses fallback) | The AOAI API version to use for the embedding client. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_API_ORGANIZATION` | For AOAI (uses fallback) | The AOAI organization to use for the embedding client. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_API_PROXY` | | The AOAI proxy to use for the embedding client. | `str` | `None` |
| `GRAPHRAG_EMBEDDING_MODEL` | | The model to use for the embedding client. | `str` | `text-embedding-3-small` |
| `GRAPHRAG_EMBEDDING_BATCH_SIZE` | | The number of texts to embed at once. [(Azure limit is 16)](https://learn.microsoft.com/en-us/azure/ai-ce) | `int` | 16 |
| `GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS` | | The maximum tokens per batch [(Azure limit is 8191)](https://learn.microsoft.com/en-us/azure/ai-services/openai/reference) | `int` | 8191 |
| `GRAPHRAG_EMBEDDING_TARGET` | | The target fields to embed. Either `required` or `all`. | `str` | `required` |
| `GRAPHRAG_EMBEDDING_SKIP` | | A comma-separated list of fields to skip embeddings for . (e.g. 'relationship.description') | `str` | `None` |
| `GRAPHRAG_EMBEDDING_THREAD_COUNT` | | The number of threads to use for parallelization for embeddings. | `int` | |
| `GRAPHRAG_EMBEDDING_THREAD_STAGGER` | | The time to wait (in seconds) between starting each thread for embeddings. | `float` | 50 |
| `GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS` | | The number of concurrent requests to allow for the embedding client. | `int` | 25 |
| `GRAPHRAG_EMBEDDING_TPM` | | The number of tokens per minute to allow for the embedding client. 0 = Bypass | `int` | 0 |
| `GRAPHRAG_EMBEDDING_RPM` | | The number of requests per minute to allow for the embedding client. 0 = Bypass | `int` | 0 |
| `GRAPHRAG_EMBEDDING_MAX_RETRIES` | | The maximum number of retries to attempt when a request fails. | `int` | 10 |
| `GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT` | | The maximum number of seconds to wait between retries. | `int` | 10 |
| `GRAPHRAG_EMBEDDING_TARGET` | | The target fields to embed. Either `required` or `all`. | `str` | `required` |
| `GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION` | | Whether to sleep on rate limit recommendation. (Azure Only) | `bool` | `True` |
## Input Settings
These settings control the data input used by the pipeline. Any settings with a fallback will use the base LLM settings, if available.
### Plaintext Input Data (`GRAPHRAG_INPUT_FILE_TYPE`=text)
| Parameter | Description | Type | Required or Optional | Default |
| ----------------------------- | --------------------------------------------------------------------------------- | ----- | -------------------- | ---------- |
| `GRAPHRAG_INPUT_FILE_PATTERN` | The file pattern regexp to use when reading input files from the input directory. | `str` | optional | `.*\.txt$` |
### CSV Input Data (`GRAPHRAG_INPUT_FILE_TYPE`=csv)
| Parameter | Description | Type | Required or Optional | Default |
| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | -------------------- | ---------- |
| `GRAPHRAG_INPUT_TYPE` | The input storage type to use when reading files. (`file` or `blob`) | `str` | optional | `file` |
| `GRAPHRAG_INPUT_FILE_PATTERN` | The file pattern regexp to use when reading input files from the input directory. | `str` | optional | `.*\.txt$` |
| `GRAPHRAG_INPUT_SOURCE_COLUMN` | The 'source' column to use when reading CSV input files. | `str` | optional | `source` |
| `GRAPHRAG_INPUT_TIMESTAMP_COLUMN` | The 'timestamp' column to use when reading CSV input files. | `str` | optional | `None` |
| `GRAPHRAG_INPUT_TIMESTAMP_FORMAT` | The timestamp format to use when parsing timestamps in the timestamp column. | `str` | optional | `None` |
| `GRAPHRAG_INPUT_TEXT_COLUMN` | The 'text' column to use when reading CSV input files. | `str` | optional | `text` |
| `GRAPHRAG_INPUT_DOCUMENT_ATTRIBUTE_COLUMNS` | A list of CSV columns, comma-separated, to incorporate as document fields. | `str` | optional | `id` |
| `GRAPHRAG_INPUT_TITLE_COLUMN` | The 'title' column to use when reading CSV input files. | `str` | optional | `title` |
| `GRAPHRAG_INPUT_STORAGE_ACCOUNT_BLOB_URL` | The Azure Storage blob endpoint to use when in `blob` mode and using managed identity. Will have the format `https://<storage_account_name>.blob.core.windows.net` | `str` | optional | `None` |
| `GRAPHRAG_INPUT_CONNECTION_STRING` | The connection string to use when reading CSV input files from Azure Blob Storage. | `str` | optional | `None` |
| `GRAPHRAG_INPUT_CONTAINER_NAME` | The container name to use when reading CSV input files from Azure Blob Storage. | `str` | optional | `None` |
| `GRAPHRAG_INPUT_BASE_DIR` | The base directory to read input files from. | `str` | optional | `None` |
## Data Mapping Settings
| Parameter | Description | Type | Required or Optional | Default |
| -------------------------- | -------------------------------------------------------- | ----- | -------------------- | ------- |
| `GRAPHRAG_INPUT_FILE_TYPE` | The type of input data, `csv` or `text` | `str` | optional | `text` |
| `GRAPHRAG_INPUT_ENCODING` | The encoding to apply when reading CSV/text input files. | `str` | optional | `utf-8` |
## Data Chunking
| Parameter | Description | Type | Required or Optional | Default |
| --------------------------- | ------------------------------------------------------------------------------------------- | ----- | -------------------- | ------- |
| `GRAPHRAG_CHUNK_SIZE` | The chunk size in tokens for text-chunk analysis windows. | `str` | optional | 300 |
| `GRAPHRAG_CHUNK_OVERLAP` | The chunk overlap in tokens for text-chunk analysis windows. | `str` | optional | 100 |
| `GRAPHRAG_CHUNK_BY_COLUMNS` | A comma-separated list of document attributes to groupby when performing TextUnit chunking. | `str` | optional | `id` |
## Prompting Overrides
| Parameter | Description | Type | Required or Optional | Default |
| --------------------------------------------- | ------------------------------------------------------------------------------------------ | -------- | -------------------- | ---------------------------------------------------------------- |
| `GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE` | The path (relative to the root) of an entity extraction prompt template text file. | `str` | optional | `None` |
| `GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS` | The maximum number of redrives (gleanings) to invoke when extracting entities in a loop. | `int` | optional | 0 |
| `GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES` | A comma-separated list of entity types to extract. | `str` | optional | `organization,person,event,geo` |
| `GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE` | The path (relative to the root) of an description summarization prompt template text file. | `str` | optional | `None` |
| `GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH` | The maximum number of tokens to generate per description summarization. | `int` | optional | 500 |
| `GRAPHRAG_CLAIM_EXTRACTION_ENABLED` | Whether claim extraction is enabled for this pipeline. | `bool` | optional | `False` |
| `GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION` | The claim_description prompting argument to utilize. | `string` | optional | "Any claims or facts that could be relevant to threat analysis." |
| `GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE` | The claim extraction prompt to utilize. | `string` | optional | `None` |
| `GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS` | The maximum number of redrives (gleanings) to invoke when extracting claims in a loop. | `int` | optional | 0 |
| `GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE` | The community report extraction prompt to utilize. | `string` | optional | `None` |
| `GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH` | The maximum number of tokens to generate per community report. | `int` | optional | 1500 |
## Storage
This section controls the storage mechanism used by the pipeline used for emitting output tables.
| Parameter | Description | Type | Required or Optional | Default |
| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | -------------------- | ------- |
| `GRAPHRAG_STORAGE_TYPE` | The type of reporter to use. Options are `file`, `memory`, or `blob` | `str` | optional | `file` |
| `GRAPHRAG_STORAGE_STORAGE_ACCOUNT_BLOB_URL` | The Azure Storage blob endpoint to use when in `blob` mode and using managed identity. Will have the format `https://<storage_account_name>.blob.core.windows.net` | `str` | optional | None |
| `GRAPHRAG_STORAGE_CONNECTION_STRING` | The Azure Storage connection string to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_STORAGE_CONTAINER_NAME` | The Azure Storage container name to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_STORAGE_BASE_DIR` | The base path to data outputs outputs. | `str` | optional | None |
## Cache
This section controls the cache mechanism used by the pipeline. This is used to cache LLM invocation results.
| Parameter | Description | Type | Required or Optional | Default |
| ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | -------------------- | ------- |
| `GRAPHRAG_CACHE_TYPE` | The type of cache to use. Options are `file`, `memory`, `none` or `blob` | `str` | optional | `file` |
| `GRAPHRAG_CACHE_STORAGE_ACCOUNT_BLOB_URL` | The Azure Storage blob endpoint to use when in `blob` mode and using managed identity. Will have the format `https://<storage_account_name>.blob.core.windows.net` | `str` | optional | None |
| `GRAPHRAG_CACHE_CONNECTION_STRING` | The Azure Storage connection string to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_CACHE_CONTAINER_NAME` | The Azure Storage container name to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_CACHE_BASE_DIR` | The base path to the reporting outputs. | `str` | optional | None |
## Reporting
This section controls the reporting mechanism used by the pipeline, for common events and error messages. The default is to write reports to a file in the output directory. However, you can also choose to write reports to the console or to an Azure Blob Storage container.
| Parameter | Description | Type | Required or Optional | Default |
| --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----- | -------------------- | ------- |
| `GRAPHRAG_REPORTING_TYPE` | The type of reporter to use. Options are `file`, `console`, or `blob` | `str` | optional | `file` |
| `GRAPHRAG_REPORTING_STORAGE_ACCOUNT_BLOB_URL` | The Azure Storage blob endpoint to use when in `blob` mode and using managed identity. Will have the format `https://<storage_account_name>.blob.core.windows.net` | `str` | optional | None |
| `GRAPHRAG_REPORTING_CONNECTION_STRING` | The Azure Storage connection string to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_REPORTING_CONTAINER_NAME` | The Azure Storage container name to use when in `blob` mode. | `str` | optional | None |
| `GRAPHRAG_REPORTING_BASE_DIR` | The base path to the reporting outputs. | `str` | optional | None |
## Node2Vec Parameters
| Parameter | Description | Type | Required or Optional | Default |
| ------------------------------- | ---------------------------------------- | ------ | -------------------- | ------- |
| `GRAPHRAG_NODE2VEC_ENABLED` | Whether to enable Node2Vec | `bool` | optional | False |
| `GRAPHRAG_NODE2VEC_NUM_WALKS` | The Node2Vec number of walks to perform | `int` | optional | 10 |
| `GRAPHRAG_NODE2VEC_WALK_LENGTH` | The Node2Vec walk length | `int` | optional | 40 |
| `GRAPHRAG_NODE2VEC_WINDOW_SIZE` | The Node2Vec window size | `int` | optional | 2 |
| `GRAPHRAG_NODE2VEC_ITERATIONS` | The number of iterations to run node2vec | `int` | optional | 3 |
| `GRAPHRAG_NODE2VEC_RANDOM_SEED` | The random seed to use for node2vec | `int` | optional | 597832 |
## Data Snapshotting
| Parameter | Description | Type | Required or Optional | Default |
| ----------------------------------- | ------------------------------------------- | ------ | -------------------- | ------- |
| `GRAPHRAG_SNAPSHOT_GRAPHML` | Whether to enable GraphML snapshots. | `bool` | optional | False |
| `GRAPHRAG_SNAPSHOT_RAW_ENTITIES` | Whether to enable raw entity snapshots. | `bool` | optional | False |
| `GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES` | Whether to enable top-level node snapshots. | `bool` | optional | False |
# Miscellaneous Settings
| Parameter | Description | Type | Required or Optional | Default |
| --------------------------- | --------------------------------------------------------------------- | ------ | -------------------- | ------------- |
| `GRAPHRAG_ASYNC_MODE` | Which async mode to use. Either `asyncio` or `threaded`. | `str` | optional | `asyncio` |
| `GRAPHRAG_ENCODING_MODEL` | The text encoding model, used in tiktoken, to encode text. | `str` | optional | `cl100k_base` |
| `GRAPHRAG_MAX_CLUSTER_SIZE` | The maximum number of entities to include in a single Leiden cluster. | `int` | optional | 10 |
| `GRAPHRAG_SKIP_WORKFLOWS` | A comma-separated list of workflow names to skip. | `str` | optional | `None` |
| `GRAPHRAG_UMAP_ENABLED` | Whether to enable UMAP layouts | `bool` | optional | False |

View File

@@ -0,0 +1,38 @@
---
title: Configuring GraphRAG Indexing
navtitle: Init Command
tags: [post]
layout: page
date: 2023-01-03
---
To start using GraphRAG, you need to configure the system. The `init` command is the easiest way to get started. It will create a `.env` and `settings.yaml` files in the specified directory with the necessary configuration settings. It will also output the default LLM prompts used by GraphRAG.
## Usage
```sh
python -m graphrag.index [--init] [--root PATH]
```
## Options
- `--init` - Initialize the directory with the necessary configuration files.
- `--root PATH` - The root directory to initialize. Default is the current directory.
## Example
```sh
python -m graphrag.index --init --root ./ragtest
```
## Output
The `init` command will create the following files in the specified directory:
- `settings.yaml` - The configuration settings file. This file contains the configuration settings for GraphRAG.
- `.env` - The environment variables file. These are referenced in the `settings.yaml` file.
- `prompts/` - The LLM prompts folder. This contains the default prompts used by GraphRAG, you can modify them or run the [Auto Prompt Tuning](/posts/prompt_tuning/auto_prompt_tuning) command to generate new prompts adapted to your data.
## Next Steps
After initializing your workspace, you can either run the [Prompt Tuning](/posts/prompt_tuning/auto_prompt_tuning) command to adapt the prompts to your data or even start running the [Indexing Pipeline](/posts/index/overview) to index your data. For more information on configuring GraphRAG, see the [Configuration](/posts/config/overview) documentation.

View File

@@ -0,0 +1,221 @@
---
title: Default Configuration Mode (using JSON/YAML)
navtitle: Using JSON or YAML
tags: [post]
layout: page
date: 2023-01-03
---
The default configuration mode may be configured by using a `config.json` or `config.yml` file in the data project root. If a `.env` file is present along with this config file, then it will be loaded, and the environment variables defined therein will be available for token replacements in your configuration document using `${ENV_VAR}` syntax.
For example:
```
# .env
API_KEY=some_api_key
# config.json
{
"llm": {
"api_key": "${API_KEY}"
}
}
```
# Config Sections
## input
### Fields
- `type` **file|blob** - The input type to use. Default=`file`
- `file_type` **text|csv** - The type of input data to load. Either `text` or `csv`. Default is `text`
- `file_encoding` **str** - The encoding of the input file. Default is `utf-8`
- `file_pattern` **str** - A regex to match input files. Default is `.*\.csv$` if in csv mode and `.*\.txt$` if in text mode.
- `source_column` **str** - (CSV Mode Only) The source column name.
- `timestamp_column` **str** - (CSV Mode Only) The timestamp column name.
- `timestamp_format` **str** - (CSV Mode Only) The source format.
- `text_column` **str** - (CSV Mode Only) The text column name.
- `title_column` **str** - (CSV Mode Only) The title column name.
- `document_attribute_columns` **list[str]** - (CSV Mode Only) The additional document attributes to include.
- `connection_string` **str** - (blob only) The Azure Storage connection string.
- `container_name` **str** - (blob only) The Azure Storage container name.
- `base_dir` **str** - The base directory to read input from, relative to the root.
- `storage_account_blob_url` **str** - The storage account blob URL to use.
## llm
This is the base LLM configuration section. Other steps may override this configuration with their own LLM configuration.
### Fields
- `api_key` **str** - The OpenAI API key to use.
- `type` **openai_chat|azure_openai_chat|openai_embedding|azure_openai_embedding** - The type of LLM to use.
- `model` **str** - The model name.
- `max_tokens` **int** - The maximum number of output tokens.
- `request_timeout` **float** - The per-request timeout.
- `api_base` **str** - The API base url to use.
- `api_version` **str** - The API version
- `organization` **str** - The client organization.
- `proxy` **str** - The proxy URL to use.
- `cognitive_services_endpoint` **str** - The url endpoint for cognitive services.
- `deployment_name` **str** - The deployment name to use (Azure).
- `model_supports_json` **bool** - Whether the model supports JSON-mode output.
- `tokens_per_minute` **int** - Set a leaky-bucket throttle on tokens-per-minute.
- `requests_per_minute` **int** - Set a leaky-bucket throttle on requests-per-minute.
- `max_retries` **int** - The maximum number of retries to use.
- `max_retry_wait` **float** - The maximum backoff time.
- `sleep_on_rate_limit_recommendation` **bool** - Whether to adhere to sleep recommendations (Azure).
- `concurrent_requests` **int** The number of open requests to allow at once.
## parallelization
### Fields
- `stagger` **float** - The threading stagger value.
- `num_threads` **int** - The maximum number of work threads.
## async_mode
**asyncio|threaded** The async mode to use. Either `asyncio` or `threaded.
## embeddings
### Fields
- `llm` (see LLM top-level config)
- `parallelization` (see Parallelization top-level config)
- `async_mode` (see Async Mode top-level config)
- `batch_size` **int** - The maximum batch size to use.
- `batch_max_tokens` **int** - The maximum batch #-tokens.
- `target` **required|all** - Determines which set of embeddings to emit.
- `skip` **list[str]** - Which embeddings to skip.
- `strategy` **dict** - Fully override the text-embedding strategy.
## chunks
### Fields
- `size` **int** - The max chunk size in tokens.
- `overlap` **int** - The chunk overlap in tokens.
- `group_by_columns` **list[str]** - group documents by fields before chunking.
- `strategy` **dict** - Fully override the chunking strategy.
## cache
### Fields
- `type` **file|memory|none|blob** - The cache type to use. Default=`file`
- `connection_string` **str** - (blob only) The Azure Storage connection string.
- `container_name` **str** - (blob only) The Azure Storage container name.
- `base_dir` **str** - The base directory to write cache to, relative to the root.
- `storage_account_blob_url` **str** - The storage account blob URL to use.
## storage
### Fields
- `type` **file|memory|blob** - The storage type to use. Default=`file`
- `connection_string` **str** - (blob only) The Azure Storage connection string.
- `container_name` **str** - (blob only) The Azure Storage container name.
- `base_dir` **str** - The base directory to write reports to, relative to the root.
- `storage_account_blob_url` **str** - The storage account blob URL to use.
## reporting
### Fields
- `type` **file|console|blob** - The reporting type to use. Default=`file`
- `connection_string` **str** - (blob only) The Azure Storage connection string.
- `container_name` **str** - (blob only) The Azure Storage container name.
- `base_dir` **str** - The base directory to write reports to, relative to the root.
- `storage_account_blob_url` **str** - The storage account blob URL to use.
## entity_extraction
### Fields
- `llm` (see LLM top-level config)
- `parallelization` (see Parallelization top-level config)
- `async_mode` (see Async Mode top-level config)
- `prompt` **str** - The prompt file to use.
- `entity_types` **list[str]** - The entity types to identify.
- `max_gleanings` **int** - The maximum number of gleaning cycles to use.
- `strategy` **dict** - Fully override the entity extraction strategy.
## summarize_descriptions
### Fields
- `llm` (see LLM top-level config)
- `parallelization` (see Parallelization top-level config)
- `async_mode` (see Async Mode top-level config)
- `prompt` **str** - The prompt file to use.
- `max_length` **int** - The maximum number of output tokens per summarization.
- `strategy` **dict** - Fully override the summarize description strategy.
## claim_extraction
### Fields
- `enabled` **bool** - Whether to enable claim extraction. default=False
- `llm` (see LLM top-level config)
- `parallelization` (see Parallelization top-level config)
- `async_mode` (see Async Mode top-level config)
- `prompt` **str** - The prompt file to use.
- `description` **str** - Describes the types of claims we want to extract.
- `max_gleanings` **int** - The maximum number of gleaning cycles to use.
- `strategy` **dict** - Fully override the claim extraction strategy.
## community_reports
### Fields
- `llm` (see LLM top-level config)
- `parallelization` (see Parallelization top-level config)
- `async_mode` (see Async Mode top-level config)
- `prompt` **str** - The prompt file to use.
- `max_length` **int** - The maximum number of output tokens per report.
- `max_input_length` **int** - The maximum number of input tokens to use when generating reports.
- `strategy` **dict** - Fully override the community reports strategy.
## cluster_graph
### Fields
- `max_cluster_size` **int** - The maximum cluster size to emit.
- `strategy` **dict** - Fully override the cluster_graph strategy.
## embed_graph
### Fields
- `enabled` **bool** - Whether to enable graph embeddings.
- `num_walks` **int** - The node2vec number of walks.
- `walk_length` **int** - The node2vec walk length.
- `window_size` **int** - The node2vec window size.
- `iterations` **int** - The node2vec number of iterations.
- `random_seed` **int** - The node2vec random seed.
- `strategy` **dict** - Fully override the embed graph strategy.
## umap
### Fields
- `enabled` **bool** - Whether to enable UMAP layouts.
## snapshots
### Fields
- `graphml` **bool** - Emit graphml snapshots.
- `raw_entities` **bool** - Emit raw entity snapshots.
- `top_level_nodes` **bool** - Emit top-level-node snapshots.
## encoding_model
**str** - The text encoding model to use. Default is `cl100k_base`.
## skip_workflows
**list[str]** - Which workflow names to skip.

View File

@@ -0,0 +1,21 @@
---
title: Configuring GraphRAG Indexing
navtitle: Configuration
tags: [post]
layout: page
date: 2023-01-03
---
The GraphRAG system is highly configurable. This page provides an overview of the configuration options available for the GraphRAG indexing engine.
## Default Configuration Mode
The default configuration mode is the simplest way to get started with the GraphRAG system. It is designed to work out-of-the-box with minimal configuration. The primary configuration sections for the Indexing Engine pipelines are described below. The main ways to set up GraphRAG in Default Configuration mode are via:
- [Init command](/posts/config/init) (recommended)
- [Purely using environment variables](/posts/config/env_vars)
- [Using JSON or YAML for deeper control](/posts/config/json_yaml)
## Custom Configuration Mode
Custom configuration mode is an advanced use-case. Most users will want to use the Default Configuration instead. The primary configuration sections for Indexing Engine pipelines are described below. Details about how to use custom configuration are available in the [Custom Configuration Mode](/posts/config/custom) documentation.

View File

@@ -0,0 +1,174 @@
---
title: Configuration Template
navtitle: Configuration Template
layout: page
tags: [post]
date: 2024-04-04
---
The following template can be used and stored as a `.env` in the the directory where you're are pointing
the `--root` parameter on your Indexing Pipeline execution.
For details about how to run the Indexing Pipeline, refer to the [Index CLI](../../index/2-cli) documentation.
## .env File Template
Required variables are uncommented. All the optional configuration can be turned on or off as needed.
### Minimal Configuration
```bash
# Base LLM Settings
GRAPHRAG_API_KEY="your_api_key"
GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users
GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users
# Text Generation Settings
GRAPHRAG_LLM_TYPE="azure_openai_chat" # or openai_chat
GRAPHRAG_LLM_DEPLOYMENT_NAME="gpt-4-turbo-preview"
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True
# Text Embedding Settings
GRAPHRAG_EMBEDDING_TYPE="azure_openai_embedding" # or openai_embedding
GRAPHRAG_LLM_DEPLOYMENT_NAME="text-embedding-3-small"
# Data Mapping Settings
GRAPHRAG_INPUT_TYPE="text"
```
### Full Configuration
```bash
# Required LLM Config
# Input Data Configuration
GRAPHRAG_INPUT_TYPE="file"
# Plaintext Input Data Configuration
# GRAPHRAG_INPUT_FILE_PATTERN=.*\.txt
# Text Input Data Configuration
GRAPHRAG_INPUT_FILE_TYPE="text"
GRAPHRAG_INPUT_FILE_PATTERN=".*\.txt$"
GRAPHRAG_INPUT_SOURCE_COLUMN=source
# GRAPHRAG_INPUT_TIMESTAMP_COLUMN=None
# GRAPHRAG_INPUT_TIMESTAMP_FORMAT=None
# GRAPHRAG_INPUT_TEXT_COLUMN="text"
# GRAPHRAG_INPUT_ATTRIBUTE_COLUMNS=id
# GRAPHRAG_INPUT_TITLE_COLUMN="title"
# GRAPHRAG_INPUT_TYPE="file"
# GRAPHRAG_INPUT_CONNECTION_STRING=None
# GRAPHRAG_INPUT_CONTAINER_NAME=None
# GRAPHRAG_INPUT_BASE_DIR=None
# Base LLM Settings
GRAPHRAG_API_KEY="your_api_key"
GRAPHRAG_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users
GRAPHRAG_API_VERSION="api_version" # For Azure OpenAI Users
# GRAPHRAG_API_ORGANIZATION=None
# GRAPHRAG_API_PROXY=None
# Text Generation Settings
# GRAPHRAG_LLM_TYPE=openai_chat
GRAPHRAG_LLM_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set
GRAPHRAG_LLM_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
GRAPHRAG_LLM_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
GRAPHRAG_LLM_MODEL_SUPPORTS_JSON=True # Suggested by default
# GRAPHRAG_LLM_API_ORGANIZATION=None
# GRAPHRAG_LLM_API_PROXY=None
# GRAPHRAG_LLM_DEPLOYMENT_NAME=None
# GRAPHRAG_LLM_MODEL=gpt-4-turbo-preview
# GRAPHRAG_LLM_MAX_TOKENS=4000
# GRAPHRAG_LLM_REQUEST_TIMEOUT=180
# GRAPHRAG_LLM_THREAD_COUNT=50
# GRAPHRAG_LLM_THREAD_STAGGER=0.3
# GRAPHRAG_LLM_CONCURRENT_REQUESTS=25
# GRAPHRAG_LLM_TPM=0
# GRAPHRAG_LLM_RPM=0
# GRAPHRAG_LLM_MAX_RETRIES=10
# GRAPHRAG_LLM_MAX_RETRY_WAIT=10
# GRAPHRAG_LLM_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True
# Text Embedding Settings
# GRAPHRAG_EMBEDDING_TYPE=openai_embedding
GRAPHRAG_EMBEDDING_API_KEY="your_api_key" # If GRAPHRAG_API_KEY is not set
GRAPHRAG_EMBEDDING_API_BASE="http://<domain>.openai.azure.com" # For Azure OpenAI Users and if GRAPHRAG_API_BASE is not set
GRAPHRAG_EMBEDDING_API_VERSION="api_version" # For Azure OpenAI Users and if GRAPHRAG_API_VERSION is not set
# GRAPHRAG_EMBEDDING_API_ORGANIZATION=None
# GRAPHRAG_EMBEDDING_API_PROXY=None
# GRAPHRAG_EMBEDDING_DEPLOYMENT_NAME=None
# GRAPHRAG_EMBEDDING_MODEL=text-embedding-3-small
# GRAPHRAG_EMBEDDING_BATCH_SIZE=16
# GRAPHRAG_EMBEDDING_BATCH_MAX_TOKENS=8191
# GRAPHRAG_EMBEDDING_TARGET=required
# GRAPHRAG_EMBEDDING_SKIP=None
# GRAPHRAG_EMBEDDING_THREAD_COUNT=None
# GRAPHRAG_EMBEDDING_THREAD_STAGGER=50
# GRAPHRAG_EMBEDDING_CONCURRENT_REQUESTS=25
# GRAPHRAG_EMBEDDING_TPM=0
# GRAPHRAG_EMBEDDING_RPM=0
# GRAPHRAG_EMBEDDING_MAX_RETRIES=10
# GRAPHRAG_EMBEDDING_MAX_RETRY_WAIT=10
# GRAPHRAG_EMBEDDING_SLEEP_ON_RATE_LIMIT_RECOMMENDATION=True
# Data Mapping Settings
# GRAPHRAG_INPUT_ENCODING=utf-8
# Data Chunking
# GRAPHRAG_CHUNK_SIZE=300
# GRAPHRAG_CHUNK_OVERLAP=100
# GRAPHRAG_CHUNK_BY_COLUMNS=id
# Prompting Overrides
# GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE=None
# GRAPHRAG_ENTITY_EXTRACTION_MAX_GLEANINGS=0
# GRAPHRAG_ENTITY_EXTRACTION_ENTITY_TYPES=organization,person,event,geo
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE=None
# GRAPHRAG_SUMMARIZE_DESCRIPTIONS_MAX_LENGTH=500
# GRAPHRAG_CLAIM_EXTRACTION_DESCRIPTION="Any claims or facts that could be relevant to threat analysis."
# GRAPHRAG_CLAIM_EXTRACTION_PROMPT_FILE=None
# GRAPHRAG_CLAIM_EXTRACTION_MAX_GLEANINGS=0
# GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE=None
# GRAPHRAG_COMMUNITY_REPORT_MAX_LENGTH=1500
# Storage
# GRAPHRAG_STORAGE_TYPE=file
# GRAPHRAG_STORAGE_CONNECTION_STRING=None
# GRAPHRAG_STORAGE_CONTAINER_NAME=None
# GRAPHRAG_STORAGE_BASE_DIR=None
# Cache
# GRAPHRAG_CACHE_TYPE=file
# GRAPHRAG_CACHE_CONNECTION_STRING=None
# GRAPHRAG_CACHE_CONTAINER_NAME=None
# GRAPHRAG_CACHE_BASE_DIR=None
# Reporting
# GRAPHRAG_REPORTING_TYPE=file
# GRAPHRAG_REPORTING_CONNECTION_STRING=None
# GRAPHRAG_REPORTING_CONTAINER_NAME=None
# GRAPHRAG_REPORTING_BASE_DIR=None
# Node2Vec Parameters
# GRAPHRAG_NODE2VEC_ENABLED=False
# GRAPHRAG_NODE2VEC_NUM_WALKS=10
# GRAPHRAG_NODE2VEC_WALK_LENGTH=40
# GRAPHRAG_NODE2VEC_WINDOW_SIZE=2
# GRAPHRAG_NODE2VEC_ITERATIONS=3
# GRAPHRAG_NODE2VEC_RANDOM_SEED=597832
# Data Snapshotting
# GRAPHRAG_SNAPSHOT_GRAPHML=False
# GRAPHRAG_SNAPSHOT_RAW_ENTITIES=False
# GRAPHRAG_SNAPSHOT_TOP_LEVEL_NODES=False
# Miscellaneous Settings
# GRAPHRAG_ASYNC_MODE=asyncio
# GRAPHRAG_ENCODING_MODEL=cl100k_base
# GRAPHRAG_MAX_CLUSTER_SIZE=10
# GRAPHRAG_ENTITY_RESOLUTION_ENABLED=False
# GRAPHRAG_SKIP_WORKFLOWS=None
# GRAPHRAG_UMAP_ENABLED=False
```

View File

@@ -0,0 +1,90 @@
---
title: Developing GraphRAG
navtitle: Developing
layout: page
tags: [post]
---
# Requirements
| Name | Installation | Purpose |
| ------------------- | ------------------------------------------------------------ | ----------------------------------------------------------------------------------- |
| Python 3.10-3.12 | [Download](https://www.python.org/downloads/) | The library is Python-based. |
| Poetry | [Instructions](https://python-poetry.org/docs/#installation) | Poetry is used for package management and virtualenv management in Python codebases |
# Getting Started
## Install Dependencies
```sh
# Install Python dependencies.
poetry install
```
## Execute the Indexing Engine
```sh
poetry run poe index <...args>
```
## Executing Queries
```sh
poetry run poe query <...args>
```
# Azurite
Some unit and smoke tests use Azurite to emulate Azure resources. This can be started by running:
```sh
./scripts/start-azurite.sh
```
or by simply running `azurite` in the terminal if already installed globally. See the [Azurite documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite) for more information about how to install and use Azurite.
# Lifecycle Scripts
Our Python package utilizes Poetry to manage dependencies and [poethepoet](https://pypi.org/project/poethepoet/) to manage build scripts.
Available scripts are:
- `poetry run poe index` - Run the Indexing CLI
- `poetry run poe query` - Run the Query CLI
- `poetry build` - This invokes `poetry build`, which will build a wheel file and other distributable artifacts.
- `poetry run poe test` - This will execute all tests.
- `poetry run poe test_unit` - This will execute unit tests.
- `poetry run poe test_integration` - This will execute integration tests.
- `poetry run poe test_smoke` - This will execute smoke tests.
- `poetry run poe check` - This will perform a suite of static checks across the package, including:
- formatting
- documentation formatting
- linting
- security patterns
- type-checking
- `poetry run poe fix` - This will apply any available auto-fixes to the package. Usually this is just formatting fixes.
- `poetry run poe fix_unsafe` - This will apply any available auto-fixes to the package, including those that may be unsafe.
- `poetry run poe format` - Explicitly run the formatter across the package.
## Troubleshooting
### "RuntimeError: llvm-config failed executing, please point LLVM_CONFIG to the path for llvm-config" when running poetry install
Make sure llvm-9 and llvm-9-dev are installed:
`sudo apt-get install llvm-9 llvm-9-dev`
and then in your bashrc, add
`export LLVM_CONFIG=/usr/bin/llvm-config-9`
### "numba/\_pymodule.h:6:10: fatal error: Python.h: No such file or directory" when running poetry install
Make sure you have python3.10-dev installed or more generally `python<version>-dev`
`sudo apt-get install python3.10-dev`
### LLM call constantly exceeds TPM, RPM or time limits
`GRAPHRAG_LLM_THREAD_COUNT` and `GRAPHRAG_EMBEDDING_THREAD_COUNT` are both set to 50 by default. You can modify this values
to reduce concurrency. Please refer to the [Configuration Documents](../config/overview)

View File

@@ -0,0 +1,130 @@
---
title: Get Started
navtitle: Get started
layout: page
tags: [post]
---
## Requirements
[Python 3.10-3.12](https://www.python.org/downloads/)
To get started with the GraphRAG system, you have a few options:
👉 [Use the GraphRAG Accelerator solution](https://github.com/Azure-Samples/graphrag-accelerator) <br/>
👉 [Install from pypi](https://pypi.org/project/graphrag/). <br/>
👉 [Use it from source](/posts/developing)<br/>
## Quickstart
To get started with the GraphRAG system we recommend trying the [Solution Accelerator](https://github.com/Azure-Samples/graphrag-accelerator) package. This provides a user-friendly end-to-end experience with Azure resources.
# Top-Level Modules
[Indexing Pipeline Overview](/posts/index/overview)<br/>
[Query Engine Overview](/posts/query/overview)
# Overview
The following is a simple end-to-end example for using the GraphRAG system.
It shows how to use the system to index some text, and then use the indexed data to answer questions about the documents.
# Install GraphRAG
```bash
pip install graphrag
```
# Running the Indexer
Now we need to set up a data project and some initial configuration. Let's set that up. We're using the [default configuration mode](/posts/config/overview/), which you can customize as needed using a [config file](/posts/config/json_yaml/), which we recommend, or [environment variables](/posts/config/env_vars/).
First let's get a sample dataset ready:
```sh
mkdir -p ./ragtest/input
```
Now let's get a copy of A Christmas Carol by Charles Dickens from a trusted source
```sh
curl https://www.gutenberg.org/cache/epub/24022/pg24022.txt > ./ragtest/input/book.txt
```
Next we'll inject some required config variables:
## Set Up Your Workspace Variables
First let's make sure to setup the required environment variables. For details on these environment variables, and what environment variables are available, see the [variables documentation](/posts/config/overview/).
To initialize your workspace, let's first run the `graphrag.index --init` command.
Since we have already configured a directory named \.ragtest` in the previous step, we can run the following command:
```sh
python -m graphrag.index --init --root ./ragtest
```
This will create two files: `.env` and `settings.yaml` in the `./ragtest` directory.
- `.env` contains the environment variables required to run the GraphRAG pipeline. If you inspect the file, you'll see a single environment variable defined,
`GRAPHRAG_API_KEY=<API_KEY>`. This is the API key for the OpenAI API or Azure OpenAI endpoint. You can replace this with your own API key.
- `settings.yaml` contains the settings for the pipeline. You can modify this file to change the settings for the pipeline.
<br/>
#### <ins>OpenAI and Azure OpenAI</ins>
To run in OpenAI mode, just make sure to update the value of `GRAPHRAG_API_KEY` in the `.env` file with your OpenAI API key.
#### <ins>Azure OpenAI</ins>
In addition, Azure OpenAI users should set the following variables in the settings.yaml file. To find the appropriate sections, just search for the `llm:` configuration, you should see two sections, one for the chat endpoint and one for the embeddings endpoint. Here is an example of how to configure the chat endpoint:
```yaml
type: azure_openai_chat # Or azure_openai_embedding for embeddings
api_base: https://<instance>.openai.azure.com
api_version: 2024-02-15-preview # You can customize this for other versions
deployment_name: <azure_model_deployment_name>
```
- For more details about configuring GraphRAG, see the [configuration documentation](/posts/config/overview/).
- To learn more about Initialization, refer to the [Initialization documentation](/posts/config/init/).
- For more details about using the CLI, refer to the [CLI documentation](/posts/query/3-cli/).
## Running the Indexing pipeline
Finally we'll run the pipeline!
```sh
python -m graphrag.index --root ./ragtest
```
![pipeline executing from the CLI](/img/pipeline-running.png)
This process will take some time to run. This depends on the size of your input data, what model you're using, and the text chunk size being used (these can be configured in your `.env` file).
Once the pipeline is complete, you should see a new folder called `./ragtest/output/<timestamp>/artifacts` with a series of parquet files.
# Using the Query Engine
## Running the Query Engine
Now let's ask some questions using this dataset.
Here is an example using Global search to ask a high-level question:
```sh
python -m graphrag.query \
--root ./ragtest \
--method global \
"What are the top themes in this story?"
```
Here is an example using Local search to ask a more specific question about a particular character:
```sh
python -m graphrag.query \
--root ./ragtest \
--method local \
"Who is Scrooge, and what are his main relationships?"
```
Please refer to [Query Engine](/posts/query/overview) docs for detailed information about how to leverage our Local and Global search mechanisms for extracting meaningful insights from data after the Indexer has wrapped up execution.

View File

@@ -0,0 +1,73 @@
---
title: Indexing Architecture
navtitle: Architecture
tags: [post, indexing]
layout: page
date: 2023-01-01
---
## Key Concepts
### Knowledge Model
In order to support the GraphRAG system, the outputs of the indexing engine (in the Default Configuration Mode) are aligned to a knowledge model we call the _GraphRAG Knowledge Model_.
This model is designed to be an abstraction over the underlying data storage technology, and to provide a common interface for the GraphRAG system to interact with.
In normal use-cases the outputs of the GraphRAG Indexer would be loaded into a database system, and the GraphRAG's Query Engine would interact with the database using the knowledge model data-store types.
### DataShaper Workflows
GraphRAG's Indexing Pipeline is built on top of our open-source library, [DataShaper](https://github.com/microsoft/datashaper).
DataShaper is a data processing library that allows users to declaratively express data pipelines, schemas, and related assets using well-defined schemas.
DataShaper has implementations in JavaScript and Python, and is designed to be extensible to other languages.
One of the core resource types within DataShaper is a [Workflow](https://github.com/microsoft/datashaper/blob/main/javascript/schema/src/workflow/WorkflowSchema.ts).
Workflows are expressed as sequences of steps, which we call [verbs](https://github.com/microsoft/datashaper/blob/main/javascript/schema/src/workflow/verbs.ts).
Each step has a verb name and a configuration object.
In DataShaper, these verbs model relational concepts such as SELECT, DROP, JOIN, etc.. Each verb transforms an input data table, and that table is passed down the pipeline.
```mermaid
---
title: Sample Workflow
---
flowchart LR
input[Input Table] --> select[SELECT] --> join[JOIN] --> binarize[BINARIZE] --> output[Output Table]
```
### LLM-based Workflow Steps
GraphRAG's Indexing Pipeline implements a handful of custom verbs on top of the standard, relational verbs that our DataShaper library provides. These verbs give us the ability to augment text documents with rich, structured data using the power of LLMs such as GPT-4. We utilize these verbs in our standard workflow to extract entities, relationships, claims, community structures, and community reports and summaries. This behavior is customizable and can be extended to support many kinds of AI-based data enrichment and extraction tasks.
### Workflow Graphs
Because of the complexity of our data indexing tasks, we needed to be able to express our data pipeline as series of multiple, interdependent workflows.
In the GraphRAG Indexing Pipeline, each workflow may define dependencies on other workflows, effectively forming a directed acyclic graph (DAG) of workflows, which is then used to schedule processing.
```mermaid
---
title: Sample Workflow DAG
---
stateDiagram-v2
[*] --> Prepare
Prepare --> Chunk
Chunk --> ExtractGraph
Chunk --> EmbedDocuments
ExtractGraph --> GenerateReports
ExtractGraph --> EmbedGraph
EntityResolution --> EmbedGraph
EntityResolution --> GenerateReports
ExtractGraph --> EntityResolution
```
### Dataframe Message Format
The primary unit of communication between workflows, and between workflow steps is an instance of `pandas.DataFrame`.
Although side-effects are possible, our goal is to be _data-centric_ and _table-centric_ in our approach to data processing.
This allows us to easily reason about our data, and to leverage the power of dataframe-based ecosystems.
Our underlying dataframe technology may change over time, but our primary goal is to support the DataShaper workflow schema while retaining single-machine ease of use and developer ergonomics.
### LLM Caching
The GraphRAG library was designed with LLM interactions in mind, and a common setback when working with LLM APIs is various errors errors due to network latency, throttling, etc..
Because of these potential error cases, we've added a cache layer around LLM interactions.
When completion requests are made using the same input set (prompt and tuning parameters), we return a cached result if one exists.
This allows our indexer to be more resilient to network issues, to act idempotently, and to provide a more efficient end-user experience.

View File

@@ -0,0 +1,218 @@
---
title: Indexing Dataflow
navtitle: Dataflow
layout: page
tags: [post, indexing]
date: 2023-01-02
---
## The GraphRAG Knowledge Model
The knowledge model is a specification for data outputs that conform to our data-model definition. You can find these definitions in the python/graphrag/graphrag/model folder within the GraphRAG repository. The following entity types are provided. The fields here represent the fields that are text-embedded by default.
- `Document` - An input document into the system. These either represent individual rows in a CSV or individual .txt file.
- `TextUnit` - A chunk of text to analyze. The size of these chunks, their overlap, and whether they adhere to any data boundaries may be configured below. A common use case is to set `CHUNK_BY_COLUMNS` to `id` so that there is a 1-to-many relationship between documents and TextUnits instead of a many-to-many.
- `Entity` - An entity extracted from a TextUnit. These represent people, places, events, or some other entity-model that you provide.
- `Relationship` - A relationship between two entities. These are generated from the covariates.
- `Covariate` - Extracted claim information, which contains statements about entities which may be time-bound.
- `Community Report` - Once entities are generated, we perform hierarchical community detection on them and generate reports for each community in this hierarchy.
- `Node` - This table contains layout information for rendered graph-views of the Entities and Documents which have been embedded and clustered.
## The Default Configuration Workflow
Let's take a look at how the default-configuration workflow transforms text documents into the _GraphRAG Knowledge Model_. This page gives a general overview of the major steps in this process. To fully configure this workflow, check out the [configuration](/posts/config/overview/) documentation.
```mermaid
---
title: Dataflow Overview
---
flowchart TB
subgraph phase1[Phase 1: Compose TextUnits]
documents[Documents] --> chunk[Chunk]
chunk --> embed[Embed] --> textUnits[Text Units]
end
subgraph phase2[Phase 2: Graph Extraction]
textUnits --> graph_extract[Entity & Relationship Extraction]
graph_extract --> graph_summarize[Entity & Relationship Summarization]
graph_summarize --> entity_resolve[Entity Resolution]
entity_resolve --> claim_extraction[Claim Extraction]
claim_extraction --> graph_outputs[Graph Tables]
end
subgraph phase3[Phase 3: Graph Augmentation]
graph_outputs --> community_detect[Community Detection]
community_detect --> graph_embed[Graph Embedding]
graph_embed --> augmented_graph[Augmented Graph Tables]
end
subgraph phase4[Phase 4: Community Summarization]
augmented_graph --> summarized_communities[Community Summarization]
summarized_communities --> embed_communities[Community Embedding]
embed_communities --> community_outputs[Community Tables]
end
subgraph phase5[Phase 5: Document Processing]
documents --> link_to_text_units[Link to TextUnits]
textUnits --> link_to_text_units
link_to_text_units --> embed_documents[Document Embedding]
embed_documents --> document_graph[Document Graph Creation]
document_graph --> document_outputs[Document Tables]
end
subgraph phase6[Phase 6: Network Visualization]
document_outputs --> umap_docs[Umap Documents]
augmented_graph --> umap_entities[Umap Entities]
umap_docs --> combine_nodes[Nodes Table]
umap_entities --> combine_nodes
end
```
## Phase 1: Compose TextUnits
The first phase of the default-configuration workflow is to transform input documents into _TextUnits_. A _TextUnit_ is a chunk of text that is used for our graph extraction techniques. They are also used as source-references by extracted knowledge items in order to empower breadcrumbs and provenance by concepts back to their original source tex.
The chunk size (counted in tokens), is user-configurable. By default this is set to 300 tokens, although we've had positive experience with 1200-token chunks using a single "glean" step. (A "glean" step is a follow-on extraction). Larger chunks result in lower-fidelity output and less meaningful reference texts; however, using larger chunks can result in much faster processing time.
The group-by configuration is also user-configurable. By default, we align our chunks to document boundaries, meaning that there is a strict 1-to-many relationship between Documents and TextUnits. In rare cases, this can be turned into a many-to-many relationship. This is useful when the documents are very short and we need several of them to compose a meaningful analysis unit (e.g. Tweets or a chat log)
Each of these text-units are text-embedded and passed into the next phase of the pipeline.
```mermaid
---
title: Documents into Text Chunks
---
flowchart LR
doc1[Document 1] --> tu1[TextUnit 1]
doc1 --> tu2[TextUnit 2]
doc2[Document 2] --> tu3[TextUnit 3]
doc2 --> tu4[TextUnit 4]
```
## Phase 2: Graph Extraction
In this phase, we analyze each text unit and extract our graph primitives: _Entities_, _Relationships_, and _Claims_.
Entities and Relationships are extracted at once in our _entity_extract_ verb, and claims are extracted in our _claim_extract_ verb. Results are then combined and passed into following phases of the pipeline.
```mermaid
---
title: Graph Extraction
---
flowchart LR
tu[TextUnit] --> ge[Graph Extraction] --> gs[Graph Summarization] --> er[Entity Resolution]
tu --> ce[Claim Extraction]
```
### Entity & Relationship Extraction
In this first step of graph extraction, we process each text-unit in order to extract entities and relationships out of the raw text using the LLM. The output of this step is a subgraph-per-TextUnit containing a list of **entities** with a _name_, _type_, and _description_, and a list of **relationships** with a _source_, _target_, and _description_.
These subgraphs are merged together - any entities with the same _name_ and _type_ are merged by creating an array of their descriptions. Similarly, any relationships with the same _source_ and _target_ are merged by creating an array of their descriptions.
### Entity & Relationship Summarization
Now that we have a graph of entities and relationships, each with a list of descriptions, we can summarize these lists into a single description per entity and relationship. This is done by asking the LLM for a short summary that captures all of the distinct information from each description. This allows all of our entities and relationships to have a single concise description.
### Entity Resolution (Not Enabled by Default)
The final step of graph extraction is to resolve any entities that represent the same real-world entity but but have different names. Since this is done via LLM, and we don't want to lose information, we want to take a conservative, non-destructive approach to this.
Our current implementation of Entity Resolution, however, is destructive. It will provide the LLM with a series of entities and ask it to determine which ones should be merged. Those entities are then merged together into a single entity and their relationships are updated.
We are currently exploring other entity resolution techniques. In the near future, entity resolution will be executed by creating an edge between entity variants indicating that the entities have been resolved by the indexing engine. This will allow for end-users to undo indexing-side resolutions, and add their own non-destructive resolutions using a similar process.
### Claim Extraction & Emission
Finally, as an independent workflow, we extract claims from the source TextUnits. These claims represent positive factual statements with an evaluated status and time-bounds. These are emitted as a primary artifact called **Covariates**.
## Phase 3: Graph Augmentation
Now that we have a usable graph of entities and relationships, we want to understand their community structure and augment the graph with additional information. This is done in two steps: _Community Detection_ and _Graph Embedding_. These give us explicit (communities) and implicit (embeddings) ways of understanding the topological structure of our graph.
```mermaid
---
title: Graph Augmentation
---
flowchart LR
cd[Leiden Hierarchical Community Detection] --> ge[Node2Vec Graph Embedding] --> ag[Graph Table Emission]
```
### Community Detection
In this step, we generate a hierarchy of entity communities using the Hierarchical Leiden Algorithm. This method will apply a recursive community-clustering to our graph until we reach a community-size threshold. This will allow us to understand the community structure of our graph and provide a way to navigate and summarize the graph at different levels of granularity.
### Graph Embedding
In this step, we generate a vector representation of our graph using the Node2Vec algorithm. This will allow us to understand the implicit structure of our graph and provide an additional vector-space in which to search for related concepts during our query phase.
### Graph Tables Emission
Once our graph augmentation steps are complete, the final **Entities** and **Relationships** tables are emitted after their text fields are text-embedded.
## Phase 4: Community Summarization
```mermaid
---
title: Community Summarization
---
flowchart LR
sc[Generate Community Reports] --> ss[Summarize Community Reports] --> ce[Community Embedding] --> co[Community Tables Emission]
```
At this point, we have a functional graph of entities and relationships, a hierarchy of communities for the entities, as well as node2vec embeddings.
Now we want to build on the communities data and generate reports for each community. This gives us a high-level understanding of the graph at several points of graph granularity. For example, if community A is the top-level community, we'll get a report about the entire graph. If the community is lower-level, we'll get a report about a local cluster.
### Generate Community Reports
In this step, we generate a summary of each community using the LLM. This will allow us to understand the distinct information contained within each community and provide a scoped understanding of the graph, from either a high-level or a low-level perspective. These reports contain an executive overview and reference the key entities, relationships, and claims within the community sub-structure.
### Summarize Community Reports
In this step, each _community report_ is then summarized via the LLM for shorthand use.
### Community Embedding
In this step, we generate a vector representation of our communities by generating text embeddings of the community report, the community report summary, and the title of the community report.
### Community Tables Emission
At this point, some bookkeeping work is performed and we emit the **Communities** and **CommunityReports** tables.
## Phase 5: Document Processing
In this phase of the workflow, we create the _Documents_ table for the knowledge model.
```mermaid
---
title: Document Processing
---
flowchart LR
aug[Augment] --> dp[Link to TextUnits] --> de[Avg. Embedding] --> dg[Document Table Emission]
```
### Augment with Columns (CSV Only)
If the workflow is operating on CSV data, you may configure your workflow to add additional fields to Documents output. These fields should exist on the incoming CSV tables. Details about configuring this can be found in the [configuration documentation](/posts/config/overview/).
### Link to TextUnits
In this step, we link each document to the text-units that were created in the first phase. This allows us to understand which documents are related to which text-units and vice-versa.
### Document Embedding
In this step, we generate a vector representation of our documents using an average embedding of document slices. We re-chunk documents without overlapping chunks, and then generate an embedding for each chunk. We create an average of these chunks weighted by token-count and use this as the document embedding. This will allow us to understand the implicit relationship between documents, and will help us generate a network representation of our documents.
### Documents Table Emission
At this point, we can emit the **Documents** table into the knowledge Model.
## Phase 6: Network Visualization
In this phase of the workflow, we perform some steps to support network visualization of our high-dimensional vector spaces within our existing graphs. At this point there are two logical graphs at play: the _Entity-Relationship_ graph and the _Document_ graph.
```mermaid
---
title: Network Visualization Workflows
---
flowchart LR
nv[Umap Documents] --> ne[Umap Entities] --> ng[Nodes Table Emission]
```
For each of the logical graphs, we perform a UMAP dimensionality reduction to generate a 2D representation of the graph. This will allow us to visualize the graph in a 2D space and understand the relationships between the nodes in the graph. The UMAP embeddings are then emitted as a table of _Nodes_. The rows of this table include a discriminator indicating whether the node is a document or an entity, and the UMAP coordinates.

View File

@@ -0,0 +1,26 @@
---
title: Indexer CLI
navtitle: CLI
layout: page
tags: [post, indexing]
date: 2023-01-03
---
The GraphRAG indexer CLI allows for no-code usage of the GraphRAG Indexer.
```bash
python -m graphrag.index --verbose --root </workspace/project/root> --config <custom_config.yml>
--resume <timestamp> --reporter <rich|print|none> --emit json,csv,parquet
--nocache
```
## CLI Arguments
- `--verbose` - Adds extra logging information during the run.
- `--root <data-project-dir>` - the data root directory. This should contain an `input` directory with the input data, and an `.env` file with environment variables. These are described below.
- `--init` - This will initialize the data project directory at the specified `root` with bootstrap configuration and prompt-overrides.
- `--resume <output-timestamp>` - if specified, the pipeline will attempt to resume a prior run. The parquet files from the prior run will be loaded into the system as inputs, and the workflows that generated those files will be skipped. The input value should be the timestamped output folder, e.g. "20240105-143721".
- `--config <config_file.yml>` - This will opt-out of the Default Configuration mode and execute a custom configuration. If this is used, then none of the environment-variables below will apply.
- `--reporter <reporter>` - This will specify the progress reporter to use. The default is `rich`. Valid values are `rich`, `print`, and `none`.
- `--emit <types>` - This specifies the table output formats the pipeline should emit. The default is `parquet`. Valid values are `parquet`, `csv`, and `json`, comma-separated.
- `--nocache` - This will disable the caching mechanism. This is useful for debugging and development, but should not be used in production.

View File

@@ -0,0 +1,82 @@
---
title: GraphRAG Indexing 🤖
navtitle: Indexing Overview
layout: page
tags: [post]
---
The GraphRAG indexing package is a data pipeline and transformation suite that is designed to extract meaningful, structured data from unstructured text using LLMs.
Indexing Pipelines are configurable. They are composed of workflows, standard and custom steps, prompt templates, and input/output adapters. Our standard pipeline is designed to:
- extract entities, relationships and claims from raw text
- perform community detection in entities
- generate community summaries and reports at multiple levels of granularity
- embed entities into a graph vector space
- embed text chunks into a textual vector space
The outputs of the pipeline can be stored in a variety of formats, including JSON and Parquet - or they can be handled manually via the Python API.
## Getting Started
### Requirements
See the [requirements](/posts/developing#requirements) section in [Get Started](/posts/get_started) for details on setting up a development environment.
The Indexing Engine can be used in either a default configuration mode or with a custom pipeline.
To configure GraphRAG, see the [configuration](/posts/config/overview) documentation.
After you have a config file you can run the pipeline using the CLI or the Python API.
## Usage
### CLI
```bash
# Via Poetry
poetry run poe cli --root <data_root> # default config mode
poetry run poe cli --config your_pipeline.yml # custom config mode
# Via Node
yarn run:index --root <data_root> # default config mode
yarn run:index --config your_pipeline.yml # custom config mode
```
### Python API
```python
from graphrag.index import run_pipeline
from graphrag.index.config import PipelineWorkflowReference
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
steps=[
{
# built-in verb
"verb": "derive", # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
"args": {
"column1": "col1", # from above
"column2": "col2", # from above
"to": "col_multiplied", # new column name
"operator": "*", # multiply the two columns
},
# Since we're trying to act on the default input, we don't need explicitly to specify an input
}
]
),
]
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
outputs = []
async for output in await run_pipeline(dataset=dataset, workflows=workflows):
outputs.append(output
pipeline_result = outputs[-1]
print(pipeline_result)
```
## Further Reading
- To start developing within the _GraphRAG_ project, see [getting started](/posts/developing/)
- To understand the underlying concepts and execution model of the indexing library, see [the architecture documentation](/posts/index/0-architecture/)
- To get running with a series of examples, see [the examples documentation](https://github.com/microsoft/graphrag/blob/main/examples/README.md)
- To read more about configuring the indexing engine, see [the configuration documentation](/posts/config/overview)

View File

@@ -0,0 +1,72 @@
---
title: Prompt Tuning ⚙️
navtitle: Auto templating
layout: page
tags: [post, tuning]
date: 2024-06-13
---
GraphRAG provides the ability to create domain adaptive templates for the generation of the knowledge graph. This step is optional, though is is highly encouraged to run it as it will yield better results when executing an Index Run.
The templates are generated by loading the inputs, splitting them into chunks (text units) and then running a series of LLM invocations and template substitutions to generate the final prompts. We suggest using the default values provided by the script, but in this page you'll find the detail of each in case you want to further explore and tweak the template generation algorithm.
## Prerequisites
Before running the automatic template generation make sure you have already initialized your workspace with the `graphrag.index --init` command. This will create the necessary configuration files and the default prompts. Refer to the [Init Documentation](/posts/config/init) for more information about the initialization process.
## Usage
You can run the main script from the command line with various options:
```bash
python -m graphrag.prompt_tune [--root ROOT] [--domain DOMAIN] [--method METHOD] [--limit LIMIT] [--max-tokens MAX_TOKENS] [--chunk-size CHUNK_SIZE] [--no-entity-types] [--output OUTPUT]
```
## Command-Line Options
- `--root` (optional): The data project root directory, including the config files (YML, JSON, or .env). Defaults to the current directory.
- `--domain` (optional): The domain related to your input data, such as 'space science', 'microbiology', or 'environmental news'. If left empty, the domain will be inferred from the input data.
- `--method` (optional): The method to select documents. Options are all, random, or top. Default is random.
- `--limit` (optional): The limit of text units to load when using random or top selection. Default is 15.
- `--max-tokens` (optional): Maximum token count for prompt generation. Default is 2000.
- `--chunk-size` (optional): The size in tokens to use for generating text units from input documents. Default is 200.
- `--no-entity-types` (optional): Use untyped entity extraction generation. We recommend using this when your data covers a lot of topics or it is highly randomized.
- `--output` (optional): The folder to save the generated prompts. Default is "prompts".
## Example Usage
```bash
python -m graphrag.prompt_tune --root /path/to/project --domain "environmental news" --method random --limit 10 --max-tokens 2048 --chunk-size 256 --no-entity-types --output /path/to/output
```
or, with minimal configuration (suggested):
```bash
python -m graphrag.prompt_tune --root /path/to/project --no-entity-types
```
## Document Selection Methods
The auto template feature ingests the input data and then divides it into text units the size of the chunk size parameter.
After that, it uses one of the following selection methods to pick a sample to work with for template generation:
- `random`: Select text units randomly. This is the default and recommended option.
- `top`: Select the head n text units.
- `all`: Use all text units for the generation. Use only with small datasets; this option is not usually recommended.
## Modify Env Vars
After running auto-templating, you should modify the following environment variables (or config variables) to pick up the new prompts on your index run. Note: Please make sure to update the correct path to the generated prompts, in this example we are using the default "prompts" path.
- `GRAPHRAG_ENTITY_EXTRACTION_PROMPT_FILE` = "prompts/entity_extraction.txt"
- `GRAPHRAG_COMMUNITY_REPORT_PROMPT_FILE` = "prompts/community_report.txt"
- `GRAPHRAG_SUMMARIZE_DESCRIPTIONS_PROMPT_FILE` = "prompts/summarize_descriptions.txt"

View File

@@ -0,0 +1,60 @@
---
title: Prompt Tuning⚙
navtitle: Manual Tuning
layout: page
tags: [post, tuning]
date: 2023-01-03
---
The GraphRAG indexer, by default, will run with a handful of prompts that are designed to work well in the broad context of knowledge discovery.
However, it is quite common to want to tune the prompts to better suit your specific use case.
We provide a means for you to do this by allowing you to specify a custom prompt file, which will each use a series of token-replacements internally.
Each of these prompts may be overridden by writing a custom prompt file in plaintext. We use token-replacements in the form of `{token_name}`, and the descriptions for the available tokens can be found below.
## Entity/Relationship Extraction
[Prompt Source](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/graph/prompts.py)
### Tokens (values provided by extractor)
- **{input_text}** - The input text to be processed.
- **{entity_types}** - A list of entity types
- **{tuple_delimiter}** - A delimiter for separating values within a tuple. A single tuple is used to represent an individual entity or relationship.
- **{record_delimiter}** - A delimiter for separating tuple instances.
- **{completion_delimiter}** - An indicator for when generation is complete.
## Summarize Entity/Relationship Descriptions
[Prompt Source](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/summarize/prompts.py)
### Tokens (values provided by extractor)
- **{entity_name}** - The name of the entity or the source/target pair of the relationship.
- **{description_list}** - A list of descriptions for the entity or relationship.
## Claim Extraction
[Prompt Source](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/claims/prompts.py)
### Tokens (values provided by extractor)
- **{input_text}** - The input text to be processed.
- **{tuple_delimiter}** - A delimiter for separating values within a tuple. A single tuple is used to represent an individual entity or relationship.
- **{record_delimiter}** - A delimiter for separating tuple instances.
- **{completion_delimiter}** - An indicator for when generation is complete.
Note: there is additional paramater for the `Claim Description` that is used in claim extraction.
The default value is
`"Any claims or facts that could be relevant to information discovery."`
See the [configuration documentation](/posts/config/overview/) for details on how to change this.
## Generate Community Reports
[Prompt Source](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/community_reports/prompts.py)
### Tokens (values provided by extractor)
- **{input_text}** - The input text to generate the report with. This will contain tables of entities and relationships.

View File

@@ -0,0 +1,26 @@
---
title: Prompt Tuning ⚙️
navtitle: Overview
layout: page
tags: [post, tuning]
date: 2024-06-13
---
This page provides an overview of the prompt tuning options available for the GraphRAG indexing engine.
## Default Prompts
The default prompts are the simplest way to get started with the GraphRAG system. It is designed to work out-of-the-box with minimal configuration. You can find more detail about these prompts in the following links:
- [Entity/Relationship Extraction](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/graph/prompts.py)
- [Entity/Relationship Description Summarization](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/summarize/prompts.py)
- [Claim Extraction](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/claims/prompts.py)
- [Community Reports](http://github.com/microsoft/graphrag/blob/main/graphrag/index/graph/extractors/community_reports/prompts.py)
## Auto Templating
Auto Templating leverages your input data and LLM interactions to create domain adaptive templates for the generation of the knowledge graph. It is highly encouraged to run it as it will yield better results when executing an Index Run. For more details about how to use it, please refer to the [Auto Templating](/posts/prompt_tuning/auto_prompt_tuning) documentation.
## Manual Configuration
Manual configuration is an advanced use-case. Most users will want to use the Auto Templating feature instead. Details about how to use manual configuration are available in the [Manual Prompt Configuration](/posts/prompt_tunins/manual_prompt_tuning) documentation.

View File

@@ -0,0 +1,78 @@
---
title: Global Search 🔎
navtitle: Global Search
tags: [post, orchestration]
layout: page
date: 2024-06-03
---
## Whole Dataset Reasoning
Baseline RAG struggles with queries that require aggregation of information across the dataset to compose an answer. Queries such as “What are the top 5 themes in the data?” perform terribly because baseline RAG relies on a vector search of semantically similar text content within the dataset. There is nothing in the query to direct it to the correct information.
However, with GraphRAG we can answer such questions, because the structure of the LLM-generated knowledge graph tells us about the structure (and thus themes) of the dataset as a whole. This allows the private dataset to be organized into meaningful semantic clusters that are pre-summarized. Using our [global search](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/) method, the LLM uses these clusters to summarize these themes when responding to a user query.
## Methodology
```mermaid
---
title: Global Search Dataflow
---
%%{ init: { 'flowchart': { 'curve': 'step' } } }%%
flowchart LR
uq[User Query] --- .1
ch1[Conversation History] --- .1
subgraph RIR
direction TB
ri1[Rated Intermediate<br/>Response 1]~~~ri2[Rated Intermediate<br/>Response 2] -."{1..N}".-rin[Rated Intermediate<br/>Response N]
end
.1--Shuffled Community<br/>Report Batch 1-->RIR
.1--Shuffled Community<br/>Report Batch 2-->RIR---.2
.1--Shuffled Community<br/>Report Batch N-->RIR
.2--Ranking +<br/>Filtering-->agr[Aggregated Intermediate<br/>Responses]-->res[Response]
classDef green fill:#26B653,stroke:#333,stroke-width:2px,color:#fff;
classDef turquoise fill:#19CCD3,stroke:#333,stroke-width:2px,color:#fff;
classDef rose fill:#DD8694,stroke:#333,stroke-width:2px,color:#fff;
classDef orange fill:#F19914,stroke:#333,stroke-width:2px,color:#fff;
classDef purple fill:#B356CD,stroke:#333,stroke-width:2px,color:#fff;
classDef invisible fill:#fff,stroke:#fff,stroke-width:0px,color:#fff, width:0px;
class uq,ch1 turquoise;
class ri1,ri2,rin rose;
class agr orange;
class res purple;
class .1,.2 invisible;
```
Given a user query and, optionally, the conversation history, the global search method uses a collection of LLM-generated community reports from a specified level of the graph's community hierarchy as context data to generate response in a map-reduce manner. At the `map` step, community reports are segmented into text chunks of pre-defined size. Each text chunk is then used to produce an intermediate response containing a list of point, each of which is accompanied by a numerical rating indicating the importance of the point. At the `reduce` step, a filtered set of the most important points from the intermediate responses are aggregated and used as the context to generate the final response.
The quality of the global searchs response can be heavily influenced by the level of the community hierarchy chosen for sourcing community reports. Lower hierarchy levels, with their detailed reports, tend to yield more thorough responses, but may also increase the time and LLM resources needed to generate the final response due to the volume of reports.
## Configuration
Below are the key parameters of the [GlobalSearch class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/search.py):
* `llm`: OpenAI model object to be used for response generation
* `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/community_context.py) object to be used for preparing context data from community reports
* `map_system_prompt`: prompt template used in the `map` stage. Default template can be found at [map_system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/map_system_prompt.py)
* `reduce_system_prompt`: prompt template used in the `reduce` stage, default template can be found at [reduce_system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/reduce_system_prompt.py)
* `response_type`: free-form text describing the desired response type and format (e.g., `Multiple Paragraphs`, `Multi-Page Report`)
* `allow_general_knowledge`: setting this to True will include additional instructions to the `reduce_system_prompt` to prompt the LLM to incorporate relevant real-world knowledge outside of the dataset. Note that this may increase hallucinations, but can be useful for certain scenarios. Default is False
*`general_knowledge_inclusion_prompt`: instruction to add to the `reduce_system_prompt` if `allow_general_knowledge` is enabled. Default instruction can be found at [general_knowledge_instruction](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/reduce_system_prompt.py)
* `max_data_tokens`: token budget for the context data
* `map_llm_params`: a dictionary of additional parameters (e.g., temperature, max_tokens) to be passed to the LLM call at the `map` stage
* `reduce_llm_params`: a dictionary of additional parameters (e.g., temperature, max_tokens) to passed to the LLM call at the `reduce` stage
* `context_builder_params`: a dictionary of additional parameters to be passed to the [`context_builder`](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/global_search/community_context.py) object when building context window for the `map` stage.
* `concurrent_coroutines`: controls the degree of parallelism in the `map` stage.
* `callbacks`: optional callback functions, can be used to provide custom event handlers for LLM's completion streaming events
## How to Use
An example of a global search scenario can be found in the following [notebook](../notebooks/global_search_nb).

View File

@@ -0,0 +1,67 @@
---
title: Local Search 🔎
navtitle: Local Search
tags: [post, orchestration]
layout: page
date: 2024-03-28
---
## Entity-based Reasoning
The [local search](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/) method combines structured data from the knowledge graph with unstructured data from the input documents to augment the LLM context with relevant entity information at query time. It is well-suited for answering questions that require an understanding of specific entities mentioned in the input documents (e.g., “What are the healing properties of chamomile?”).
## Methodology
```mermaid
---
title: Local Search Dataflow
---
%%{ init: { 'flowchart': { 'curve': 'step' } } }%%
flowchart LR
uq[User Query] ---.1
ch1[Conversation<br/>History]---.1
.1--Entity<br/>Description<br/>Embedding--> ee[Extracted Entities]
ee[Extracted Entities] ---.2--Entity-Text<br/>Unit Mapping--> ctu[Candidate<br/>Text Units]--Ranking + <br/>Filtering -->ptu[Prioritized<br/>Text Units]---.3
.2--Entity-Report<br/>Mapping--> ccr[Candidate<br/>Community Reports]--Ranking + <br/>Filtering -->pcr[Prioritized<br/>Community Reports]---.3
.2--Entity-Entity<br/>Relationships--> ce[Candidate<br/>Entities]--Ranking + <br/>Filtering -->pe[Prioritized<br/>Entities]---.3
.2--Entity-Entity<br/>Relationships--> cr[Candidate<br/>Relationships]--Ranking + <br/>Filtering -->pr[Prioritized<br/>Relationships]---.3
.2--Entity-Covariate<br/>Mappings--> cc[Candidate<br/>Covariates]--Ranking + <br/>Filtering -->pc[Prioritized<br/>Covariates]---.3
ch1 -->ch2[Conversation History]---.3
.3-->res[Response]
classDef green fill:#26B653,stroke:#333,stroke-width:2px,color:#fff;
classDef turquoise fill:#19CCD3,stroke:#333,stroke-width:2px,color:#fff;
classDef rose fill:#DD8694,stroke:#333,stroke-width:2px,color:#fff;
classDef orange fill:#F19914,stroke:#333,stroke-width:2px,color:#fff;
classDef purple fill:#B356CD,stroke:#333,stroke-width:2px,color:#fff;
classDef invisible fill:#fff,stroke:#fff,stroke-width:0px,color:#fff, width:0px;
class uq,ch1 turquoise
class ee green
class ctu,ccr,ce,cr,cc rose
class ptu,pcr,pe,pr,pc,ch2 orange
class res purple
class .1,.2,.3 invisible
```
Given a user query and, optionally, the conversation history, the local search method identifies a set of entities from the knowledge graph that are semantically-related to the user input. These entities serve as access points into the knowledge graph, enabling the extraction of further relevant details such as connected entities, relationships, entity covariates, and community reports. Additionally, it also extracts relevant text chunks from the raw input documents that are associated with the identified entities. These candidate data sources are then prioritized and filtered to fit within a single context window of pre-defined size, which is used to generate a response to the user query.
## Configuration
Below are the key parameters of the [LocalSearch class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/search.py):
* `llm`: OpenAI model object to be used for response generation
* `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object to be used for preparing context data from collections of knowledge model objects
* `system_prompt`: prompt template used to generate the search response. Default template can be found at [system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/system_prompt.py)
* `response_type`: free-form text describing the desired response type and format (e.g., `Multiple Paragraphs`, `Multi-Page Report`)
* `llm_params`: a dictionary of additional parameters (e.g., temperature, max_tokens) to be passed to the LLM call
* `context_builder_params`: a dictionary of additional parameters to be passed to the [`context_builder`](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object when building context for the search prompt
* `callbacks`: optional callback functions, can be used to provide custom event handlers for LLM's completion streaming events
## How to Use
An example of a local search scenario can be found in the following [notebook](../notebooks/local_search_nb).

View File

@@ -0,0 +1,28 @@
---
title: Question Generation ❔
navtitle: Question Generation
tags: [post, orchestration]
layout: page
date: 2024-03-28
---
## Entity-based Question Generation
The [question generation](https://github.com/microsoft/graphrag/blob/main//graphrag/query/question_gen/) method combines structured data from the knowledge graph with unstructured data from the input documents to generate candidate questions related to specific entities.
## Methodology
Given a list of prior user questions, the question generation method uses the same context-building approach employed in [local search](1-local_search.md) to extract and prioritize relevant structured and unstructured data, including entities, relationships, covariates, community reports and raw text chunks. These data records are then fitted into a single LLM prompt to generate candidate follow-up questions that represent the most important or urgent information content or themes in the data.
## Configuration
Below are the key parameters of the [Question Generation class](https://github.com/microsoft/graphrag/blob/main//graphrag/query/question_gen/local_gen.py):
* `llm`: OpenAI model object to be used for response generation
* `context_builder`: [context builder](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object to be used for preparing context data from collections of knowledge model objects, using the same context builder class as in local search
* `system_prompt`: prompt template used to generate candidate questions. Default template can be found at [system_prompt](https://github.com/microsoft/graphrag/blob/main//graphrag/query/question_gen/system_prompt.py)
* `llm_params`: a dictionary of additional parameters (e.g., temperature, max_tokens) to be passed to the LLM call
* `context_builder_params`: a dictionary of additional parameters to be passed to the [`context_builder`](https://github.com/microsoft/graphrag/blob/main//graphrag/query/structured_search/local_search/mixed_context.py) object when building context for the question generation prompt
* `callbacks`: optional callback functions, can be used to provide custom event handlers for LLM's completion streaming events
## How to Use
An example of the question generation function can be found in the following [notebook](../notebooks/local_search_nb).

View File

@@ -0,0 +1,48 @@
---
title: Query CLI
navtitle: CLI
layout: page
tags: [post, orchestration]
date: 2024-27-03
---
The GraphRAG query CLI allows for no-code usage of the GraphRAG Query engine.
```bash
python -m graphrag.query --data <path-to-data> --community_level <comunit-level> --response_type <response-type> --method <"local"|"global"> <query>
```
## CLI Arguments
- `--data <path-to-data>` - Folder containing the `.parquet` output files from running the Indexer.
- `--community_level <community-level>` - Community level in the Leiden community hierarchy from which we will load the community reports higher value means we use reports on smaller communities. Default: 2
- `--response_type <response-type>` - Free form text describing the response type and format, can be anything, e.g. `Multiple Paragraphs`, `Single Paragraph`, `Single Sentence`, `List of 3-7 Points`, `Single Page`, `Multi-Page Report`. Default: `Multiple Paragraphs`.
- `--method <"local"|"global">` - Method to use to answer the query, one of local or global. For more information check [Overview](overview.md)
## Env Variables
Required environment variables to execute:
- `GRAPHRAG_API_KEY` - API Key for executing the model, will fallback to `OPENAI_API_KEY` if one is not provided.
- `GRAPHRAG_LLM_MODEL` - Model to use for Chat Completions.
- `GRAPHRAG_EMBEDDING_MODEL` - Model to use for Embeddings.
You can further customize the execution by providing these environment variables:
- `GRAPHRAG_LLM_API_BASE` - The API Base URL. Default: `None`
- `GRAPHRAG_LLM_TYPE` - The LLM operation type. Either `openai_chat` or `azure_openai_chat`. Default: `openai_chat`
- `GRAPHRAG_LLM_MAX_RETRIES` - The maximum number of retries to attempt when a request fails. Default: `20`
- `GRAPHRAG_EMBEDDING_API_BASE` - The API Base URL. Default: `None`
- `GRAPHRAG_EMBEDDING_TYPE` - The embedding client to use. Either `openai_embedding` or `azure_openai_embedding`. Default: `openai_embedding`
- `GRAPHRAG_EMBEDDING_MAX_RETRIES` - The maximum number of retries to attempt when a request fails. Default: `20`
- `GRAPHRAG_LOCAL_SEARCH_TEXT_UNIT_PROP` - Proportion of context window dedicated to related text units. Default: `0.5`
- `GRAPHRAG_LOCAL_SEARCH_COMMUNITY_PROP` - Proportion of context window dedicated to community reports. Default: `0.1`
- `GRAPHRAG_LOCAL_SEARCH_CONVERSATION_HISTORY_MAX_TURNS` - Maximum number of turns to include in the conversation history. Default: `5`
- `GRAPHRAG_LOCAL_SEARCH_TOP_K_ENTITIES` - Number of related entities to retrieve from the entity description embedding store. Default: `10`
- `GRAPHRAG_LOCAL_SEARCH_TOP_K_RELATIONSHIPS` - Control the number of out-of-network relationships to pull into the context window. Default: `10`
- `GRAPHRAG_LOCAL_SEARCH_MAX_TOKENS` - Change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000). Default: `12000`
- `GRAPHRAG_LOCAL_SEARCH_LLM_MAX_TOKENS` - Change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500). Default: `2000`
- `GRAPHRAG_GLOBAL_SEARCH_MAX_TOKENS` - Change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000). Default: `12000`
- `GRAPHRAG_GLOBAL_SEARCH_DATA_MAX_TOKENS` - Change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000). Default: `12000`
- `GRAPHRAG_GLOBAL_SEARCH_MAP_MAX_TOKENS` - Default: `500`
- `GRAPHRAG_GLOBAL_SEARCH_REDUCE_MAX_TOKENS` - Change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000-1500). Default: `2000`
- `GRAPHRAG_GLOBAL_SEARCH_CONCURRENCY` - Default: `32`

View File

@@ -0,0 +1,13 @@
---
title: Query Engine Notebooks
navtitle: Query Engine Notebooks
layout: page
tags: [post, notebook]
---
For examples about running Query please refer to the following notebooks:
- [Global Search Notebook](/posts/query/notebooks/global_search_nb)
- [Local Search Notebook](/posts/query/notebooks/local_search_nb)
The test dataset for these notebooks can be found [here](/data/operation_dulce/dataset.zip).

View File

@@ -0,0 +1,31 @@
---
title: Query Engine 🔎
navtitle: Overview
tags: [post]
layout: page
---
The Query Engine is the retrieval module of the Graph RAG Library. It is one of the two main components of the Graph RAG library, the other being the Indexing Pipeline (see [Indexing Pipeline](/posts/index/overview)).
It is responsible for the following tasks:
- [Local Search](#local-search)
- [Global Search](#global-search)
- [Question Generation](#question-generation)
## Local Search
Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).
For more details about how Local Search works please refer to the [Local Search](/posts/query/1-local_search) documentation.
## Global Search
Global search method generates answers by searching over all AI-generated community reports in a map-reduce fashion. This is a resource-intensive method, but often gives good responses for questions that require an understanding of the dataset as a whole (e.g. What are the most significant values of the herbs mentioned in this notebook?).
More about this can be checked at the [Global Search](/posts/query/0-global_search) documentation.
## Question Generation
This functionality takes a list of user queries and generates the next candidate questions. This is useful for generating follow-up questions in a conversation or for generating a list of questions for the investigator to dive deeper into the dataset.
Information about how question generation works can be found at the [Question Generation](/posts/query/2-question_generation) documentation page.

2793
docsite/yarn.lock Normal file

File diff suppressed because it is too large Load Diff

19
examples/README.md Normal file
View File

@@ -0,0 +1,19 @@
# Indexing Engine Examples
This directory contains several examples of how to use the indexing engine.
Most examples include two different forms of running the pipeline, both are contained in the examples `run.py`
1. Using mostly the Python API
2. Using mostly the a pipeline configuration file
# Running an Example
First run `poetry shell` to activate a virtual environment with the required dependencies.
Then run `PYTHONPATH="$(pwd)" python examples/path_to_example/run.py` from the `python/graphrag` directory.
For example to run the single_verb example, you would run the following commands:
```bash
cd python/graphrag
poetry shell
PYTHONPATH="$(pwd)" python examples/single_verb/run.py
```

2
examples/__init__.py Normal file
View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,24 @@
# Setup reporting however you'd like
reporting:
type: console
# Setup storage however you'd like
storage:
type: memory
# Setup cache however you'd like
cache:
type: memory
# Just a simple workflow
workflows:
# This is an anonymous workflow, it doesn't have a name
- steps:
# Unpack the nodes from the graph
- verb: fill
args:
to: filled_column
value: "Filled Value"

View File

@@ -0,0 +1,46 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from graphrag.index import run_pipeline_with_config
pipeline_file = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
async def run():
# Load your dataset
dataset = _load_dataset_some_unique_way()
# Load your config without the input section
config = pipeline_file
# Grab the last result from the pipeline, should be our entity extraction
outputs = []
async for output in run_pipeline_with_config(
config_or_path=config, dataset=dataset
):
outputs.append(output)
pipeline_result = outputs[-1]
if pipeline_result.result is not None:
# Should look something like
# col1 col2 filled_column
# 0 2 4 Filled Value
# 1 5 10 Filled Value
print(pipeline_result.result)
else:
print("No results!")
def _load_dataset_some_unique_way() -> pd.DataFrame:
# Totally loaded from some other place
return pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
if __name__ == "__main__":
asyncio.run(run())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,22 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
from datashaper import TableContainer, VerbInput
def str_append(
input: VerbInput, source_column: str, target_column: str, string_to_append: str
):
"""A custom verb that appends a string to a column"""
# by convention, we typically use "column" as the input column name and "to" as the output column name, but you can use whatever you want
# just as long as the "args" in the workflow reference match the function signature
input_data = input.get_input()
output_df = input_data.copy()
output_df[target_column] = output_df[source_column].apply(
lambda x: f"{x}{string_to_append}"
)
return TableContainer(table=output_df)
custom_verbs = {
"str_append": str_append,
}

View File

@@ -0,0 +1,7 @@
workflows:
- steps:
- verb: "str_append" # should be the key that you pass to the custom_verbs dict below
args:
source_column: "col1"
target_column: "col_1_custom"
string_to_append: " - custom verb"

View File

@@ -0,0 +1,84 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from examples.custom_set_of_available_verbs.custom_verb_definitions import custom_verbs
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineWorkflowReference
# Our fake dataset
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
async def run_with_config():
"""Run a pipeline with a config file"""
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
outputs = []
async for output in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
outputs.append(output)
pipeline_result = outputs[-1]
if pipeline_result.result is not None:
# Should look something like this, which should be identical to the python example:
# col1 col2 col_1_custom
# 0 2 4 2 - custom verb
# 1 5 10 5 - custom verb
print(pipeline_result.result)
else:
print("No results!")
async def run_python():
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
name="my_workflow",
steps=[
{
"verb": "str_append", # should be the key that you pass to the custom_verbs dict below
"args": {
"source_column": "col1", # from above
"target_column": "col_1_custom", # new column name,
"string_to_append": " - custom verb", # The string to append to the column
},
# Since we're trying to act on the default input, we don't need explicitly to specify an input
}
],
),
]
# Run the pipeline
outputs = []
async for output in run_pipeline(
dataset=dataset,
workflows=workflows,
additional_verbs=custom_verbs,
):
outputs.append(output)
# Find the result from the workflow we care about
pipeline_result = next(
(output for output in outputs if output.workflow == "my_workflow"), None
)
if pipeline_result is not None and pipeline_result.result is not None:
# Should look something like this:
# col1 col2 col_1_custom
# 0 2 4 2 - custom verb
# 1 5 10 5 - custom verb
print(pipeline_result.result)
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,36 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
from graphrag.index.workflows import WorkflowDefinitions
# Sets up the list of custom workflows that can be used in a pipeline
# The idea being that you can have a pool of workflows that can be used in any number of
# your pipelines
custom_workflows: WorkflowDefinitions = {
"my_workflow": lambda config: [
{
"verb": "derive",
"args": {
"column1": "col1", # looks for col1 in the dataset
"column2": "col2", # looks for col2 in the dataset
"to": config.get(
# Allow the user to specify the output column name,
# otherwise default to "output_column"
"derive_output_column",
"output_column",
), # new column name,
"operator": "*",
},
}
],
"my_unused_workflow": lambda _config: [
{
"verb": "derive",
"args": {
"column1": "col1", # looks for col1 in the dataset
"column2": "col2", # looks for col2 in the dataset
"to": "unused_output_column",
"operator": "*",
},
}
],
}

View File

@@ -0,0 +1,4 @@
workflows:
- name: my_workflow
config:
derive_output_column: "col_1_multiplied"

View File

@@ -0,0 +1,85 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from examples.custom_set_of_available_workflows.custom_workflow_definitions import (
custom_workflows,
)
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineWorkflowReference
sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../_sample_data/"
)
# our fake dataset
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
async def run_with_config():
"""Run a pipeline with a config file"""
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path,
dataset=dataset,
additional_workflows=custom_workflows,
):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this:
# col1 col2 col_1_multiplied
# 0 2 4 8
# 1 5 10 50
print(pipeline_result.result)
else:
print("No results!")
async def run_python():
"""Run a pipeline using the python API"""
# Define the actual workflows to be run, this is identical to the python api
# but we're defining the workflows to be run via python instead of via a config file
workflows: list[PipelineWorkflowReference] = [
# run my_workflow against the dataset, notice we're only using the "my_workflow" workflow
# and not the "my_unused_workflow" workflow
PipelineWorkflowReference(
name="my_workflow", # should match the name of the workflow in the custom_workflows dict above
config={ # pass in a config
# set the derive_output_column to be "col_1_multiplied", this will be passed to the workflow definition above
"derive_output_column": "col_1_multiplied"
},
),
]
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(
workflows, dataset=dataset, additional_workflows=custom_workflows
):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this:
# col1 col2 col_1_multiplied
# 0 2 4 8
# 1 5 10 50
print(pipeline_result.result)
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,16 @@
workflows:
- name: "entity_extraction"
config:
entity_extract:
strategy:
type: "graph_intelligence"
llm:
type: "openai_chat"
# create a .env file in the same directory as this pipeline.yml file
# end add the following lines to it:
# EXAMPLE_OPENAI_API_KEY="YOUR_API_KEY"
api_key: !ENV ${EXAMPLE_OPENAI_API_KEY:None} # None is the default
model: !ENV ${EXAMPLE_OPENAI_MODEL:gpt-3.5-turbo} # gpt-3.5-turbo is the default
max_tokens: !ENV ${EXAMPLE_OPENAI_MAX_TOKENS:2500} # 2500 is the default
temperature: !ENV ${EXAMPLE_OPENAI_TEMPERATURE:0} # 0 is the default

View File

@@ -0,0 +1,111 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineCSVInputConfig, PipelineWorkflowReference
from graphrag.index.input import load_input
sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../../_sample_data/"
)
shared_dataset = asyncio.run(
load_input(
PipelineCSVInputConfig(
file_pattern=".*\\.csv$",
base_dir=sample_data_dir,
source_column="author",
text_column="message",
timestamp_column="date(yyyyMMddHHmmss)",
timestamp_format="%Y%m%d%H%M%S",
title_column="message",
),
)
)
async def run_with_config():
"""Run a pipeline with a config file"""
# We're cheap, and this is an example, lets just do 10
dataset = shared_dataset.head(10)
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
tables.append(table)
pipeline_result = tables[-1]
# Print the entities. This will be a row for each text unit, each with a list of entities,
# This should look pretty close to the python version, but since we're using an LLM
# it will be a little different depending on how it feels about the text
if pipeline_result.result is not None:
print(pipeline_result.result["entities"].to_list())
else:
print("No results!")
async def run_python():
if (
"EXAMPLE_OPENAI_API_KEY" not in os.environ
and "OPENAI_API_KEY" not in os.environ
):
msg = "Please set EXAMPLE_OPENAI_API_KEY or OPENAI_API_KEY environment variable to run this example"
raise Exception(msg)
# We're cheap, and this is an example, lets just do 10
dataset = shared_dataset.head(10)
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
name="entity_extraction",
config={
"entity_extract": {
"strategy": {
"type": "graph_intelligence",
"llm": {
"type": "openai_chat",
"api_key": os.environ.get(
"EXAMPLE_OPENAI_API_KEY",
os.environ.get("OPENAI_API_KEY", None),
),
"model": os.environ.get(
"EXAMPLE_OPENAI_MODEL", "gpt-3.5-turbo"
),
"max_tokens": os.environ.get(
"EXAMPLE_OPENAI_MAX_TOKENS", 2500
),
"temperature": os.environ.get(
"EXAMPLE_OPENAI_TEMPERATURE", 0
),
},
}
}
},
)
]
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
tables.append(table)
pipeline_result = tables[-1]
# Print the entities. This will be a row for each text unit, each with a list of entities
if pipeline_result.result is not None:
print(pipeline_result.result["entities"].to_list())
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,6 @@
workflows:
- name: "entity_extraction"
config:
entity_extract:
strategy:
type: "nltk"

View File

@@ -0,0 +1,78 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineCSVInputConfig, PipelineWorkflowReference
from graphrag.index.input import load_input
sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../../_sample_data/"
)
shared_dataset = asyncio.run(
load_input(
PipelineCSVInputConfig(
file_pattern=".*\\.csv$",
base_dir=sample_data_dir,
source_column="author",
text_column="message",
timestamp_column="date(yyyyMMddHHmmss)",
timestamp_format="%Y%m%d%H%M%S",
title_column="message",
),
)
)
async def run_with_config():
"""Run a pipeline with a config file"""
# We're cheap, and this is an example, lets just do 10
dataset = shared_dataset.head(10)
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
tables.append(table)
pipeline_result = tables[-1]
# Print the entities. This will be a row for each text unit, each with a list of entities
if pipeline_result.result is not None:
print(pipeline_result.result["entities"].to_list())
else:
print("No results!")
async def run_python():
dataset = shared_dataset.head(10)
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
name="entity_extraction",
config={"entity_extract": {"strategy": {"type": "nltk"}}},
)
]
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
tables.append(table)
pipeline_result = tables[-1]
# Print the entities. This will be a row for each text unit, each with a list of entities
if pipeline_result.result is not None:
print(pipeline_result.result["entities"].to_list())
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,23 @@
workflows:
- name: aggregate_workflow
steps:
- verb: "aggregate" # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/aggregate.py
args:
groupby: "type"
column: "col_multiplied"
to: "aggregated_output"
operation: "sum"
input:
source: "workflow:derive_workflow" # reference the derive_workflow, cause this one requires that one to run first
# Notice, these are out of order, the indexing engine will figure out the right order to run them in
- name: derive_workflow
steps:
- verb: "derive" # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
args:
column1: "col1" # from above
column2: "col2" # from above
to: "col_multiplied" # new column name
operator: "*" # multiply the two columns,
# Since we're trying to act on the dataset, we don't need explicitly to specify an input
# "input": { "source": "source" } # use the dataset as the input to this verb. This is the default, so you can omit it.

View File

@@ -0,0 +1,102 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineWorkflowReference
# Our fake dataset
dataset = pd.DataFrame([
{"type": "A", "col1": 2, "col2": 4},
{"type": "A", "col1": 5, "col2": 10},
{"type": "A", "col1": 15, "col2": 26},
{"type": "B", "col1": 6, "col2": 15},
])
async def run_with_config():
"""Run a pipeline with a config file"""
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this, which should be identical to the python example:
# type aggregated_output
# 0 A 448
# 1 B 90
print(pipeline_result.result)
else:
print("No results!")
async def run_python():
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
name="aggregate_workflow",
steps=[
{
"verb": "aggregate", # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/aggregate.py
"args": {
"groupby": "type",
"column": "col_multiplied",
"to": "aggregated_output",
"operation": "sum",
},
"input": {
"source": "workflow:derive_workflow", # reference the derive_workflow, cause this one requires that one to run first
# Notice, these are out of order, the indexing engine will figure out the right order to run them in
},
}
],
),
PipelineWorkflowReference(
name="derive_workflow",
steps=[
{
# built-in verb
"verb": "derive", # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
"args": {
"column1": "col1", # from above
"column2": "col2", # from above
"to": "col_multiplied", # new column name
"operator": "*", # multiply the two columns,
},
# Since we're trying to act on the default input, we don't need explicitly to specify an input
}
],
),
]
# Grab the last result from the pipeline, should be our aggregate_workflow since it should be the last one to run
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this:
# type aggregated_output
# 0 A 448
# 1 B 90
# This is because we first in "derive_workflow" we multiply col1 and col2 together, then in "aggregate_workflow" we sum them up by type
print(pipeline_result.result)
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,4 @@
workflows:
- !include workflows/workflow_1.yml
- !include workflows/workflow_2.yml
- !include workflows/workflow_3.yml

View File

@@ -0,0 +1,43 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
from graphrag.index import run_pipeline_with_config
from graphrag.index.config import PipelineCSVInputConfig
from graphrag.index.input import load_input
sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./../_sample_data/"
)
async def run_with_config():
dataset = await load_input(
PipelineCSVInputConfig(
file_pattern=".*\\.csv$",
base_dir=sample_data_dir,
source_column="author",
text_column="message",
timestamp_column="date(yyyyMMddHHmmss)",
timestamp_format="%Y%m%d%H%M%S",
title_column="message",
),
)
# We're cheap, and this is an example, lets just do 10
dataset = dataset.head(2)
# run the pipeline with the config, and override the dataset with the one we just created
# and grab the last result from the pipeline, should be the last workflow that was run (our nodes)
pipeline_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
async for result in run_pipeline_with_config(pipeline_path, dataset=dataset):
print(f"Workflow {result.workflow} result\n: ")
print(result.result)
if __name__ == "__main__":
asyncio.run(run_with_config())

View File

@@ -0,0 +1 @@
value_from_shared_file

View File

@@ -0,0 +1,6 @@
name: workflow_1
steps:
- verb: fill
args:
to: "col_workflow_1"
value: 1

View File

@@ -0,0 +1,17 @@
name: workflow_2
steps:
- verb: fill
args:
to: "col_workflow_2"
value: 2
input:
# workflow_2 is dependent on workflow_1
# so in workflow_2 output, you'll also see the output from workflow_1
source: "workflow:workflow_1"
# Example of pulling in values from a shared file
- verb: fill
args:
to: "col_from_shared_file"
value: !include ./shared/shared_fill_value.txt

View File

@@ -0,0 +1,6 @@
name: workflow_3
steps:
- verb: fill
args:
to: "col_workflow_3"
value: 3

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,3 @@
col1,col2
2,4
5,10
1 col1 col2
2 2 4
3 5 10

View File

@@ -0,0 +1,12 @@
input:
file_type: csv
base_dir: ./input
file_pattern: .*\.csv$
workflows:
- steps:
- verb: derive # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
args:
column1: "col1"
column2: "col2"
to: "col_multiplied"
operator: "*"

View File

@@ -0,0 +1,77 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
import pandas as pd
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineWorkflowReference
# our fake dataset
dataset = pd.DataFrame([{"col1": 2, "col2": 4}, {"col1": 5, "col2": 10}])
async def run_with_config():
"""Run a pipeline with a config file"""
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this, which should be identical to the python example:
# col1 col2 col_multiplied
# 0 2 4 8
# 1 5 10 50
print(pipeline_result.result)
else:
print("No results!")
async def run_python():
"""Run a pipeline using the python API"""
workflows: list[PipelineWorkflowReference] = [
PipelineWorkflowReference(
steps=[
{
# built-in verb
"verb": "derive", # https://github.com/microsoft/datashaper/blob/main/python/datashaper/datashaper/engine/verbs/derive.py
"args": {
"column1": "col1", # from above
"column2": "col2", # from above
"to": "col_multiplied", # new column name
"operator": "*", # multiply the two columns
},
# Since we're trying to act on the default input, we don't need explicitly to specify an input
}
]
),
]
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# Should look something like this:
# col1 col2 col_multiplied
# 0 2 4 8
# 1 5 10 50
print(pipeline_result.result)
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_with_config())
asyncio.run(run_python())

View File

@@ -0,0 +1,2 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

View File

@@ -0,0 +1,23 @@
workflows:
- name: "entity_extraction"
config:
entity_extract:
strategy:
type: "nltk"
- name: "entity_graph"
config:
cluster_graph:
strategy:
type: "leiden"
embed_graph:
strategy:
type: "node2vec"
num_walks: 10
walk_length: 40
window_size: 2
iterations: 3
random_seed: 597832
layout_graph:
strategy:
type: "umap"

View File

@@ -0,0 +1,117 @@
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import asyncio
import os
from graphrag.index import run_pipeline, run_pipeline_with_config
from graphrag.index.config import PipelineCSVInputConfig, PipelineWorkflowReference
from graphrag.index.input import load_input
sample_data_dir = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../_sample_data/"
)
# Load our dataset once
shared_dataset = asyncio.run(
load_input(
PipelineCSVInputConfig(
file_pattern=".*\\.csv$",
base_dir=sample_data_dir,
source_column="author",
text_column="message",
timestamp_column="date(yyyyMMddHHmmss)",
timestamp_format="%Y%m%d%H%M%S",
title_column="message",
),
)
)
async def run_with_config():
"""Run a pipeline with a config file"""
# We're cheap, and this is an example, lets just do 10
dataset = shared_dataset.head(10)
# load pipeline.yml in this directory
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "./pipeline.yml"
)
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline_with_config(
config_or_path=config_path, dataset=dataset
):
tables.append(table)
pipeline_result = tables[-1]
if pipeline_result.result is not None:
# The output of this should match the run_python() example
first_result = pipeline_result.result.head(1)
print(f"level: {first_result['level'][0]}")
print(f"embeddings: {first_result['embeddings'][0]}")
print(f"entity_graph_positions: {first_result['node_positions'][0]}")
else:
print("No results!")
async def run_python():
# We're cheap, and this is an example, lets just do 10
dataset = shared_dataset.head(10)
workflows: list[PipelineWorkflowReference] = [
# This workflow reference here is only necessary
# because we want to customize the entity_extraction workflow is configured
# otherwise, it can be omitted, but you're stuck with the default configuration for entity_extraction
PipelineWorkflowReference(
name="entity_extraction",
config={
"entity_extract": {
"strategy": {
"type": "nltk",
}
}
},
),
PipelineWorkflowReference(
name="entity_graph",
config={
"cluster_graph": {"strategy": {"type": "leiden"}},
"embed_graph": {
"strategy": {
"type": "node2vec",
"num_walks": 10,
"walk_length": 40,
"window_size": 2,
"iterations": 3,
"random_seed": 597832,
}
},
"layout_graph": {
"strategy": {
"type": "umap",
},
},
},
),
]
# Grab the last result from the pipeline, should be our entity extraction
tables = []
async for table in run_pipeline(dataset=dataset, workflows=workflows):
tables.append(table)
pipeline_result = tables[-1]
# The output will contain entity graphs per hierarchical level, with embeddings per entity
if pipeline_result.result is not None:
first_result = pipeline_result.result.head(1)
print(f"level: {first_result['level'][0]}")
print(f"embeddings: {first_result['embeddings'][0]}")
print(f"entity_graph_positions: {first_result['node_positions'][0]}")
else:
print("No results!")
if __name__ == "__main__":
asyncio.run(run_python())
asyncio.run(run_with_config())

Some files were not shown because too many files have changed in this diff Show More