wip: [01-stabilize] paused at task 1/1 - OCR Hallucination Immune logic via Semantic delta window and fret-isolation
This commit is contained in:
22
.agent/services/claude-mem/ragtime/CLAUDE.md
Normal file
22
.agent/services/claude-mem/ragtime/CLAUDE.md
Normal file
@@ -0,0 +1,22 @@
|
||||
<claude-mem-context>
|
||||
# Recent Activity
|
||||
|
||||
### Dec 19, 2025
|
||||
|
||||
| ID | Time | T | Title | Read |
|
||||
|----|------|---|-------|------|
|
||||
| #30153 | 8:24 PM | 🔵 | Context Builder Creates Formatted Email Investigation Context | ~384 |
|
||||
| #30152 | " | 🔵 | Ragtime Current Implementation: Manual Context Injection Via buildContextForEmail | ~357 |
|
||||
|
||||
### Dec 20, 2025
|
||||
|
||||
| ID | Time | T | Title | Read |
|
||||
|----|------|---|-------|------|
|
||||
| #30437 | 4:23 PM | 🔵 | Ragtime processes emails through Claude Agent SDK with claude-mem plugin | ~397 |
|
||||
| #30436 | 4:22 PM | 🔵 | Ragtime displays worker URL on localhost:37777 | ~219 |
|
||||
| #30340 | 3:42 PM | 🔄 | Relocated simple ragtime.ts to ragtime folder | ~219 |
|
||||
| #30339 | 3:41 PM | ✅ | Deleted overengineered ragtime.ts script | ~201 |
|
||||
| #30336 | 3:40 PM | 🔵 | Ragtime Email Corpus Processor Architecture | ~495 |
|
||||
| #30335 | " | 🔵 | Ragtime Uses Separate Noncommercial License | ~259 |
|
||||
| #30252 | 3:17 PM | 🟣 | Multi-Format Email Corpus Loader | ~436 |
|
||||
</claude-mem-context>
|
||||
137
.agent/services/claude-mem/ragtime/LICENSE
Normal file
137
.agent/services/claude-mem/ragtime/LICENSE
Normal file
@@ -0,0 +1,137 @@
|
||||
# PolyForm Noncommercial License 1.0.0
|
||||
|
||||
<https://polyformproject.org/licenses/noncommercial/1.0.0>
|
||||
|
||||
## Acceptance
|
||||
|
||||
In order to get any license under these terms, you must agree
|
||||
to them as both strict obligations and conditions to all
|
||||
your licenses.
|
||||
|
||||
## Copyright License
|
||||
|
||||
The licensor grants you a copyright license for the
|
||||
software to do everything you might do with the software
|
||||
that would otherwise infringe the licensor's copyright
|
||||
in it for any permitted purpose. However, you may
|
||||
only distribute the software according to [Distribution
|
||||
License](#distribution-license) and make changes or new works
|
||||
based on the software according to [Changes and New Works
|
||||
License](#changes-and-new-works-license).
|
||||
|
||||
## Distribution License
|
||||
|
||||
The licensor grants you an additional copyright license
|
||||
to distribute copies of the software. Your license
|
||||
to distribute covers distributing the software with
|
||||
changes and new works permitted by [Changes and New Works
|
||||
License](#changes-and-new-works-license).
|
||||
|
||||
## Notices
|
||||
|
||||
You must ensure that anyone who gets a copy of any part of
|
||||
the software from you also gets a copy of these terms or the
|
||||
URL for them above, as well as copies of any plain-text lines
|
||||
beginning with `Required Notice:` that the licensor provided
|
||||
with the software. For example:
|
||||
|
||||
> Required Notice: Copyright Alex Newman (https://github.com/thedotmack)
|
||||
|
||||
## Changes and New Works License
|
||||
|
||||
The licensor grants you an additional copyright license to
|
||||
make changes and new works based on the software for any
|
||||
permitted purpose.
|
||||
|
||||
## Patent License
|
||||
|
||||
The licensor grants you a patent license for the software that
|
||||
covers patent claims the licensor can license, or becomes able
|
||||
to license, that you would infringe by using the software.
|
||||
|
||||
## Noncommercial Purposes
|
||||
|
||||
Any noncommercial purpose is a permitted purpose.
|
||||
|
||||
## Personal Uses
|
||||
|
||||
Personal use for research, experiment, and testing for
|
||||
the benefit of public knowledge, personal study, private
|
||||
entertainment, hobby projects, amateur pursuits, or religious
|
||||
observance, without any anticipated commercial application,
|
||||
is use for a permitted purpose.
|
||||
|
||||
## Noncommercial Organizations
|
||||
|
||||
Use by any charitable organization, educational institution,
|
||||
public research organization, public safety or health
|
||||
organization, environmental protection organization,
|
||||
or government institution is use for a permitted purpose
|
||||
regardless of the source of funding or obligations resulting
|
||||
from the funding.
|
||||
|
||||
## Fair Use
|
||||
|
||||
You may have "fair use" rights for the software under the
|
||||
law. These terms do not limit them.
|
||||
|
||||
## No Other Rights
|
||||
|
||||
These terms do not allow you to sublicense or transfer any of
|
||||
your licenses to anyone else, or prevent the licensor from
|
||||
granting licenses to anyone else. These terms do not imply
|
||||
any other licenses.
|
||||
|
||||
## Patent Defense
|
||||
|
||||
If you make any written claim that the software infringes or
|
||||
contributes to infringement of any patent, your patent license
|
||||
for the software granted under these terms ends immediately. If
|
||||
your company makes such a claim, your patent license ends
|
||||
immediately for work on behalf of your company.
|
||||
|
||||
## Violations
|
||||
|
||||
The first time you are notified in writing that you have
|
||||
violated any of these terms, or done anything with the software
|
||||
not covered by your licenses, your licenses can nonetheless
|
||||
continue if you come into full compliance with these terms,
|
||||
and take practical steps to correct past violations, within
|
||||
32 days of receiving notice. Otherwise, all your licenses
|
||||
end immediately.
|
||||
|
||||
## No Liability
|
||||
|
||||
***As far as the law allows, the software comes as is, without
|
||||
any warranty or condition, and the licensor will not be liable
|
||||
to you for any damages arising out of these terms or the use
|
||||
or nature of the software, under any kind of legal claim.***
|
||||
|
||||
## Definitions
|
||||
|
||||
The **licensor** is the individual or entity offering these
|
||||
terms, and the **software** is the software the licensor makes
|
||||
available under these terms.
|
||||
|
||||
**You** refers to the individual or entity agreeing to these
|
||||
terms.
|
||||
|
||||
**Your company** is any legal entity, sole proprietorship,
|
||||
or other kind of organization that you work for, plus all
|
||||
organizations that have control over, are under the control of,
|
||||
or are under common control with that organization. **Control**
|
||||
means ownership of substantially all the assets of an entity,
|
||||
or the power to direct its management and policies by vote,
|
||||
contract, or otherwise. Control can be direct or indirect.
|
||||
|
||||
**Your licenses** are all the licenses granted to you for the
|
||||
software under these terms.
|
||||
|
||||
**Use** means anything you do with the software requiring one
|
||||
of your licenses.
|
||||
|
||||
---
|
||||
|
||||
Required Notice: Copyright 2025 Alex Newman (https://github.com/thedotmack)
|
||||
|
||||
For commercial licensing inquiries, contact: thedotmack@gmail.com
|
||||
83
.agent/services/claude-mem/ragtime/README.md
Normal file
83
.agent/services/claude-mem/ragtime/README.md
Normal file
@@ -0,0 +1,83 @@
|
||||
# Ragtime
|
||||
|
||||
Email Investigation Batch Processor using Claude-mem's email-investigation mode.
|
||||
|
||||
## Overview
|
||||
|
||||
Ragtime processes email corpus files through Claude, using the email-investigation mode for entity/relationship/timeline extraction. Each file gets a NEW session - context is managed by Claude-mem's context injection hook, not by conversation continuation.
|
||||
|
||||
## Features
|
||||
|
||||
- **Email-investigation mode** - Specialized observation types for entities, relationships, timeline events, anomalies
|
||||
- **Self-iterating loop** - Each file processed in a new session
|
||||
- **Transcript cleanup** - Automatic cleanup prevents buildup of old transcripts
|
||||
- **Configurable** - All paths and settings via environment variables
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
# Basic usage (expects corpus in datasets/epstein-mode/)
|
||||
bun ragtime/ragtime.ts
|
||||
|
||||
# With custom corpus path
|
||||
RAGTIME_CORPUS_PATH=/path/to/emails bun ragtime/ragtime.ts
|
||||
|
||||
# Limit files for testing
|
||||
RAGTIME_FILE_LIMIT=5 bun ragtime/ragtime.ts
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
| Environment Variable | Default | Description |
|
||||
|---------------------|---------|-------------|
|
||||
| `RAGTIME_CORPUS_PATH` | `./datasets/epstein-mode` | Path to folder containing .md email files |
|
||||
| `RAGTIME_PLUGIN_PATH` | `./plugin` | Path to claude-mem plugin |
|
||||
| `CLAUDE_MEM_WORKER_PORT` | `37777` | Worker service port |
|
||||
| `RAGTIME_TRANSCRIPT_MAX_AGE` | `24` | Max age of transcripts to keep (hours) |
|
||||
| `RAGTIME_PROJECT_NAME` | `ragtime-investigation` | Project name for grouping |
|
||||
| `RAGTIME_FILE_LIMIT` | `0` | Limit files to process (0 = all) |
|
||||
| `RAGTIME_SESSION_DELAY` | `2000` | Delay between sessions (ms) |
|
||||
|
||||
## Corpus Format
|
||||
|
||||
The corpus directory should contain markdown files with email content. Files are processed in numeric order based on the first number in the filename:
|
||||
|
||||
```
|
||||
datasets/epstein-mode/
|
||||
0001.md
|
||||
0002.md
|
||||
0003.md
|
||||
...
|
||||
```
|
||||
|
||||
Each markdown file should contain a single email or document to analyze.
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Startup**: Sets `CLAUDE_MEM_MODE=email-investigation` and cleans up old transcripts
|
||||
2. **Processing**: For each file:
|
||||
- Starts a NEW Claude session (no continuation)
|
||||
- Claude reads the file and analyzes entities, relationships, timeline events
|
||||
- Claude-mem's context injection hook provides relevant past observations
|
||||
- Worker processes and stores new observations
|
||||
3. **Cleanup**: Periodic and final transcript cleanup prevents buildup
|
||||
|
||||
## License
|
||||
|
||||
This directory is licensed under the **PolyForm Noncommercial License 1.0.0**.
|
||||
|
||||
See [LICENSE](./LICENSE) for full terms.
|
||||
|
||||
### What this means:
|
||||
|
||||
- You can use ragtime for noncommercial purposes
|
||||
- You can modify and distribute it
|
||||
- You cannot use it for commercial purposes without permission
|
||||
|
||||
### Why a different license?
|
||||
|
||||
The main claude-mem repository is licensed under AGPL 3.0, but ragtime uses the more restrictive PolyForm Noncommercial license to ensure it remains freely available for personal and educational use while preventing commercial exploitation.
|
||||
|
||||
---
|
||||
|
||||
For questions about commercial licensing, please contact the project maintainer.
|
||||
288
.agent/services/claude-mem/ragtime/ragtime.ts
Normal file
288
.agent/services/claude-mem/ragtime/ragtime.ts
Normal file
@@ -0,0 +1,288 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* RAGTIME - Email Investigation Batch Processor
|
||||
*
|
||||
* Processes email corpus files through Claude using email-investigation mode.
|
||||
* Each file gets a NEW session - context is managed by Claude-mem's context
|
||||
* injection hook, not by conversation continuation.
|
||||
*
|
||||
* Features:
|
||||
* - Email-investigation mode for entity/relationship/timeline extraction
|
||||
* - Self-iterating loop (each file = new session)
|
||||
* - Transcript cleanup to prevent buildup
|
||||
* - Configurable paths via environment or defaults
|
||||
*/
|
||||
|
||||
import { query } from "@anthropic-ai/claude-agent-sdk";
|
||||
import * as fs from "fs";
|
||||
import * as path from "path";
|
||||
import { homedir } from "os";
|
||||
|
||||
// Configuration - can be overridden via environment variables
|
||||
const CONFIG = {
|
||||
// Path to corpus folder containing .md files
|
||||
corpusPath: process.env.RAGTIME_CORPUS_PATH ||
|
||||
path.join(process.cwd(), "datasets", "epstein-mode"),
|
||||
|
||||
// Path to claude-mem plugin
|
||||
pluginPath: process.env.RAGTIME_PLUGIN_PATH ||
|
||||
path.join(process.cwd(), "plugin"),
|
||||
|
||||
// Worker port
|
||||
workerPort: parseInt(process.env.CLAUDE_MEM_WORKER_PORT || "37777", 10),
|
||||
|
||||
// Max age of transcripts to keep (in hours)
|
||||
transcriptMaxAgeHours: parseInt(process.env.RAGTIME_TRANSCRIPT_MAX_AGE || "24", 10),
|
||||
|
||||
// Project name for grouping transcripts
|
||||
projectName: process.env.RAGTIME_PROJECT_NAME || "ragtime-investigation",
|
||||
|
||||
// Limit files to process (0 = all)
|
||||
fileLimit: parseInt(process.env.RAGTIME_FILE_LIMIT || "0", 10),
|
||||
|
||||
// Delay between sessions (ms) - gives worker time to process
|
||||
sessionDelayMs: parseInt(process.env.RAGTIME_SESSION_DELAY || "2000", 10),
|
||||
};
|
||||
|
||||
// Set email-investigation mode for Claude-mem
|
||||
process.env.CLAUDE_MEM_MODE = "email-investigation";
|
||||
|
||||
/**
|
||||
* Get list of markdown files to process, sorted numerically
|
||||
*/
|
||||
function getFilesToProcess(): string[] {
|
||||
if (!fs.existsSync(CONFIG.corpusPath)) {
|
||||
console.error(`Corpus path does not exist: ${CONFIG.corpusPath}`);
|
||||
console.error("Set RAGTIME_CORPUS_PATH environment variable or create the directory");
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const files = fs
|
||||
.readdirSync(CONFIG.corpusPath)
|
||||
.filter((f) => f.endsWith(".md"))
|
||||
.sort((a, b) => {
|
||||
// Extract numeric part from filename (e.g., "0001.md" -> 1)
|
||||
const numA = parseInt(a.match(/\d+/)?.[0] || "0", 10);
|
||||
const numB = parseInt(b.match(/\d+/)?.[0] || "0", 10);
|
||||
return numA - numB;
|
||||
})
|
||||
.map((f) => path.join(CONFIG.corpusPath, f));
|
||||
|
||||
if (files.length === 0) {
|
||||
console.error(`No .md files found in: ${CONFIG.corpusPath}`);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Apply limit if set
|
||||
if (CONFIG.fileLimit > 0) {
|
||||
return files.slice(0, CONFIG.fileLimit);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up old transcripts to prevent buildup
|
||||
* Removes transcripts older than configured max age
|
||||
*/
|
||||
async function cleanupOldTranscripts(): Promise<void> {
|
||||
const transcriptsBase = path.join(homedir(), ".claude", "projects");
|
||||
|
||||
if (!fs.existsSync(transcriptsBase)) {
|
||||
console.log("No transcripts directory found, skipping cleanup");
|
||||
return;
|
||||
}
|
||||
|
||||
const maxAgeMs = CONFIG.transcriptMaxAgeHours * 60 * 60 * 1000;
|
||||
const now = Date.now();
|
||||
let cleaned = 0;
|
||||
|
||||
try {
|
||||
// Walk through project directories
|
||||
const projectDirs = fs.readdirSync(transcriptsBase);
|
||||
|
||||
for (const projectDir of projectDirs) {
|
||||
const projectPath = path.join(transcriptsBase, projectDir);
|
||||
const stat = fs.statSync(projectPath);
|
||||
|
||||
if (!stat.isDirectory()) continue;
|
||||
|
||||
// Check for .jsonl transcript files
|
||||
const files = fs.readdirSync(projectPath);
|
||||
|
||||
for (const file of files) {
|
||||
if (!file.endsWith(".jsonl")) continue;
|
||||
|
||||
const filePath = path.join(projectPath, file);
|
||||
const fileStat = fs.statSync(filePath);
|
||||
const fileAge = now - fileStat.mtimeMs;
|
||||
|
||||
if (fileAge > maxAgeMs) {
|
||||
try {
|
||||
fs.unlinkSync(filePath);
|
||||
cleaned++;
|
||||
} catch (err) {
|
||||
console.warn(`Failed to delete old transcript: ${filePath}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove empty project directories
|
||||
const remaining = fs.readdirSync(projectPath);
|
||||
if (remaining.length === 0) {
|
||||
try {
|
||||
fs.rmdirSync(projectPath);
|
||||
} catch {
|
||||
// Ignore - may have race condition
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cleaned > 0) {
|
||||
console.log(`Cleaned up ${cleaned} old transcript(s)`);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn("Transcript cleanup error:", err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll the worker's processing status endpoint until the queue is empty
|
||||
*/
|
||||
async function waitForQueueToEmpty(): Promise<void> {
|
||||
const maxWaitTimeMs = 5 * 60 * 1000; // 5 minutes maximum
|
||||
const pollIntervalMs = 500;
|
||||
const startTime = Date.now();
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
const response = await fetch(
|
||||
`http://localhost:${CONFIG.workerPort}/api/processing-status`
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
console.error(`Failed to get processing status: ${response.status}`);
|
||||
break;
|
||||
}
|
||||
|
||||
const status = await response.json();
|
||||
|
||||
// Exit when queue is empty
|
||||
if (status.queueDepth === 0 && !status.isProcessing) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Check timeout
|
||||
if (Date.now() - startTime > maxWaitTimeMs) {
|
||||
console.warn("Queue did not empty within timeout, continuing anyway");
|
||||
break;
|
||||
}
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
|
||||
} catch (error) {
|
||||
console.error("Error polling worker status:", error);
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single file in a NEW session
|
||||
* Context is injected by Claude-mem hooks, not conversation continuation
|
||||
*/
|
||||
async function processFile(file: string, index: number, total: number): Promise<void> {
|
||||
const filename = path.basename(file);
|
||||
console.log(`\n[${ index + 1}/${total}] Processing: ${filename}`);
|
||||
|
||||
try {
|
||||
for await (const message of query({
|
||||
prompt: `Read ${file} and analyze it in the context of the investigation. Look for entities, relationships, timeline events, and any anomalies. Cross-reference with what you know from the injected context above.`,
|
||||
options: {
|
||||
cwd: CONFIG.corpusPath,
|
||||
plugins: [{ type: "local", path: CONFIG.pluginPath }],
|
||||
},
|
||||
})) {
|
||||
// Log assistant responses
|
||||
if (message.type === "assistant") {
|
||||
const content = message.message.content;
|
||||
if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (block.type === "text" && block.text) {
|
||||
// Truncate long responses for console
|
||||
const text = block.text.length > 500
|
||||
? block.text.substring(0, 500) + "..."
|
||||
: block.text;
|
||||
console.log("Assistant:", text);
|
||||
}
|
||||
}
|
||||
} else if (typeof content === "string") {
|
||||
console.log("Assistant:", content);
|
||||
}
|
||||
}
|
||||
|
||||
// Log completion
|
||||
if (message.type === "result" && message.subtype === "success") {
|
||||
console.log(`Completed: ${filename}`);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
console.error(`Error processing ${filename}:`, err);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main execution loop
|
||||
*/
|
||||
async function main(): Promise<void> {
|
||||
console.log("=".repeat(60));
|
||||
console.log("RAGTIME Email Investigation Processor");
|
||||
console.log("=".repeat(60));
|
||||
console.log(`Mode: email-investigation`);
|
||||
console.log(`Corpus: ${CONFIG.corpusPath}`);
|
||||
console.log(`Plugin: ${CONFIG.pluginPath}`);
|
||||
console.log(`Worker: http://localhost:${CONFIG.workerPort}`);
|
||||
console.log(`Transcript cleanup: ${CONFIG.transcriptMaxAgeHours}h`);
|
||||
console.log("=".repeat(60));
|
||||
|
||||
// Initial cleanup
|
||||
await cleanupOldTranscripts();
|
||||
|
||||
// Get files to process
|
||||
const files = getFilesToProcess();
|
||||
console.log(`\nFound ${files.length} file(s) to process\n`);
|
||||
|
||||
// Process each file in a NEW session
|
||||
for (let i = 0; i < files.length; i++) {
|
||||
const file = files[i];
|
||||
|
||||
await processFile(file, i, files.length);
|
||||
|
||||
// Wait for worker to finish processing observations
|
||||
console.log("Waiting for worker queue...");
|
||||
await waitForQueueToEmpty();
|
||||
|
||||
// Delay before next session
|
||||
if (i < files.length - 1 && CONFIG.sessionDelayMs > 0) {
|
||||
await new Promise((resolve) => setTimeout(resolve, CONFIG.sessionDelayMs));
|
||||
}
|
||||
|
||||
// Periodic transcript cleanup (every 10 files)
|
||||
if ((i + 1) % 10 === 0) {
|
||||
await cleanupOldTranscripts();
|
||||
}
|
||||
}
|
||||
|
||||
// Final cleanup
|
||||
await cleanupOldTranscripts();
|
||||
|
||||
console.log("\n" + "=".repeat(60));
|
||||
console.log("Investigation complete");
|
||||
console.log("=".repeat(60));
|
||||
}
|
||||
|
||||
// Run
|
||||
main().catch((err) => {
|
||||
console.error("Fatal error:", err);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user