<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>https://www.theaidataengineer.com</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>1</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/archive</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.8</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/about</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.5</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/enterprise-knowledge-management-with-rag-for-digital-native-companies</loc>
<lastmod>2026-05-25T17:04:23.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/an-exciting-new-chapter-for-monte-carlo</loc>
<lastmod>2026-05-21T15:48:28.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/embeddings-arent-magic-the-predictable-failure-modes-of-rag-retrieval</loc>
<lastmod>2026-05-30T15:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/rag-and-genai-for-regulated-and-public-sector-architectures</loc>
<lastmod>2026-05-25T18:02:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/how-we-built-cloudflares-data-platform-and-an-ai-agent-on-top-of-it</loc>
<lastmod>2026-05-28T13:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/codex-is-becoming-a-productivity-tool-for-everyone</loc>
<lastmod>2026-06-02T02:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/rerankers-arent-magic-either-when-the-cross-encoder-layer-is-worth-the-cost</loc>
<lastmod>2026-05-31T15:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/autonomous-agentic-event-driven-systems-architecture</loc>
<lastmod>2026-05-25T15:19:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/axios-at-snowflake-summit-building-a-culture-of-ai-trust-with-monte-carlo</loc>
<lastmod>2026-06-02T06:31:27.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/claude-opus-48-a-modest-but-tangible-improvement</loc>
<lastmod>2026-05-28T23:59:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/fivetran-dbt-labs-complete-merger-to-create-the-data-infrastructure-for-trusted</loc>
<lastmod>2026-06-01T13:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/agentic-fleet-management-architecture-for-real-time-operations</loc>
<lastmod>2026-05-19T13:16:26.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/announcing-claude-managed-agents-on-cloudflare</loc>
<lastmod>2026-05-19T13:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/build-a-coding-assistant-with-weaviate-mcp-rag-over-code-docs</loc>
<lastmod>2026-05-21T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/paper-the-coverage-illusion-from-pre-retrieval-routing-failure-to-post-retrieval</loc>
<lastmod>2026-05-26T16:08:34.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/beyond-the-model-why-data-scientists-must-embrace-apis-and-api-documentation</loc>
<lastmod>2026-05-24T13:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/github-souravroy-etlduckle</loc>
<lastmod>2026-05-27T12:52:46.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/github-nanoflow-ioengram</loc>
<lastmod>2026-05-22T06:06:14.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/paper-graphreview-scientific-paper-evaluation-via-llm-based-graph-message-passin</loc>
<lastmod>2026-05-26T15:58:49.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/paper-muchator-enabling-active-music-discovery-via-conversational-music-llms-in</loc>
<lastmod>2026-05-26T14:42:52.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/ai-ready-data-in-practice-what-dbt-semantic-layer-and-dbts-mcp-server-and-agent</loc>
<lastmod>2026-05-19T21:20:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/stop-using-llms-like-giant-problem-solvers</loc>
<lastmod>2026-05-26T13:30:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/the-ultimate-beginners-guide-to-building-an-ai-agent-in-python</loc>
<lastmod>2026-05-24T17:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/recent-developments-in-llm-architectures-kv-sharing-mhc-and-compressed-attention</loc>
<lastmod>2026-05-16T11:33:51.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/paper-fairness-aware-retrieval-optimization-for-retrieval-augmented-generation</loc>
<lastmod>2026-05-15T09:47:44.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/granite-embedding-multilingual-r2-open-apache-20-multilingual-embeddings-with-32</loc>
<lastmod>2026-05-14T18:55:01.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/proxy-pointer-rag-solving-entity-and-relationship-sprawl-in-large-knowledge-grap</loc>
<lastmod>2026-05-19T12:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/built-a-fully-offline-suitcase-robot-around-a-jetson-orin-nx-super-16gb-gemma-4</loc>
<lastmod>2026-05-15T15:09:18.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/i-built-a-coding-agent-that-gets-87-on-benchmarks-with-a-4b-parameter-model-here</loc>
<lastmod>2026-05-18T06:38:11.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/github-python-telegrambotai-auto-trading</loc>
<lastmod>2026-05-13T15:46:25.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/ai-assisted-analytics-engineering-docusigns-framework-for-scaling-dbt-unit-testi</loc>
<lastmod>2026-05-18T14:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/fine-tuning-nvidia-cosmos-predict-25-with-loradora-for-robot-video-generation</loc>
<lastmod>2026-05-18T16:00:21.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/building-blocks-for-foundation-model-training-and-inference-on-aws</loc>
<lastmod>2026-05-11T23:18:26.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/the-must-know-topics-for-an-llm-engineer</loc>
<lastmod>2026-05-09T15:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/i-got-tired-of-spending-30-minutes-setting-up-gpu-instances-every-time-i-wanted</loc>
<lastmod>2026-05-10T19:16:26.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/emo-pretraining-mixture-of-experts-for-emergent-modularity</loc>
<lastmod>2026-05-08T16:03:50.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/using-transformers-to-forecast-incredibly-rare-solar-flares</loc>
<lastmod>2026-05-11T17:41:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/how-i-approach-mlops-system-design-questions-in-interviews-sharing-the-thinking</loc>
<lastmod>2026-05-09T08:57:42.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/multi-token-prediction-mtp-for-llamacpp-gemma-4-speedup-by-40</loc>
<lastmod>2026-05-08T00:27:44.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/llm-summarizers-skip-the-identification-step</loc>
<lastmod>2026-05-10T13:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/computer-build-using-intel-optane-persistent-memory-can-run-1-trillion-parameter</loc>
<lastmod>2026-05-11T19:54:25.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/production-rag-what-i-learned-from-processing-5m-documents</loc>
<lastmod>2025-10-20T15:55:36.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/meta-superintelligence-labs-first-paper-is-about-rag</loc>
<lastmod>2025-10-11T23:16:05.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/pgvectorize-vector-search-and-rag-on-postgres</loc>
<lastmod>2024-03-06T08:34:16.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/gemini-embedding-powering-rag-and-context-engineering</loc>
<lastmod>2025-07-31T16:47:54.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/embeddings-what-they-are-and-why-they-matter</loc>
<lastmod>2023-10-23T13:42:12.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/storing-openai-embeddings-in-postgres-with-pgvector</loc>
<lastmod>2023-02-06T21:24:09.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/all-in-one-embedding-model-for-interleaved-text-images-and-screenshots</loc>
<lastmod>2024-11-17T07:42:08.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/zvec-a-lightweight-fast-in-process-vector-database</loc>
<lastmod>2026-02-13T08:53:51.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/your-llm-is-only-as-good-as-what-it-retrieves</loc>
<lastmod>2026-05-06T00:00:00.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/so-you-wanna-build-a-local-rag</loc>
<lastmod>2025-11-28T16:54:56.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/open-source-rule-based-pdf-parser-for-rag</loc>
<lastmod>2024-01-24T05:31:11.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/helixdb-open-source-vector-graph-database-for-ai-applications-rust</loc>
<lastmod>2025-05-13T17:26:38.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/paper-needle-in-rag-prompt-conditioned-character-level-traceback-of-poisoned-spa</loc>
<lastmod>2026-05-03T08:42:29.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/articles/we-open-sourced-our-entire-text-to-sql-product</loc>
<lastmod>2024-05-23T15:50:25.000Z</lastmod>
<changefreq>monthly</changefreq>
<priority>0.7</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/data-pipeline</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/data-quality</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/embeddings</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/fine-tuning</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/llm-serving</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/ml-observability</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/mlops</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/model-eval</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/open-source-tools</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/orchestration</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/rag</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/streaming-ml</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
<url>
<loc>https://www.theaidataengineer.com/topic/vector-db</loc>
<lastmod>2026-06-02T17:03:05.590Z</lastmod>
<changefreq>weekly</changefreq>
<priority>0.6</priority>
</url>
</urlset>
