Pulling and analyzing scientific datasets is data-engineering/integration work in the build stage. Connecting to and ingesting from an external database fits build > integrations.

Common Questions / FAQ

Is Alphafold Database Fetch And Analyze safe to install?

skills.sh reports 3 of 3 security scanners passed. Review the Security Audits panel on this page before installing in production.

SKILL.md

READMESKILL.md - Alphafold Database Fetch And Analyze

# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Analyzes Predicted Aligned Error (PAE) and detects domain boundaries."""

# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///

import argparse
import itertools
import json
import os


def find_sub_domains(pae_matrix, distance_cutoff=7.0, min_domain_size=40):
  """Identifies structurally independent sub-domains based on the PAE matrix."""
  n_res = len(pae_matrix)
  domains = []
  current_domain = []

  for i in range(n_res):
    if not current_domain:
      current_domain.append(i)
      continue

    window_size = min(20, len(current_domain))
    recent_res = current_domain[-window_size:]

    pae_sum = sum(pae_matrix[r][i] + pae_matrix[i][r] for r in recent_res)
    avg_pae = pae_sum / (2.0 * window_size)

    if avg_pae < distance_cutoff:
      current_domain.append(i)
    else:
      if len(current_domain) >= min_domain_size:
        domains.append(current_domain)
      current_domain = [i]

  if len(current_domain) >= min_domain_size:
    domains.append(current_domain)

  domain_boundaries = []
  for comp in domains:
    start = comp[0] + 1
    end = comp[-1] + 1
    domain_boundaries.append([start, end])

  return domain_boundaries


def merge_global_domains(boundaries, pae_matrix, merge_cutoff=15.0):
  """Merges sub-domains if the average PAE between them is below cutoff."""
  if not boundaries:
    return []

  if len(boundaries) == 1:
    merged = boundaries
  else:
    merged = [boundaries[0]]

    for i in range(1, len(boundaries)):
      prev_end = merged[-1][1] - 1
      curr_start = boundaries[i][0] - 1

      lookback = max(merged[-1][0] - 1, prev_end - 30)
      lookfwd = min(boundaries[i][1] - 1, curr_start + 30)

      pae_sum = 0
      n_pairs = 0
      for r1 in range(lookback, prev_end + 1):
        for r2 in range(curr_start, lookfwd + 1):
          pae_sum += pae_matrix[r1][r2] + pae_matrix[r2][r1]
          n_pairs += 2

      if n_pairs > 0 and (pae_sum / n_pairs) < merge_cutoff:
        merged[-1][1] = boundaries[i][1]
      else:
        merged.append(boundaries[i])

  filtered_merged = [dom for dom in merged if (dom[1] - dom[0] + 1) > 50]

  return filtered_merged


def analyze_pae(pae_file):
  """Parses a PAE JSON file and calculates structural domain metrics."""
  print(
      "\n[*] Analyzing Predicted Aligned Error (PAE) from"
      f" {os.path.basename(pae_file)}..."
  )
  try:
    with open(pae_file, "r") as f:
      data = json.load(f)[0]

    if "predicted_aligned_error" in data:
      pae = data["predicted_aligned_error"]
    elif "distance" in data:
      pae = data["distance"]
    else:
      print(
          "     [!] Could not locate PAE matrix in JSON keys:"
          f" {list(data.keys())}"
      )
      return

    flat_pae = list(itertools.chain.from_iterable(pae))
    if not flat_pae:
      print("     [!] PAE matrix is empty.")
      return

    mean_pae = sum(flat_pae) / len(flat_pae)
    max_pae = max(flat_pae)
    min_pae = min(flat_pae)
    confident_pairs = sum(1 for p in flat_pae if p < 5.0) / len(flat_pae) * 100

    print(f"  -> PAE Matrix Shape: {len(pae)}x{len(pae[0])}")
    print(f"  -> Mean Error: {mean_pae:.2f} Å")
    print(
        f"  -> Max Error: {max_pae:.2f} Å (suggests max possible distance"
        " between domains)"
    )
    print(f"  -> Min Error: {min_pae:.2f} Å")
    print(
        "  -> Fraction of confident residue pairs (<5Å PAE):"
        f" {confident_pairs:.1f}%"

SKILL.md

READMESKILL.md - Alphafold Database Fetch And Analyze

# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Analyzes Predicted Aligned Error (PAE) and detects domain boundaries."""

# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///

import argparse
import itertools
import json
import os


def find_sub_domains(pae_matrix, distance_cutoff=7.0, min_domain_size=40):
  """Identifies structurally independent sub-domains based on the PAE matrix."""
  n_res = len(pae_matrix)
  domains = []
  current_domain = []

  for i in range(n_res):
    if not current_domain:
      current_domain.append(i)
      continue

    window_size = min(20, len(current_domain))
    recent_res = current_domain[-window_size:]

    pae_sum = sum(pae_matrix[r][i] + pae_matrix[i][r] for r in recent_res)
    avg_pae = pae_sum / (2.0 * window_size)

    if avg_pae < distance_cutoff:
      current_domain.append(i)
    else:
      if len(current_domain) >= min_domain_size:
        domains.append(current_domain)
      current_domain = [i]

  if len(current_domain) >= min_domain_size:
    domains.append(current_domain)

  domain_boundaries = []
  for comp in domains:
    start = comp[0] + 1
    end = comp[-1] + 1
    domain_boundaries.append([start, end])

  return domain_boundaries


def merge_global_domains(boundaries, pae_matrix, merge_cutoff=15.0):
  """Merges sub-domains if the average PAE between them is below cutoff."""
  if not boundaries:
    return []

  if len(boundaries) == 1:
    merged = boundaries
  else:
    merged = [boundaries[0]]

    for i in range(1, len(boundaries)):
      prev_end = merged[-1][1] - 1
      curr_start = boundaries[i][0] - 1

      lookback = max(merged[-1][0] - 1, prev_end - 30)
      lookfwd = min(boundaries[i][1] - 1, curr_start + 30)

      pae_sum = 0
      n_pairs = 0
      for r1 in range(lookback, prev_end + 1):
        for r2 in range(curr_start, lookfwd + 1):
          pae_sum += pae_matrix[r1][r2] + pae_matrix[r2][r1]
          n_pairs += 2

      if n_pairs > 0 and (pae_sum / n_pairs) < merge_cutoff:
        merged[-1][1] = boundaries[i][1]
      else:
        merged.append(boundaries[i])

  filtered_merged = [dom for dom in merged if (dom[1] - dom[0] + 1) > 50]

  return filtered_merged


def analyze_pae(pae_file):
  """Parses a PAE JSON file and calculates structural domain metrics."""
  print(
      "\n[*] Analyzing Predicted Aligned Error (PAE) from"
      f" {os.path.basename(pae_file)}..."
  )
  try:
    with open(pae_file, "r") as f:
      data = json.load(f)[0]

    if "predicted_aligned_error" in data:
      pae = data["predicted_aligned_error"]
    elif "distance" in data:
      pae = data["distance"]
    else:
      print(
          "     [!] Could not locate PAE matrix in JSON keys:"
          f" {list(data.keys())}"
      )
      return

    flat_pae = list(itertools.chain.from_iterable(pae))
    if not flat_pae:
      print("     [!] PAE matrix is empty.")
      return

    mean_pae = sum(flat_pae) / len(flat_pae)
    max_pae = max(flat_pae)
    min_pae = min(flat_pae)
    confident_pairs = sum(1 for p in flat_pae if p < 5.0) / len(flat_pae) * 100

    print(f"  -> PAE Matrix Shape: {len(pae)}x{len(pae[0])}")
    print(f"  -> Mean Error: {mean_pae:.2f} Å")
    print(
        f"  -> Max Error: {max_pae:.2f} Å (suggests max possible distance"
        " between domains)"
    )
    print(f"  -> Min Error: {min_pae:.2f} Å")
    print(
        "  -> Fraction of confident residue pairs (<5Å PAE):"
        f" {confident_pairs:.1f}%"

Install

What is this skill?

Recommended Skills

Journey fit

Is Alphafold Database Fetch And Analyze safe to install?

SKILL.md

This week for builders

Install

What is this skill?

Recommended Skills

Journey fit

Is Alphafold Database Fetch And Analyze safe to install?

SKILL.md