Merging Examples

This page provides practical examples of merging extracted data into knowledge graphs using the Graphora client library.

Basic Merging Operation

After transforming documents, you can merge the extracted data into a knowledge graph:

from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Use a previously completed transformation
session_id = "ont_123456789"  # Ontology ID
transform_id = "transform_123456789"

# Start the merging process
merge_response = client.start_merge(session_id, transform_id)
print(f"Merge started with ID: {merge_response.merge_id}")

# Check the status of the merge
status = client.get_merge_status(merge_response.merge_id)
print(f"Merge status: {status.status}")
print(f"Progress: {status.progress:.2%}")

Monitoring Merge Progress

For large datasets, merging can take some time. Here’s how to monitor the progress:

from graphora import GraphoraClient
import time

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Start a merge (assuming you have already completed a transformation)
session_id = "ont_123456789"  # Ontology ID
transform_id = "transform_123456789"
merge_response = client.start_merge(session_id, transform_id)
merge_id = merge_response.merge_id

# Monitor the merge progress
completed = False
while not completed:
    status = client.get_merge_status(merge_id)
    print(f"Status: {status.status}, Progress: {status.progress:.2%}")
    
    if status.status in ["COMPLETED", "FAILED"]:
        completed = True
        if status.status == "COMPLETED":
            print("Merge completed successfully!")
        else:
            print(f"Merge failed: {status.error}")
    else:
        # Wait for 5 seconds before checking again
        time.sleep(5)

# If merge was successful, you can retrieve the graph data
if status.status == "COMPLETED":
    graph = client.get_merged_graph(merge_id, transform_id)
    print(f"Retrieved graph with {len(graph.nodes)} nodes and {len(graph.edges)} edges")

Handling Merge Conflicts

When merging data, conflicts may arise. Here’s how to handle them:

from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Get conflicts for a merge that has detected conflicts
merge_id = "merge_123456789"
conflicts = client.get_conflicts(merge_id)

print(f"Found {len(conflicts)} conflicts")

# Resolve each conflict
for conflict in conflicts:
    print(f"Conflict ID: {conflict.id}")
    print(f"Entity Type: {conflict.entity_type}")
    print(f"Entity ID: {conflict.entity_id}")
    print(f"Conflicting Properties: {conflict.properties}")
    
    # Example: Resolve by accepting the conflict
    changed_props = {}  # You can modify specific properties here
    
    # Add a learning comment to improve future conflict resolution
    learning_comment = "Accepting this entity as it appears to be accurate"
    
    # Resolve the conflict
    success = client.resolve_conflict(
        merge_id, 
        conflict.id, 
        changed_props, 
        "accept",  # Resolution strategy
        learning_comment
    )
    
    if success:
        print(f"Resolved conflict {conflict.id}")
    else:
        print(f"Failed to resolve conflict {conflict.id}")

Getting Merge Statistics

You can get detailed statistics about a merge operation:

from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Get merge statistics
merge_id = "merge_123456789"
stats = client.get_merge_statistics(merge_id)

print("Merge Statistics:")
print(f"Total entities processed: {stats.get('total_entities', 0)}")
print(f"Entities merged: {stats.get('merged_entities', 0)}")
print(f"New entities created: {stats.get('new_entities', 0)}")
print(f"Conflicts detected: {stats.get('conflicts_detected', 0)}")
print(f"Conflicts resolved: {stats.get('conflicts_resolved', 0)}")

Retrieving and Using the Merged Graph

Once a merge is complete, you can retrieve and use the graph data:

from graphora import GraphoraClient

# Initialize the client with user ID (required)
client = GraphoraClient(
    base_url="https://api.graphora.io",
    user_id="your-user-id",  # Required for all API calls
    api_key="your-api-key"
)

# Get the merged graph
merge_id = "merge_123456789"
transform_id = "transform_123456789"
graph_response = client.get_merged_graph(merge_id, transform_id)

# Print some statistics
print(f"Graph contains {len(graph_response.nodes)} nodes and {len(graph_response.edges)} edges")

# Examine nodes
for node in graph_response.nodes[:5]:  # First 5 nodes
    print(f"Node {node.id}: {node.labels} - {node.properties}")

# Examine edges
for edge in graph_response.edges[:5]:  # First 5 edges
    print(f"Edge {edge.id}: {edge.source} -> {edge.target} ({edge.type})")

These examples demonstrate the basic operations for merging data with the Graphora client library. For more detailed information about merging concepts and options, see the Merging Concepts page.