NoSQL Basics
Not all data lives in relational databases. MongoDB stores documents, Redis caches key-value pairs, and DynamoDB provides serverless key-value/document storage. As a QA engineer, you need to know how to test these systems — the patterns differ significantly from SQL.
NoSQL Database Types
| Database | Type | Primary Use | QA Testing Focus |
|---|---|---|---|
| MongoDB | Document | Flexible schemas, content management, catalogs | Schema validation, indexing, aggregation pipelines |
| Redis | Key-Value / Cache | Caching, sessions, rate limiting, queues | TTL expiration, cache invalidation, eviction policies |
| DynamoDB | Key-Value / Document | Serverless, high-scale, AWS-native | Capacity limits, query vs scan, GSI consistency |
| Elasticsearch | Search engine | Full-text search, log aggregation | Index mapping, query accuracy, relevance scoring |
| Cassandra | Wide-column | Time-series, high-write workloads | Consistency levels, partition sizing |
MongoDB Testing
MongoDB stores data as JSON-like documents (BSON). Documents in the same collection can have different structures — which is both a feature and a testing challenge.
Document Structure Validation
from pymongo import MongoClient
import pytest
@pytest.fixture(scope="session")
def mongo_db():
client = MongoClient("mongodb://localhost:27017")
db = client["testdb"]
yield db
client.close()
def test_user_document_structure(mongo_db):
"""Verify user documents have expected fields."""
mongo_db.users.insert_one({
"name": "Alice",
"email": "alice@test.com",
"tags": ["admin"],
"profile": {"bio": "Test user", "avatar_url": None}
})
user = mongo_db.users.find_one({"email": "alice@test.com"})
assert user is not None
assert isinstance(user["tags"], list)
assert "admin" in user["tags"]
assert isinstance(user["profile"], dict)
assert "bio" in user["profile"]
def test_schema_validation_enforced(mongo_db):
"""If schema validation is configured, invalid documents should be rejected."""
# Set up schema validation
mongo_db.command({
"collMod": "strict_users",
"validator": {
"$jsonSchema": {
"bsonType": "object",
"required": ["name", "email"],
"properties": {
"name": {"bsonType": "string"},
"email": {"bsonType": "string", "pattern": "^.+@.+$"}
}
}
}
})
# Valid document should succeed
mongo_db.strict_users.insert_one({"name": "Valid", "email": "valid@test.com"})
# Invalid document should fail
from pymongo.errors import WriteError
with pytest.raises(WriteError):
mongo_db.strict_users.insert_one({"name": "Invalid"}) # Missing email
MongoDB Aggregation Testing
def test_aggregation_pipeline(mongo_db):
"""Test an aggregation pipeline that calculates user statistics."""
# Insert test data
mongo_db.orders.insert_many([
{"user_id": "user-1", "total": 50.00, "status": "completed"},
{"user_id": "user-1", "total": 75.00, "status": "completed"},
{"user_id": "user-1", "total": 25.00, "status": "cancelled"},
{"user_id": "user-2", "total": 100.00, "status": "completed"},
])
# Run aggregation
pipeline = [
{"$match": {"status": "completed"}},
{"$group": {
"_id": "$user_id",
"total_spent": {"$sum": "$total"},
"order_count": {"$sum": 1}
}},
{"$sort": {"total_spent": -1}}
]
results = list(mongo_db.orders.aggregate(pipeline))
assert len(results) == 2
assert results[0]["_id"] == "user-1"
assert results[0]["total_spent"] == 125.00 # 50 + 75 (cancelled excluded)
assert results[0]["order_count"] == 2
Redis Testing
Redis is primarily used as a cache, session store, or message broker. Testing Redis focuses on TTL behavior, cache invalidation, and data structure correctness.
TTL Expiration
import redis
import time
import pytest
@pytest.fixture
def redis_client():
client = redis.Redis(host="localhost", port=6379, db=1)
yield client
client.flushdb() # Clean up after tests
def test_cache_ttl_expiration(redis_client):
"""Cached data should expire after TTL."""
redis_client.setex("session:abc", 2, "user-data") # 2 second TTL
# Immediately available
assert redis_client.get("session:abc") is not None
# Wait for expiration
time.sleep(3)
assert redis_client.get("session:abc") is None
def test_cache_ttl_value(redis_client):
"""Verify the TTL is set correctly."""
redis_client.setex("session:def", 300, "data")
ttl = redis_client.ttl("session:def")
assert 298 <= ttl <= 300 # Allow small timing variance
Cache Invalidation
def test_cache_invalidation_on_update(redis_client, api_client):
"""When a user is updated via API, their cache should be invalidated."""
# Prime the cache
api_client.get("/users/123")
assert redis_client.get("user:123") is not None
# Update the user
api_client.put("/users/123", json={"name": "Updated Name"})
# Cache should be invalidated
assert redis_client.get("user:123") is None
# Next GET should re-populate cache
response = api_client.get("/users/123")
assert response.json()["name"] == "Updated Name"
assert redis_client.get("user:123") is not None
Redis Data Structures
def test_redis_sorted_set_leaderboard(redis_client):
"""Test a leaderboard using Redis sorted set."""
redis_client.zadd("leaderboard", {"alice": 100, "bob": 85, "charlie": 92})
# Get top 2
top = redis_client.zrevrange("leaderboard", 0, 1, withscores=True)
assert top[0] == (b"alice", 100.0)
assert top[1] == (b"charlie", 92.0)
# Get rank
rank = redis_client.zrevrank("leaderboard", "bob")
assert rank == 2 # 0-indexed: alice=0, charlie=1, bob=2
def test_redis_list_as_queue(redis_client):
"""Test a job queue using Redis list."""
redis_client.rpush("job_queue", "job-1", "job-2", "job-3")
assert redis_client.llen("job_queue") == 3
# Process (FIFO)
job = redis_client.lpop("job_queue")
assert job == b"job-1"
assert redis_client.llen("job_queue") == 2
DynamoDB Testing
import boto3
from moto import mock_dynamodb # Mock for testing
@mock_dynamodb
def test_dynamodb_crud():
"""Test CRUD operations on DynamoDB."""
dynamodb = boto3.resource("dynamodb", region_name="us-east-1")
# Create table
table = dynamodb.create_table(
TableName="users",
KeySchema=[{"AttributeName": "id", "KeyType": "HASH"}],
AttributeDefinitions=[{"AttributeName": "id", "AttributeType": "S"}],
BillingMode="PAY_PER_REQUEST"
)
# Insert
table.put_item(Item={"id": "user-1", "name": "Alice", "email": "alice@test.com"})
# Read
response = table.get_item(Key={"id": "user-1"})
assert response["Item"]["name"] == "Alice"
# Query non-existent
response = table.get_item(Key={"id": "nonexistent"})
assert "Item" not in response
Practical Exercise
- Write MongoDB tests: insert a document, query it, verify structure, test schema validation
- Write Redis tests: set a key with TTL, verify it exists, wait for expiration, verify it is gone
- Write a test for Redis-backed rate limiting: increment a counter, verify it resets after the window
- Test cache invalidation: update data via API, verify the cache was cleared
- Compare testing patterns between SQL constraints and MongoDB schema validation
Key Takeaways
- NoSQL testing focuses on different concerns than SQL: schema flexibility, TTL, cache behavior
- MongoDB: test document structure, aggregation pipelines, and schema validation rules
- Redis: test TTL expiration, cache invalidation, and data structure operations
- DynamoDB: test query patterns, capacity behavior, and eventual consistency
- Use mocking libraries (moto for AWS, mongomock for MongoDB) for unit tests