Advanced Usage Examples ======================= This section provides advanced usage examples for the `edgar-sec` library, including caching, rate limiting, concurrent requests, and sophisticated data extraction techniques. Caching and Rate Limiting ------------------------- The `edgar-sec` library includes built-in support for caching and rate limiting to optimize API usage and comply with SEC guidelines. .. code-block:: python import edgar_sec as ed # Initialize the API client with caching enabled edgar = ed.EdgarAPI(cache_mode=True) # First request will hit the API apple_facts = edgar.get_company_facts("0000320193") print(f"Number of taxonomies: {len(apple_facts.facts)}") # Subsequent identical requests will use cache (much faster) apple_facts_cached = edgar.get_company_facts("0000320193") # The built-in rate limiter ensures you don't exceed 10 API calls per second # This is handled automatically for both synchronous and asynchronous requests for cik in ["0000320193", "0000789019", "0001652044", "0001018724", "0001326801"]: company = edgar.get_submissions(central_index_key=cik) print(f"Retrieved data for {company.name}") Concurrent Requests with AsyncAPI --------------------------------- You can use the `Async` attribute to make concurrent requests for improved performance. .. code-block:: python import asyncio import edgar_sec as ed async def fetch_multiple_companies(): edgar = ed.EdgarAPI(cache_mode=True) # Define company CIKs companies = { "0000320193": "Apple", # Apple "0000789019": "Microsoft", # Microsoft "0001652044": "Alphabet", # Alphabet (Google) "0001018724": "Amazon", # Amazon "0001326801": "Meta" # Meta (Facebook) } # Fetch submission history for multiple companies concurrently submission_tasks = [ edgar.Async.get_submissions(cik) for cik in companies.keys() ] submissions = await asyncio.gather(*submission_tasks) # Process the results for submission, name in zip(submissions, companies.values()): print(f"{name} ({submission.name}):") print(f" Ticker: {submission.tickers}") print(f" Recent filing: {submission.filings[0].form} on {submission.filings[0].filing_date}") print() # Now fetch the same concept across all companies concept_tasks = [ edgar.Async.get_company_concept(cik, "us-gaap", "Assets") for cik in companies.keys() ] concepts = await asyncio.gather(*concept_tasks) # Compare assets across companies print("Total Assets Comparison:") for concept, name in zip(concepts, companies.values()): # Get the most recent USD disclosure if available if concept.units and hasattr(concept.units[0], 'val'): latest = concept.units[0] print(f" {name}: ${latest.val:,} ({latest.fy} {latest.fp})") # Run the async function asyncio.run(fetch_multiple_companies()) Advanced Data Extraction ------------------------ Extract specific financial metrics from company facts for analysis. .. code-block:: python import edgar_sec as ed import pandas as pd import matplotlib.pyplot as plt # Initialize the API client edgar = ed.EdgarAPI(cache_mode=True) # Fetch all Apple facts apple_facts = edgar.get_company_facts("0000320193") # Extract revenue time series if "us-gaap" in apple_facts.facts: # Look for various revenue concept tags (may vary by company) revenue_tags = [ "Revenue", "RevenueFromContractWithCustomerExcludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer" ] # Find the first matching tag revenue_tag = next((tag for tag in revenue_tags if tag in apple_facts.facts["us-gaap"].disclosures), None) if revenue_tag and "USD" in apple_facts.facts["us-gaap"].disclosures[revenue_tag].units: # Extract revenue data revenue_data = [] for fact in apple_facts.facts["us-gaap"].disclosures[revenue_tag].units["USD"]: # Only include annual (FY) or quarterly (Q1-Q4) data if fact.fp in ["FY", "Q1", "Q2", "Q3", "Q4"]: revenue_data.append({ "period": f"{fact.fy} {fact.fp}", "date": fact.end, "revenue": fact.val, "form": fact.form }) # Convert to DataFrame for analysis df = pd.DataFrame(revenue_data) df["date"] = pd.to_datetime(df["date"]) df = df.sort_values("date") # Only keep 10-K and 10-Q reports df = df[df["form"].isin(["10-K", "10-Q"])] # Plot the revenue trend plt.figure(figsize=(12, 6)) plt.plot(df["date"], df["revenue"] / 1e9) # Convert to billions plt.title(f"Apple Inc. - {revenue_tag} Over Time") plt.xlabel("Date") plt.ylabel("Revenue (Billions USD)") plt.grid(True) plt.show() Cross-Company Analysis with Frames ---------------------------------- Use the frames API to compare the same financial concept across multiple companies. .. code-block:: python import edgar_sec as ed import pandas as pd import matplotlib.pyplot as plt # Initialize the API client edgar = ed.EdgarAPI(cache_mode=True) # Get assets for all companies for Q4 2022 assets_frame = edgar.get_frames( taxonomy="us-gaap", tag="Assets", unit="USD", period="CY2022Q4I" # Calendar Year 2022, Q4, Instantaneous ) print(f"Total companies reporting: {assets_frame.pts}") # Extract the top 10 companies by assets top_companies = sorted(assets_frame.frames, key=lambda x: x.val, reverse=True)[:10] # Convert to DataFrame df = pd.DataFrame([ {"Company": company.entity_name, "Assets (Billions)": company.val / 1e9} for company in top_companies ]) # Create a horizontal bar chart plt.figure(figsize=(12, 8)) plt.barh(df["Company"], df["Assets (Billions)"]) plt.title("Top 10 Companies by Total Assets (Q4 2022)") plt.xlabel("Total Assets (Billions USD)") plt.grid(True, axis="x") plt.tight_layout() plt.show() Error Handling and Validation ----------------------------- Implement robust error handling to manage API limitations and issues. .. code-block:: python import edgar_sec as ed import httpx from tenacity import retry, wait_fixed, stop_after_attempt # Initialize the API client edgar = ed.EdgarAPI(cache_mode=True) # Function with enhanced error handling @retry(wait=wait_fixed(2), stop=stop_after_attempt(3)) def get_company_data(cik): try: # Attempt to fetch data return edgar.get_submissions(central_index_key=cik) except ValueError as e: # Handle API-specific errors if "rate limit" in str(e).lower(): print(f"Rate limit exceeded, retrying in 2 seconds...") raise # Let retry handle this else: print(f"API Error: {e}") return None except httpx.HTTPStatusError as e: if e.response.status_code == 404: print(f"Company with CIK {cik} not found") return None elif e.response.status_code == 429: print(f"Rate limit exceeded, retrying in 2 seconds...") raise # Let retry handle this else: print(f"HTTP Error: {e}") return None except Exception as e: print(f"Unexpected error: {e}") return None # Try with valid and invalid CIKs companies = [ "0000320193", # Apple (valid) "0000123456", # Invalid CIK "0000789019", # Microsoft (valid) ] for cik in companies: company = get_company_data(cik) if company: print(f"Successfully retrieved data for {company.name}") else: print(f"Failed to retrieve data for CIK {cik}") Advanced Caching Configuration ------------------------------ Customize the caching behavior to optimize performance for your specific use case. .. code-block:: python import edgar_sec as ed from cachetools import TTLCache import time # Create a custom cache with specific size and TTL custom_cache = TTLCache(maxsize=512, ttl=7200) # 2 hour TTL, larger cache # Access the internal attributes to customize the API (advanced usage) edgar = ed.EdgarAPI(cache_mode=True) edgar.cache = custom_cache # Replace the default cache # Measure performance difference with caching start_time = time.time() apple_facts = edgar.get_company_facts("0000320193") first_request_time = time.time() - start_time start_time = time.time() apple_facts_cached = edgar.get_company_facts("0000320193") cached_request_time = time.time() - start_time print(f"First request time: {first_request_time:.2f} seconds") print(f"Cached request time: {cached_request_time:.2f} seconds") print(f"Speed improvement: {first_request_time / cached_request_time:.1f}x faster")