Source code for edgar_sec.helpers

# filepath: /src/edgar_sec/helpers.py
#
# Copyright (c) 2025 Nikhil Sunder
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
This module defines helper methods for the edgar-sec package.
"""

from typing import Optional, List, Union
from datetime import datetime
import asyncio
import re
import httpx
from edgar_sec.objects import Company
from edgar_sec.__about__ import __title__, __version__, __author__, __license__, __copyright__, __description__, __url__

[docs] class EdgarHelpers: """ A class containing helper methods for the Edgar SEC module. """
[docs] @staticmethod def get_cik(ticker:Optional[str]=None, search_text: Optional[str]=None) -> Union[str,List[str]]: """ Helper method to get the CIK (Central Index Key) for a given ticker symbol. Args: ticker (str): The ticker symbol of the company. search_text (str): The name of the company to search for. Returns: str | List[str]: The CIK of the company or a list of CIKs if multiple matches are found. Raises: ValueError: If neither ticker nor search_text is provided, or if both are provided. Example: """ if (ticker is None and search_text is None) or (ticker and search_text): raise ValueError("Provide exactly one of ticker or search_text.") with httpx.Client() as client: response = client.get(url='https://www.sec.gov/files/company_tickers.json') response.raise_for_status() data = response.json() if ticker: for item in data: if item['ticker'] == ticker: return item['cik_str'] raise ValueError(f"Ticker '{ticker}' not found in the SEC EDGAR database.") else: assert search_text is not None for item in data: if search_text.lower() in item['title'].lower(): return item['cik_str'] raise ValueError(f"Search text '{search_text}' not found in the SEC EDGAR database.")
[docs] @staticmethod def get_universe() -> List[Company]: """ Helper method to get the universe of companies from the SEC EDGAR database. Returns: List[Company]: A list of Company instances representing the universe of companies. """ with httpx.Client() as client: response = client.get(url='https://www.sec.gov/files/company_tickers.json') response.raise_for_status() data = response.json() return [Company.to_object(item) for item in data]
[docs] @staticmethod def datetime_cy_conversion(period: datetime) -> str: """ Helper method to convert a reporting period in datetime format to the 'CY####Q#' format. Args: period (datetime): The reporting period as a datetime object. Returns: str: The reporting period in 'CY####Q#' format. """ if not isinstance(period, datetime): raise TypeError("period must be a datetime object.") if period.month in [1, 2, 3]: return f"CY{period.year}Q{1}" elif period.month in [4, 5, 6]: return f"CY{period.year}Q{2}" elif period.month in [7, 8, 9]: return f"CY{period.year}Q{3}" else: return f"CY{period.year}Q{4}"
[docs] @staticmethod def string_cy_conversion(period: str) -> str: """ Helper method to convert a reporting period string in YYYY-MM-DD format to CY####Q# format. Args: period (str): The reporting period in 'YYYY-MM-DD' format. Returns: str: The reporting period in 'CY####Q#' format. """ if not isinstance(period, str): raise TypeError("period must be a string.") try: date_obj = datetime.strptime(period, '%Y-%m-%d') if date_obj.month in [1, 2, 3]: return f"CY{date_obj.year}Q1" elif date_obj.month in [4, 5, 6]: return f"CY{date_obj.year}Q2" elif date_obj.month in [7, 8, 9]: return f"CY{date_obj.year}Q3" else: return f"CY{date_obj.year}Q4" except ValueError as e: raise ValueError("Invalid date format. Must be in 'YYYY-MM-DD' format.") from e
[docs] @staticmethod def string_cy_validation(period: str) -> bool: """ Helper method to validate if a string is in 'CY####' or 'CY####Q#' format. Args: period (str): The reporting period string to validate. Returns: bool: True if the string is in 'CY####' or 'CY####Q#' format, False otherwise. """ if not isinstance(period, str): raise TypeError("period must be a string.") return bool(re.fullmatch(r'CY\d{4}(Q[1-4])?', period))
[docs] @staticmethod def cik_validation(central_index_key: str) -> str: """ Helper method to validate and fix the CIK (Central Index Key) format. Args: central_index_key (str): The CIK to validate. Returns: str: The validated CIK. Raises: ValueError: If the CIK is not in the correct format. """ if not isinstance(central_index_key, str): raise TypeError("central_index_key must be a string.") if len(central_index_key) > 10: raise ValueError("CIK must be 10 digits or less.") if not re.fullmatch(r'\d{10}', central_index_key): return central_index_key.zfill(10) else: return central_index_key
[docs] @staticmethod async def get_cik_async(ticker: Optional[str]=None, search_text: Optional[str] = None) -> Union[str, List[str]]: """ Helper method to asynchronously get the CIK (Central Index Key) for a given ticker symbol. Args: ticker (str): The ticker symbol of the company. search_text (str): The name of the company to search for. Returns: str | List[str]: The CIK of the company or a list of CIKs if multiple matches are found. """ if (ticker is None and search_text is None) or (ticker and search_text): raise ValueError("Provide exactly one of ticker or search_text.") async with httpx.AsyncClient() as client: response = await client.get(url='https://www.sec.gov/files/company_tickers.json') response.raise_for_status() data = response.json() if ticker: for item in data: if item['ticker'] == ticker: return item['cik_str'] raise ValueError(f"Ticker '{ticker}' not found in the SEC EDGAR database.") else: assert search_text is not None for item in data: if search_text.lower() in item['title'].lower(): return item['cik_str'] raise ValueError(f"Search text '{search_text}' not found in the SEC EDGAR database.")
[docs] @staticmethod async def get_universe_async() -> List[Company]: """ Helper method to asynchronously get the universe of companies from the SEC EDGAR database. """ async with httpx.AsyncClient() as client: response = await client.get(url='https://www.sec.gov/files/company_tickers.json') response.raise_for_status() data = response.json() return [await Company.to_object_async(item) for item in data]
[docs] @staticmethod async def datetime_cy_conversion_async(period: datetime) -> str: """ Helper method to convert a reporting period in datetime format to the 'CY####Q#' format. Args: period (datetime): The reporting period as a datetime object. Returns: str: The reporting period in 'CY####Q#' format. """ return await asyncio.to_thread(EdgarHelpers.datetime_cy_conversion, period)
[docs] @staticmethod async def string_cy_conversion_async(period: str) -> str: """ Helper method to asynchronously convert a reporting period string in YYYY-MM-DD format to CY####Q# format. Args: period (str): The reporting period in 'YYYY-MM-DD' format. Returns: str: The reporting period in 'CY####Q#' format. """ return await asyncio.to_thread(EdgarHelpers.string_cy_conversion, period)
[docs] @staticmethod async def string_cy_validation_async(period: str) -> bool: """ Helper method to asynchronously validate if a string is in 'CY####' or 'CY####Q#' format. Args: period (str): The reporting period string to validate. Returns: bool: True if the string is in 'CY####' or 'CY####Q#' format, False otherwise. """ return await asyncio.to_thread(EdgarHelpers.string_cy_validation, period)
[docs] @staticmethod async def cik_validation_async(central_index_key: str) -> str: """ Helper method to asynchronously validate and fix the CIK (Central Index Key) format. Args: central_index_key (str): The CIK to validate. Returns: str: The validated CIK. Raises: ValueError: If the CIK is not in the correct format. """ return await asyncio.to_thread(EdgarHelpers.cik_validation, central_index_key)