From 7af02b8a8258da64f07901084dfb1d49069a8539 Mon Sep 17 00:00:00 2001 From: Stepbus Date: Mon, 2 Mar 2026 12:38:12 +0200 Subject: [PATCH 1/3] add enrichments; version increased --- outscraper/businesses.py | 44 +++++++++++++++++++++++++++++++++++++--- setup.py | 2 +- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/outscraper/businesses.py b/outscraper/businesses.py index fb58af0..81ea654 100644 --- a/outscraper/businesses.py +++ b/outscraper/businesses.py @@ -12,7 +12,9 @@ def __init__(self, client: OutscraperClient) -> None: self._client = client def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Optional[str] = None, include_total: bool = False, - fields: Optional[list[str]] = None, query: str = '') -> BusinessSearchResult: + fields: Optional[list[str]] = None, enrichments: Optional[list[str]] = None, + contacts_per_company: Optional[int] = None, emails_per_contact: Optional[int] = None, + query: str = '') -> BusinessSearchResult: ''' Retrieve business records with optional enrichment data. @@ -30,6 +32,14 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option include_total (bool): Whether to include the total count of matching records in the response. This could increase response time. Default: False. fields (list[str] | None): List of fields to include in the response. If not specified, all fields will be returned. + enrichments (list[str] | None): Optional enrichments to apply. + Supported values: + - "contacts_n_leads" + - "company_insights" + contacts_per_company (int | None): Applies only when "contacts_n_leads" enrichment is enabled. If not provided, + defaults to 3. + emails_per_contact (int | None): Applies only when "contacts_n_leads" enrichment is enabled. If not provided, + defaults to 1. query (str): natural language search. Returns: @@ -41,6 +51,12 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option if limit < 1 or limit > 1000: raise ValueError('limit must be in range [1, 1000]') + if contacts_per_company is not None and contacts_per_company < 1: + raise ValueError('contacts_per_company must be >= 1') + + if emails_per_contact is not None and emails_per_contact < 1: + raise ValueError('emails_per_contact must be >= 1') + if filters is None: filters_payload = {} elif isinstance(filters, BusinessFilters): @@ -57,6 +73,18 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option if fields: payload['fields'] = list(fields) + if isinstance(enrichments, str): + enrichments = [enrichments] + enrichments_payload = list(enrichments) if enrichments else [] + if enrichments_payload: + payload['enrichments'] = enrichments_payload + + if 'contacts_n_leads' in enrichments_payload: + payload['contacts_per_company'] = contacts_per_company if contacts_per_company is not None else 3 + payload['emails_per_contact'] = emails_per_contact if emails_per_contact is not None else 1 + elif contacts_per_company is not None or emails_per_contact is not None: + raise ValueError('contacts_per_company and emails_per_contact require enrichments to include "contacts_n_leads"') + if query: payload['query'] = query @@ -74,7 +102,9 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option ) def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cursor: Optional[str] = None, - include_total: bool = False, fields: Optional[list[str]] = None) -> Iterator[dict]: + include_total: bool = False, fields: Optional[list[str]] = None, + enrichments: Optional[list[str]] = None, contacts_per_company: Optional[int] = None, + emails_per_contact: Optional[int] = None, query: str = '') -> Iterator[dict]: ''' Iterate over businesses across all pages (auto-pagination). @@ -91,6 +121,10 @@ def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cur include_total (bool): Passed to `search()` (if supported by API). Default: False. fields (list[str] | None): Passed to `search()`. + enrichments (list[str] | None): Passed to `search()`. + contacts_per_company (int | None): Passed to `search()`. + emails_per_contact (int | None): Passed to `search()`. + query (str): Passed to `search()`. Yields: item (dict): Each business record from all pages. @@ -105,7 +139,11 @@ def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cur limit=limit, cursor=cursor, include_total=include_total, - fields=fields) + fields=fields, + enrichments=enrichments, + contacts_per_company=contacts_per_company, + emails_per_contact=emails_per_contact, + query=query) for item in business_search_result.items: yield item diff --git a/setup.py b/setup.py index 5ef4c61..4d8845c 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ def readme(): setup( name='outscraper', - version='6.0.2', + version='6.0.3', description='Python bindings for the Outscraper API', long_description=readme(), classifiers = ['Programming Language :: Python', From c06bccf92aa49917606820ee2badb4417d0f4605 Mon Sep 17 00:00:00 2001 From: Stepbus Date: Wed, 4 Mar 2026 11:37:29 +0200 Subject: [PATCH 2/3] user query structure changed --- outscraper/businesses.py | 104 ++++++++++++++++++++++++++------------- 1 file changed, 71 insertions(+), 33 deletions(-) diff --git a/outscraper/businesses.py b/outscraper/businesses.py index 81ea654..c5dcc18 100644 --- a/outscraper/businesses.py +++ b/outscraper/businesses.py @@ -5,6 +5,11 @@ FiltersLike = Union[BusinessFilters, Mapping[str, Any], None] +EnrichmentsLike = Optional[Union[ + dict[str, Union[dict[str, Any], None, bool]], + list[str], + str, +]] class BusinessesAPI: @@ -12,9 +17,7 @@ def __init__(self, client: OutscraperClient) -> None: self._client = client def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Optional[str] = None, include_total: bool = False, - fields: Optional[list[str]] = None, enrichments: Optional[list[str]] = None, - contacts_per_company: Optional[int] = None, emails_per_contact: Optional[int] = None, - query: str = '') -> BusinessSearchResult: + fields: Optional[list[str]] = None, enrichments: EnrichmentsLike = None, query: str = '') -> BusinessSearchResult: ''' Retrieve business records with optional enrichment data. @@ -32,14 +35,19 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option include_total (bool): Whether to include the total count of matching records in the response. This could increase response time. Default: False. fields (list[str] | None): List of fields to include in the response. If not specified, all fields will be returned. - enrichments (list[str] | None): Optional enrichments to apply. - Supported values: + enrichments (dict | list[str] | str | None): Optional enrichments to apply. + Preferred format is dict with per-enrichment params: + { + "contacts_n_leads": { + "contacts_per_company": 3, + "emails_per_contact": 1, + }, + "company_insights": {}, + } + Backward-compatible formats are also supported: + - ["contacts_n_leads", "company_insights"] - "contacts_n_leads" - - "company_insights" - contacts_per_company (int | None): Applies only when "contacts_n_leads" enrichment is enabled. If not provided, - defaults to 3. - emails_per_contact (int | None): Applies only when "contacts_n_leads" enrichment is enabled. If not provided, - defaults to 1. + In those forms, each enrichment is sent with empty params. query (str): natural language search. Returns: @@ -51,12 +59,6 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option if limit < 1 or limit > 1000: raise ValueError('limit must be in range [1, 1000]') - if contacts_per_company is not None and contacts_per_company < 1: - raise ValueError('contacts_per_company must be >= 1') - - if emails_per_contact is not None and emails_per_contact < 1: - raise ValueError('emails_per_contact must be >= 1') - if filters is None: filters_payload = {} elif isinstance(filters, BusinessFilters): @@ -73,17 +75,10 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option if fields: payload['fields'] = list(fields) - if isinstance(enrichments, str): - enrichments = [enrichments] - enrichments_payload = list(enrichments) if enrichments else [] - if enrichments_payload: - payload['enrichments'] = enrichments_payload + normalized_enrichments = self._normalize_enrichments(enrichments=enrichments) - if 'contacts_n_leads' in enrichments_payload: - payload['contacts_per_company'] = contacts_per_company if contacts_per_company is not None else 3 - payload['emails_per_contact'] = emails_per_contact if emails_per_contact is not None else 1 - elif contacts_per_company is not None or emails_per_contact is not None: - raise ValueError('contacts_per_company and emails_per_contact require enrichments to include "contacts_n_leads"') + if normalized_enrichments: + payload['enrichments'] = normalized_enrichments if query: payload['query'] = query @@ -103,8 +98,7 @@ def search(self, *, filters: FiltersLike = None, limit: int = 10, cursor: Option def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cursor: Optional[str] = None, include_total: bool = False, fields: Optional[list[str]] = None, - enrichments: Optional[list[str]] = None, contacts_per_company: Optional[int] = None, - emails_per_contact: Optional[int] = None, query: str = '') -> Iterator[dict]: + enrichments: EnrichmentsLike = None, query: str = '') -> Iterator[dict]: ''' Iterate over businesses across all pages (auto-pagination). @@ -121,9 +115,8 @@ def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cur include_total (bool): Passed to `search()` (if supported by API). Default: False. fields (list[str] | None): Passed to `search()`. - enrichments (list[str] | None): Passed to `search()`. - contacts_per_company (int | None): Passed to `search()`. - emails_per_contact (int | None): Passed to `search()`. + enrichments (dict | list[str] | str | None): Passed to `search()`. + Supports the same formats as `search()`. query (str): Passed to `search()`. Yields: @@ -141,8 +134,6 @@ def iter_search(self, *, filters: FiltersLike = None, limit: int = 10, start_cur include_total=include_total, fields=fields, enrichments=enrichments, - contacts_per_company=contacts_per_company, - emails_per_contact=emails_per_contact, query=query) for item in business_search_result.items: @@ -187,3 +178,50 @@ def get(self, business_id: str, *, fields: Optional[list[str]] = None) -> dict: raise Exception(f'Unexpected response for /businesses/{business_id}: {type(data)}') return data + + def _normalize_enrichments(self, enrichments: EnrichmentsLike = None) -> dict[str, dict[str, Any]]: + normalized_enrichments = {} + + if enrichments is None: + return normalized_enrichments + + if isinstance(enrichments, str): + if not enrichments: + raise ValueError('enrichment name must be a non-empty string') + normalized_enrichments[enrichments] = {} + + elif isinstance(enrichments, dict): + for name, params in enrichments.items(): + if not isinstance(name, str) or not name: + raise ValueError('enrichment name must be a non-empty string') + + if params is None or params is True: + params = {} + elif params is False: + raise ValueError(f'enrichment "{name}" cannot be False; omit it instead') + + if not isinstance(params, dict): + raise ValueError(f'params for enrichment "{name}" must be a dict, None or True') + + normalized_enrichments[name] = dict(params) + + elif isinstance(enrichments, list): + for name in enrichments: + if not isinstance(name, str) or not name: + raise ValueError('enrichment name must be a non-empty string') + normalized_enrichments[name] = {} + else: + raise ValueError('enrichments must be a dict, list[str], string, or None') + + contacts_n_leads = normalized_enrichments.get('contacts_n_leads', {}) + if 'contacts_per_company' in contacts_n_leads: + contacts_per_company = contacts_n_leads['contacts_per_company'] + if not isinstance(contacts_per_company, int) or contacts_per_company < 1: + raise ValueError('contacts_per_company must be an int >= 1') + + if 'emails_per_contact' in contacts_n_leads: + emails_per_contact = contacts_n_leads['emails_per_contact'] + if not isinstance(emails_per_contact, int) or emails_per_contact < 1: + raise ValueError('emails_per_contact must be an int >= 1') + + return normalized_enrichments From 9fa2a796bffd84e6a9f1fcb99c89fd860b9fe7af Mon Sep 17 00:00:00 2001 From: Stepbus Date: Wed, 4 Mar 2026 11:41:11 +0200 Subject: [PATCH 3/3] changed readme --- examples/Businesses.md | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/examples/Businesses.md b/examples/Businesses.md index 4e9fcec..f1a38d5 100644 --- a/examples/Businesses.md +++ b/examples/Businesses.md @@ -69,6 +69,32 @@ result = client.businesses.search( ] ) +# Search with enrichments (recommended dict format): +result = client.businesses.search( + filters=filters, + limit=25, + fields=['name', 'website', 'phone'], + enrichments={ + 'contacts_n_leads': { + 'contacts_per_company': 3, + 'emails_per_contact': 1, + }, + 'company_insights': {}, + }, +) + +# Search with enrichments (list format): +result = client.businesses.search( + filters=filters, + enrichments=['contacts_n_leads', 'company_insights'], +) + +# Search with enrichments (single string format): +result = client.businesses.search( + filters=filters, + enrichments='contacts_n_leads', +) + # Search with dict filters (alternative) result = client.businesses.search( filters={ @@ -87,6 +113,13 @@ json = { 'cursor': None, 'include_total': False, 'fields': ['name', 'types', 'address', 'state', 'postal_code', 'country', 'website', 'phone', 'rating', 'reviews', 'photo'], + 'enrichments': { + 'contacts_n_leads': { + 'contacts_per_company': 2, + 'emails_per_contact': 1 + }, + 'company_insights': {}, + }, 'filters': { 'country_code': 'US', 'states': [ @@ -115,7 +148,13 @@ filters = BusinessFilters(country_code='US', states=['NY'], business_statuses=[' for business in client.businesses.iter_search( filters=filters, limit=100, - fields=['name', 'phone', 'address', 'rating', 'reviews'] + fields=['name', 'phone', 'address', 'rating', 'reviews'], + enrichments={ + 'contacts_n_leads': { + 'contacts_per_company': 2, + 'emails_per_contact': 1, + } + }, ): # business is a Business dataclass instance print(business)