Sekhar Baggaraju
08/16/2024, 3:22 PMEdgar Ramírez (Arch.dev)
08/16/2024, 4:24 PMstart_date
for the extractor?Edgar Ramírez (Arch.dev)
08/16/2024, 6:13 PMAndy Carter
08/16/2024, 7:46 PMstart_date
required in the tap schema.Sekhar Baggaraju
08/20/2024, 12:33 PMconfig:
start_date: 2024-08-10T00:00:01Z
Sekhar Baggaraju
08/20/2024, 12:36 PMAndy Carter
08/20/2024, 12:39 PMconfig:
start_date: 2024-08-10
Sekhar Baggaraju
08/20/2024, 3:29 PMSekhar Baggaraju
08/20/2024, 3:39 PM"""REST client handling, including freshdeskStream base class."""
from __future__ import annotations
import time
from pathlib import Path
from typing import Any, Callable, Iterable, TYPE_CHECKING, Generator
import requests
from http import HTTPStatus
from urllib.parse import urlparse
from singer_sdk.authenticators import BasicAuthenticator
from singer_sdk.helpers.jsonpath import extract_jsonpath
from singer_sdk.streams import RESTStream
from singer_sdk.pagination import BasePageNumberPaginator, SinglePagePaginator
if TYPE_CHECKING:
from requests import Response
_Auth = Callable[[requests.PreparedRequest], requests.PreparedRequest]
SCHEMAS_DIR = Path(__file__).parent / Path("./schemas")
class FreshdeskStream(RESTStream):
"""freshdesk stream class."""
name: str
records_jsonpath = "$.[*]" # Or override `parse_response`.
primary_keys = ["id"]
@property
def backoff_max_tries(self) -> int:
return 10
@property
def path(self) -> str:
return f"/{self.name}"
@property
def schema_filepath(self) -> Path | None:
return SCHEMAS_DIR / f"{self.name}.json"
# OR use a dynamic url_base:
@property
def url_base(self) -> str:
domain = self.config["domain"]
return f"https://{domain}.<http://freshdesk.com/api/v2|freshdesk.com/api/v2>"
@property
def authenticator(self) -> BasicAuthenticator:
return BasicAuthenticator.create_for_stream(
self,
username=self.config.get("api_key", ""),
password="",
)
@property
def http_headers(self) -> dict:
headers = {}
if "user_agent" in self.config:
headers["User-Agent"] = self.config.get("user_agent")
return headers
def get_next_page_token(
self,
response: requests.Response,
previous_token: Any | None,
) -> Any | None:
if self.next_page_token_jsonpath:
all_matches = extract_jsonpath(
self.next_page_token_jsonpath, response.json()
)
first_match = next(iter(all_matches), None)
next_page_token = first_match
else:
next_page_token = response.headers.get("X-Next-Page", None)
return next_page_token
def get_url_params(
self,
context: dict | None,
next_page_token: Any | None,
) -> dict[str, Any]:
params: dict = {}
embeds = self.config.get("embeds")
if embeds:
embed_fields = embeds.get(self.name, [])
if embed_fields: # i.e. 'stats,company,sla_policy'
params["include"] = ",".join(embed_fields)
return params
def parse_response(self, response: requests.Response) -> Iterable[dict]:
yield from extract_jsonpath(self.records_jsonpath, input=response.json())
def get_new_paginator(self) -> SinglePagePaginator:
return SinglePagePaginator()
def backoff_wait_generator(self) -> Generator[float, None, None]:
return self.backoff_runtime(value=self._wait_for)
@staticmethod
def _wait_for(exception) -> int:
return int(exception.response.headers["Retry-After"])
def backoff_jitter(self, value: float) -> float:
return value
# Handling error, overriding this method over RESTStream's Class
def response_error_message(self, response: requests.Response) -> str:
full_path = urlparse(response.url).path or self.path
error_type = (
"Client"
if HTTPStatus.BAD_REQUEST
<= response.status_code
< HTTPStatus.INTERNAL_SERVER_ERROR
else "Server"
)
error_details = []
if response.status_code >= 400:
print(f"Error Response: {response.status_code} {response.reason}")
try:
error_data = response.json()
errors = error_data.get("errors")
for index, error in enumerate(errors):
message = error.get("message", "Unknown")
field = error.get("field", "Unknown")
error_details.append(
f"Error {index + 1}: Message - {message}, Field - {field}"
)
except requests.exceptions.JSONDecodeError:
return "Error: Unable to parse JSON error response"
return (
f"{response.status_code} {error_type} Error: "
f"{response.reason} for path: {full_path}. "
f"Error via function response_error_message : {'. '.join(error_details)}."
)
class FreshdeskPaginator(BasePageNumberPaginator):
def has_more(self, response: Response) -> bool:
return len(response.json()) != 0 and self.current_value < 300
class PagedFreshdeskStream(FreshdeskStream):
def get_url_params(
self,
context: dict | None,
next_page_token: Any | None,
) -> dict[str, Any]:
context = context or {}
params = super().get_url_params(context, next_page_token)
params["per_page"] = 100
if next_page_token:
params["page"] = next_page_token
if "updated_since" not in context:
params["updated_since"] = self.get_starting_timestamp(context)
return params
def get_new_paginator(self) -> BasePageNumberPaginator:
return FreshdeskPaginator(start_value=1)
class PagedFreshdeskTicketsStream(FreshdeskStream):
def get_url_params(
self,
context: dict | None,
next_page_token: Any | None,
) -> dict[str, Any]:
context = context or {}
params = super().get_url_params(context, next_page_token)
params["per_page"] = 30
if next_page_token:
params["page"] = next_page_token
if "updated_since" not in context:
params["updated_since"] = self.get_starting_timestamp(context)
return params
def get_new_paginator(self) -> BasePageNumberPaginator:
return FreshdeskPaginator(start_value=1)
Edgar Ramírez (Arch.dev)
08/20/2024, 3:49 PMmeltano run
, can you use the --full-refresh
flag too?Andy Carter
08/20/2024, 4:08 PMSekhar Baggaraju
08/21/2024, 8:18 AMAndy Carter
08/21/2024, 11:35 AMmeltano.yml
?
@Edgar Ramírez (Arch.dev) is it possible this config value could be cached somewhere?Sekhar Baggaraju
08/21/2024, 12:32 PMAndy Carter
08/21/2024, 1:10 PMTAP_FRESHDESK_START_DATE
?Andy Carter
08/21/2024, 1:13 PMmeltano config tap-freshdesk list
To see what the source of the start date is?Sekhar Baggaraju
08/21/2024, 1:28 PMSekhar Baggaraju
08/21/2024, 1:28 PMAndy Carter
08/21/2024, 1:36 PM2024-08-10
in meltano.yml and see if the URL changes? Or is the timestamp value cached from somewhere?Andy Carter
08/21/2024, 1:41 PMSekhar Baggaraju
08/23/2024, 9:24 AM