Ian OLeary
04/10/2024, 2:53 PMclass JobDivaPaginator(BaseAPIPaginator):
def __init__(self, *args, **kwargs):
super().__init__(None, *args, **kwargs)
def has_more(self, response):
#check the get_next() response to make sure it's before today
return self.get_next(response) < date.today()
def get_next(self, response):
#get the parameters used for the previous request
#return ["toDate"] param from the previous request and add 1 second to determine the new ["fromDate"]
params = dict(parse_qsl(urlparse(response.request.url).query))
return datetime.strptime(params["toDate"], OUTPUT_DATE_FORMAT).date() + timedelta(seconds=1)
and my get_url_params:
def get_url_params(
self,
context: dict | None, # noqa: ARG002
next_page_token: date | None, # noqa: ANN401
) -> dict[str, Any]:
#start_value = self.config["start_date"]
start_value = self.get_starting_replication_key_value(context)
from_date = (
next_page_token
or datetime.strptime(start_value, INPUT_DATE_FORMAT).date()
)
to_date = from_date + timedelta(days=14) - timedelta(seconds=1)
return {
"fromDate": from_date.strftime(OUTPUT_DATE_FORMAT),
"toDate": to_date.strftime(OUTPUT_DATE_FORMAT),
}
Edgar Ramírez (Arch.dev)
04/10/2024, 6:04 PMIan OLeary
04/11/2024, 2:53 PMclass PageValue(t.NamedTuple):
start_date: datetime.date
page_number: int
class JobDivaPaginator(BaseAPIPaginator[PageValue]):
def __init__(self, *args, **kwargs):
super().__init__(None, *args, **kwargs)
def has_more(self, response):
#check the get_next() response to make sure it's before today
return self.get_next(response.next_start_date) < date.today()
def get_next(self, response):
#get the parameters used for the previous request
#return ["toDate"] param from the previous request and add 1 second to determine the new ["fromDate"]
params = dict(parse_qsl(urlparse(response.request.url).query))
curr_request = yield from extract_jsonpath(self.records_jsonpath, input=response.json())
if len(curr_request) < 1:
next_start_date = datetime.strptime(params["toDate"], OUTPUT_DATE_FORMAT).date() + timedelta(seconds=1)
page_number = 1
return PageValue(next_start_date, page_number)
else:
next_start_date = datetime.strptime(params["toDate"], OUTPUT_DATE_FORMAT).date()
page_number = params["pageNumber"] + 1
return PageValue(next_start_date, page_number)
def get_url_params(
self,
context: dict | None, # noqa: ARG002
next_page_token, # noqa: ANN401
) -> dict[str, Any]:
#start_value = self.config["start_date"]
start_value = self.get_starting_replication_key_value(context)
from_date = (
next_page_token.start_date
or datetime.strptime(start_value, INPUT_DATE_FORMAT).date()
)
to_date = from_date + timedelta(days=14) - timedelta(seconds=1)
page_number = next_page_token.page_number
return {
"fromDate": from_date.strftime(OUTPUT_DATE_FORMAT),
"toDate": to_date.strftime(OUTPUT_DATE_FORMAT),
"pageNumber": page_number,
"pageSize": 1000,
}
Here's what I did roughly. In the stack trace, though, I'm getting AttributeError: 'NoneType' object has no attribute 'start_date'
from the next_page_token.start_date. Does the get_next() method return the next_page_token object for the get_url_params() method in the stream class?Reuben (Matatika)
04/11/2024, 4:57 PMNone
on the first request, before any call to your paginator get_next
.Edgar Ramírez (Arch.dev)
04/11/2024, 5:33 PMnext_page_token
is None
instead of checking if next_page_token.start_date
is truthyIan OLeary
04/11/2024, 6:00 PMdef get_url_params(
self,
context: dict | None, # noqa: ARG002
next_page_token: Optional[PageValue] = None, # noqa: ANN401
) -> dict[str, Any]:
start_value = self.get_starting_replication_key_value(context)
if next_page_token is None:
next_page_token = PageValue(start_value, 1)
from_date = (
next_page_token.start_date
or datetime.strptime(start_value, INPUT_DATE_FORMAT).date()
)
print("from_date:", from_date, "type:", type(from_date))
to_date = from_date + timedelta(days=14) - timedelta(seconds=1)
page_number = next_page_token.page_number
Yeah so I did this to fix it. Now working on why from date is set to a string from the print statement. 'from_date: 2023-11-01T000000 type: <class 'str'>' from the print().Edgar Ramírez (Arch.dev)
04/11/2024, 6:03 PMstart_value = self.get_starting_replication_key_value(context)
if next_page_token is None:
next_page_token = PageValue(start_value, 1)
make it a string I think. You probably want to apply strptime(start_value)
earlier.Ian OLeary
04/11/2024, 6:17 PMIan OLeary
04/11/2024, 6:18 PMEdgar Ramírez (Arch.dev)
04/11/2024, 6:24 PM