Hi everyone I'm trying to build a custom extractor...
# getting-started
o
Hi everyone I'm trying to build a custom extractor using the Meltano SDK but I'm running into an error that states that the 'response_code' was present 'comments' stream but not found in the catalog schema.
Copy code
time=2022-06-07 09:28:16 name=tap-jsonplaceholder level=WARNING message=Property 'response_code' was present in the 'comments' stream but not found in catalog schema. Ignoring.
time=2022-06-07 09:28:16 name=tap-jsonplaceholder level=WARNING message=Property 'results' was present in the 'comments' stream but not found in catalog schema. Ignoring.
{"type": "RECORD", "stream": "comments", "record": {}, "time_extracted": "2022-06-07T09:28:16.214634Z"}
time=2022-06-07 09:28:16 name=tap-jsonplaceholder level=INFO message=INFO METRIC: {'type': 'counter', 'metric': 'record_count', 'value': 1, 'tags': {'stream': 'comments'}}
{"type": "STATE", "value": {"bookmarks": {"comments": {}}}}
Here's my streams.py file
Copy code
from pathlib import Path
from typing import Any, Dict, Optional, Union, List, Iterable

from singer_sdk import typing as th  # JSON Schema typing helpers

from tap_jsonplaceholder.client import jsonplaceholderStream

class CommentsStream(jsonplaceholderStream):
    primary_keys = ["id"]
    path = '/comments'
    name = "comments"

    schema = th.PropertiesList(
        th.Property("postId", th.StringType),
        th.Property("id", th.StringType),
        th.Property("name", th.StringType),
        th.Property("email", th.StringType),
        th.Property("body", th.StringType),
    ).to_dict()
And here's my tap.py file
Copy code
"""jsonplaceholder tap class."""

from typing import List

from singer_sdk import Tap, Stream
from singer_sdk import typing as th  # JSON schema typing helpers
# TODO: Import your custom stream types here:
from tap_jsonplaceholder.streams import (
    jsonplaceholderStream,
    CommentsStream
)
# TODO: Compile a list of custom stream types here
#       OR rewrite discover_streams() below with your custom logic.
STREAM_TYPES = [
    CommentsStream
]


class Tapjsonplaceholder(Tap):
    """jsonplaceholder tap class."""
    name = "tap-jsonplaceholder"

    config_jsonschema = th.PropertiesList(
        th.Property(
            "api_url",
            th.StringType,
            default="<https://jsonplaceholder.typicode.com>",
            description="The json placeholder API"
        ),
    ).to_dict()

    def discover_streams(self) -> List[Stream]:
        """Return a list of discovered streams."""
        return [stream_class(tap=self) for stream_class in STREAM_TYPES]
Here's the Github repo containing the tap code: https://github.com/vicradon/tap-jsonplaceholder
c
I'm getting a very different error when trying to run your tap. ```time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=tap-jsonplaceholder v0.0.1, Meltano SDK v0.5.0) time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=Skipping parse of env var settings... time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=Config validation passed with 0 warnings. time=2022-06-07 103652 name=root level=INFO message=Operator '__else__=None' was not found. Unmapped streams will be included in output. time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=Beginning full_table sync of 'comments'... time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=Tap has custom mapper. Using 1 provided map(s). {"type": "SCHEMA", "stream": "comments", "schema": {"properties": {"postId": {"type": ["string", "null"]}, "id": {"type": ["string", "null"]}, "name": {"type": ["string", "null"]}, "email": {"type": ["string", "null"]}, "body": {"type": ["string", "null"]}}, "type": "object"}, "key_properties": ["id"]} time=2022-06-07 103652 name=tap-jsonplaceholder level=INFO message=INFO METRIC: {'type': 'timer', 'metric': 'http_request_duration', 'value': 0.056631, 'tags': {'endpoint': '/comments', 'http_status_code': 404, 'status': 'failed'}} Traceback (most recent call last): File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/bin/tap-jsonplaceholder", line 5, in <module> Tapjsonplaceholder.cli() File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/click/core.py", line 1130, in call return self.main(*args, **kwargs) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/click/core.py", line 1055, in main rv = self.invoke(ctx) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/click/core.py", line 1404, in invoke return ctx.invoke(self.callback, **ctx.params) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/click/core.py", line 760, in invoke return __callback(*args, **kwargs) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/tap_base.py", line 501, in cli tap.sync_all() File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/tap_base.py", line 379, in sync_all stream.sync() File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/streams/core.py", line 1020, in sync self._sync_records(context) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/streams/core.py", line 946, in _sync_records for record_result in self.get_records(current_context): File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/streams/rest.py", line 424, in get_records for record in self.request_records(context): File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/streams/rest.py", line 322, in request_records resp = decorated_request(prepared_request, context) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/backoff/_sync.py", line 94, in retry ret = target(*args, **kwargs) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python3.10/site-packages/singer_sdk/streams/rest.py", line 235, in _request self.validate_response(response) File "/home/cwegener/.cache/pypoetry/virtualenvs/tap-jsonplaceholder-7Psic1CA-py3.10/lib/python…
o
Says path not found for comments The root API has different extensions /users /todos /comments I wonder why I couldn't find it
r
It's a warning to let you know that the API response contains a comment record with
response_code
and
results
properties, which you do not define in
CommentsStream.schema
. If you want to include them, add the corresponding
th.Property
instances:
Copy code
schema = th.PropertiesList(
        th.Property("postId", th.StringType),
        th.Property("id", th.StringType),
        th.Property("name", th.StringType),
        th.Property("email", th.StringType),
        th.Property("body", th.StringType),
        th.Property("response_code", <type>), // assuming `response_code` in an integer, use `th.IntegerType`
        th.Property("results", <type>),
    ).to_dict()
o
I've been able to get it to work. Thank you @Reuben (Matatika) and @christoph
I have one question though How do I select a particular endpoint to replicate and how much data I want replicated? Right now it just replicates everything I set in the STREAM_TYPES array
r
If you're talking about how a user of the tap can control those parameters, you'd want to configure them as tap settings in your
Tapjsonplaceholder
class and then access the config in your stream classes with
self.config
.
o
Okay, I'll try that now