Source code for ads.data_labeling.reader.jsonl_reader
#!/usr/bin/env python# -*- coding: utf-8; -*-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/importjsonfromtypingimportAny,Dict,Generatorimportfsspecfromads.data_labeling.interface.readerimportReaderfromads.commonimportauthasauthutil
[docs]classJsonlReader(Reader):"""JsonlReader class which reads the file."""def__init__(self,path:str,auth:Dict=None,encoding="utf-8")->"JsonlReader":"""Initiates a JsonlReader object. Parameters ---------- path: str object storage path or local path for a file. auth: (dict, optional). Defaults to None. The default authetication is set using `ads.set_auth` API. If you need to override the default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate authentication signer and kwargs required to instantiate IdentityClient object. encoding : (str, optional). Defaults to 'utf-8'. Encoding of files. Only used for "TEXT" dataset. Examples -------- >>> from ads.data_labeling.reader.jsonl_reader import JsonlReader >>> path = "your/path/to/jsonl/file.jsonl" >>> from ads.common import auth as authutil >>> reader = JsonlReader(path=path, auth=authutil.api_keys(), encoding="utf-8") >>> next(reader.read()) """self.path=pathself.auth=authorauthutil.default_signer()self.encoding=encoding
[docs]defread(self,skip:int=None)->Generator[Dict,Any,Any]:"""Reads and yields the content of the file. Parameters ---------- skip: (int, optional). Defaults to None. The number of records that should be skipped. Returns ------- Generator[Dict, Any, Any] The content of the file. Raises ------ ValueError If `skip` not empty and not a positive integer. FileNotFoundError When file not found. """ifskipand(notisinstance(skip,int)orskip<1):raiseValueError("The parameter `skip` must be a positive integer.")try:line_number=0withfsspec.open(self.path,"r",encoding=self.encoding,**self.auth)asf:forlineinf:line_number+=1ifskipandline_number<=skip:continueyieldjson.loads(line)exceptFileNotFoundError:raiseFileNotFoundError(f"Path ({self.path}) not found.")