Source code for ads.feature_engineering.feature_type.adsstring.oci_language

#!/usr/bin/env python
# -*- coding: utf-8 -*--

# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
from typing import Dict, List

import oci.ai_language
import pandas as pd
from ads.common import auth as authutil
from ads.common import oci_client


[docs] class OCILanguage(object): # pragma: no cover """Defines the OCILanguage plugin for ADSString built on top of the OCI Language Services. Example ------- >>> ADSString.plugin_register(OCILanguage) >>> s = ADSString("This movie is awesome.") >>> s.absa >>> s.text_classification >>> s.ner >>> s.language_dominant """ def __init__(self, auth=None): auth = auth if auth else authutil.default_signer() self.ai_client = oci_client.OCIClientFactory(**auth).ai_language @property def ner(self) -> List[Dict]: """Detects named entites in the text.""" output = self.ai_client.batch_detect_language_entities( oci.ai_language.models.BatchDetectLanguageEntitiesDetails( documents=[ oci.ai_language.models.TextDocument(key="1", text=self.string) ] ) ) return json.loads(str(output.data.documents[0]))["entities"] @property def language_dominant(self) -> List[Dict]: """Determines the language of the text along with ISO 639-1 language code and a probability score.""" output = self.ai_client.batch_detect_dominant_language( oci.ai_language.models.BatchDetectDominantLanguageDetails( documents=[ oci.ai_language.models.DominantLanguageDocument( key="1", text=self.string ) ] ) ) return json.loads(str(output.data.documents[0])) @property def key_phrase(self) -> List[Dict]: """Extracts the most relevant words from the ADSString object and assigns them a score.""" output = self.ai_client.batch_detect_language_key_phrases( oci.ai_language.models.BatchDetectLanguageKeyPhrasesDetails( documents=[ oci.ai_language.models.TextDocument(key="1", text=self.string) ] ) ) return json.loads(str(output.data.documents[0]))["key_phrases"] @property def absa(self) -> List[Dict]: """Runs aspect-based sentiment analysis on the text to gauge teh mood or the tone of the text.""" output = self.ai_client.batch_detect_language_sentiments( oci.ai_language.models.BatchDetectLanguageSentimentsDetails( documents=[ oci.ai_language.models.TextDocument(key="1", text=self.string) ] ) ) return json.loads(str(output.data.documents[0]))["aspects"] @property def text_classification(self) -> List[Dict]: """Analyses the text and identifies categories for the content with a confidence score.""" output = self.ai_client.batch_detect_language_text_classification( oci.ai_language.models.BatchDetectLanguageTextClassificationDetails( documents=[ oci.ai_language.models.TextDocument(key="1", text=self.string) ] ) ) return json.loads(str(output.data.documents[0]))["text_classification"]