#!/usr/bin/env python# -*- coding: utf-8; -*-# Copyright (c) 2021, 2022 Oracle and/or its affiliates.# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/importfunctoolsimportos
[docs]classNotSupportedError(Exception):# pragma: no coverpass
[docs]defexperimental(cls):@functools.wraps(cls)defwrapper(*args,**kwargs):instance=cls(*args,**kwargs)print(f"{cls.__name__} is experimental and may be removed in the future.")returninstancereturnwrapper
[docs]classPY4JGateway:def__init__(self)->None:try:frompy4j.java_gatewayimportGatewayParameters,JavaGateway,launch_gatewayexceptModuleNotFoundError:raiseModuleNotFoundError("py4j is not installed.")if"CONDA_PREFIX"notinos.environornotos.path.exists(os.path.join(os.environ.get("CONDA_PREFIX"),"text-extraction-tools.jar")):raiseNotSupportedError("Tika is not supported in this distribution. Use alternatives such as pdfplumber.")port=launch_gateway(java_path="/usr/bin/java",classpath=os.path.join(os.environ.get("CONDA_PREFIX"),"text-extraction-tools.jar"),)self.gateway=JavaGateway(gateway_parameters=GatewayParameters(port=port))def__enter__(self)->None:returnself.gatewaydef__exit__(self,exc_type,exc_val,exc_tb)->None:self.gateway.shutdown()