核心问题:解决 LLM 的 plain text 处理和 KG 的 structured data 不匹配
model_name = "facebook/opt-iml-max-30b"
pipeline = pipeline("text-generation", model=model_name, device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
llm = OurLLM()
service_context = ServiceContext.from_defaults(
llm=llm,
context_window=context_window,
num_output=num_output
)
class OurLLM(CustomLLM):
@property
def metadata(self) -> LLMMetadata:
"""Get LLM metadata."""
return LLMMetadata(
context_window=context_window,
num_output=num_output,
model_name=model_name
)
@llm_completion_callback()
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
prompt_length = len(prompt)
response = pipeline(prompt, max_new_tokens=num_output)[0]["generated_text"]
# only return newly generated tokens
text = response[prompt_length:]
return CompletionResponse(text=text)
@llm_completion_callback()
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
raise NotImplementedError()
embed_model = HuggingFaceBgeEmbeddings(model_name="BAAI/bge-base-en")
service_context = ServiceContext.from_defaults(embed_model=embed_model)