-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathchatbot-v4.py
More file actions
89 lines (71 loc) · 2.95 KB
/
chatbot-v4.py
File metadata and controls
89 lines (71 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from threading import Thread
from typing import Optional
import gradio as gr
import langchain
from langchain import LLMChain
from langchain.llms.base import LLM
from langchain_community.llms import LlamaCpp
import langchain_core
import langchain_core.prompts
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
# MaziyarPanahi/Calme-7B-Instruct-v0.9
# upstage/SOLAR-10.7B-Instruct-v1.0
# CohereForAI/c4ai-command-r-v01-4bit
MODEL_NAME="CohereForAI/c4ai-command-r-v01-4bit"
def initialize_model_and_tokenizer(model_name=MODEL_NAME):
model = AutoModelForCausalLM.from_pretrained(model_name)#.cuda()
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
def init_chain(model, tokenizer):
class CustomLLM(LLM):
"""Streamer Object"""
streamer: Optional[TextIteratorStreamer] = None
def _call(self, prompt, stop=None, run_manager=None) -> str:
self.streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, Timeout=5)
inputs = tokenizer(prompt, return_tensors="pt")
# NOT_USE: inputs = {k: v.cuda() for k, v in inputs.items()}
# inputs['input_ids'] = inputs['input_ids'].cuda()
kwargs = dict(input_ids=inputs["input_ids"], streamer=self.streamer, max_new_tokens=200)
thread = Thread(target=model.generate, kwargs=kwargs)
thread.start()
return ""
@property
def _llm_type(self) -> str:
return "custom"
llm = CustomLLM()
template = """user: {question}
Answer:"""
prompt = langchain_core.prompts.PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
return llm_chain, llm
# Download
model, tokenizer = initialize_model_and_tokenizer()
theme = gr.themes.Default(
#color contructors
primary_hue="violet",
secondary_hue="indigo",
neutral_hue="purple"
).set(slider_color="#800080")
with gr.Blocks(theme=theme) as demo:
title = """<h1 align="center">KNU Test ChatBot No.4</h1>
<h3 align="center">[langchain TextIteratorStreamer] Local LLM GPU ChatBot streaming Interactive</h3>"""
gr.HTML(title)
chatbot = gr.Chatbot(label=MODEL_NAME.replace("/", " "))
msg = gr.Textbox()
clear = gr.Button("Clear")
llm_chain, llm = init_chain(model, tokenizer)
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history):
print("Question: ", history[-1][0])
llm_chain.run(question=history[-1][0])
history[-1][1] = ""
for character in llm.streamer:
print(character)
history[-1][1] += character
yield history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(bot, chatbot, chatbot)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch()