CalcCracker/CalcCracker.py at main · tomrudyk/CalcCracker · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###     Use: streamlit run <file.py>    ###

import streamlit as st
from Retrieval import Retriver
from llm import llm
from chain import chain

import json
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM

from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_community.utilities.wolfram_alpha import WolframAlphaAPIWrapper

import pdfplumber
from langchain.schema import Document

import chromadb
import os
import requests

import time


wolfram_api_key_full = "RK6EY7-JXQKYY77YK"
wolfram_api_key_llm = "RK6EY7-A2JK96375T"
os.environ["WOLFRAM_ALPHA_APPID"] = wolfram_api_key_llm

wolfram = WolframAlphaAPIWrapper()

import random

chromadb.api.client.SharedSystemClient.clear_system_cache()

#   https://docs.streamlit.io/

# {
# Using VirtualchatbotTest memory
# (venv) python3 -m venv chatbotTest
# (venv) source chatbotTest/bin/activate
# (chatbotTest) (venv)
# }


# modelPirate = llm("ollamapirate")
modelNormal = llm("llama3.1:8b")
modelSmall = llm("llama3.2:3b", 0.8)  # temperature = 0.8
modelSmall2 = llm("llama3.2:3b")  # temperature = 0

modelDeepSeekR1 = llm("deepseek-r1:8b")

modelBig = llm("llama3.3:70b")
modelBigWithTemperature = llm("llama3.3:70b", temperature=0.8)
modelBigRouter = llm("llama3.3:70b", 0, "json")

modelNormalRouter = llm("llama3.1:8b", 0, "json")
modelSmallRouter = llm("llama3.2:3b", 0, "json")

###
##########  TEMPLATES START  ########
###


userDataExtract_instructions = """
You are an expert at extracting data about the user from given summary.
You must summarise to less than 3 words each topic.

The summary is in the following context:
<context>
{question}
</context>

Return JSON with keys, that are related to the summary.

'Name': name
'Age': age
'ID': id (integer)
'Task Complete Self': number(integer)
'Task Complete AI': number(integer)
'Progress_LimitCalculation': number(integer)
'timeSpent': time(string)

"""


RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.
You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.
You should **change numbers, variables, and conditions** while maintaining the structure of the original.


Rules for generation:
- Ensure the question differs in numbers and specific expressions from retrieved ones.
- Keep the same mathematical structure.
- Always ask the user to provide a final solution.
- If unsure, say that you are having trouble.
- DO NOT explain the process.


Return **ONLY** the newly generated question in correct LaTeX format.

Use less than 250 words.


<context>
{context}
</context>

"""

RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.
You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.
You should **change numbers, variables, and conditions** while maintaining the structure of the original.

Ask the user to provide a solution.
DO NOT explain the process.

Return the newly generated question in correct LaTeX format.

Use less than 250 words.


<context>
{context}
</context>

"""

RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.
You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.
You should use *random* numbers, variables, and conditions while maintaining problem structure and same or similar technique for solution.

Ask the user to provide a solution.
DO NOT explain the process.
DO NOT ADD NOTES.

Return the newly generated question in correct LaTeX format.

Use less than 250 words.


<context>
{context}
</context>

"""
## ChatGPT fight on randomness, says i can also inject the randomnes.
RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.

You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.

Before creating the new question, **invent 2-3 random numbers between 1 and 100** different from the ones in context.

Then, **create a new question** while maintaining problem structure and similar technique for solution, but replacing original numbers with part of your invented numbers.
You can use powers and roots of the numbers you invented.

Ask the user to provide a solution.
DO NOT explain the process.
DO NOT ADD NOTES.
DO NOT USE cube root.

Return the newly generated question in correct LaTeX format.

Use less than 250 words.

<context>
{context}
</context>


"""

rand1 = random.randint(2,37)
rand2 = random.randint(2,37)
# Prompt injection with rand numbers DYNAMICALLY (look for format in end)
RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.

You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.
You can use the following number, its power and its **square** root: {rand1}.

**Create a new question** while maintaining problem structure and similar technique for solution, but replacing original numbers with part of your new numbers.

Ask the user to provide a solution.
DO NOT explain the process.
DO NOT ADD NOTES.
YOU MUST NOT USE root greater that square root, for example: DO NOT USE CUBE ROOT.

Return the newly generated question in correct LaTeX format.

Use less than 250 words.

<context>
{context}
</context>
"""

RAG_TEMPLATE_MATH = """
You are a mathematical professor, expert at generating new and creative math questions.

You must ensure that the generated question is **not identical** to the one in the retrieved context data, written in LaTex.
You can use the following number, its power and its **square** root: {rand1}.

**Create a new question** while maintaining problem structure and similar technique for solution, but replacing original numbers with part of your new numbers.

Ask the user to provide a solution.
Here are the rules to follow:
(1) DO NOT explain the process.
(2) DO NOT ADD NOTES.
(3) YOU MUST *NOT* USE root greater that square root, for example: DO *NOT* USE CUBE ROOT.

Return the newly generated question in correct LaTeX format.

Use less than 250 words.

<context>
{context}
</context>
"""

### Works, slowly, reads all the data each time. ###
router_instructions1 = """
You are an expert at routing a user question to a vectorstore or self answer.
The vectorstore contains documents related to math questions:
<context>
{context}
</context>
Use the vectorstore for questions on these topics. For all else, and especially for current events, generate independent answer.
Return JSON with single key, datasource, that is 'independent' or 'vectorstore' depending on the question:
{question}"""

### Also Works, given what data is stored in vector. ###
router_instructions2 = """
You are an expert at routing a user question to a vectorstore or self answer.
The vectorstore contains documents related to math questions in calculus 1, the questions are at 3 different levels: 1,2 and 3.
*Use the vectorstore only as reference for NEW math question generation in calculus 1.
*Use the vectorstore ONLY if user ask to generate a question.
For all else, and especially for current events, and given information generate independent answer.
Return JSON with single key, datasource, that is 'independent' or 'vectorstore' depending on the question:
{question}"""
router_instructions2_step2 = """
You are an expert at routing a user request to a math topic and level.
Here are the available topics:
"Limit_Calculation"
Here are the available levels:
"Level1"
"Level2"
"Level3"

Based on users request you need to choose which level and topic are requested.
Use only the available ones.
Return JSON with 2 keys, "topic", that is the chosen topic, and "level", that is the chosen level.
Users request:
{question}"""


###
##########  TEMPLATES END  ##########
###
def extract_hebrew_text(pdf_path):
    documents = []
    with pdfplumber.open(pdf_path) as pdf:
        for i, page in enumerate(pdf.pages):
            text = page.extract_text()
            if text:  # Only include pages with text
                documents.append(Document(page_content=text,
                                          metadata={"page": i + 1}))
    return documents


def extract_hebrew_text_toString(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ""
        for page in pdf.pages:
            text += page.extract_text()
    return text


def RouterDirection(question, router_chain):
    ##### Use router_chain2 for simple check.
    ##### User router_chain1 for heavy check. - runs through files.
    print("*** Routing QUESTION ****")
    directionOfQJSON = router_chain.chainRunner.invoke(question)
    # print(directionOfQJSON)
    directionOfQ = json.loads(directionOfQJSON)["datasource"]
    print(f"Choosen way is: {directionOfQ}")
    return directionOfQ


def RouterRandOriginalQuestion(question, router_chain, database):
    ##### return random question from database, based on request
    print("*** Routing TOPIC+LEVEL ****")
    topic_level = router_chain.chainRunner.invoke(question)
    print(topic_level)
    topic = json.loads(topic_level)["topic"]
    level = json.loads(topic_level)["level"]

    topicDict = database[topic]
    levelDict = topicDict[level]
    random_number = random.randint(1, len(levelDict))
    rand_question = levelDict[str(random_number)]
    print(f"Chosen rand is {random_number}")
    print(rand_question)
    return rand_question


def reverse_Heb(sentence):
    words = sentence.split()
    reversed_words = [word[::-1] for word in words]  # reverse each word
    reversed_words = reversed_words[::-1]  # reverse order of words
    return ' '.join(reversed_words)


# path = "data_storage/calculusJsonDataShort_eng.txt"
# pathLimitCalculus = "data_storage/calculusJsonDeepSeek_eng.txt"
path_userData = "data_storage/userData/userData_eng.txt"
path_summaryForUser = "data_storage/SummaryForUser/SummaryForUser.txt"
path_newDictDatabase = "data_storage/calculusDataset.txt"
# path_newDictDatabase = "data_storage/TESTcalculusDataset.txt"
path_wolframAnswer = "data_storage/wolframAnswer.txt"


# If directory doesnt exists
directory_userData = os.path.dirname(path_userData)
directory_summaryForUser = os.path.dirname(path_summaryForUser)
if not os.path.exists(directory_userData):
    os.makedirs(directory_userData)
if not os.path.exists(directory_summaryForUser):
    os.makedirs(directory_summaryForUser)


if os.path.exists(path_userData):
    # File exists, open in read and write mode ('r+')
    open(path_userData, 'r+')  # 'r+' allows reading and writing
else:
    # File doesn't exist, create and open in write mode ('w')
    with open(path_userData, 'w') as f:  # 'w' creates and opens for writing
        f.write("EMPTY")


print("--Start Data")
# data= Retriver(path)
# data= Retriver(pathLimitCalculus)
with open(path_newDictDatabase, "r") as f:
    newDict_database = json.load(f)
# dataUser= Retriver(path_userData) ----> Not Used
print("--End Data")

# router_chain1 = chain(router_instructions1, modelBigRouter, data)
router_chain2 = chain(router_instructions2, modelBigRouter)
router_chain2_step2 = chain(router_instructions2_step2, modelBigRouter)

# chainRAG ---> can select which model to search with, best is modelSmall
###         Story PDF-Eng = 1:48 minutes --> llama3.1:8b
###         Story Txt-Eng = 0:54 minutes --> llama3.1:8b
###         Story PDF-Eng = 0:24 minutes --> llama3.2:3b
###         Story Txt-Eng = 0:24 minutes --> llama3.2:3b
userDataEXTRACTOR_agent = chain(userDataExtract_instructions, modelBigRouter)
# chainRAG = chain(RAG_TEMPLATE_MATH, modelBigWithTemperature)

independentChat = """
You are encouraging tutor in calculus 1 course, your job is to help student gain understanding of his study material.
You are not allowed to be wrong, before answering validate that your answer is indeed correct.
You will give the student a hint or two, before revealing the full solution.
You will not generate new questions at all.
*If user answers to a question incorrectly, give him a hint, do not show the full solution.
*If user answers question correctly, tell him, and move on, ask the user if he needs another help.
*Verify the conditions for LHopital Rule before applying it, try avoiding it by cancel out the common factor.
Answer the question below shortly, you must use less than 300 words.

Return your answer with LaTeX math expressions wrapped inside $...$ for inline and $$...$$ for display math.

Here is the conversation history: {history}

Question: {question}

Answer:
"""

independentChat = """
You are encouraging tutor in calculus 1 course, your job is to help student gain understanding of his study material.
You are not allowed to be wrong, before answering validate that your answer is indeed correct.
You will give the student a hint or two, before revealing the full solution.
You will not generate new questions at all.
*If user answers to a question incorrectly, give him a hint, do not show the full solution.
*If user answers question correctly, tell him, and move on, ask the user if he needs another help.
To Answer Follow these steps:
*Step 1, Plug the limiting value into the function, If you get a finite number, that’s the limit, If you get 0/0 or inf/inf, or another indeterminate form, proceed to further steps.
*Step 2, Try algebraic manipulation, "Factor and Cancel", "Rationalize Numerator/Denominator", "Substitution", "Absolute Value Handling".
*Step 3, Apply Special Limit Rules (If Needed), "L’Hôpital’s Rule" only if 0/0 or inf/inf, "Taylor Series Expansion".
*Step 4, Check Left/Right Limits (If Necessary), Evaluate both sides, if f(x+) not equals f(x-), the two sided limit does not exist, if (constant/0) the limit does not exist.
*Step 5, Final Answer, If simplification resolves the limit, state the result, If left/right limits differ, conclude the two-sided limit does not exist.

Answer the question below shortly, you must use less than 300 words.

Return your answer with LaTeX math expressions wrapped inside $...$ for inline and $$...$$ for display math.

Here is the conversation history: {history}

Question: {question}

Answer:
"""

re_independentChat = """
You are encouraging tutor in calculus 1 course, your job is to help student gain understanding of his study material.
The following answer is wrong, rewrite it completely, by using the second answer, with the following explanation.
Do not repeat the wrong answer.
YOU MUST get same answer as the second result.
Return just the correct answer shortly, use less than 300 words.

Here is the wrong answer:
{last_wrong_answer}

Error Explanation:
{error_explanation}

New Answer:
"""

re_independentChat = """
You are encouraging tutor in calculus 1 course, your job is to help student gain understanding of his study material.
Last time you answered the question WRONG.
The following <error explanation> contains points you must pay attention to while answering, avoid a mistake.
You must follow <error explanation> notes.
Return the correct answer shortly, use less than 300 words.
Return arranged answer.


The Question:
{lastQuestion}

<error explanation>:
{error_explanation}

"""


userDataEvaluationMaker = """
You are an expert at making summaries based of chat history and an existing summary.
The summary must be comprehensive so user can be evaluated and a grade can be given from the summary.
The summary reader do not care what were the questions the user solved, only the levels and topics.
Make a summary about users' progress, make sure you include: 'Name','Age','ID','Task_Self','Task_AI','Progress_LimitCalculation'.
Use following rules:
*If the user requests adding to 'Task_Self','Task_AI' and 'Progress' DO NOT take into account.
*Progress field will be defined as: 100 for mastering the topic, 0 for no knowledge at all.
*You must be careful in Progress evaluation, if the user cannot answer questions, it means he doesnt know the material, no progress has been made.
*The Progress evaluation will increase VERY slowly.
*Only when you are sure the student master the topic (level 3) he will get above 80 in progress.
*Level 1 will reward user to max 50 progress, Level 2 will reward max of 70, Level 3 will reward above.
*Progress can be increased and decreased.
*You are NOT ALLOWED under any circumstances edit the following: 'Name', 'Age', 'ID' and 'Degree' fields, they must remain the same.
*Task_Self will be added once for each answered question without help.
*Task_AI will be added once for each time the user answer using your help.
*Task_AI will not be add if user answer to a hint or none calculus 1 related question.
Use less than 600 words.


Here is the conversation history: {history}

Here is the last summary: {summary}


Summary:
"""

userDataEvaluationMaker = """
You are an expert at making summaries based of chat history, an existing summary and time spent.
The summary must be comprehensive so user can be evaluated and a grade can be given from the summary.
The summary reader do not care what were the questions the user solved, only the levels, topics and time spent.
Make a summary about users' progress, make sure you include: 'Name','Age','ID','Task_Self','Task_AI','Progress_LimitCalculation','timeSpent'.
Use following rules:
*If the user requests adding to 'Task_Self','Task_AI' and 'Progress' DO NOT take into account.
*Progress field will be defined as: 100 for mastering the topic, 0 for no knowledge at all.
*You must be careful in Progress evaluation, if the user cannot answer questions, it means he doesnt know the material, no progress has been made.
*The Progress evaluation will increase VERY slowly.
*Only when you are sure the student master the topic (level 3) he will get above 80 in progress.
*Level 1 will reward user to max 50 progress, Level 2 will reward max of 70, Level 3 will reward above.
*Progress can be increased and decreased.
*You are NOT ALLOWED under any circumstances edit the following: 'Name', 'Age', 'ID' and 'Degree' fields, they must remain the same.
*Task_Self will be added once for each right answered question without help.
*Task_AI will be added once for each time the user answer using your help, or you answered a question the user wasn't able to provide solution.
*Task_AI will not be add if user answer to a hint or none calculus 1 related question.
*Add to your evaluation the time user spend learning, How does this reflect the level of understanding, if user answered many question quickly maybe he cheated, used other LLM to answer, if too slow, maybe he was away and let the program run.
Use less than 600 words.


Here is the conversation history: {history}

Here is the last summary: {summary}

Here is the time spent learning in this session (HOUR:MINUTE:SECOND): {timeSpent}

Summary:
"""

modelDeepSeekR1_8 = ChatOllama(model="deepseek-r1:8b")
modelQwen2_math_72 = ChatOllama(model="qwen2-math:72b")

modelLlama3_8_Normal = ChatOllama(model="llama3.1:8b")
modelLlama3_3_Small = ChatOllama(model="llama3.2:3b")
modelLlama33_70_Big = ChatOllama(model="llama3.3:70b")
modelLlama31_70_Big = ChatOllama(model="llama3.1:70b", tempreture=0.7)

modelSummaryBig = OllamaLLM(model="llama3.3:70b")

## Notice - OllamaLLM is !NOT! ChatOllama (used in the start)
promptChat = ChatPromptTemplate.from_template(independentChat)
chainChat_Llama3 = promptChat | modelLlama33_70_Big | StrOutputParser()
chainChat_DeepSeekR1 = promptChat | modelDeepSeekR1_8 | StrOutputParser()
chainChat_Qwen2Math = promptChat | modelQwen2_math_72 | StrOutputParser()

prompt_re_Chat = ChatPromptTemplate.from_template(re_independentChat)
chainChat_re_Llama3 = prompt_re_Chat | modelLlama33_70_Big | StrOutputParser()

promptRAG = ChatPromptTemplate.from_template(RAG_TEMPLATE_MATH)
chainRAG = promptRAG | modelLlama31_70_Big | StrOutputParser()

## Notice - OllamaLLM is !NOT! ChatOllama (used in the start)
promptUserDataEvaluationMaker = ChatPromptTemplate.from_template(userDataEvaluationMaker)
summaryMaker_agent = promptUserDataEvaluationMaker | modelSummaryBig | StrOutputParser()

## Notice - OllamaLLM is !NOT! ChatOllama (used in the start)
SummaryMakerForUser = """
You are a encouraging tutor, expert at making summaries and giving tips for students' use.
The summary will professional, contain information for the student how can he improve, what levels should he work on(higher or lower), does he understand the basics, does he invest enough time.
If time spend is low, recommend investing more time.
You will give the summary based on the evaluation summary, chat history and time spent.
Do not tell the student his grading, avoid mathematical expressions.
Use less than 300 words.

Here is the conversation history: {history}

Here is the evaluation summary: {summary}

Here is the time spent learning in this session (HOUR:MINUTE:SECOND): {timeSpent}


Summary:
"""
promptSummaryMakerForUser = ChatPromptTemplate.from_template(SummaryMakerForUser)
summaryMakerUser_agent = promptSummaryMakerForUser | modelSummaryBig | StrOutputParser()

summaryMadeSupervisor = """
You are a summarize supervisor, the following summary was written by AI agent.
Your mission is to check the following states:
*Name:
*Age:
*ID:
*Task_Self:
*Task_AI:
*Progress_LimitCalculation:
*timeSpent:

If one of the above does not exist - return '0'. Else return '1'.
Return JSON with single key, legitimate, that is '1' or '0' as string, depending on the summary.

Here is the summary: {Summary}


Summary:
"""

chat_supervisor = """
You are a supervisor, the following results were written by AI agent.
First result may contain errors, second result is correct.
Your mission is to check if the following results have the SAME FINAL answer.
If L'Hopital rule is been used, make sure both numerator and denominator equals to zero.

If the results DO NOT HAVE SAME answer, return an explanation how the first result was wrong.
If the first result gives a hint, do not compare the results, but Check the correctness it.
Else, return 'None'.
Return arranged answer.

Return JSON with single key, legitimate, that is the error explanation or 'None' as string.

Here is the first result:
{result_llama}

Here is the second result:
{result_DeepSeek}
"""

chat_supervisor = """
You are a teacher grading a quiz.
You will be given RIGHT-ANSWER and a STUDENT ANSWER.

Here is the grade criteria to follow:
(1) Ensure the STUDENT ANSWER is grounded in the RIGHT-ANSWER.
(2) Ensure the STUDENT ANSWER does not contain "hallucinated" information outside the scope of the RIGHT-ANSWER.
(3) Ensure the STUDENT ANSWER AND RIGHT-ANSWER use L'Hopital rule only if both numerator and denominator equals zero.
(4) STUDENT ANSWER *must* get the same result as RIGHT-ANSWER.

A grounded value of None means that the student's answer meets all of the criteria.
Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.
State what is the correct final answer, and what steps should be done.
Use less than 300 words.


Return JSON with single key, legitimate, that is the error explanation with steps or 'None' as string.

STUDENT ANSWER:
{result_llama}

RIGHT-ANSWER:
{result_DeepSeek}
"""

chat_supervisor_Level2 = """
You are a teacher grading a quiz.
You will be given a STUDENT ANSWER.

You MUST follow each step in the STUDENT ANSWER and validate its correctness.
Pay attention to mathematical calculation and correct use of formulas.

A grounded value of None means that the STUDENT ANSWER is valid.
If not valid, Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct.

Use less than 300 words.

Return JSON with single key, legitimate, that is the error explanation with steps or 'None' as string.

STUDENT ANSWER:
{result_llama}
{result_DeepSeek}

"""

modelBigRouter_Chat = ChatOllama(model="llama3.3:70b", temperature=0, format="json")
promptSummarySupervisor = ChatPromptTemplate.from_template(summaryMadeSupervisor)
summarySupervisor_agent = promptSummarySupervisor | modelBigRouter_Chat | StrOutputParser()

promptChat_Supervisor = ChatPromptTemplate.from_template(chat_supervisor)
chatSupervisor_agent = promptChat_Supervisor | modelBigRouter_Chat | StrOutputParser()

promptChat_Level2_Supervisor = ChatPromptTemplate.from_template(chat_supervisor_Level2)
chatSupervisor_Level2_agent = promptChat_Level2_Supervisor | modelBigRouter_Chat | StrOutputParser()

wolframAlpha_transformer = """
You are a transformer, you will get a text written partly in latex, your job is to transform it to readable text.
Readable text is when you can get a text and understand the equation, without using symbols.
For example;
Original: "$\\lim_[x\\rightarrow 3] \\frac[x^2 - 9][x - 3]$"
Transformation: "limit ((x^2) - 9)/(x - 3) as x approaches 3"

For ^ use (): (x^2)
For sqrt use (): (sqrt(2))

*DO NOT ANSWER THE QUESTION.
*DO NOT ADD ADDITIONAL TEXT.
*DO NOT USE SYMBOLS, except (+,-,(),^,/, *).
Just Transform the question the readable text.

Return the transformation of following question:
{question}
"""
promptWolframAlpha_transformer = ChatPromptTemplate.from_template(wolframAlpha_transformer)
wolframAlpha_transformer_agent = promptWolframAlpha_transformer | modelLlama33_70_Big | StrOutputParser()


def get_user_info():
    st.title("Welcome! Please enter your details to continue.")

    with st.form("user_info_form"):
        name = st.text_input("Enter your Name:")
        age = st.text_input("Enter your Age:")
        id = st.text_input("Enter your ID:")
        degree = st.text_input("Enter your Degree:")

        submitted = st.form_submit_button("Continue")

        if submitted:
            if name.strip() and age.strip().isdigit() and id.strip().isdigit():
                taskCompleteSelf = 0
                progress_LimitCalculation = 0
                st.session_state.name = name
                st.session_state.age = age
                st.session_state.id = id
                st.session_state.degree = degree
                st.session_state.taskCompleteSelf = taskCompleteSelf
                st.session_state.taskCompleteAI = taskCompleteSelf
                st.session_state.progress_LimitCalculation = progress_LimitCalculation
                st.session_state.info_collected = True
                with open(path_userData, "w") as file:
                    file.write(f"Name: {name}\nAge: {age}\nDegree: {degree}\nID: {id}\n"
                               f"Tasks_Self: {taskCompleteSelf}"
                               f"Tasks_AI: {taskCompleteSelf}"
                               f"\nProgress_LimitCalculation: {progress_LimitCalculation}")
                st.rerun()  # Refresh page to move to chat
            else:
                st.warning("Please enter a valid 'Name' and 'Age' and 'ID'.")


def handle_conv_streamlit():
    lastSummary = setUserData()  # Gets summary from text, before EXTRACT_agent
    # Also, set data on sidebar

    ## --- Initialize Messages --- ##
    if "messages" not in st.session_state:
        st.session_state.messages = []
    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(clean_latex(message["content"]))

    ## --- Initialize wolframAPI Last Answer --- ##
    if 'wolframAPI_ans' not in st.session_state:
        st.session_state.wolframAPI_ans = "None"

    ## --- Initialize Last Generated Question (For re_) --- ##
    if 'generatedQuestion' not in st.session_state:
        st.session_state.generatedQuestion = "None"

    ## --- Initialize Timer -- ##
    if 'start_time' not in st.session_state:
        st.session_state.start_time = time.time()
    if 'elapsed_time' not in st.session_state:
        st.session_state.elapsed_time = 0

    ## --- Start Of Conv_Handler --- ##
    if userInput := st.chat_input("What is up?"):
        st.session_state.messages.append({"role": "user", "content": userInput})
        with st.chat_message("user"):
            st.markdown(userInput)

        with st.chat_message("assistant"):
            if userInput.lower() == "/exit":
                st.write("**Exiting the chat. Refresh the page to start again.**")
                return
            elif userInput.lower() == "/save":
                st.markdown("**Starting to Summarize, Please wait.**")
                saveFunction(lastSummary)
                st.session_state.messages = []  # Clear Chat
                st.session_state.wolframAPI_ans="None" # Clear Last wolfram answer
                st.session_state.generatedQuestion="" # Clear Last Question
                st.rerun()  # Refresh Page
                st.markdown("**Finished Summary**")
                return

            # Determine route and invoke appropriate chain
            routerWay = RouterDirection(userInput, router_chain2)
            if routerWay == "vectorstore":
                rand_question = RouterRandOriginalQuestion(userInput,
                                                           router_chain2_step2,
                                                           newDict_database)
                result = chainRAG.invoke({"context": rand_question,
                                          "rand1": rand1})
                #Save Generated Question (for re_)
                st.session_state.generatedQuestion = result
                lastQuestion_ans_byWolfram = wolframAlpha_runner(result)
                #Save last wolfram question answered
                st.session_state.wolframAPI_ans= lastQuestion_ans_byWolfram


            else:  # independent
                # Just One model, no supervisor:
                # result = clean_latex(independentChat_noSupervisor(chainChat_Qwen2Math,userInput))

                #Load last wolfram question answered
                lastQuestion_ans_byWolfram = st.session_state.wolframAPI_ans
                # if (lastQuestion_ans_byWolfram != "EMPTY"):
                #     # WolframAlpha + llama + supervisor
                #     result = independentChat_withSupervisor(userInput, lastQuestion_ans_byWolfram)
                # else:
                #     # DeepSeek+llama + supervisor:
                #     result = independentChat_withSupervisor(userInput)

                # If ans_byWolf is None, DeepSeekR1 will run, else, it will use wolfram
                result = independentChat_withSupervisor(userInput, lastQuestion_ans_byWolfram)


            result = remove_thinking_deepseekR1(result)
            st.markdown(clean_latex(result))
            # print(result)
            # st.markdown(clean_latex(result))

        st.session_state.messages.append({"role": "assistant", "content": result})


def timeCapture_SaveFunction():
    current_time = time.time()
    timePassed_CurrentSession= current_time - st.session_state.start_time
    st.session_state.start_time = time.time()
    st.session_state.elapsed_time = timePassed_CurrentSession
    # Display the timer in HH:MM:SS format
    hours, rem = divmod(st.session_state.elapsed_time, 3600)
    minutes, seconds = divmod(rem, 60)
    timer_text = f"{int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}"
    print(f"Timer: {timer_text}")
    return timer_text


def saveFunction(lastSummary):
    def makeSummary(lastSummary, timeSpent, agent, path_save, max_length=300, supervisor=0):
        input_data = {
            "history": [
                {"role": m["role"], "content": m["content"]}
                for m in st.session_state.messages
                if len(m["content"]) < max_length
            ],
            "summary": lastSummary,
            "timeSpent": timeSpent
        }

        print("--Summary_agent START")
        newSummary = agent.invoke(input_data)

        if ((supervisor == 0) or (summarySupervisorRouter(newSummary) == '1')):  # summary legit
            with open(path_save, 'w') as file:
                file.write(newSummary)
            timestamp = time.strftime("%d.%m.%y_%H:%M")
            path_save_history = f"{path_save[:-7]}{timestamp}.txt" # -7 removes "eng.txt"
            with open(path_save_history, 'w') as file:
                file.write(newSummary)
        else:
            print("** Unable to create Summary **")
            makeSummary(lastSummary, agent, path_save, max_length - 50, supervisor)
        print("--Summary_agent FINISH")

    def summarySupervisorRouter(Summary):
        print("--SummarySupervisor START")
        jsonSupervisorSummary = summarySupervisor_agent.invoke({
            "Summary": Summary
        })
        directionOfSupervisor = json.loads(jsonSupervisorSummary)["legitimate"]
        # 1 == True --- 0 == False
        print(f"**ROUTER SUPERVISOR**: {directionOfSupervisor}")
        print("--SummarySupervisor FINISH")
        return directionOfSupervisor

    timeSpent = timeCapture_SaveFunction()
    makeSummary(lastSummary, timeSpent, summaryMaker_agent, path_userData, supervisor=1)
    makeSummary(lastSummary, timeSpent, summaryMakerUser_agent, path_summaryForUser, supervisor=0)


def independentChat_withSupervisor(userInput_q, wolframAns="None"):
    # If get wolfram --> no need for a DeepSeekR1 run
    def Supervisor_chat(supervisor_agent,result_llama, result_deepseek):
        print("--Chat_Supervisor START")
        json_supervisor_chat = supervisor_agent.invoke({
            "result_llama": result_llama,
            "result_DeepSeek": result_deepseek
        })
        directionOfSupervisor = json.loads(json_supervisor_chat)["legitimate"]
        print("--Chat_Supervisor FINISH")
        return directionOfSupervisor

    ## Llama3.3 is just bad, can't answer question a bit harder than basic.
    result_llama = independentChat_noSupervisor(chainChat_Llama3, userInput_q)
    ## Qwen_2Math doesnt always use $..$ im math answer, also very slow (72B), AND cant answer
    # result_llama = independentChat_noSupervisor(chainChat_Qwen2Math,userInput_q)
    ## DeepSeekR1 is not controllable via prompt
    # result_llama = remove_thinking_deepseekR1(independentChat_noSupervisor(chainChat_DeepSeekR1,userInput_q))

    print(f"\n\n**wolfram** == \n{wolframAns}\n")
    if (wolframAns == "None"):
        second_model = "DeepSeekR1"
        result_DeepSeek = remove_thinking_deepseekR1(independentChat_noSupervisor(chainChat_DeepSeekR1, userInput_q))
    else:
        second_model = "Wolfram"
        result_DeepSeek = wolframAns

    print(f"\nLLama:\n{result_llama}\n"
          f"\n{second_model}:\n{result_DeepSeek}")
    supervisorDirection = Supervisor_chat(chatSupervisor_agent,result_llama, result_DeepSeek)
    print(f"\nResults Error:\n{supervisorDirection}\n")

    #First Supervisor --> Compare llama with Wolfram/DeepSeekR1
    if (supervisorDirection.lower() != 'none'):  # = explanation about error
        lastQuestion = st.session_state.generatedQuestion
        new_answer = chainChat_re_Llama3.invoke({
            "lastQuestion": lastQuestion,
            "error_explanation": supervisorDirection
        })
    # Supervisor may be sleepy, --> check llama correctness. (Level 2 supervisor)
    else:
        supervisorDirection_Level2 = Supervisor_chat(chatSupervisor_Level2_agent, result_llama, "")
        print(f"\nResults Error:\n{supervisorDirection_Level2}\n")
        if (supervisorDirection_Level2.lower() != 'none'):  # = explanation about error
            lastQuestion = st.session_state.generatedQuestion
            new_answer = chainChat_re_Llama3.invoke({
                "lastQuestion": lastQuestion,
                "error_explanation": supervisorDirection_Level2
            })
        else:
            new_answer = result_llama
    return new_answer


def independentChat_noSupervisor(agentChat, userInput_q):
    print("--Chat START")
    result = agentChat.invoke({
        "history": getChatHistory(numberOfInteractions=3),
        "question": userInput_q
    })
    print("--Chat FINISH")
    return result


def getChatHistory(numberOfInteractions=5):
    # *(-2) because 1 user 1 ai, (-) because we want LAST __ interactions.
    last_messages = st.session_state.messages[numberOfInteractions*(-2):]
    history = [
            {"role": m["role"], "content": m["content"]}
            for m in last_messages # last_messages <---> st.session_state.messages
        ]
    return history

def wolframAlpha_runner(RAG_question):  # Transformer + API to answer
    wolframT = wolframAlpha_transformer_agent.invoke({
        "question": RAG_question
    })
    print(f"**WolframT: \n{wolframT}\n")
    try:
        # Try to wolf API, if can't, return None. None will get into independent
        # If it None, DeepSeekR1 will run, else, it will use wolfram
        wolframAnswer = wolfram.run(wolframT)
        if (wolframAnswer == "Wolfram Alpha wasn't able to answer it"):
            wolframAnswer = "None"
    except:
        wolframAnswer = "None"
    print(f"**WolframAnswer: \n{wolframAnswer}")
    return wolframAnswer


## uses extractor_aget, saves userData in JSON format, updating sidebar.
def setUserData():
    # No Data -- go to get_user_info()
    with open(path_userData, 'r') as file:
        lastSummary = file.read()
        if lastSummary == 'EMPTY':  # No Data on User
            get_user_info()

    # Must be Data, extract using EXTRACTOR_agent
    userData = userDataEXTRACTOR_agent.chainRunner.invoke(lastSummary)
    userData = json.loads(userData)
    print(userData)
    st.sidebar.title("User Data")
    for key, val in (userData.items()):
        key = key.split("_")
        if (key[0] == 'Progress'):
            st.sidebar.header(key[1])
            st.sidebar.progress(int(val))
        else:
            st.sidebar.metric(key[0], val)
        # st.sidebar.metric(key, val)
    return lastSummary  # lastSummary is the text, before EXTRACT_agent


def basicStreamlit():
    prompt = st.text_input("Say something")
    if prompt:
        st.write(f"User prompt: {prompt}")
    user_data = {"Score": 85, "Tasks Completed": 7, "Level": 3}

    # Sidebar content
    st.sidebar.title("User Progress")
    st.sidebar.metric("Score", user_data["Score"])
    st.sidebar.metric("Tasks Completed", user_data["Tasks Completed"])
    st.sidebar.metric("Level", user_data["Level"])

    # Progress bar simulation
    progress = st.sidebar.progress(0)
    for i in range(101):  # Simulating progress
        time.sleep(0.05)
        progress.progress(i)

    # Main content
    st.title("Main Application")
    st.write("This is your main content area.")


# basicStreamlit()


import re


def clean_latex(input_string):
    # Remove [asy] environments and their contents
    cleaned_string = re.sub(r'\[asy\](.*?)\[/asy\]', '', input_string, flags=re.DOTALL)

    # Remove extra newlines and trim whitespace
    # cleaned_string = re.sub(r'\n\s*\n', '\n', cleaned_string).strip()

    return cleaned_string.replace('\\\\', '\\')


def remove_thinking_deepseekR1(text):
    # Remove the <think>...</think> section
    return re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()