File size: 3,459 Bytes
64f5e18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from typing import List


def format_as_translator(message: str, languages: str = 'German') -> str:
    """
    Given a message and a history of previous messages, returns a string that formats the conversation as a chat.
    Uses the format expected by Meta Llama 3 Instruct.

    :param message: A string containing the user's most recent message
    :param history: A list of lists of previous messages, where each sublist is a conversation turn:
        [[user_message1, assistant_reply1], [user_message2, assistant_reply2], ...]
    """
    fewshot_training_data = [['Translate this into German: How are you today?', 'Wie geht es dir heute?'],
                             ['Translate this into German: I love reading books.', 'Ich liebe es, Bücher zu lesen.'],
                             ['Translate this into German: Can you help me with this?', 'Kannst du mir damit helfen?'],
                             ['Translate this into German: It\'s a beautiful day outside.', 'Es ist ein schöner Tag draußen.'],
                             ['Translate this into German: What time is dinner?', 'Um wie viel Uhr ist das Abendessen?'],
                             ['Translate this into Chinese: What is your name?', '你叫什么名字? (Nǐ jiào shénme míngzì?)'],
                             ['Translate this into Chinese: I am learning to speak Chinese.', '我在学习说中文。 (Wǒ zài xuéxí shuō zhōngwén.)'],
                             ['Translate this into Chinese: Where is the nearest subway station?', '最近的地铁站在哪里? (Zuìjìn de dìtiě zhàn zài nǎlǐ?)'],
                             ['Translate this into Chinese: I would like a cup of coffee, please.', '请给我来一杯咖啡。 (Qǐng gěi wǒ lái yī bēi kāfēi.)'],
                             ['Translate this into Chinese: How much does this cost?', '这个多少钱? (Zhège duōshǎo qián?)'],
                             ['Translate this into Arabic: Where can I find a good restaurant?', 'أين يمكنني أن أجد مطعمًا جيدًا؟ (Ayna yumkinunī an ajida maṭʿaman jayyidan?)'],
                             ['Translate this into Arabic: I need to book a flight.', 'أحتاج إلى حجز رحلة طيران. (Aḥtāj ilā ḥajz riḥlat ṭayrān.)'],
                             ['Translate this into Arabic: What is the weather like tomorrow?', 'كيف سيكون الطقس غدًا؟ (Kayfa sayakūn al-ṭaqs ghadan?)'],
                             ['Translate this into Arabic: I am visiting for the first time.', 'أنا أزور للمرة الأولى. (Anā azūr lilmarrati al\'ūlā.)'],
                             ['Translate this into Arabic: Can you speak slower, please?', 'هل يمكنك التحدث ببطء، من فضلك؟ (Hal yumkinuk al-taḥadduth bibuṭ\', min faḍlik?)']]
    
    fewshot_lis = ['<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n' + 
                   '<|eot_id|><|start_header_id|>translator<|end_header_id|>\n\n'.join(sub_fewshotlis) +
                          '<|eot_id|>' for sub_fewshotlis in fewshot_training_data]
    
    #To avoid error response from model mix into our few shot data, we don't concatenate the history data here
    output_message = ''.join(fewshot_lis) + '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n' + f'Translate this into {languages}: ' + message + '<|eot_id|>'

    return output_message