query-to-hub-datasets-viewer-project
Collection
2 items
•
Updated
This is a sentence-transformers model finetuned from Alibaba-NLP/gte-base-en-v1.5 on the query-to-dataset-viewer-descriptions dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
SentenceTransformer(
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("query-to-dataset-viewer-descriptions")
# Run inference
sentences = [
'USER_QUERY: kotlin code dataset',
'HUB_DATASET_PREVIEW: DATASET_NAME: "mvasiliniuc/iva-kotlin-codeint"\nFEATURES: {\'repo_name\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'path\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'copies\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'size\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'content\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'license\': {\'dtype\': \'string\', \'_type\': \'Value\'}}\nDATA SAMPLE:\n[\n {\n "row_idx": 0,\n "row": {\n "repo_name": "Cognifide/gradle-aem-plugin",\n "path": "src/main/kotlin/com/cognifide/gradle/aem/instance/tasks/InstanceReload.kt",\n "copies": "1",\n "size": "1052",\n "content": "package com.cognifide.gradle.aem.instance.tasks\\n\\nimport com.cognifide.gradle.aem.common.instance.action.AwaitUpAction\\nimport com.cognifide.gradle.aem.common.instance.action.ReloadAction\\nimport com.cognifide.gradle.aem.common.instance.names\\nimport com.cognifide.gradle.aem.common.tasks.Instance\\nimport org.gradle.api.tasks.TaskAction\\n\\nopen class InstanceReload : Instance() {\\n\\n private var reloadOptions: ReloadAction.() -> Unit = {}\\n\\n fun reload(options: ReloadAction.() -> Unit) {\\n this.reloadOptions = options\\n }\\n\\n private var awaitUpOptions: AwaitUpAction.() -> Unit = {}\\n\\n fun awaitUp(options: AwaitUpAction.() -> Unit) {\\n this.awaitUpOptions = options\\n }\\n\\n @TaskAction\\n fun reload() {\\n instanceManager.awaitReloaded(anyInstances, reloadOptions, awaitUpOptions)\\n common.notifier.lifecycle(\\"Instance(s) reloaded\\", \\"Which: ${anyInstances.names}\\")\\n }\\n\\n init {\\n description = \\"Reloads all AEM instance(s).\\"\\n }\\n\\n companion object {\\n const val NAME = \\"instanceReload\\"\\n }\\n}\\n",\n "license": "apache-2.0"\n },\n "truncated_cells": []\n },\n {\n "row_idx": 1,\n "row": {\n "repo_name": "80998062/Fank",\n "path": "presentation/src/main/java/com/sinyuk/fanfou/ui/status/StatusView.kt",\n "copies": "1",\n "size": "8490",\n "content": "/*\\n *\\n * * Apache License\\n * *\\n * * Copyright [2017] Sinyuk\\n * *\\n * * Licensed under the Apache License, Version 2.0 (the \\"License\\");\\n * * you may not use this file except in compliance with the License.\\n * * You may obtain a copy of the License at\\n * *\\n * * http://www.apache.org/licenses/LICENSE-2.0\\n * *\\n * * Unless required by applicable law or agreed to in writing, software\\n * * distributed under the License is distributed on an \\"AS IS\\" BASIS,\\n * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\\n * * See the License for the specific language governing permissions and\\n * * limitations under the License.\\n *\\n */\\n\\npackage com.sinyuk.fanfou.ui.status\\n\\nimport android.os.Build\\nimport android.os.Bundle\\nimport android.support.v4.app.Fragment\\nimport android.support.v4.app.FragmentPagerAdapter\\nimport android.text.Editable\\nimport android.text.TextWatcher\\nimport android.view.View\\nimport android.view.ViewTreeObserver\\nimport cn.dreamtobe.kpswitch.util.KeyboardUtil\\nimport com.linkedin.android.spyglass.suggestions.SuggestionsResult\\nimport com.linkedin.android.spyglass.suggestions.interfaces.Suggestible\\nimport com.linkedin.android.spyglass.suggestions.interfaces.SuggestionsResultListener\\nimport com.linkedin.android.spyglass.suggestions.interfaces.SuggestionsVisibilityManager\\nimport com.linkedin.android.spyglass.tokenization.QueryToken\\nimport com.linkedin.android.spyglass.tokenization.impl.WordTokenizer\\nimport com.linkedin.android.spyglass.tokenization.impl.WordTokenizerConfig\\nimport com.linkedin.android.spyglass.tokenization.interfaces.QueryTokenReceiver\\nimport com.sinyuk.fanfou.R\\nimport com.sinyuk.fanfou.base.AbstractActivity\\nimport com.sinyuk.fanfou.base.AbstractFragment\\nimport com.sinyuk.fanfou.di.Injectable\\nimport com.sinyuk.fanfou.domain.DO.Player\\nimport com.sinyuk.fanfou.domain.DO.Status\\nimport com.sinyuk.fanfou.domain.STATUS_LIMIT\\nimport com.sinyuk.fanfou.domain.StatusCreation\\nimport com.sinyuk.fanfou.domain.TIMELINE_CONTEXT\\nimport com.sinyuk.fanfou.ui.editor.EditorView\\nimport com.sinyuk.fanfou.ui.editor.MentionListView\\nimport com.sinyuk.fanfou.ui.timeline.TimelineView\\nimport com.sinyuk.fanfou.util.obtainViewModelFromActivity\\nimport com.sinyuk.fanfou.viewmodel.FanfouViewModelFactory\\nimport com.sinyuk.fanfou.viewmodel.PlayerViewModel\\nimport kotlinx.android.synthetic.main.status_view.*\\nimport kotlinx.android.synthetic.main.status_view_footer.*\\nimport kotlinx.android.synthetic.main.status_view_reply_actionbar.*\\nimport javax.inject.Inject\\n\\n\\n/**\\n * Created by sinyuk on 2018/1/12.\\n *\\n */\\nclass StatusView : AbstractFragment(), Injectable, QueryTokenReceiver, SuggestionsResultListener, SuggestionsVisibilityManager {\\n\\n companion object {\\n fun newInstance(status: Status, photoExtra: Bundle? = null) = StatusView().apply {\\n arguments = Bundle().apply {\\n putParcelable(\\"status\\", status)\\n putBundle(\\"photoExtra\\", photoExtra)\\n }\\n }\\n }\\n\\n override fun layoutId() = R.layout.status_view\\n\\n @Inject\\n lateinit var factory: FanfouViewModelFactory\\n\\n private val playerViewModel by lazy { obtainViewModelFromActivity(factory, PlayerViewModel::class.java) }\\n\\n override fun onEnterAnimationEnd(savedInstanceState: Bundle?) {\\n super.onEnterAnimationEnd(savedInstanceState)\\n navBack.setOnClickListener { onBackPressedSupport() }\\n setupEditor()\\n setupKeyboard()\\n onTextChanged(0)\\n setupViewPager()\\n\\n val status = arguments!!.getParcelable<Status>(\\"status\\")\\n fullscreenButton.setOnClickListener {\\n (activity as AbstractActivity).start(EditorView.newInstance(status.id,\\n replyEt.mentionsText,\\n StatusCreation.REPOST_STATUS))\\n replyEt.text = null\\n }\\n }\\n\\n private fun setupViewPager() {\\n val status = arguments!!.getParcelable<Status>(\\"status\\")\\n val bundle = arguments!!.getBundle(\\"photoExtra\\")\\n val fragments: List<Fragment> = if (findChildFragment(TimelineView::class.java) == null) {\\n val mentionView = MentionListView()\\n mentionView.onItemClickListener = onSuggestionSelectListener\\n mutableListOf(TimelineView.contextTimeline(TIMELINE_CONTEXT, status, bundle), mentionView)\\n } else {\\n mutableListOf(findChildFragment(TimelineView::class.java), MentionListView())\\n }\\n\\n viewPager.setPagingEnabled(false)\\n viewPager.offscreenPageLimit = 1\\n viewPager.adapter = object : FragmentPagerAdapter(childFragmentManager) {\\n override fun getItem(position: Int) = fragments[position]\\n\\n override fun getCount() = fragments.size\\n }\\n }\\n\\n private var keyboardListener: ViewTreeObserver.OnGlobalLayoutListener? = null\\n\\n private fun setupKeyboard() {\\n keyboardListener = KeyboardUtil.attach(activity, panelRoot, {\\n // TODO: how comes the Exception: panelRootContainer must not be null\\n panelRootContainer?.visibility =\\n if (it) {\\n if (replyEt.requestFocus()) replyEt.setSelection(replyEt.text.length)\\n View.VISIBLE\\n } else {\\n replyEt.clearFocus()\\n View.GONE\\n }\\n })\\n }\\n\\n private val config = WordTokenizerConfig.Builder()\\n .setExplicitChars(\\"@\\")\\n .setThreshold(3)\\n .setMaxNumKeywords(5)\\n .setWordBreakChars(\\" \\").build()\\n\\n private fun setupEditor() {\\n replyEt.tokenizer = WordTokenizer(config)\\n replyEt.setAvoidPrefixOnTap(true)\\n replyEt.setQueryTokenReceiver(this)\\n replyEt.setSuggestionsVisibilityManager(this)\\n replyEt.setAvoidPrefixOnTap(true)\\n\\n replyCommitButton.setOnClickListener { }\\n\\n if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.O)\\n textCountProgress.min = 0\\n textCountProgress.max = STATUS_LIMIT\\n replyEt.addTextChangedListener(object : TextWatcher {\\n override fun afterTextChanged(s: Editable?) {\\n onTextChanged(s?.length ?: 0)\\n }\\n\\n override fun beforeTextChanged(s: CharSequence?, start: Int, count: Int, after: Int) {\\n\\n }\\n\\n override fun onTextChanged(s: CharSequence?, start: Int, before: Int, count: Int) {\\n\\n }\\n })\\n }\\n\\n\\n /**\\n * @param count \\u5b57\\u6570\\n */\\n private fun onTextChanged(count: Int) {\\n textCountProgress.progress = count\\n replyCommitButton.isEnabled = count in 1..STATUS_LIMIT\\n }\\n\\n\\n private val onSuggestionSelectListener = object : MentionListView.OnItemClickListener {\\n override fun onItemClick(position: Int, item: Suggestible) {\\n (item as Player).let {\\n replyEt.insertMention(it)\\n displaySuggestions(false)\\n playerViewModel.updateMentionedAt(it) //\\n onTextChanged(replyEt.text.length)\\n replyEt.requestFocus()\\n replyEt.setSelection(replyEt.text.length)\\n }\\n }\\n }\\n\\n @Suppress(\\"PrivatePropertyName\\")\\n private val BUCKET = \\"player-mentioned\\"\\n\\n override fun onQueryReceived(queryToken: QueryToken): MutableList<String> {\\n val data = playerViewModel.filter(queryToken.keywords)\\n onReceiveSuggestionsResult(SuggestionsResult(queryToken, data), BUCKET)\\n return arrayOf(BUCKET).toMutableList()\\n }\\n\\n override fun onReceiveSuggestionsResult(result: SuggestionsResult, bucket: String) {\\n val data = result.suggestions\\n if (data?.isEmpty() != false) return\\n displaySuggestions(true)\\n findChildFragment(MentionListView::class.java).setData(data)\\n }\\n\\n override fun displaySuggestions(display: Boolean) {\\n viewPager.setCurrentItem(if (display) 1 else 0, true)\\n }\\n\\n override fun isDisplayingSuggestions() = viewPager.currentItem == 1\\n\\n override fun onBackPressedSupport(): Boolean {\\n when {\\n panelRootContainer.visibility == View.VISIBLE -> KeyboardUtil.hideKeyboard(panelRootContainer)\\n isDisplayingSuggestions -> displaySuggestions(false)\\n else -> pop()\\n }\\n return true\\n\\n }\\n\\n override fun onDestroy() {\\n keyboardListener?.let { KeyboardUtil.detach(activity, it) }\\n activity?.currentFocus?.let { KeyboardUtil.hideKeyboard(it) }\\n super.onDestroy()\\n }\\n\\n}",\n "license": "mit"\n },\n "truncated_cells": []\n }\n]',
'NEGATIVE: DATASET_NAME: "vikp/starcoder_cleaned"\nFEATURES: {\'code\': {\'dtype\': \'string\', \'_type\': \'Value\'}, \'repo_path\': {\'dtype\': \'string\', \'_type\': \'Value\'}}\nDATA SAMPLE:\n[\n {\n "row_idx": 0,\n "row": {\n "code": "# ---\\n# jupyter:\\n# jupytext:\\n# text_representation:\\n# extension: .py\\n# format_name: light\\n# format_version: \'1.5\'\\n# jupytext_version: 1.14.4\\n# kernelspec:\\n# display_name: Python 3\\n# language: python\\n# name: python3\\n# ---\\n\\n# # 09 Strain Gage\\n#\\n# This is one of the most commonly used sensor. It is used in many transducers. Its fundamental operating principle is fairly easy to understand and it will be the purpose of this lecture. \\n#\\n# A strain gage is essentially a thin wire that is wrapped on film of plastic. \\n# <img src=\\"img/StrainGage.png\\" width=\\"200\\">\\n# The strain gage is then mounted (glued) on the part for which the strain must be measured. \\n# <img src=\\"img/Strain_gauge_2.jpg\\" width=\\"200\\">\\n#\\n# ## Stress, Strain\\n# When a beam is under axial load, the axial stress, $\\\\sigma_a$, is defined as:\\n# \\\\begin{align*}\\n# \\\\sigma_a = \\\\frac{F}{A}\\n# \\\\end{align*}\\n# with $F$ the axial load, and $A$ the cross sectional area of the beam under axial load.\\n#\\n# <img src=\\"img/BeamUnderStrain.png\\" width=\\"200\\">\\n#\\n# Under the load, the beam of length $L$ will extend by $dL$, giving rise to the definition of strain, $\\\\epsilon_a$:\\n# \\\\begin{align*}\\n# \\\\epsilon_a = \\\\frac{dL}{L}\\n# \\\\end{align*}\\n# The beam will also contract laterally: the cross sectional area is reduced by $dA$. This results in a transverval strain $\\\\epsilon_t$. The transversal and axial strains are related by the Poisson\'s ratio:\\n# \\\\begin{align*}\\n# \\\\nu = - \\\\frac{\\\\epsilon_t }{\\\\epsilon_a}\\n# \\\\end{align*}\\n# For a metal the Poission\'s ratio is typically $\\\\nu = 0.3$, for an incompressible material, such as rubber (or water), $\\\\nu = 0.5$.\\n#\\n# Within the elastic limit, the axial stress and axial strain are related through Hooke\'s law by the Young\'s modulus, $E$:\\n# \\\\begin{align*}\\n# \\\\sigma_a = E \\\\epsilon_a\\n# \\\\end{align*}\\n#\\n# <img src=\\"img/ElasticRegime.png\\" width=\\"200\\">\\n\\n# ## Resistance of a wire\\n#\\n# The electrical resistance of a wire $R$ is related to its physical properties (the electrical resistiviy, $\\\\rho$ in $\\\\Omega$/m) and its geometry: length $L$ and cross sectional area $A$.\\n#\\n# \\\\begin{align*}\\n# R = \\\\frac{\\\\rho L}{A}\\n# \\\\end{align*}\\n#\\n# Mathematically, the change in wire dimension will result inchange in its electrical resistance. This can be derived from first principle:\\n# \\\\begin{align}\\n# \\\\frac{dR}{R} = \\\\frac{d\\\\rho}{\\\\rho} + \\\\frac{dL}{L} - \\\\frac{dA}{A}\\n# \\\\end{align}\\n# If the wire has a square cross section, then:\\n# \\\\begin{align*}\\n# A & = L\'^2 \\\\\\\\\\n# \\\\frac{dA}{A} & = \\\\frac{d(L\'^2)}{L\'^2} = \\\\frac{2L\'dL\'}{L\'^2} = 2 \\\\frac{dL\'}{L\'}\\n# \\\\end{align*}\\n# We have related the change in cross sectional area to the transversal strain.\\n# \\\\begin{align*}\\n# \\\\epsilon_t = \\\\frac{dL\'}{L\'}\\n# \\\\end{align*}\\n# Using the Poisson\'s ratio, we can relate then relate the change in cross-sectional area ($dA/A$) to axial strain $\\\\epsilon_a = dL/L$.\\n# \\\\begin{align*}\\n# \\\\epsilon_t &= - \\\\nu \\\\epsilon_a \\\\\\\\\\n# \\\\frac{dL\'}{L\'} &= - \\\\nu \\\\frac{dL}{L} \\\\; \\\\text{or}\\\\\\\\\\n# \\\\frac{dA}{A} & = 2\\\\frac{dL\'}{L\'} = -2 \\\\nu \\\\frac{dL}{L}\\n# \\\\end{align*}\\n# Finally we can substitute express $dA/A$ in eq. for $dR/R$ and relate change in resistance to change of wire geometry, remembering that for a metal $\\\\nu =0.3$:\\n# \\\\begin{align}\\n# \\\\frac{dR}{R} & = \\\\frac{d\\\\rho}{\\\\rho} + \\\\frac{dL}{L} - \\\\frac{dA}{A} \\\\\\\\\\n# & = \\\\frac{d\\\\rho}{\\\\rho} + \\\\frac{dL}{L} - (-2\\\\nu \\\\frac{dL}{L}) \\\\\\\\\\n# & = \\\\frac{d\\\\rho}{\\\\rho} + 1.6 \\\\frac{dL}{L} = \\\\frac{d\\\\rho}{\\\\rho} + 1.6 \\\\epsilon_a\\n# \\\\end{align}\\n# It also happens that for most metals, the resistivity increases with axial strain. In general, one can then related the change in resistance to axial strain by defining the strain gage factor:\\n# \\\\begin{align}\\n# S = 1.6 + \\\\frac{d\\\\rho}{\\\\rho}\\\\cdot \\\\frac{1}{\\\\epsilon_a}\\n# \\\\end{align}\\n# and finally, we have:\\n# \\\\begin{align*}\\n# \\\\frac{dR}{R} = S \\\\epsilon_a\\n# \\\\end{align*}\\n# $S$ is materials dependent and is typically equal to 2.0 for most commercially availabe strain gages. It is dimensionless.\\n#\\n# Strain gages are made of thin wire that is wraped in several loops, effectively increasing the length of the wire and therefore the sensitivity of the sensor.\\n#\\n# _Question:\\n#\\n# Explain why a longer wire is necessary to increase the sensitivity of the sensor_.\\n#\\n# Most commercially available strain gages have a nominal resistance (resistance under no load, $R_{ini}$) of 120 or 350 $\\\\Omega$.\\n#\\n# Within the elastic regime, strain is typically within the range $10^{-6} - 10^{-3}$, in fact strain is expressed in unit of microstrain, with a 1 microstrain = $10^{-6}$. Therefore, changes in resistances will be of the same order. If one were to measure resistances, we will need a dynamic range of 120 dB, whih is typically very expensive. Instead, one uses the Wheatstone bridge to transform the change in resistance to a voltage, which is easier to measure and does not require such a large dynamic range.\\n\\n# ## Wheatstone bridge:\\n# <img src=\\"img/WheatstoneBridge.png\\" width=\\"200\\">\\n#\\n# The output voltage is related to the difference in resistances in the bridge:\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} = \\\\frac{R_1R_3-R_2R_4}{(R_1+R_4)(R_2+R_3)}\\n# \\\\end{align*}\\n#\\n# If the bridge is balanced, then $V_o = 0$, it implies: $R_1/R_2 = R_4/R_3$.\\n#\\n# In practice, finding a set of resistors that balances the bridge is challenging, and a potentiometer is used as one of the resistances to do minor adjustement to balance the bridge. If one did not do the adjustement (ie if we did not zero the bridge) then all the measurement will have an offset or bias that could be removed in a post-processing phase, as long as the bias stayed constant.\\n#\\n# If each resistance $R_i$ is made to vary slightly around its initial value, ie $R_i = R_{i,ini} + dR_i$. For simplicity, we will assume that the initial value of the four resistances are equal, ie $R_{1,ini} = R_{2,ini} = R_{3,ini} = R_{4,ini} = R_{ini}$. This implies that the bridge was initially balanced, then the output voltage would be:\\n#\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} = \\\\frac{1}{4} \\\\left( \\\\frac{dR_1}{R_{ini}} - \\\\frac{dR_2}{R_{ini}} + \\\\frac{dR_3}{R_{ini}} - \\\\frac{dR_4}{R_{ini}} \\\\right)\\n# \\\\end{align*}\\n#\\n# Note here that the changes in $R_1$ and $R_3$ have a positive effect on $V_o$, while the changes in $R_2$ and $R_4$ have a negative effect on $V_o$. In practice, this means that is a beam is a in tension, then a strain gage mounted on the branch 1 or 3 of the Wheatstone bridge will produce a positive voltage, while a strain gage mounted on branch 2 or 4 will produce a negative voltage. One takes advantage of this to increase sensitivity to measure strain.\\n#\\n# ### Quarter bridge\\n# One uses only one quarter of the bridge, ie strain gages are only mounted on one branch of the bridge.\\n#\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} = \\\\pm \\\\frac{1}{4} \\\\epsilon_a S\\n# \\\\end{align*}\\n# Sensitivity, $G$:\\n# \\\\begin{align*}\\n# G = \\\\frac{V_o}{\\\\epsilon_a} = \\\\pm \\\\frac{1}{4}S V_s\\n# \\\\end{align*}\\n#\\n#\\n# ### Half bridge\\n# One uses half of the bridge, ie strain gages are mounted on two branches of the bridge.\\n#\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} = \\\\pm \\\\frac{1}{2} \\\\epsilon_a S\\n# \\\\end{align*}\\n#\\n# ### Full bridge\\n#\\n# One uses of the branches of the bridge, ie strain gages are mounted on each branch.\\n#\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} = \\\\pm \\\\epsilon_a S\\n# \\\\end{align*}\\n#\\n# Therefore, as we increase the order of bridge, the sensitivity of the instrument increases. However, one should be carefull how we mount the strain gages as to not cancel out their measurement.\\n\\n# _Exercise_\\n#\\n# 1- Wheatstone bridge\\n#\\n# <img src=\\"img/WheatstoneBridge.png\\" width=\\"200\\">\\n#\\n# > How important is it to know \\\\& match the resistances of the resistors you employ to create your bridge?\\n# > How would you do that practically?\\n# > Assume $R_1=120\\\\,\\\\Omega$, $R_2=120\\\\,\\\\Omega$, $R_3=120\\\\,\\\\Omega$, $R_4=110\\\\,\\\\Omega$, $V_s=5.00\\\\,\\\\text{V}$. What is $V_\\\\circ$?\\n\\nVs = 5.00\\nVo = (120**2-120*110)/(230*240) * Vs\\nprint(\'Vo = \',Vo, \' V\')\\n\\n# typical range in strain a strain gauge can measure\\n# 1 -1000 micro-Strain\\nAxialStrain = 1000*10**(-6) # axial strain\\nStrainGageFactor = 2\\nR_ini = 120 # Ohm\\nR_1 = R_ini+R_ini*StrainGageFactor*AxialStrain\\nprint(R_1)\\nVo = (120**2-120*(R_1))/((120+R_1)*240) * Vs\\nprint(\'Vo = \', Vo, \' V\')\\n\\n# > How important is it to know \\\\& match the resistances of the resistors you employ to create your bridge?\\n# > How would you do that practically?\\n# > Assume $R_1= R_2 =R_3=120\\\\,\\\\Omega$, $R_4=120.01\\\\,\\\\Omega$, $V_s=5.00\\\\,\\\\text{V}$. What is $V_\\\\circ$?\\n\\nVs = 5.00\\nVo = (120**2-120*120.01)/(240.01*240) * Vs\\nprint(Vo)\\n\\n# 2- Strain gage 1:\\n#\\n# One measures the strain on a bridge steel beam. The modulus of elasticity is $E=190$ GPa. Only one strain gage is mounted on the bottom of the beam; the strain gage factor is $S=2.02$.\\n#\\n# > a) What kind of electronic circuit will you use? Draw a sketch of it.\\n#\\n# > b) Assume all your resistors including the unloaded strain gage are balanced and measure $120\\\\,\\\\Omega$, and that the strain gage is at location $R_2$. The supply voltage is $5.00\\\\,\\\\text{VDC}$. Will $V_\\\\circ$ be positive or negative when a downward load is added?\\n\\n# In practice, we cannot have all resistances = 120 $\\\\Omega$. at zero load, the bridge will be unbalanced (show $V_o \\\\neq 0$). How could we balance our bridge?\\n#\\n# Use a potentiometer to balance bridge, for the load cell, we \'\'zero\'\' the instrument.\\n#\\n# Other option to zero-out our instrument? Take data at zero-load, record the voltage, $V_{o,noload}$. Substract $V_{o,noload}$ to my data.\\n\\n# > c) For a loading in which $V_\\\\circ = -1.25\\\\,\\\\text{mV}$, calculate the strain $\\\\epsilon_a$ in units of microstrain.\\n\\n# \\\\begin{align*}\\n# \\\\frac{V_o}{V_s} & = - \\\\frac{1}{4} \\\\epsilon_a S\\\\\\\\\\n# \\\\epsilon_a & = -\\\\frac{4}{S} \\\\frac{V_o}{V_s}\\n# \\\\end{align*}\\n\\nS = 2.02\\nVo = -0.00125\\nVs = 5\\neps_a = -1*(4/S)*(Vo/Vs)\\nprint(eps_a)\\n\\n# > d) Calculate the axial stress (in MPa) in the beam under this load.\\n\\n\\n\\n# > e) You now want more sensitivity in your measurement, you install a second strain gage on to\\n\\n# p of the beam. Which resistor should you use for this second active strain gage?\\n#\\n# > f) With this new setup and the same applied load than previously, what should be the output voltage?\\n\\n# 3- Strain Gage with Long Lead Wires \\n#\\n# <img src=\\"img/StrainGageLongWires.png\\" width=\\"360\\">\\n#\\n# A quarter bridge strain gage Wheatstone bridge circuit is constructed with $120\\\\,\\\\Omega$ resistors and a $120\\\\,\\\\Omega$ strain gage. For this practical application, the strain gage is located very far away form the DAQ station and the lead wires to the strain gage are $10\\\\,\\\\text{m}$ long and the lead wire have a resistance of $0.080\\\\,\\\\Omega/\\\\text{m}$. The lead wire resistance can lead to problems since $R_{lead}$ changes with temperature.\\n#\\n# > Design a modified circuit that will cancel out the effect of the lead wires.\\n\\n# ## Homework\\n#\\n",\n "repo_path": "Lectures/09_StrainGage.ipynb"\n },\n "truncated_cells": []\n },\n {\n "row_idx": 1,\n "row": {\n "code": "# ---\\n# jupyter:\\n# jupytext:\\n# split_at_heading: true\\n# text_representation:\\n# extension: .py\\n# format_name: light\\n# format_version: \'1.5\'\\n# jupytext_version: 1.14.4\\n# kernelspec:\\n# display_name: Python 3\\n# language: python\\n# name: python3\\n# ---\\n\\n#export\\nfrom fastai.basics import *\\nfrom fastai.tabular.core import *\\nfrom fastai.tabular.model import *\\n\\nfrom fastai.tabular.data import *\\n\\n#hide\\nfrom nbdev.showdoc import *\\n\\n\\n# +\\n#default_exp tabular.learner\\n# -\\n\\n# # Tabular learner\\n#\\n# > The function to immediately get a `Learner` ready to train for tabular data\\n\\n# The main function you probably want to use in this module is `tabular_learner`. It will automatically create a `TabulaModel` suitable for your data and infer the irght loss function. See the [tabular tutorial](http://docs.fast.ai/tutorial.tabular) for an example of use in context.\\n\\n# ## Main functions\\n\\n#export\\n@log_args(but_as=Learner.__init__)\\nclass TabularLearner(Learner):\\n \\"`Learner` for tabular data\\"\\n def predict(self, row):\\n tst_to = self.dls.valid_ds.new(pd.DataFrame(row).T)\\n tst_to.process()\\n tst_to.conts = tst_to.conts.astype(np.float32)\\n dl = self.dls.valid.new(tst_to)\\n inp,preds,_,dec_preds = self.get_preds(dl=dl, with_input=True, with_decoded=True)\\n i = getattr(self.dls, \'n_inp\', -1)\\n b = (*tuplify(inp),*tuplify(dec_preds))\\n full_dec = self.dls.decode((*tuplify(inp),*tuplify(dec_preds)))\\n return full_dec,dec_preds[0],preds[0]\\n\\n\\nshow_doc(TabularLearner, title_level=3)\\n\\n\\n# It works exactly as a normal `Learner`, the only difference is that it implements a `predict` method specific to work on a row of data.\\n\\n#export\\n@log_args(to_return=True, but_as=Learner.__init__)\\n@delegates(Learner.__init__)\\ndef tabular_learner(dls, layers=None, emb_szs=None, config=None, n_out=None, y_range=None, **kwargs):\\n \\"Get a `Learner` using `dls`, with `metrics`, including a `TabularModel` created using the remaining params.\\"\\n if config is None: config = tabular_config()\\n if layers is None: layers = [200,100]\\n to = dls.train_ds\\n emb_szs = get_emb_sz(dls.train_ds, {} if emb_szs is None else emb_szs)\\n if n_out is None: n_out = get_c(dls)\\n assert n_out, \\"`n_out` is not defined, and could not be infered from data, set `dls.c` or pass `n_out`\\"\\n if y_range is None and \'y_range\' in config: y_range = config.pop(\'y_range\')\\n model = TabularModel(emb_szs, len(dls.cont_names), n_out, layers, y_range=y_range, **config)\\n return TabularLearner(dls, model, **kwargs)\\n\\n\\n# If your data was built with fastai, you probably won\'t need to pass anything to `emb_szs` unless you want to change the default of the library (produced by `get_emb_sz`), same for `n_out` which should be automatically inferred. `layers` will default to `[200,100]` and is passed to `TabularModel` along with the `config`.\\n#\\n# Use `tabular_config` to create a `config` and cusotmize the model used. There is just easy access to `y_range` because this argument is often used.\\n#\\n# All the other arguments are passed to `Learner`.\\n\\npath = untar_data(URLs.ADULT_SAMPLE)\\ndf = pd.read_csv(path/\'adult.csv\')\\ncat_names = [\'workclass\', \'education\', \'marital-status\', \'occupation\', \'relationship\', \'race\']\\ncont_names = [\'age\', \'fnlwgt\', \'education-num\']\\nprocs = [Categorify, FillMissing, Normalize]\\ndls = TabularDataLoaders.from_df(df, path, procs=procs, cat_names=cat_names, cont_names=cont_names, \\n y_names=\\"salary\\", valid_idx=list(range(800,1000)), bs=64)\\nlearn = tabular_learner(dls)\\n\\n#hide\\ntst = learn.predict(df.iloc[0])\\n\\n# +\\n#hide\\n#test y_range is passed\\nlearn = tabular_learner(dls, y_range=(0,32))\\nassert isinstance(learn.model.layers[-1], SigmoidRange)\\ntest_eq(learn.model.layers[-1].low, 0)\\ntest_eq(learn.model.layers[-1].high, 32)\\n\\nlearn = tabular_learner(dls, config = tabular_config(y_range=(0,32)))\\nassert isinstance(learn.model.layers[-1], SigmoidRange)\\ntest_eq(learn.model.layers[-1].low, 0)\\ntest_eq(learn.model.layers[-1].high, 32)\\n\\n\\n# -\\n\\n#export\\n@typedispatch\\ndef show_results(x:Tabular, y:Tabular, samples, outs, ctxs=None, max_n=10, **kwargs):\\n df = x.all_cols[:max_n]\\n for n in x.y_names: df[n+\'_pred\'] = y[n][:max_n].values\\n display_df(df)\\n\\n\\n# ## Export -\\n\\n#hide\\nfrom nbdev.export import notebook2script\\nnotebook2script()\\n\\n\\n",\n "repo_path": "nbs/43_tabular.learner.ipynb"\n },\n "truncated_cells": []\n }\n]',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
TripletEvaluator
Metric | Value |
---|---|
cosine_accuracy | 1.0 |
dot_accuracy | 0.0 |
manhattan_accuracy | 1.0 |
euclidean_accuracy | 1.0 |
max_accuracy | 1.0 |
query
, positive
, and negative
query | positive | negative | |
---|---|---|---|
type | string | string | string |
details |
|
|
|
query | positive | negative |
---|---|---|
USER_QUERY: LLM paper dataset |
HUB_DATASET_PREVIEW: DATASET_NAME: "MarkrAI/AutoRAG-evaluation-2024-LLM-paper-v1" |
NEGATIVE: DATASET_NAME: "emozilla/dolma-v1_7-arxiv" |
USER_QUERY: code vulnerability dataset |
HUB_DATASET_PREVIEW: DATASET_NAME: "benjis/bigvul" |
NEGATIVE: DATASET_NAME: "sfakhoury/NL2Fix" |
USER_QUERY: english korean translation dataset |
HUB_DATASET_PREVIEW: DATASET_NAME: "yoonjae22/Aihub_translate" |
NEGATIVE: DATASET_NAME: "werty1248/EnKo-Translation-LongTextOnly-dedup" |
CachedMultipleNegativesRankingLoss
with these parameters:{
"scale": 20.0,
"similarity_fct": "cos_sim"
}
query
, positive
, and negative
query | positive | negative | |
---|---|---|---|
type | string | string | string |
details |
|
|
|
query | positive | negative |
---|---|---|
USER_QUERY: multimodal conversation dataset |
HUB_DATASET_PREVIEW: DATASET_NAME: "BUAADreamer/llava-en-zh-2k" |
NEGATIVE: DATASET_NAME: "passing2961/photochat_plus" |
USER_QUERY: question answering dataset for technical documentation |
HUB_DATASET_PREVIEW: DATASET_NAME: "hythyt/preguntas-respuestas-RAG" |
NEGATIVE: DATASET_NAME: "cmalaviya/expertqa" |
USER_QUERY: conversational ai training dataset |
HUB_DATASET_PREVIEW: DATASET_NAME: "dataautogpt3/flan1m-alpaca-uncensored" |
NEGATIVE: DATASET_NAME: "sgp-bench/open-instruct-gpt4o_40k" |
CachedMultipleNegativesRankingLoss
with these parameters:{
"scale": 20.0,
"similarity_fct": "cos_sim"
}
eval_strategy
: stepsper_device_train_batch_size
: 4per_device_eval_batch_size
: 4num_train_epochs
: 200warmup_ratio
: 0.1bf16
: Trueload_best_model_at_end
: Truebatch_sampler
: no_duplicatesoverwrite_output_dir
: Falsedo_predict
: Falseeval_strategy
: stepsprediction_loss_only
: Trueper_device_train_batch_size
: 4per_device_eval_batch_size
: 4per_gpu_train_batch_size
: Noneper_gpu_eval_batch_size
: Nonegradient_accumulation_steps
: 1eval_accumulation_steps
: Nonetorch_empty_cache_steps
: Nonelearning_rate
: 5e-05weight_decay
: 0.0adam_beta1
: 0.9adam_beta2
: 0.999adam_epsilon
: 1e-08max_grad_norm
: 1.0num_train_epochs
: 200max_steps
: -1lr_scheduler_type
: linearlr_scheduler_kwargs
: {}warmup_ratio
: 0.1warmup_steps
: 0log_level
: passivelog_level_replica
: warninglog_on_each_node
: Truelogging_nan_inf_filter
: Truesave_safetensors
: Truesave_on_each_node
: Falsesave_only_model
: Falserestore_callback_states_from_checkpoint
: Falseno_cuda
: Falseuse_cpu
: Falseuse_mps_device
: Falseseed
: 42data_seed
: Nonejit_mode_eval
: Falseuse_ipex
: Falsebf16
: Truefp16
: Falsefp16_opt_level
: O1half_precision_backend
: autobf16_full_eval
: Falsefp16_full_eval
: Falsetf32
: Nonelocal_rank
: 0ddp_backend
: Nonetpu_num_cores
: Nonetpu_metrics_debug
: Falsedebug
: []dataloader_drop_last
: Falsedataloader_num_workers
: 0dataloader_prefetch_factor
: Nonepast_index
: -1disable_tqdm
: Falseremove_unused_columns
: Truelabel_names
: Noneload_best_model_at_end
: Trueignore_data_skip
: Falsefsdp
: []fsdp_min_num_params
: 0fsdp_config
: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap
: Noneaccelerator_config
: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}deepspeed
: Nonelabel_smoothing_factor
: 0.0optim
: adamw_torchoptim_args
: Noneadafactor
: Falsegroup_by_length
: Falselength_column_name
: lengthddp_find_unused_parameters
: Noneddp_bucket_cap_mb
: Noneddp_broadcast_buffers
: Falsedataloader_pin_memory
: Truedataloader_persistent_workers
: Falseskip_memory_metrics
: Trueuse_legacy_prediction_loop
: Falsepush_to_hub
: Falseresume_from_checkpoint
: Nonehub_model_id
: Nonehub_strategy
: every_savehub_private_repo
: Falsehub_always_push
: Falsegradient_checkpointing
: Falsegradient_checkpointing_kwargs
: Noneinclude_inputs_for_metrics
: Falseeval_do_concat_batches
: Truefp16_backend
: autopush_to_hub_model_id
: Nonepush_to_hub_organization
: Nonemp_parameters
: auto_find_batch_size
: Falsefull_determinism
: Falsetorchdynamo
: Noneray_scope
: lastddp_timeout
: 1800torch_compile
: Falsetorch_compile_backend
: Nonetorch_compile_mode
: Nonedispatch_batches
: Nonesplit_batches
: Noneinclude_tokens_per_second
: Falseinclude_num_input_tokens_seen
: Falseneftune_noise_alpha
: Noneoptim_target_modules
: Nonebatch_eval_metrics
: Falseeval_on_start
: Falseeval_use_gather_object
: Falsebatch_sampler
: no_duplicatesmulti_dataset_batch_sampler
: proportionalEpoch | Step | Training Loss | loss | max_accuracy |
---|---|---|---|---|
0 | 0 | - | - | 0.5 |
0.3497 | 100 | 1.0509 | 0.7070 | - |
0.6993 | 200 | 0.6183 | 0.3396 | - |
1.0490 | 300 | 0.3746 | 0.2282 | - |
1.3986 | 400 | 0.2481 | 0.1616 | - |
1.7483 | 500 | 0.2198 | 0.1302 | - |
2.0979 | 600 | 0.166 | 0.1164 | - |
2.4476 | 700 | 0.1045 | 0.1174 | - |
2.7972 | 800 | 0.0797 | 0.1095 | - |
3.1469 | 900 | 0.0422 | 0.1176 | - |
3.4965 | 1000 | 0.0595 | 0.1115 | - |
3.8462 | 1100 | 0.0416 | 0.1008 | - |
4.1958 | 1200 | 0.0174 | 0.1233 | - |
4.5455 | 1300 | 0.0273 | 0.1032 | - |
4.8951 | 1400 | 0.0389 | 0.0990 | - |
5.2448 | 1500 | 0.0126 | 0.0963 | - |
5.5944 | 1600 | 0.0074 | 0.1193 | - |
5.9441 | 1700 | 0.0165 | 0.1379 | - |
6.2937 | 1800 | 0.0046 | 0.1127 | - |
6.6434 | 1900 | 0.0158 | 0.1289 | - |
6.9930 | 2000 | 0.0157 | 0.1009 | - |
7.3427 | 2100 | 0.0032 | 0.1075 | - |
7.6923 | 2200 | 0.0072 | 0.1289 | - |
8.0420 | 2300 | 0.0192 | 0.1176 | - |
8.3916 | 2400 | 0.001 | 0.1214 | - |
8.7413 | 2500 | 0.024 | 0.1320 | 1.0 |
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
@misc{gao2021scaling,
title={Scaling Deep Contrastive Learning Batch Size under Memory Limited Setup},
author={Luyu Gao and Yunyi Zhang and Jiawei Han and Jamie Callan},
year={2021},
eprint={2101.06983},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
Base model
Alibaba-NLP/gte-base-en-v1.5