Christina Theodoris commited on
Commit
f89f796
1 Parent(s): ebe5ee8

Add example for tokenizing .loom dataset

Browse files
examples/tokenizing_scRNAseq_data.ipynb ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "a91bca46-c056-4784-8c6c-b0f5d3f33496",
6
+ "metadata": {
7
+ "tags": []
8
+ },
9
+ "source": [
10
+ "## Tokenizing .loom single cell RNA-seq data to rank value encoding .dataset format"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "id": "080fdd9c-0c48-4d5d-a254-52b6c53cdf78",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "from geneformer import TranscriptomeTokenizer"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "9641b146-af2c-4688-9d8a-9c570246d116",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "tk = TranscriptomeTokenizer({\"cell_type\": \"cell_type\", \"organ_major\": \"organ_major\"}, nproc=4) # Dictionary of custom attributes to be added to the dataset.\n",
31
+ "tk.tokenize_data(\"loom_data_directory\", \"output_directory\", \"output_prefix\")"
32
+ ]
33
+ }
34
+ ],
35
+ "metadata": {
36
+ "kernelspec": {
37
+ "display_name": "Python 3 (ipykernel)",
38
+ "language": "python",
39
+ "name": "python3"
40
+ },
41
+ "language_info": {
42
+ "codemirror_mode": {
43
+ "name": "ipython",
44
+ "version": 3
45
+ },
46
+ "file_extension": ".py",
47
+ "mimetype": "text/x-python",
48
+ "name": "python",
49
+ "nbconvert_exporter": "python",
50
+ "pygments_lexer": "ipython3",
51
+ "version": "3.10.11"
52
+ }
53
+ },
54
+ "nbformat": 4,
55
+ "nbformat_minor": 5
56
+ }