diff --git "a/app/notebooks/pdf_readers.ipynb" "b/app/notebooks/pdf_readers.ipynb" new file mode 100644--- /dev/null +++ "b/app/notebooks/pdf_readers.ipynb" @@ -0,0 +1,1549 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here, we test various parsers and approaches" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "\n", + "from langchain_community.document_loaders import PyPDFLoader\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_community.vectorstores import Chroma\n", + "from langchain.chains import ConversationalRetrievalChain\n", + "from langchain_community.embeddings import HuggingFaceEmbeddings \n", + "from langchain_community.llms import HuggingFacePipeline\n", + "from langchain.chains import ConversationChain\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain_community.llms import HuggingFaceEndpoint\n", + "from pathlib import Path\n", + "from PyPDF2 import PdfReader\n", + "from dotenv import load_dotenv, find_dotenv\n", + "\n", + "from rich import print\n", + "from rich.pretty import pprint\n", + "\n", + "from llama_parse import LlamaParse \n", + "\n", + "\n", + "load_dotenv(find_dotenv('env'))" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "pdf1 = \"data/test.pdf\" # a manually created simple pdf with a few words on 2 pages\n", + "pdf2 = \"data/AMZN_Moodys_CreditRating_2023_p1.pdf\" # first page, with the very difficult to read Exhibit 1 with a bar chart\n", + "pdf3 = \"../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PYPDFLOADER" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='���������������������������������', metadata={'source': 'data/test.pdf', 'page': 0})]\n" + ] + } + ], + "source": [ + "# Load PDF document and create doc splits\n", + "def load_doc(list_file_path, chunk_size, chunk_overlap):\n", + " # Processing for one document only\n", + " # loader = PyPDFLoader(file_path)\n", + " # pages = loader.load()\n", + " loaders = [PyPDFLoader(x) for x in list_file_path]\n", + " pages = []\n", + " for loader in loaders:\n", + " pages.extend(loader.load())\n", + " # text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)\n", + " text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size = chunk_size, \n", + " chunk_overlap = chunk_overlap)\n", + " doc_splits = text_splitter.split_documents(pages)\n", + " return doc_splits\n", + "splits = load_doc([pdf1], 600, 50)\n", + "\n", + "print(splits)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is very bad since it's a very simple pdf with a few words, no images... but it works well on pdf2." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='CORPORATES\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.', metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0}),\n", + " Document(page_content=\"margaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices (“AWS”), the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\", metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0}),\n", + " Document(page_content='and reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower', metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0}),\n", + " Document(page_content=\"profitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\", metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0}),\n", + " Document(page_content=\"challenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt (USD Millions)Moody's Adj. Operating Income (USD Millions)Moody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\", metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0})]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "load_doc([pdf2], 600, 50)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "Document(\n",
+       "│   │   page_content=\"CORPORATES\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section  at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices (“AWS”), the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt (USD Millions)Moody's Adj. Operating Income (USD Millions)Moody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\",\n",
+       "│   │   metadata={'source': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'page': 0}\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"CORPORATES\u001b[0m\u001b[32m\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mMoody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'data/AMZN_Moodys_CreditRating_2023_p1.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "loaders = [PyPDFLoader(x) for x in [pdf2]]\n", + "pages = []\n", + "for loader in loaders:\n", + " pages.extend(loader.load())\n", + "pprint(pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "Document(\n",
+       "│   │   page_content=\"CORPORATES\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section  at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices (“AWS”), the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt (USD Millions)Moody's Adj. Operating Income (USD Millions)Moody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 0}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nCredit strengths\\n»Leading online retailer and cloud provider\\n»AWS provides an increasing income stream provides solid positioning in AI\\n»Prime membership base supports customer loyalty\\n»Advertising revenue remains a significant area of growth\\nCredit challenges\\n»Brick-and-mortar retailers continue to increase online retail presence\\n»Heightened cloud competition from larger, well capitalized tech companies\\n»Inefficiencies in its retail fulfillment operations weigh on profitability\\n»Reduced cash balance lowers cushion for volatile or heavy investment periods\\nRating outlook\\nThe stable outlook reflects our view that Amazon will quickly restore its credit metrics to levels reflective of its A1 rating. The outlook\\nalso assumes that Amazon will maintain excellent liquidity and consistent financial strategies.\\nFactors that could lead to an upgrade\\n»Ratings could be upgraded if Amazon's numerous investments generate commensurate levels of profitability such that RCF/debt is\\nmaintained around 65%.\\n»Additional factors that would be critical for an upgrade are the continued maintenance of very strong liquidity, a robust cash and\\ninvestments to debt position and maintenance of conservative financial strategies.\\nFactors that could lead to a downgrade\\nRatings could be downgraded if:\\n»Operating performance continues to weaken, or\\n»It becomes clear that investments are not paying off, or\\n»Financial strategy is becoming significantly more aggressive with regard to cash returned to shareholders or acquisitions\\n»If for any of the above, RCF/debt falls below 50% for an extended period\\nThis publication does not announce a credit rating action. For any credit ratings referenced in this publication, please see the issuer/deal page on https://ratings.moodys.com for the\\nmost updated credit rating action information and rating history.\\n2          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 1}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nKey indicators\\nExhibit 2\\nAmazon.com, Inc.\\nUS Billions Dec-18 Dec-19 Dec-20 Dec-21 Dec-22LTM \\n(Mar-23)12-18 Month \\nForward View\\nRevenue 233 281 386 470 514 525 589\\nEBIT / Interest Expense 5.9x 6.8x 10.2x 9.6x 2.8x 2.9x 5.9x\\nRCF / Net Debt 87.7% 90.9% 81.4% 62.8% 53.9% 50.7% 90.2%\\nDebt / EBITDA 2.3x 2.2x 2.1x 2.3x 3.6x 3.6x 2.4x\\nAll figures and ratios are calculated using Moody’s estimates and standard adjustments. Moody's Forecasts (f) or Projections (proj.) are Moody's opinion and do not represent the views of\\nthe issuer. Periods are Financial Year-End unless indicated. LTM = Last Twelve Months.\\nSource: Moody’s Financial Metrics™\\nProfile\\nHeadquartered in Seattle, Washington, Amazon.com, Inc. is the world’s largest online retailer, and also a leading web services provider\\nvia AWS. Revenue was approximately $525 billion for the twelve months ended March 31, 2023.\\nExhibit 3\\nAmazon's Revenue by SegmentExhibit 4\\nAmazon's Operating Profit by Segment\\n$107,006 $135,987 $177,866 $232,887 $280,522 $386,064 $469,822 $513,983 $524,897 \\n $(50,000) $50,000 $150,000 $250,000 $350,000 $450,000 $550,000\\n2015 2016 2017 2018 2019 2020 2021 2022 Q1-23\\nLTMRevenue (USD Millions)North America International AWS\\nSource: Company SEC Filings$2,233 $4,186 $4,106 $12,420 $14,541 $22,899 $24,879 \\n$12,248 \\n$13,353 \\n $(10,000) $(7,500) $(5,000) $(2,500) $- $2,500 $5,000 $7,500 $10,000 $12,500 $15,000 $17,500 $20,000 $22,500 $25,000 $27,500 $30,000\\n2015 2016 2017 2018 2019 2020 2021 2022 Q1-23\\nLTMOperating Income (USD Millions)North America International AWS\\nSource: Company SEC Filings\\nDetailed credit considerations\\nAWS has leading market share, high profitability and solid long term prospects\\nAWS continues to generate most of Amazon's operating profit, making it possible for investment programs (delivery, content and\\ninternational) to continue. Despite still being relatively early in the conversion of companies transitioning to the cloud, revenue\\ngrowth and profitability of AWS continues to experience a slowing of growth in Q1 2023 as companies try to find more effective\\ncost solutions. Revenue growth in Q1 2023 has decelerated to 15.8% from 20.2% in Q4 2022 and 27.5% in Q3 2022, as operating\\nmargins decline sequentially and from last year. These trends have continued into Q2 2023 with April revenue growth rates lower than\\nQ1 2023. We expect revenue growth to improve toward the end of 2023 as optimization efforts of customers are exhausted. AWS’\\ncustomers continue to increase commitments for future capacity usage and we expect strong long-term growth for AWS benefiting\\nfrom migration of IT infrastructure investments from on-premise to the cloud. AWS is also well-positioned to capitalize on the rapidly\\ngrowing AI opportunity in the cloud with its scalable infrastructure, tools for developers to build and deploy models and services that\\ncustomers can use to leverage proprietary large language models. At the same time, AWS faces strong competitors with financial\\nresources such as Microsoft Corporation (Aaa Stable), Oracle Corporation (Baa2 Stable), Alphabet Inc. (Aa2 Stable), and IBM (A3\\nStable) that all aim to grow market share. Operating margin expansion for AWS could be challenged by increasing competition and\\nelevated investments.\\n3          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 2}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nOnline retail presence still dominates as digital efforts of brick and mortar accelerate\\nAmazon's online retail is the clear leader relative its US competitors. Nonetheless, competition has accelerated from larger players such\\nas Walmart Inc. (Aa2 stable), Target Corporation (A2 stable) and Best Buy Co., Inc. (A3 stable), all of which have continued to enhance\\ntheir online capabilities. These larger brick-and-mortar retailers can also leverage their store networks and proprietary distribution\\ncapability to offer the consumer options for obtaining their purchases. In store pickup provides a competitive offering (within hours\\nin most cases) and a more cost effective alternative to the seller. In addition, some brick-and-mortar retailers already have vehicle\\nnetworks that are used to stock its stores which could be used for some same day delivery capability. An example is the auto parts\\nretailers, which measure delivery times in hours, not days.\\nAmazon faces an increasingly competitive environment as the pandemic accelerated investment in areas such as curbside pickup,\\ninventory visibility and better usage of stores to fulfill orders. To combat these growing competitive threats, Amazon continues to\\nsupport its Prime Free One-Day Delivery initiative, which is an effort to counter buy-online/pick-up in store. The company is also\\nmoving its fulfillment network to a regionalized model which is expected to improve delivery speed and cost. To offset rising costs, the\\ncompany increased pricing on its prime membership in the U.S. in 2022 from $119 to $139 annually. The move, which is the first price\\nincrease since 2018 and will provide to offset to these continued investments.\\nThird-party sales remains an important part of the business with growth continuing to outpace first-party. Third party comprises\\n59% of paid units relative to 55% for the same period last year. The company has implemented a fuel and inflation as well as a peak\\nfulfillment surcharge on fulfillment fee per unit rates for Fulfilled by Amazon sellers in the face of rising inflation and energy costs. We\\nnote that Walmart has increased its third party efforts with its Advance Auto Parts and Shopify relationships. In the Advance example,\\nWalmart is providing space in its stores, as well as the use of its distribution network and placement on the website.\\nPhysical store sales remain primarily from Whole Foods. The 2017 acquisition of Whole Foods consisting now of over 500 locations\\nprovides Amazon with a “scalable” food business, as well as pickup points for online orders across categories. The company has closed\\nsome non-core concepts including its physical bookstores, and 4-star stores.\\nAdvertising revenue continues to grow rapidly in programmatic advertising and, in our opinion, has an advantage in this area given\\nits significant e-commerce presence and data gatheri ng. Growth in advertising revenue remains robust at over 23% in Q1 2023.\\nAdvertising for LTM March 2023 was roughly $39.4 billion, and we note advertising operating margins are generally healthy and\\ntypically run in the midteens. Along with AWS, the profitability of this category provides “buffer” to support its retail operations\\nperforming well below historical operating margins.\\nCost pressures are being addressed as investment is targeted at or below 2022 levels\\nAmazon's retail operations is contending with the inefficiency posed by the more than doubling of its capacity during the pandemic,\\nAmazon continues to work to move its cost structure closer to pre-pandemic levels and has made significant head count reduction\\nwith 27,000 roles eliminated including areas such as AWS as well as Twitch, devices, advertising and human resources.\\nAmazon remains committed to providing value to customers, despite inflation while slowing remaining elevated. The company is\\nbenefiting from continued improvement in shipping speeds and in-stock rates have recovered. Amazon has been contending with\\nhigher costs related to system productivity and inflation since the second half of 2021.\\nDuring first quarter 2023, Amazon continued to take cost cutting measures to improve margins and make progress on reducing its\\ncash usage. These efforts include shutting down businesses such as Amazon Fabric and Amazon Care as well as closing eight Amazon\\nGo locations. The company has also increased its minimum grocery purchases from $35 to $150 for free shipping and reduced\\ncorporate head count. Nonetheless, cost structure improvements have been partially offset by increased spend in advertising and other\\ninvestments such as video content and marketing costs.\\nAmazon expects investment in 2023 to be at or below 2022 levels at it reduces its spending on fulfillment and transportation given\\nits significant increase in capacity in recent years and shifts spending to technology and related infrastructure including large language\\nmodels and generative AI. In 2022, the company spent approximately $59 billion in capital investments.\\nAmazon has continued to pursue acquisitions it has exited businesses that are not reaching long term return goals. The company\\nacquired MGM Holdings Inc. (“MGM”) for approximately $8.5 billion, including MGM’s debt, in Q1 2022 which increased its video\\n4          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 3}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\ncontent which is a key component to its Prime's value proposition and differentiates its offering beyond shipping. MGM's revenue of\\nabout $1.5 billion represents less than 1% of Amazon's revenue. The company also closed in Q1 2023 on its $4 billion purchase of\\n1Life Healthcare Inc. “One Medical,” a national primary care provider with approximately $1 billion of LTM revenue as of December\\n2022. One Medical operates a chain of primary healthcare clinics. The business is based on a membership model where the company\\ncharges a fixed monthly subscription fee and in exchange provides regular primary care services. The company focuses on both physical\\nappointments and digital offerings. The company also has announced its intention to acquire iRobot for approximately $1.9 billion. The\\npurchase is currently still being reviewed by the FTC.\\nWe estimate that Amazon's RCF/debt can return to near our target of 50% RCF/debt at the end of 2023 to the extent that Amazon\\nprioritizes aligning free cash flow generation with its investments. Our estimates assumes that free cash flow is positive and utilized to\\nreduce funded debt. Nonetheless, the economic back drop remains weak which poses a need for continued capital allocation discipline\\nto achieve this goal. We also recognize that cash+short term investments as a percentage of debt remains below historical levels.\\nExhibit 5\\nRetained cash flow to debt expected to recover in the next 12-18 months\\n20%25%30%35%40%45%50%55%60%65%70%\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1-23 LTM 12-18 Months\\nForward ViewUp Trigger Down Trigger RCF/Debt\\nCredit metrics reflect Moody's standard adjustments\\nSource: Moody’s Financial Metrics™, Moody's estimates\\nESG considerations\\nAmazon.com, Inc.'s ESG Credit Impact Score is Neutral-to-Low CIS-2\\nExhibit 6\\nESG Credit Impact Score\\nSource: Moody's Investors Service\\n(CIS-2 ) Amazon's ESG Credit Impact Score reflects our assessment that its governance practices which include maintaining high cash\\nbalances positions the company to meet its moderate exposure to environmental and social risks.\\n5          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 4}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nExhibit 7\\nESG Issuer Profile Scores\\nSource: Moody's Investors Service\\nEnvironmental\\n(E-3) Amazon's environmental risks reflect exposure to carbon transition risk as product transport, which currently relies primarily on\\ncombustion engine vehicles, remains integral to its operations. The company continues to invest in EV and is committed to growing\\nits EV fleet. Its physical climate risk is low as its operations are well diversified within the US and internationally. Natural capital risk is\\nviewed as low given its business diversification through AWS despite its sales of food and apparel.\\nSocial\\n(S-3) Amazon's social risk reflects its exposure to human capital, customer relations and responsible production. Human capital risk\\nhigher than most retailers, as AWS requires a more highly skilled workforce. The company’s exposure to risk related to demographic\\nand societal trends is lower than the that of the overall retail and apparel industry. Amazon remains poised to benefit from the\\ncontinued shift of consumers transacting online and the robust demand for IT infrastructure and the continued adoption of cloud\\nservices. Its business diversification with AWS and its significant volume with third party sellers lowers its responsible production risk.\\nData privacy issues surrounding both its online and web services segments increases customer relations risk.\\nGovernance\\n(G-2) Amazon's governance risk reflects its overall conservative financial policies, including the maintenance of high cash balances and\\nlimited shareholder distributions to date and its moderate leverage. The company has separate chairperson and CEO roles with Jeff\\nBezos as Chairman.\\nESG Issuer Profile Scores and Credit Impact Scores for the rated entity/transaction are available on Moodys.com. To view the latest\\nscores, please click here to go to the landing page for the entity/transaction on MDC and view the ESG Scores section.\\nLiquidity analysis\\nAmazon maintains strong liquidity from its significant cash balances, which provides the company with increased flexibility and this\\nremains a critical credit consideration. In January 2023, Amazon issued an $8 billion 364-day term loan to partially fund the One\\nMedical acquisition which closed February 2023. The company issued $12.75 billion of debt securities in April 2022 and $8.25 billion\\nin December 2022 ranging in maturity from 2024 to 2062. The use of proceeds were for general corporate purposes. We expect\\npositive free cash flow to be applied to debt reduction supported by improved working capital and better profitability at its non-AWS\\noperations.\\nIn March 2022, the company expanded its US commercial paper program to $20 billion. The commercial paper program includes the\\noption to issue €3 billion and is backed by a $10 billion revolving credit facility expiring March 29, 2025 as well as a $10 billion 364-day\\ncredit facility which was put in place on November 18, 2022 and may be extended once. As of March 31, 2023 the company had $7.8\\nbillion of commercial paper outstanding under its programs. A significant credit consideration to its short term and long term ratings\\nis Amazon's commitment to fully cover all commercial paper balances with the availability under its committed revolvers and excess\\nsame-day available cash balances. The commercial paper program is likely to be used to bridge working capital swings and adds to its\\nformidable liquidity profile. The revolving credit facility has same day availability, no ongoing MAC clause and no financial covenants.\\n6          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 5}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nRating methodology and scorecard factor\\n \\nThe following table shows Amazon.com, Inc.'s scorecard-indicated outcome using Retail Industry, with data as of March 31, 2023 and\\non a forward-looking basis. Applying Moody's 12-18 month forward view, the scorecard indicated outcome is A1, the same level as its\\nsenior unsecured rating.\\nExhibit 8\\nRetail Industry Scorecard [1][2]   \\nFactor 1 : Scale (10%) Measure Score Measure Score\\na) Revenue (USD Billion) $524.9 Aaa $588.6 Aaa\\nFactor 2 : Business Profile (30%)\\na) Stability of Product Aa Aa Aa Aa\\nb) Execution and Competitive Position Aa Aa Aa Aa\\nFactor 3 : Leverage and Coverage (45%)\\na) EBIT / Interest Expense 2.9x Ba 5.9x Baa\\nb) RCF / Net Debt 50.7% Aa 90.2% Aa\\nc) Debt / EBITDA 3.6x Ba 2.4x A\\nFactor 4 : Financial Policy (15%)\\na) Financial Policy A A A A\\nRating: \\na) Scorecard-Indicated Outcome A2 A1\\nb) Actual Rating Assigned A1Current \\nLTM 3/31/2023Moody's 12-18 Month Forward View\\nAs of 5/16/2023 [3]\\n[1] All ratios are based on 'Adjusted' financial data and incorporate Moody's Global Standard Adjustments for Non-Financial Corporations.\\n[2] As of 3/31/2023 (L).\\n[3] This represents Moody's forward view; not the view of the issuer; and unless noted in the text, does not incorporate significant acquisitions and divestitures.\\nSource: Moody's Financial Metrics™, Moody's estimates\\n7          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 6}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\n \\nRatings\\nExhibit 9\\nCategory Moody's Rating\\nAMAZON.COM, INC.\\nOutlook Stable\\nSenior Unsecured A1\\nCommercial Paper P-1\\nWHOLE FOODS MARKET, INC.\\nOutlook Stable\\nSenior Unsecured A1\\nSource: Moody's Investors Service\\n8          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 7}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\nAppendix\\nExhibit 10\\n(in US Millions)FYE\\nDec-18FYE\\nDec-19FYE\\nDec-20FYE\\nDec-21FYE\\nDec-22LTM Ending\\nMar-23\\nAs Reported Debt49,289 24,719 32,971 50,235 70,149 69,084\\nOperating Leases 21,442 52,814 67,533 82,083 84,823 85,695\\nNon-Standard Adjustments 101 101 725 725 6,800 15,800\\nMoody's-Adjusted Debt 70,832 77,634 101,229 133,043 161,772 170,579Moody's-Adjusted Debt Breakdown\\nAmazon.com, Inc.\\nAll figures are calculated using Moody’s estimates and standard adjustments.\\nSource: Moody’s Financial Metrics™\\nExhibit 11\\n(in US Millions)FYE\\nDec-18FYE\\nDec-19FYE\\nDec-20FYE\\nDec-21FYE\\nDec-22LTM Ending\\nMar-23\\nAs Reported EBITDA 28,028 31,277 42,589 63,385 21,956 32,278\\nOperating Leases 3,400 3,669 5,019 7,199 8,847 9,256\\nUnusual 0 0 0 -11,526 13,870 5,625\\nMoody's-Adjusted EBITDA 31,428 34,946 47,608 59,058 44,673 47,159Moody's-Adjusted EBITDA Breakdown\\nAmazon.com, Inc.\\nAll figures are calculated using Moody’s estimates and standard adjustments.\\nSource: Moody’s Financial Metrics™\\nExhibit 12\\nPeer snapshot\\n(in US millions)FYE\\nDec-21FYE\\nDec-22LTM\\nMar-23FYE\\nJan-21FYE\\nJan-22FYE\\nJan-23FYE\\nDec-21FYE\\nDec-22LTM\\nMar-23FYE\\nAug-21FYE\\nAug-22LTM\\nFeb-23FYE\\nMay-21FYE\\nMay-22LTM\\nFeb-23\\nRevenue $469,822 $513,983 $524,897 $559,151 $572,754 $611,289 $257,637 $282,836 $284,612 $195,929 $226,954 $234,390 $40,479 $42,440 $47,957\\nEBITDA $59,058 $44,673 $47,159 $36,657 $39,032 $37,233 $91,935 $94,469 $93,141 $8,928 $10,195 $10,493 $19,363 $19,268 $19,314\\nTotal Debt $133,043 $161,772 $170,579 $71,299 $57,323 $60,496 $34,992 $35,777 $36,292 $11,407 $10,906 $10,931 $93,460 $85,145 $101,097\\nCash & Cash Equiv. $36,220 $53,888 $49,343 $17,741 $14,760 $8,625 $20,945 $21,879 $25,924 $11,258 $10,203 $12,970 $30,098 $21,383 $8,219\\nEBITDA Margin 12.6% 8.7% 9.0% 6.6% 6.8% 6.1% 35.7% 33.4% 32.7% 4.6% 4.5% 4.5% 47.8% 45.4% 40.3%\\nEBIT / Int. Exp. 9.6x 2.8x 2.9x 7.4x 9.5x 8.3x 93.9x 84.1x 82.4x 29.7x 36.7x 39.1x 6.1x 5.4x 3.9x\\nDebt / EBITDA 2.3x 3.6x 3.6x 1.9x 1.5x 1.6x 0.4x 0.4x 0.4x 1.3x 1.1x 1.0x 4.8x 4.4x 5.2x\\nRCF / Net Debt 62.8% 53.9% 50.7% 43.5% 60.2% 40.8% 679.1% 691.3% 906.6% 1446.4% 1011.0% -364.5% 21.4% 13.7% 14.5%\\nFCF / Debt -19.5% -15.4% -8.9% 26.2% 6.9% 8.0% 194.0% 167.7% 170.6% -3.9% 14.8% 32.4% 11.4% 1.8% 3.8%A1 Stable Aa2 Stable Aa2 Stable Aa3 Stable Baa2 StableAmazon.com, Inc. Walmart Inc. Alphabet Inc. Costco Wholesale Corporation Oracle Corporation\\nAll figures & ratios calculated using Moody’s estimates & standard adjustments. FYE = Financial Year-End. LTM = Last Twelve Months. RUR* = Ratings under Review, where UPG = for\\nupgrade and DNG = for downgrade.\\nSource: Moody’s Financial Metrics™\\n9          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 8}\n",
+       "),\n",
+       "Document(\n",
+       "│   │   page_content=\"MOODY'S INVESTORS SERVICE CORPORATES\\n© 2023 Moody’s Corporation, Moody’s Investors Service, Inc., Moody’s Analytics, Inc. and/or their licensors and affiliates (collectively, “MOODY’S”). All rights reserved.\\nCREDIT RATINGS ISSUED BY MOODY'S CREDIT RATINGS AFFILIATES ARE THEIR CURRENT OPINIONS OF THE RELATIVE FUTURE CREDIT RISK OF ENTITIES, CREDIT\\nCOMMITMENTS, OR DEBT OR DEBT-LIKE SECURITIES, AND MATERIALS, PRODUCTS, SERVICES AND INFORMATION PUBLISHED BY MOODY’S (COLLECTIVELY,\\n“PUBLICATIONS”) MAY INCLUDE SUCH CURRENT OPINIONS. MOODY’S DEFINES CREDIT RISK AS THE RISK THAT AN ENTITY MAY NOT MEET ITS CONTRACTUAL\\nFINANCIAL OBLIGATIONS AS THEY COME DUE AND ANY ESTIMATED FINANCIAL LOSS IN THE EVENT OF DEFAULT OR IMPAIRMENT. SEE APPLICABLE MOODY’S\\nRATING SYMBOLS AND DEFINITIONS PUBLICATION FOR INFORMATION ON THE TYPES OF CONTRACTUAL FINANCIAL OBLIGATIONS ADDRESSED BY MOODY’S\\nCREDIT RATINGS. CREDIT RATINGS DO NOT ADDRESS ANY OTHER RISK, INCLUDING BUT NOT LIMITED TO: LIQUIDITY RISK, MARKET VALUE RISK, OR PRICE\\nVOLATILITY. CREDIT RATINGS, NON-CREDIT ASSESSMENTS (“ASSESSMENTS”), AND OTHER OPINIONS INCLUDED IN MOODY’S PUBLICATIONS ARE NOT\\nSTATEMENTS OF CURRENT OR HISTORICAL FACT. MOODY’S PUBLICATIONS MAY ALSO INCLUDE QUANTITATIVE MODEL-BASED ESTIMATES OF CREDIT RISK AND\\nRELATED OPINIONS OR COMMENTARY PUBLISHED BY MOODY’S ANALYTICS, INC. AND/OR ITS AFFILIATES. MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER\\nOPINIONS AND PUBLICATIONS DO NOT CONSTITUTE OR PROVIDE INVESTMENT OR FINANCIAL ADVICE, AND MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER\\nOPINIONS AND PUBLICATIONS ARE NOT AND DO NOT PROVIDE RECOMMENDATIONS TO PURCHASE, SELL, OR HOLD PARTICULAR SECURITIES. MOODY’S CREDIT\\nRATINGS, ASSESSMENTS, OTHER OPINIONS AND PUBLICATIONS DO NOT COMMENT ON THE SUITABILITY OF AN INVESTMENT FOR ANY PARTICULAR INVESTOR.\\nMOODY’S ISSUES ITS CREDIT RATINGS, ASSESSMENTS AND OTHER OPINIONS AND PUBLISHES ITS PUBLICATIONS WITH THE EXPECTATION AND UNDERSTANDING\\nTHAT EACH INVESTOR WILL, WITH DUE CARE, MAKE ITS OWN STUDY AND EVALUATION OF EACH SECURITY THAT IS UNDER CONSIDERATION FOR PURCHASE,\\nHOLDING, OR SALE.\\nMOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS, AND PUBLICATIONS ARE NOT INTENDED FOR USE BY RETAIL INVESTORS AND IT WOULD BE\\nRECKLESS AND INAPPROPRIATE FOR RETAIL INVESTORS TO USE MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS OR PUBLICATIONS WHEN MAKING\\nAN INVESTMENT DECISION. IF IN DOUBT YOU SHOULD CONTACT YOUR FINANCIAL OR OTHER PROFESSIONAL ADVISER.\\nALL INFORMATION CONTAINED HEREIN IS PROTECTED BY LAW, INCLUDING BUT NOT LIMITED TO, COPYRIGHT LAW, AND NONE OF SUCH INFORMATION MAY BE COPIED\\nOR OTHERWISE REPRODUCED, REPACKAGED, FURTHER TRANSMITTED, TRANSFERRED, DISSEMINATED, REDISTRIBUTED OR RESOLD, OR STORED FOR SUBSEQUENT USE\\nFOR ANY SUCH PURPOSE, IN WHOLE OR IN PART, IN ANY FORM OR MANNER OR BY ANY MEANS WHATSOEVER, BY ANY PERSON WITHOUT MOODY’S PRIOR WRITTEN\\nCONSENT.\\nMOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS AND PUBLICATIONS ARE NOT INTENDED FOR USE BY ANY PERSON AS A BENCHMARK AS THAT TERM IS\\nDEFINED FOR REGULATORY PURPOSES AND MUST NOT BE USED IN ANY WAY THAT COULD RESULT IN THEM BEING CONSIDERED A BENCHMARK.\\nAll information contained herein is obtained by MOODY’S from sources believed by it to be accurate and reliable. Because of the possibility of human or mechanical error as well\\nas other factors, however, all information contained herein is provided “AS IS” without warranty of any kind. MOODY'S adopts all necessary measures so that the information it\\nuses in assigning a credit rating is of sufficient quality and from sources MOODY'S considers to be reliable including, when appropriate, independent third-party sources. However,\\nMOODY’S is not an auditor and cannot in every instance independently verify or validate information received in the credit rating process or in preparing its Publications.\\nTo the extent permitted by law, MOODY’S and its directors, officers, employees, agents, representatives, licensors and suppliers disclaim liability to any person or entity for any\\nindirect, special, consequential, or incidental losses or damages whatsoever arising from or in connection with the information contained herein or the use of or inability to use any\\nsuch information, even if MOODY’S or any of its directors, officers, employees, agents, representatives, licensors or suppliers is advised in advance of the possibility of such losses or\\ndamages, including but not limited to: (a) any loss of present or prospective profits or (b) any loss or damage arising where the relevant financial instrument is not the subject of a\\nparticular credit rating assigned by MOODY’S.\\nTo the extent permitted by law, MOODY’S and its directors, officers, employees, agents, representatives, licensors and suppliers disclaim liability for any direct or compensatory\\nlosses or damages caused to any person or entity, including but not limited to by any negligence (but excluding fraud, willful misconduct or any other type of liability that, for the\\navoidance of doubt, by law cannot be excluded) on the part of, or any contingency within or beyond the control of, MOODY’S or any of its directors, officers, employees, agents,\\nrepresentatives, licensors or suppliers, arising from or in connection with the information contained herein or the use of or inability to use any such information.\\nNO WARRANTY, EXPRESS OR IMPLIED, AS TO THE ACCURACY, TIMELINESS, COMPLETENESS, MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OF ANY CREDIT\\nRATING, ASSESSMENT, OTHER OPINION OR INFORMATION IS GIVEN OR MADE BY MOODY’S IN ANY FORM OR MANNER WHATSOEVER.\\nMoody’s Investors Service, Inc., a wholly-owned credit rating agency subsidiary of Moody’s Corporation (“MCO”), hereby discloses that most issuers of debt securities (including\\ncorporate and municipal bonds, debentures, notes and commercial paper) and preferred stock rated by Moody’s Investors Service, Inc. have, prior to assignment of any credit rating,\\nagreed to pay to Moody’s Investors Service, Inc. for credit ratings opinions and services rendered by it fees ranging from $1,000 to approximately $5,000,000. MCO and Moody’s\\nInvestors Service also maintain policies and procedures to address the independence of Moody’s Investors Service credit ratings and credit rating processes. Information regarding\\ncertain affiliations that may exist between directors of MCO and rated entities, and between entities who hold credit ratings from Moody’s Investors Service, Inc. and have also\\npublicly reported to the SEC an ownership interest in MCO of more than 5%, is posted annually at www.moodys.com  under the heading “Investor Relations — Corporate Governance\\n— Charter Documents - Director and Shareholder Affiliation Policy.”\\nAdditional terms for Australia only: Any publication into Australia of this document is pursuant to the Australian Financial Services License of MOODY’S affiliate, Moody’s Investors\\nService Pty Limited ABN 61 003 399 657AFSL 336969 and/or Moody’s Analytics Australia Pty Ltd ABN 94 105 136 972 AFSL 383569 (as applicable). This document is intended\\nto be provided only to “wholesale clients” within the meaning of section 761G of the Corporations Act 2001. By continuing to access this document from within Australia, you\\nrepresent to MOODY’S that you are, or are accessing the document as a representative of, a “wholesale client” and that neither you nor the entity you represent will directly or\\nindirectly disseminate this document or its contents to “retail clients” within the meaning of section 761G of the Corporations Act 2001. MOODY’S credit rating is an opinion as to\\nthe creditworthiness of a debt obligation of the issuer, not on the equity securities of the issuer or any form of security that is available to retail investors.\\nAdditional terms for Japan only: Moody's Japan K.K. (“MJKK”) is a wholly-owned credit rating agency subsidiary of Moody's Group Japan G.K., which is wholly-owned by Moody’s\\nOverseas Holdings Inc., a wholly-owned subsidiary of MCO. Moody’s SF Japan K.K. (“MSFJ”) is a wholly-owned credit rating agency subsidiary of MJKK. MSFJ is not a Nationally\\nRecognized Statistical Rating Organization (“NRSRO”). Therefore, credit ratings assigned by MSFJ are Non-NRSRO Credit Ratings. Non-NRSRO Credit Ratings are assigned by an\\nentity that is not a NRSRO and, consequently, the rated obligation will not qualify for certain types of treatment under U.S. laws. MJKK and MSFJ are credit rating agencies registered\\nwith the Japan Financial Services Agency and their registration numbers are FSA Commissioner (Ratings) No. 2 and 3 respectively.\\nMJKK or MSFJ (as applicable) hereby disclose that most issuers of debt securities (including corporate and municipal bonds, debentures, notes and commercial paper) and preferred\\nstock rated by MJKK or MSFJ (as applicable) have, prior to assignment of any credit rating, agreed to pay to MJKK or MSFJ (as applicable) for credit ratings opinions and services\\nrendered by it fees ranging from JPY100,000 to approximately JPY550,000,000.\\nMJKK and MSFJ also maintain policies and procedures to address Japanese regulatory requirements.\\nREPORT NUMBER 1366931\\n10          23 May 2023 Amazon.com, Inc.: Update to credit analysis\",\n",
+       "│   │   metadata={'source': '../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf', 'page': 9}\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"CORPORATES\u001b[0m\u001b[32m\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mMoody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m0\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nCredit strengths\\n»Leading online retailer and cloud provider\\n»AWS provides an increasing income stream provides solid positioning in AI\\n»Prime membership base supports customer loyalty\\n»Advertising revenue remains a significant area of growth\\nCredit challenges\\n»Brick-and-mortar retailers continue to increase online retail presence\\n»Heightened cloud competition from larger, well capitalized tech companies\\n»Inefficiencies in its retail fulfillment operations weigh on profitability\\n»Reduced cash balance lowers cushion for volatile or heavy investment periods\\nRating outlook\\nThe stable outlook reflects our view that Amazon will quickly restore its credit metrics to levels reflective of its A1 rating. The outlook\\nalso assumes that Amazon will maintain excellent liquidity and consistent financial strategies.\\nFactors that could lead to an upgrade\\n»Ratings could be upgraded if Amazon's numerous investments generate commensurate levels of profitability such that RCF/debt is\\nmaintained around 65%.\\n»Additional factors that would be critical for an upgrade are the continued maintenance of very strong liquidity, a robust cash and\\ninvestments to debt position and maintenance of conservative financial strategies.\\nFactors that could lead to a downgrade\\nRatings could be downgraded if:\\n»Operating performance continues to weaken, or\\n»It becomes clear that investments are not paying off, or\\n»Financial strategy is becoming significantly more aggressive with regard to cash returned to shareholders or acquisitions\\n»If for any of the above, RCF/debt falls below 50% for an extended period\\nThis publication does not announce a credit rating action. For any credit ratings referenced in this publication, please see the issuer/deal page on https://ratings.moodys.com for the\\nmost updated credit rating action information and rating history.\\n2 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m1\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nKey indicators\\nExhibit 2\\nAmazon.com, Inc.\\nUS Billions Dec-18 Dec-19 Dec-20 Dec-21 Dec-22LTM \\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32mMar-23\u001b[0m\u001b[32m)\u001b[0m\u001b[32m12-18 Month \\nForward View\\nRevenue 233 281 386 470 514 525 589\\nEBIT / Interest Expense 5.9x 6.8x 10.2x 9.6x 2.8x 2.9x 5.9x\\nRCF / Net Debt 87.7% 90.9% 81.4% 62.8% 53.9% 50.7% 90.2%\\nDebt / EBITDA 2.3x 2.2x 2.1x 2.3x 3.6x 3.6x 2.4x\\nAll figures and ratios are calculated using Moody’s estimates and standard adjustments. Moody's Forecasts \u001b[0m\u001b[32m(\u001b[0m\u001b[32mf\u001b[0m\u001b[32m)\u001b[0m\u001b[32m or Projections \u001b[0m\u001b[32m(\u001b[0m\u001b[32mproj.\u001b[0m\u001b[32m)\u001b[0m\u001b[32m are Moody's opinion and do not represent the views of\\nthe issuer. Periods are Financial Year-End unless indicated. LTM = Last Twelve Months.\\nSource: Moody’s Financial Metrics™\\nProfile\\nHeadquartered in Seattle, Washington, Amazon.com, Inc. is the world’s largest online retailer, and also a leading web services provider\\nvia AWS. Revenue was approximately $525 billion for the twelve months ended March 31, 2023.\\nExhibit 3\\nAmazon's Revenue by SegmentExhibit 4\\nAmazon's Operating Profit by Segment\\n$107,006 $135,987 $177,866 $232,887 $280,522 $386,064 $469,822 $513,983 $524,897 \\n $\u001b[0m\u001b[32m(\u001b[0m\u001b[32m50,000\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $50,000 $150,000 $250,000 $350,000 $450,000 $550,000\\n2015 2016 2017 2018 2019 2020 2021 2022 Q1-23\\nLTMRevenue \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mNorth America International AWS\\nSource: Company SEC Filings$2,233 $4,186 $4,106 $12,420 $14,541 $22,899 $24,879 \\n$12,248 \\n$13,353 \\n $\u001b[0m\u001b[32m(\u001b[0m\u001b[32m10,000\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $\u001b[0m\u001b[32m(\u001b[0m\u001b[32m7,500\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $\u001b[0m\u001b[32m(\u001b[0m\u001b[32m5,000\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $\u001b[0m\u001b[32m(\u001b[0m\u001b[32m2,500\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $- $2,500 $5,000 $7,500 $10,000 $12,500 $15,000 $17,500 $20,000 $22,500 $25,000 $27,500 $30,000\\n2015 2016 2017 2018 2019 2020 2021 2022 Q1-23\\nLTMOperating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mNorth America International AWS\\nSource: Company SEC Filings\\nDetailed credit considerations\\nAWS has leading market share, high profitability and solid long term prospects\\nAWS continues to generate most of Amazon's operating profit, making it possible for investment programs \u001b[0m\u001b[32m(\u001b[0m\u001b[32mdelivery, content and\\ninternational\u001b[0m\u001b[32m)\u001b[0m\u001b[32m to continue. Despite still being relatively early in the conversion of companies transitioning to the cloud, revenue\\ngrowth and profitability of AWS continues to experience a slowing of growth in Q1 2023 as companies try to find more effective\\ncost solutions. Revenue growth in Q1 2023 has decelerated to 15.8% from 20.2% in Q4 2022 and 27.5% in Q3 2022, as operating\\nmargins decline sequentially and from last year. These trends have continued into Q2 2023 with April revenue growth rates lower than\\nQ1 2023. We expect revenue growth to improve toward the end of 2023 as optimization efforts of customers are exhausted. AWS’\\ncustomers continue to increase commitments for future capacity usage and we expect strong long-term growth for AWS benefiting\\nfrom migration of IT infrastructure investments from on-premise to the cloud. AWS is also well-positioned to capitalize on the rapidly\\ngrowing AI opportunity in the cloud with its scalable infrastructure, tools for developers to build and deploy models and services that\\ncustomers can use to leverage proprietary large language models. At the same time, AWS faces strong competitors with financial\\nresources such as Microsoft Corporation \u001b[0m\u001b[32m(\u001b[0m\u001b[32mAaa Stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, Oracle Corporation \u001b[0m\u001b[32m(\u001b[0m\u001b[32mBaa2 Stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, Alphabet Inc. \u001b[0m\u001b[32m(\u001b[0m\u001b[32mAa2 Stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, and IBM \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA3\\nStable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m that all aim to grow market share. Operating margin expansion for AWS could be challenged by increasing competition and\\nelevated investments.\\n3 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m2\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nOnline retail presence still dominates as digital efforts of brick and mortar accelerate\\nAmazon's online retail is the clear leader relative its US competitors. Nonetheless, competition has accelerated from larger players such\\nas Walmart Inc. \u001b[0m\u001b[32m(\u001b[0m\u001b[32mAa2 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, Target Corporation \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA2 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and Best Buy Co., Inc. \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA3 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, all of which have continued to enhance\\ntheir online capabilities. These larger brick-and-mortar retailers can also leverage their store networks and proprietary distribution\\ncapability to offer the consumer options for obtaining their purchases. In store pickup provides a competitive offering \u001b[0m\u001b[32m(\u001b[0m\u001b[32mwithin hours\\nin most cases\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and a more cost effective alternative to the seller. In addition, some brick-and-mortar retailers already have vehicle\\nnetworks that are used to stock its stores which could be used for some same day delivery capability. An example is the auto parts\\nretailers, which measure delivery times in hours, not days.\\nAmazon faces an increasingly competitive environment as the pandemic accelerated investment in areas such as curbside pickup,\\ninventory visibility and better usage of stores to fulfill orders. To combat these growing competitive threats, Amazon continues to\\nsupport its Prime Free One-Day Delivery initiative, which is an effort to counter buy-online/pick-up in store. The company is also\\nmoving its fulfillment network to a regionalized model which is expected to improve delivery speed and cost. To offset rising costs, the\\ncompany increased pricing on its prime membership in the U.S. in 2022 from $119 to $139 annually. The move, which is the first price\\nincrease since 2018 and will provide to offset to these continued investments.\\nThird-party sales remains an important part of the business with growth continuing to outpace first-party. Third party comprises\\n59% of paid units relative to 55% for the same period last year. The company has implemented a fuel and inflation as well as a peak\\nfulfillment surcharge on fulfillment fee per unit rates for Fulfilled by Amazon sellers in the face of rising inflation and energy costs. We\\nnote that Walmart has increased its third party efforts with its Advance Auto Parts and Shopify relationships. In the Advance example,\\nWalmart is providing space in its stores, as well as the use of its distribution network and placement on the website.\\nPhysical store sales remain primarily from Whole Foods. The 2017 acquisition of Whole Foods consisting now of over 500 locations\\nprovides Amazon with a “scalable” food business, as well as pickup points for online orders across categories. The company has closed\\nsome non-core concepts including its physical bookstores, and 4-star stores.\\nAdvertising revenue continues to grow rapidly in programmatic advertising and, in our opinion, has an advantage in this area given\\nits significant e-commerce presence and data gatheri ng. Growth in advertising revenue remains robust at over 23% in Q1 2023.\\nAdvertising for LTM March 2023 was roughly $39.4 billion, and we note advertising operating margins are generally healthy and\\ntypically run in the midteens. Along with AWS, the profitability of this category provides “buffer” to support its retail operations\\nperforming well below historical operating margins.\\nCost pressures are being addressed as investment is targeted at or below 2022 levels\\nAmazon's retail operations is contending with the inefficiency posed by the more than doubling of its capacity during the pandemic,\\nAmazon continues to work to move its cost structure closer to pre-pandemic levels and has made significant head count reduction\\nwith 27,000 roles eliminated including areas such as AWS as well as Twitch, devices, advertising and human resources.\\nAmazon remains committed to providing value to customers, despite inflation while slowing remaining elevated. The company is\\nbenefiting from continued improvement in shipping speeds and in-stock rates have recovered. Amazon has been contending with\\nhigher costs related to system productivity and inflation since the second half of 2021.\\nDuring first quarter 2023, Amazon continued to take cost cutting measures to improve margins and make progress on reducing its\\ncash usage. These efforts include shutting down businesses such as Amazon Fabric and Amazon Care as well as closing eight Amazon\\nGo locations. The company has also increased its minimum grocery purchases from $35 to $150 for free shipping and reduced\\ncorporate head count. Nonetheless, cost structure improvements have been partially offset by increased spend in advertising and other\\ninvestments such as video content and marketing costs.\\nAmazon expects investment in 2023 to be at or below 2022 levels at it reduces its spending on fulfillment and transportation given\\nits significant increase in capacity in recent years and shifts spending to technology and related infrastructure including large language\\nmodels and generative AI. In 2022, the company spent approximately $59 billion in capital investments.\\nAmazon has continued to pursue acquisitions it has exited businesses that are not reaching long term return goals. The company\\nacquired MGM Holdings Inc. \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“MGM”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for approximately $8.5 billion, including MGM’s debt, in Q1 2022 which increased its video\\n4 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m3\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\ncontent which is a key component to its Prime's value proposition and differentiates its offering beyond shipping. MGM's revenue of\\nabout $1.5 billion represents less than 1% of Amazon's revenue. The company also closed in Q1 2023 on its $4 billion purchase of\\n1Life Healthcare Inc. “One Medical,” a national primary care provider with approximately $1 billion of LTM revenue as of December\\n2022. One Medical operates a chain of primary healthcare clinics. The business is based on a membership model where the company\\ncharges a fixed monthly subscription fee and in exchange provides regular primary care services. The company focuses on both physical\\nappointments and digital offerings. The company also has announced its intention to acquire iRobot for approximately $1.9 billion. The\\npurchase is currently still being reviewed by the FTC.\\nWe estimate that Amazon's RCF/debt can return to near our target of 50% RCF/debt at the end of 2023 to the extent that Amazon\\nprioritizes aligning free cash flow generation with its investments. Our estimates assumes that free cash flow is positive and utilized to\\nreduce funded debt. Nonetheless, the economic back drop remains weak which poses a need for continued capital allocation discipline\\nto achieve this goal. We also recognize that cash+short term investments as a percentage of debt remains below historical levels.\\nExhibit 5\\nRetained cash flow to debt expected to recover in the next 12-18 months\\n20%25%30%35%40%45%50%55%60%65%70%\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1-23 LTM 12-18 Months\\nForward ViewUp Trigger Down Trigger RCF/Debt\\nCredit metrics reflect Moody's standard adjustments\\nSource: Moody’s Financial Metrics™, Moody's estimates\\nESG considerations\\nAmazon.com, Inc.'s ESG Credit Impact Score is Neutral-to-Low CIS-2\\nExhibit 6\\nESG Credit Impact Score\\nSource: Moody's Investors Service\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32mCIS-2 \u001b[0m\u001b[32m)\u001b[0m\u001b[32m Amazon's ESG Credit Impact Score reflects our assessment that its governance practices which include maintaining high cash\\nbalances positions the company to meet its moderate exposure to environmental and social risks.\\n5 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m4\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nExhibit 7\\nESG Issuer Profile Scores\\nSource: Moody's Investors Service\\nEnvironmental\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32mE-3\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Amazon's environmental risks reflect exposure to carbon transition risk as product transport, which currently relies primarily on\\ncombustion engine vehicles, remains integral to its operations. The company continues to invest in EV and is committed to growing\\nits EV fleet. Its physical climate risk is low as its operations are well diversified within the US and internationally. Natural capital risk is\\nviewed as low given its business diversification through AWS despite its sales of food and apparel.\\nSocial\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32mS-3\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Amazon's social risk reflects its exposure to human capital, customer relations and responsible production. Human capital risk\\nhigher than most retailers, as AWS requires a more highly skilled workforce. The company’s exposure to risk related to demographic\\nand societal trends is lower than the that of the overall retail and apparel industry. Amazon remains poised to benefit from the\\ncontinued shift of consumers transacting online and the robust demand for IT infrastructure and the continued adoption of cloud\\nservices. Its business diversification with AWS and its significant volume with third party sellers lowers its responsible production risk.\\nData privacy issues surrounding both its online and web services segments increases customer relations risk.\\nGovernance\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32mG-2\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Amazon's governance risk reflects its overall conservative financial policies, including the maintenance of high cash balances and\\nlimited shareholder distributions to date and its moderate leverage. The company has separate chairperson and CEO roles with Jeff\\nBezos as Chairman.\\nESG Issuer Profile Scores and Credit Impact Scores for the rated entity/transaction are available on Moodys.com. To view the latest\\nscores, please click here to go to the landing page for the entity/transaction on MDC and view the ESG Scores section.\\nLiquidity analysis\\nAmazon maintains strong liquidity from its significant cash balances, which provides the company with increased flexibility and this\\nremains a critical credit consideration. In January 2023, Amazon issued an $8 billion 364-day term loan to partially fund the One\\nMedical acquisition which closed February 2023. The company issued $12.75 billion of debt securities in April 2022 and $8.25 billion\\nin December 2022 ranging in maturity from 2024 to 2062. The use of proceeds were for general corporate purposes. We expect\\npositive free cash flow to be applied to debt reduction supported by improved working capital and better profitability at its non-AWS\\noperations.\\nIn March 2022, the company expanded its US commercial paper program to $20 billion. The commercial paper program includes the\\noption to issue €3 billion and is backed by a $10 billion revolving credit facility expiring March 29, 2025 as well as a $10 billion 364-day\\ncredit facility which was put in place on November 18, 2022 and may be extended once. As of March 31, 2023 the company had $7.8\\nbillion of commercial paper outstanding under its programs. A significant credit consideration to its short term and long term ratings\\nis Amazon's commitment to fully cover all commercial paper balances with the availability under its committed revolvers and excess\\nsame-day available cash balances. The commercial paper program is likely to be used to bridge working capital swings and adds to its\\nformidable liquidity profile. The revolving credit facility has same day availability, no ongoing MAC clause and no financial covenants.\\n6 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m5\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nRating methodology and scorecard factor\\n \\nThe following table shows Amazon.com, Inc.'s scorecard-indicated outcome using Retail Industry, with data as of March 31, 2023 and\\non a forward-looking basis. Applying Moody's 12-18 month forward view, the scorecard indicated outcome is A1, the same level as its\\nsenior unsecured rating.\\nExhibit 8\\nRetail Industry Scorecard \u001b[0m\u001b[32m[\u001b[0m\u001b[32m1\u001b[0m\u001b[32m]\u001b[0m\u001b[32m[\u001b[0m\u001b[32m2\u001b[0m\u001b[32m]\u001b[0m\u001b[32m \\nFactor 1 : Scale \u001b[0m\u001b[32m(\u001b[0m\u001b[32m10%\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Measure Score Measure Score\\na\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Revenue \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Billion\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $524.9 Aaa $588.6 Aaa\\nFactor 2 : Business Profile \u001b[0m\u001b[32m(\u001b[0m\u001b[32m30%\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\na\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Stability of Product Aa Aa Aa Aa\\nb\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Execution and Competitive Position Aa Aa Aa Aa\\nFactor 3 : Leverage and Coverage \u001b[0m\u001b[32m(\u001b[0m\u001b[32m45%\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\na\u001b[0m\u001b[32m)\u001b[0m\u001b[32m EBIT / Interest Expense 2.9x Ba 5.9x Baa\\nb\u001b[0m\u001b[32m)\u001b[0m\u001b[32m RCF / Net Debt 50.7% Aa 90.2% Aa\\nc\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Debt / EBITDA 3.6x Ba 2.4x A\\nFactor 4 : Financial Policy \u001b[0m\u001b[32m(\u001b[0m\u001b[32m15%\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\na\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Financial Policy A A A A\\nRating: \\na\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Scorecard-Indicated Outcome A2 A1\\nb\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Actual Rating Assigned A1Current \\nLTM 3/31/2023Moody's 12-18 Month Forward View\\nAs of 5/16/2023 \u001b[0m\u001b[32m[\u001b[0m\u001b[32m3\u001b[0m\u001b[32m]\u001b[0m\u001b[32m\\n\u001b[0m\u001b[32m[\u001b[0m\u001b[32m1\u001b[0m\u001b[32m]\u001b[0m\u001b[32m All ratios are based on 'Adjusted' financial data and incorporate Moody's Global Standard Adjustments for Non-Financial Corporations.\\n\u001b[0m\u001b[32m[\u001b[0m\u001b[32m2\u001b[0m\u001b[32m]\u001b[0m\u001b[32m As of 3/31/2023 \u001b[0m\u001b[32m(\u001b[0m\u001b[32mL\u001b[0m\u001b[32m)\u001b[0m\u001b[32m.\\n\u001b[0m\u001b[32m[\u001b[0m\u001b[32m3\u001b[0m\u001b[32m]\u001b[0m\u001b[32m This represents Moody's forward view; not the view of the issuer; and unless noted in the text, does not incorporate significant acquisitions and divestitures.\\nSource: Moody's Financial Metrics™, Moody's estimates\\n7 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m6\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\n \\nRatings\\nExhibit 9\\nCategory Moody's Rating\\nAMAZON.COM, INC.\\nOutlook Stable\\nSenior Unsecured A1\\nCommercial Paper P-1\\nWHOLE FOODS MARKET, INC.\\nOutlook Stable\\nSenior Unsecured A1\\nSource: Moody's Investors Service\\n8 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m7\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\nAppendix\\nExhibit 10\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32min US Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mFYE\\nDec-18FYE\\nDec-19FYE\\nDec-20FYE\\nDec-21FYE\\nDec-22LTM Ending\\nMar-23\\nAs Reported Debt49,289 24,719 32,971 50,235 70,149 69,084\\nOperating Leases 21,442 52,814 67,533 82,083 84,823 85,695\\nNon-Standard Adjustments 101 101 725 725 6,800 15,800\\nMoody's-Adjusted Debt 70,832 77,634 101,229 133,043 161,772 170,579Moody's-Adjusted Debt Breakdown\\nAmazon.com, Inc.\\nAll figures are calculated using Moody’s estimates and standard adjustments.\\nSource: Moody’s Financial Metrics™\\nExhibit 11\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32min US Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mFYE\\nDec-18FYE\\nDec-19FYE\\nDec-20FYE\\nDec-21FYE\\nDec-22LTM Ending\\nMar-23\\nAs Reported EBITDA 28,028 31,277 42,589 63,385 21,956 32,278\\nOperating Leases 3,400 3,669 5,019 7,199 8,847 9,256\\nUnusual 0 0 0 -11,526 13,870 5,625\\nMoody's-Adjusted EBITDA 31,428 34,946 47,608 59,058 44,673 47,159Moody's-Adjusted EBITDA Breakdown\\nAmazon.com, Inc.\\nAll figures are calculated using Moody’s estimates and standard adjustments.\\nSource: Moody’s Financial Metrics™\\nExhibit 12\\nPeer snapshot\\n\u001b[0m\u001b[32m(\u001b[0m\u001b[32min US millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32mFYE\\nDec-21FYE\\nDec-22LTM\\nMar-23FYE\\nJan-21FYE\\nJan-22FYE\\nJan-23FYE\\nDec-21FYE\\nDec-22LTM\\nMar-23FYE\\nAug-21FYE\\nAug-22LTM\\nFeb-23FYE\\nMay-21FYE\\nMay-22LTM\\nFeb-23\\nRevenue $469,822 $513,983 $524,897 $559,151 $572,754 $611,289 $257,637 $282,836 $284,612 $195,929 $226,954 $234,390 $40,479 $42,440 $47,957\\nEBITDA $59,058 $44,673 $47,159 $36,657 $39,032 $37,233 $91,935 $94,469 $93,141 $8,928 $10,195 $10,493 $19,363 $19,268 $19,314\\nTotal Debt $133,043 $161,772 $170,579 $71,299 $57,323 $60,496 $34,992 $35,777 $36,292 $11,407 $10,906 $10,931 $93,460 $85,145 $101,097\\nCash & Cash Equiv. $36,220 $53,888 $49,343 $17,741 $14,760 $8,625 $20,945 $21,879 $25,924 $11,258 $10,203 $12,970 $30,098 $21,383 $8,219\\nEBITDA Margin 12.6% 8.7% 9.0% 6.6% 6.8% 6.1% 35.7% 33.4% 32.7% 4.6% 4.5% 4.5% 47.8% 45.4% 40.3%\\nEBIT / Int. Exp. 9.6x 2.8x 2.9x 7.4x 9.5x 8.3x 93.9x 84.1x 82.4x 29.7x 36.7x 39.1x 6.1x 5.4x 3.9x\\nDebt / EBITDA 2.3x 3.6x 3.6x 1.9x 1.5x 1.6x 0.4x 0.4x 0.4x 1.3x 1.1x 1.0x 4.8x 4.4x 5.2x\\nRCF / Net Debt 62.8% 53.9% 50.7% 43.5% 60.2% 40.8% 679.1% 691.3% 906.6% 1446.4% 1011.0% -364.5% 21.4% 13.7% 14.5%\\nFCF / Debt -19.5% -15.4% -8.9% 26.2% 6.9% 8.0% 194.0% 167.7% 170.6% -3.9% 14.8% 32.4% 11.4% 1.8% 3.8%A1 Stable Aa2 Stable Aa2 Stable Aa3 Stable Baa2 StableAmazon.com, Inc. Walmart Inc. Alphabet Inc. Costco Wholesale Corporation Oracle Corporation\\nAll figures & ratios calculated using Moody’s estimates & standard adjustments. FYE = Financial Year-End. LTM = Last Twelve Months. RUR* = Ratings under Review, where UPG = for\\nupgrade and DNG = for downgrade.\\nSource: Moody’s Financial Metrics™\\n9 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m8\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mpage_content\u001b[0m=\u001b[32m\"MOODY\u001b[0m\u001b[32m'S INVESTORS SERVICE CORPORATES\\n© 2023 Moody’s Corporation, Moody’s Investors Service, Inc., Moody’s Analytics, Inc. and/or their licensors and affiliates \u001b[0m\u001b[32m(\u001b[0m\u001b[32mcollectively, “MOODY’S”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. All rights reserved.\\nCREDIT RATINGS ISSUED BY MOODY'S CREDIT RATINGS AFFILIATES ARE THEIR CURRENT OPINIONS OF THE RELATIVE FUTURE CREDIT RISK OF ENTITIES, CREDIT\\nCOMMITMENTS, OR DEBT OR DEBT-LIKE SECURITIES, AND MATERIALS, PRODUCTS, SERVICES AND INFORMATION PUBLISHED BY MOODY’S \u001b[0m\u001b[32m(\u001b[0m\u001b[32mCOLLECTIVELY,\\n“PUBLICATIONS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m MAY INCLUDE SUCH CURRENT OPINIONS. MOODY’S DEFINES CREDIT RISK AS THE RISK THAT AN ENTITY MAY NOT MEET ITS CONTRACTUAL\\nFINANCIAL OBLIGATIONS AS THEY COME DUE AND ANY ESTIMATED FINANCIAL LOSS IN THE EVENT OF DEFAULT OR IMPAIRMENT. SEE APPLICABLE MOODY’S\\nRATING SYMBOLS AND DEFINITIONS PUBLICATION FOR INFORMATION ON THE TYPES OF CONTRACTUAL FINANCIAL OBLIGATIONS ADDRESSED BY MOODY’S\\nCREDIT RATINGS. CREDIT RATINGS DO NOT ADDRESS ANY OTHER RISK, INCLUDING BUT NOT LIMITED TO: LIQUIDITY RISK, MARKET VALUE RISK, OR PRICE\\nVOLATILITY. CREDIT RATINGS, NON-CREDIT ASSESSMENTS \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“ASSESSMENTS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, AND OTHER OPINIONS INCLUDED IN MOODY’S PUBLICATIONS ARE NOT\\nSTATEMENTS OF CURRENT OR HISTORICAL FACT. MOODY’S PUBLICATIONS MAY ALSO INCLUDE QUANTITATIVE MODEL-BASED ESTIMATES OF CREDIT RISK AND\\nRELATED OPINIONS OR COMMENTARY PUBLISHED BY MOODY’S ANALYTICS, INC. AND/OR ITS AFFILIATES. MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER\\nOPINIONS AND PUBLICATIONS DO NOT CONSTITUTE OR PROVIDE INVESTMENT OR FINANCIAL ADVICE, AND MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER\\nOPINIONS AND PUBLICATIONS ARE NOT AND DO NOT PROVIDE RECOMMENDATIONS TO PURCHASE, SELL, OR HOLD PARTICULAR SECURITIES. MOODY’S CREDIT\\nRATINGS, ASSESSMENTS, OTHER OPINIONS AND PUBLICATIONS DO NOT COMMENT ON THE SUITABILITY OF AN INVESTMENT FOR ANY PARTICULAR INVESTOR.\\nMOODY’S ISSUES ITS CREDIT RATINGS, ASSESSMENTS AND OTHER OPINIONS AND PUBLISHES ITS PUBLICATIONS WITH THE EXPECTATION AND UNDERSTANDING\\nTHAT EACH INVESTOR WILL, WITH DUE CARE, MAKE ITS OWN STUDY AND EVALUATION OF EACH SECURITY THAT IS UNDER CONSIDERATION FOR PURCHASE,\\nHOLDING, OR SALE.\\nMOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS, AND PUBLICATIONS ARE NOT INTENDED FOR USE BY RETAIL INVESTORS AND IT WOULD BE\\nRECKLESS AND INAPPROPRIATE FOR RETAIL INVESTORS TO USE MOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS OR PUBLICATIONS WHEN MAKING\\nAN INVESTMENT DECISION. IF IN DOUBT YOU SHOULD CONTACT YOUR FINANCIAL OR OTHER PROFESSIONAL ADVISER.\\nALL INFORMATION CONTAINED HEREIN IS PROTECTED BY LAW, INCLUDING BUT NOT LIMITED TO, COPYRIGHT LAW, AND NONE OF SUCH INFORMATION MAY BE COPIED\\nOR OTHERWISE REPRODUCED, REPACKAGED, FURTHER TRANSMITTED, TRANSFERRED, DISSEMINATED, REDISTRIBUTED OR RESOLD, OR STORED FOR SUBSEQUENT USE\\nFOR ANY SUCH PURPOSE, IN WHOLE OR IN PART, IN ANY FORM OR MANNER OR BY ANY MEANS WHATSOEVER, BY ANY PERSON WITHOUT MOODY’S PRIOR WRITTEN\\nCONSENT.\\nMOODY’S CREDIT RATINGS, ASSESSMENTS, OTHER OPINIONS AND PUBLICATIONS ARE NOT INTENDED FOR USE BY ANY PERSON AS A BENCHMARK AS THAT TERM IS\\nDEFINED FOR REGULATORY PURPOSES AND MUST NOT BE USED IN ANY WAY THAT COULD RESULT IN THEM BEING CONSIDERED A BENCHMARK.\\nAll information contained herein is obtained by MOODY’S from sources believed by it to be accurate and reliable. Because of the possibility of human or mechanical error as well\\nas other factors, however, all information contained herein is provided “AS IS” without warranty of any kind. MOODY'S adopts all necessary measures so that the information it\\nuses in assigning a credit rating is of sufficient quality and from sources MOODY'S considers to be reliable including, when appropriate, independent third-party sources. However,\\nMOODY’S is not an auditor and cannot in every instance independently verify or validate information received in the credit rating process or in preparing its Publications.\\nTo the extent permitted by law, MOODY’S and its directors, officers, employees, agents, representatives, licensors and suppliers disclaim liability to any person or entity for any\\nindirect, special, consequential, or incidental losses or damages whatsoever arising from or in connection with the information contained herein or the use of or inability to use any\\nsuch information, even if MOODY’S or any of its directors, officers, employees, agents, representatives, licensors or suppliers is advised in advance of the possibility of such losses or\\ndamages, including but not limited to: \u001b[0m\u001b[32m(\u001b[0m\u001b[32ma\u001b[0m\u001b[32m)\u001b[0m\u001b[32m any loss of present or prospective profits or \u001b[0m\u001b[32m(\u001b[0m\u001b[32mb\u001b[0m\u001b[32m)\u001b[0m\u001b[32m any loss or damage arising where the relevant financial instrument is not the subject of a\\nparticular credit rating assigned by MOODY’S.\\nTo the extent permitted by law, MOODY’S and its directors, officers, employees, agents, representatives, licensors and suppliers disclaim liability for any direct or compensatory\\nlosses or damages caused to any person or entity, including but not limited to by any negligence \u001b[0m\u001b[32m(\u001b[0m\u001b[32mbut excluding fraud, willful misconduct or any other type of liability that, for the\\navoidance of doubt, by law cannot be excluded\u001b[0m\u001b[32m)\u001b[0m\u001b[32m on the part of, or any contingency within or beyond the control of, MOODY’S or any of its directors, officers, employees, agents,\\nrepresentatives, licensors or suppliers, arising from or in connection with the information contained herein or the use of or inability to use any such information.\\nNO WARRANTY, EXPRESS OR IMPLIED, AS TO THE ACCURACY, TIMELINESS, COMPLETENESS, MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OF ANY CREDIT\\nRATING, ASSESSMENT, OTHER OPINION OR INFORMATION IS GIVEN OR MADE BY MOODY’S IN ANY FORM OR MANNER WHATSOEVER.\\nMoody’s Investors Service, Inc., a wholly-owned credit rating agency subsidiary of Moody’s Corporation \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“MCO”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, hereby discloses that most issuers of debt securities \u001b[0m\u001b[32m(\u001b[0m\u001b[32mincluding\\ncorporate and municipal bonds, debentures, notes and commercial paper\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and preferred stock rated by Moody’s Investors Service, Inc. have, prior to assignment of any credit rating,\\nagreed to pay to Moody’s Investors Service, Inc. for credit ratings opinions and services rendered by it fees ranging from $1,000 to approximately $5,000,000. MCO and Moody’s\\nInvestors Service also maintain policies and procedures to address the independence of Moody’s Investors Service credit ratings and credit rating processes. Information regarding\\ncertain affiliations that may exist between directors of MCO and rated entities, and between entities who hold credit ratings from Moody’s Investors Service, Inc. and have also\\npublicly reported to the SEC an ownership interest in MCO of more than 5%, is posted annually at www.moodys.com under the heading “Investor Relations — Corporate Governance\\n— Charter Documents - Director and Shareholder Affiliation Policy.”\\nAdditional terms for Australia only: Any publication into Australia of this document is pursuant to the Australian Financial Services License of MOODY’S affiliate, Moody’s Investors\\nService Pty Limited ABN 61 003 399 657AFSL 336969 and/or Moody’s Analytics Australia Pty Ltd ABN 94 105 136 972 AFSL 383569 \u001b[0m\u001b[32m(\u001b[0m\u001b[32mas applicable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. This document is intended\\nto be provided only to “wholesale clients” within the meaning of section 761G of the Corporations Act 2001. By continuing to access this document from within Australia, you\\nrepresent to MOODY’S that you are, or are accessing the document as a representative of, a “wholesale client” and that neither you nor the entity you represent will directly or\\nindirectly disseminate this document or its contents to “retail clients” within the meaning of section 761G of the Corporations Act 2001. MOODY’S credit rating is an opinion as to\\nthe creditworthiness of a debt obligation of the issuer, not on the equity securities of the issuer or any form of security that is available to retail investors.\\nAdditional terms for Japan only: Moody's Japan K.K. \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“MJKK”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a wholly-owned credit rating agency subsidiary of Moody's Group Japan G.K., which is wholly-owned by Moody’s\\nOverseas Holdings Inc., a wholly-owned subsidiary of MCO. Moody’s SF Japan K.K. \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“MSFJ”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m is a wholly-owned credit rating agency subsidiary of MJKK. MSFJ is not a Nationally\\nRecognized Statistical Rating Organization \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“NRSRO”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m. Therefore, credit ratings assigned by MSFJ are Non-NRSRO Credit Ratings. Non-NRSRO Credit Ratings are assigned by an\\nentity that is not a NRSRO and, consequently, the rated obligation will not qualify for certain types of treatment under U.S. laws. MJKK and MSFJ are credit rating agencies registered\\nwith the Japan Financial Services Agency and their registration numbers are FSA Commissioner \u001b[0m\u001b[32m(\u001b[0m\u001b[32mRatings\u001b[0m\u001b[32m)\u001b[0m\u001b[32m No. 2 and 3 respectively.\\nMJKK or MSFJ \u001b[0m\u001b[32m(\u001b[0m\u001b[32mas applicable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m hereby disclose that most issuers of debt securities \u001b[0m\u001b[32m(\u001b[0m\u001b[32mincluding corporate and municipal bonds, debentures, notes and commercial paper\u001b[0m\u001b[32m)\u001b[0m\u001b[32m and preferred\\nstock rated by MJKK or MSFJ \u001b[0m\u001b[32m(\u001b[0m\u001b[32mas applicable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m have, prior to assignment of any credit rating, agreed to pay to MJKK or MSFJ \u001b[0m\u001b[32m(\u001b[0m\u001b[32mas applicable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m for credit ratings opinions and services\\nrendered by it fees ranging from JPY100,000 to approximately JPY550,000,000.\\nMJKK and MSFJ also maintain policies and procedures to address Japanese regulatory requirements.\\nREPORT NUMBER 1366931\\n10 23 May 2023 Amazon.com, Inc.: Update to credit analysis\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'source'\u001b[0m: \u001b[32m'../../assignment_data/AMZN_Moodys_CreditRating_2023.pdf'\u001b[0m, \u001b[32m'page'\u001b[0m: \u001b[1;36m9\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "loaders = [PyPDFLoader(x) for x in [pdf3]]\n", + "pages = []\n", + "for loader in loaders:\n", + " pages.extend(loader.load())\n", + "pprint(pages)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"CORPORATES\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile Seattle, Washington,\\nUnited States\\nLong Term Rating A1\\nType Senior Unsecured -\\nDom Curr\\nOutlook Stable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni +1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers +1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor +1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.comAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc. 's (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices (“AWS”), the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0$20,000$40,000$60,000$80,000$100,000$120,000$140,000$160,000$180,000\\n$0$5,000$10,000$15,000$20,000$25,000$30,000\\n2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\\nMoody's Adjusted Debt (USD Millions)Moody's Adj. Operating Income (USD Millions)Moody's adjusted operating income Moody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\"" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pages[0].page_content" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PyPDF2" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
���������������������������������\n",
+       "
\n" + ], + "text/plain": [ + "���������������������������������\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "reader = PdfReader(pdf1)\n", + "page = reader.pages[0]\n", + "print(page.extract_text())" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Number of pages: 2\n",
+       "
\n" + ], + "text/plain": [ + "Number of pages: \u001b[1;36m2\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
���������������������������������\n",
+       "
\n" + ], + "text/plain": [ + "���������������������������������\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# https://pypdf2.readthedocs.io/en/3.x/user/extract-text.html\n", + "reader = PdfReader(pdf1)\n", + "number_of_pages = len(reader.pages)\n", + "print(f\"Number of pages: {number_of_pages}\")\n", + "page = reader.pages[0]\n", + "text = page.extract_text()\n", + "print(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "ename": "OSError", + "evalue": "cannot write mode PA as PNG", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/PIL/PngImagePlugin.py:1289\u001b[0m, in \u001b[0;36m_save\u001b[0;34m(im, fp, filename, chunk, save_all)\u001b[0m\n\u001b[1;32m 1288\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1289\u001b[0m rawmode, mode \u001b[38;5;241m=\u001b[39m \u001b[43m_OUTMODES\u001b[49m\u001b[43m[\u001b[49m\u001b[43mmode\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[0;31mKeyError\u001b[0m: 'PA'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[17], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m page \u001b[38;5;241m=\u001b[39m reader\u001b[38;5;241m.\u001b[39mpages[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 4\u001b[0m count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m image_file_object \u001b[38;5;129;01min\u001b[39;00m \u001b[43mpage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimages\u001b[49m:\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[38;5;28mstr\u001b[39m(count) \u001b[38;5;241m+\u001b[39m image_file_object\u001b[38;5;241m.\u001b[39mname, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m fp:\n\u001b[1;32m 8\u001b[0m fp\u001b[38;5;241m.\u001b[39mwrite(image_file_object\u001b[38;5;241m.\u001b[39mdata)\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/PyPDF2/_page.py:481\u001b[0m, in \u001b[0;36mPageObject.images\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 479\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m obj \u001b[38;5;129;01min\u001b[39;00m x_object:\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m x_object[obj][IA\u001b[38;5;241m.\u001b[39mSUBTYPE] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/Image\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 481\u001b[0m extension, byte_stream \u001b[38;5;241m=\u001b[39m \u001b[43m_xobj_to_image\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx_object\u001b[49m\u001b[43m[\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 482\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m extension \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 483\u001b[0m filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mobj[\u001b[38;5;241m1\u001b[39m:]\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mextension\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/PyPDF2/filters.py:617\u001b[0m, in \u001b[0;36m_xobj_to_image\u001b[0;34m(x_object_obj)\u001b[0m\n\u001b[1;32m 615\u001b[0m img\u001b[38;5;241m.\u001b[39mputalpha(alpha)\n\u001b[1;32m 616\u001b[0m img_byte_arr \u001b[38;5;241m=\u001b[39m BytesIO()\n\u001b[0;32m--> 617\u001b[0m \u001b[43mimg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimg_byte_arr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPNG\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 618\u001b[0m data \u001b[38;5;241m=\u001b[39m img_byte_arr\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[1;32m 619\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m x_object_obj[SA\u001b[38;5;241m.\u001b[39mFILTER] \u001b[38;5;129;01min\u001b[39;00m (\n\u001b[1;32m 620\u001b[0m [FT\u001b[38;5;241m.\u001b[39mLZW_DECODE],\n\u001b[1;32m 621\u001b[0m [FT\u001b[38;5;241m.\u001b[39mASCII_85_DECODE],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 625\u001b[0m \u001b[38;5;66;03m# There might not be any relationship between the filters and the\u001b[39;00m\n\u001b[1;32m 626\u001b[0m \u001b[38;5;66;03m# extension\u001b[39;00m\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/PIL/Image.py:2459\u001b[0m, in \u001b[0;36mImage.save\u001b[0;34m(self, fp, format, **params)\u001b[0m\n\u001b[1;32m 2456\u001b[0m fp \u001b[38;5;241m=\u001b[39m builtins\u001b[38;5;241m.\u001b[39mopen(filename, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw+b\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 2458\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 2459\u001b[0m \u001b[43msave_handler\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2460\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 2461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m open_fp:\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/PIL/PngImagePlugin.py:1292\u001b[0m, in \u001b[0;36m_save\u001b[0;34m(im, fp, filename, chunk, save_all)\u001b[0m\n\u001b[1;32m 1290\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1291\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot write mode \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmode\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m as PNG\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1292\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(msg) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 1294\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 1295\u001b[0m \u001b[38;5;66;03m# write minimal PNG file\u001b[39;00m\n\u001b[1;32m 1297\u001b[0m fp\u001b[38;5;241m.\u001b[39mwrite(_MAGIC)\n", + "\u001b[0;31mOSError\u001b[0m: cannot write mode PA as PNG" + ] + } + ], + "source": [ + "# extract the images\n", + "reader = PdfReader(pdf2)\n", + "page = reader.pages[0]\n", + "count = 0\n", + "\n", + "for image_file_object in page.images:\n", + " with open(str(count) + image_file_object.name, \"wb\") as fp:\n", + " fp.write(image_file_object.data)\n", + " count += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Total disaster with the library's own code. PyPDF2 is not just a parser, it can merge etc so maybe we need something more specialized at parsing." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LLAMA_PARSE" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started parsing the file under job_id a3a95967-3301-4140-9c89-e1f5d4f0677d\n", + "[Document(id_='3f10f65a-c636-4df9-83b1-1ffb0878bb70', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='A very simple pdf file\\n\\n\\nsecond line\\n---\\n\\n', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\\n\\n{content}', metadata_template='{key}: {value}', metadata_seperator='\\n')]\n" + ] + } + ], + "source": [ + "# llama-parse is async-first, running the async code in a notebook requires the use of nest_asyncio\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "#%%\n", + "# https://github.com/run-llama/llama_parse\n", + "parser = LlamaParse(\n", + " api_key = os.getenv(\"LLAMA_PARSE_API_KEY\"),\n", + " num_workers=4,\n", + " verbose=True,\n", + " language=\"en\",\n", + " result_type=\"text\" # \"markdown\" and \"text\" are available\n", + ")\n", + "#%%\n", + "documents = parser.load_data(pdf1)\n", + "# documents = parser.load_data([pdf1, pdf2]) # sync batch\n", + "\n", + "# documents = await parser.aload_data(pdf1) # async\n", + "# documents = await parser.aload_data([pdf1, pdf2]) # async batch\n", + "print(documents)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started parsing the file under job_id f34a3133-b632-4ddd-8035-eacbe2678796\n" + ] + }, + { + "data": { + "text/plain": [ + "[{'pages': [{'page': 1,\n", + " 'text': 'A very simple pdf file\\n\\n\\nsecond line',\n", + " 'md': 'A very simple pdf file\\n\\nsecond line',\n", + " 'images': [],\n", + " 'items': [{'type': 'text',\n", + " 'value': 'A very simple pdf file\\n\\nsecond line',\n", + " 'md': 'A very simple pdf file\\n\\nsecond line'}]},\n", + " {'page': 2,\n", + " 'text': 'text on 2nd page\\n\\n\\nurl',\n", + " 'md': 'text on 2nd page\\n\\n\\nurl',\n", + " 'images': [],\n", + " 'items': [{'type': 'text',\n", + " 'value': 'text on 2nd page\\n\\n\\nurl',\n", + " 'md': 'text on 2nd page\\n\\n\\nurl'}]}],\n", + " 'job_id': 'f34a3133-b632-4ddd-8035-eacbe2678796',\n", + " 'file_path': 'data/test.pdf'}]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# https://github.com/run-llama/llama_parse/blob/main/examples/demo_json.ipynb\n", + "docs = parser.get_json_result(pdf1)\n", + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'A very simple pdf file\\n\\n\\nsecond line'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0]['pages'][0]['text']" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'text on 2nd page\\n\\n\\nurl'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs[0]['pages'][1]['text']" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Started parsing the file under job_id 4e1922f5-566b-4bf7-8684-da7f4a977e37\n" + ] + }, + { + "data": { + "text/plain": [ + "\" CORPORATES\\n\\n\\n CREDIT OPINION Amazon.com, Inc.\\n 23 May 2023\\nMoody's Adj. Operating Income (USD Millions) Update to credit analysis\\n Update Summary\\n Amazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\n Send Your Feedback is synonymous with online retail, as well as the strength and profitability of Amazon Web\\n Services (“AWS”), the market leader in the cloud computing market. The company is reliant\\n on the operating income derived from AWS, as its non-AWS profitability has remained weak\\n since the end of 2021. Although the company is making progress with improving productivity\\n RATINGS and reducing costs, online operating margins remain well below historical levels. Amazon has\\n Amazon.com, Inc. taken actions to make its fulfillment operations more efficient as its business grows into its\\n Domicile Seattle, Washington,\\n United States capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\n Long Term Rating A1 entertainment content that enhances its offering, operates a formidable third-party seller\\n Type Senior Unsecured - business and generates a solid and growing revenue stream from advertising. Nonetheless,\\n Dom Curr\\n Outlook Stable its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\n profitability, coupled with increased levels of investment have led to higher debt levels\\n Please see the ratings section at the end of this report and lower cash balances. Capital allocation will be critical to improving its credit profile as\\n for more information. The ratings and outlook shown Amazon navigates a weaker economic backdrop that could dampen demand for its products\\n reflect information as of the publication date. and services as it pursues cost reductions and efficiencies to restore profitability at online\\n retail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\n Contacts competition from larger, well capitalized companies in AWS' universe also presents future\\n Christina Boni +1.212.553.0514 challenges.\\n Senior Vice President Exhibit 1\\n christina.boni@moodys.com Amazon's debt has continued to rise as operating income remains below 2019\\n\\n\\n Jack Myers +1.212.553.5116 $30,000 Moody's adjusted operating incomeMoody's adjusted debt $180,000\\n Associate Analyst $160,000\\n jack.myers@moodys.com $25,000 $140,000\\n\\n\\n Margaret Taylor +1.212.553.0424 $20,000 $120,000\\nMoody's Adjusted Debt (USD Millions) $100,000\\n Associate Managing Director $15,000 $80,000\\n margaret.taylor@moodys.com $10,000 $60,000\\n\\n\\n $5,000 $40,000\\n $20,000\\n\\n\\n $0 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM$0\\n Debt includes lease\\n Source: Moody’s Financial Metrics™\"" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs2 = parser.get_json_result(pdf2)\n", + "docs2[0]['pages'][0]['text']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
{\n",
+       "'pages': [\n",
+       "│   │   {\n",
+       "│   │   │   'page': 1,\n",
+       "│   │   │   'text': \"                                                                                                                                                                                          CORPORATES\\n\\n\\n                CREDIT OPINION                                            Amazon.com, Inc.\\n                23 May 2023\\nMoody's Adj. Operating Income (USD Millions)                              Update to credit analysis\\n                  Update                                                  Summary\\n                                                                          Amazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\n                           Send Your Feedback                             is synonymous with online retail, as well as the strength and profitability of Amazon Web\\n                                                                          Services (“AWS”), the market leader in the cloud computing market. The company is reliant\\n                                                                          on the operating income derived from AWS, as its non-AWS profitability has remained weak\\n                                                                          since the end of 2021. Although the company is making progress with improving productivity\\n                RATINGS                                                   and reducing costs, online operating margins remain well below historical levels. Amazon has\\n                 Amazon.com, Inc.                                         taken actions to make its fulfillment operations more efficient as its business grows into its\\n                 Domicile                  Seattle, Washington,\\n                                           United States                  capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\n                 Long Term Rating          A1                             entertainment content that enhances its offering, operates a formidable third-party seller\\n                 Type                      Senior Unsecured -             business and generates a solid and growing revenue stream from advertising. Nonetheless,\\n                                           Dom Curr\\n                 Outlook                   Stable                         its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\n                                                                          profitability, coupled with increased levels of investment have led to higher debt levels\\n                Please see the ratings section at the end of this report  and lower cash balances. Capital allocation will be critical to improving its credit profile as\\n                for more information. The ratings and outlook shown       Amazon navigates a weaker economic backdrop that could dampen demand for its products\\n                reflect information as of the publication date.           and services as it pursues cost reductions and efficiencies to restore profitability at online\\n                                                                          retail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\n                Contacts                                                  competition from larger, well capitalized companies in AWS' universe also presents future\\n                Christina Boni                   +1.212.553.0514          challenges.\\n                Senior Vice President                                     Exhibit 1\\n                christina.boni@moodys.com                                 Amazon's debt has continued to rise as operating income remains below 2019\\n\\n\\n                Jack Myers                        +1.212.553.5116               $30,000       Moody's adjusted operating incomeMoody's adjusted debt                                         $180,000\\n                Associate Analyst                                                                                                                                                            $160,000\\n                jack.myers@moodys.com                                           $25,000                                                                                                      $140,000\\n\\n\\n                Margaret Taylor                  +1.212.553.0424                $20,000                                                                                                      $120,000\\nMoody's Adjusted Debt (USD Millions)                                                                                                                                                         $100,000\\n                Associate Managing Director                                     $15,000                                                                                                      $80,000\\n                margaret.taylor@moodys.com                                      $10,000                                                                                                      $60,000\\n\\n\\n                                                                                 $5,000                                                                                                      $40,000\\n                                                                                                                                                                                             $20,000\\n\\n\\n                                                                                   $0    2013      2014      2015     2016      2017      2018     2019      2020      2021  2022  Q1 -23 LTM$0\\n                                                                          Debt includes lease\\n                                                                          Source: Moody’s Financial Metrics™\",\n",
+       "│   │   │   'md': \"# CREDIT OPINION\\n\\nAmazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income (USD Millions)\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services (“AWS”), the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\\n\\n# RATINGS\\n\\n|Amazon.com, Inc.| |\\n|---|---|\\n|Domicile|Seattle, Washington, United States|\\n|Long Term Rating|A1|\\n|Type|Senior Unsecured - Dom Curr|\\n|Outlook|Stable|\\n\\nPlease see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.\\n\\n# Contacts\\n\\nChristina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com\\n\\n# Exhibit 1\\n\\n| |Moody's adjusted operating income|Moody's adjusted debt|\\n|---|---|---|\\n|$30,000| |$180,000|\\n|$25,000| |$160,000|\\n|$20,000| |$140,000|\\n|$15,000| |$120,000|\\n|$10,000| |$100,000|\\n|$5,000| |$80,000|\\n|$0|2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM|$0|\\n\\nDebt includes lease\\n\\nSource: Moody’s Financial Metrics™\",\n",
+       "│   │   │   'images': [\n",
+       "│   │   │   │   {'name': 'img_p0_1.png', 'height': 193, 'width': 1117},\n",
+       "│   │   │   │   {'name': 'img_p0_2.png', 'height': 57, 'width': 281},\n",
+       "│   │   │   │   {'name': 'img_p0_3.png', 'height': 28, 'width': 1116}\n",
+       "│   │   │   ],\n",
+       "│   │   │   'items': [\n",
+       "│   │   │   │   {'type': 'heading', 'lvl': 1, 'value': 'CREDIT OPINION', 'md': '# CREDIT OPINION'},\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'text',\n",
+       "│   │   │   │   │   'value': \"Amazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income (USD Millions)\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services (“AWS”), the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\",\n",
+       "│   │   │   │   │   'md': \"Amazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income (USD Millions)\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services (“AWS”), the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\"\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   {'type': 'heading', 'lvl': 1, 'value': 'RATINGS', 'md': '# RATINGS'},\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'table',\n",
+       "│   │   │   │   │   'rows': [\n",
+       "│   │   │   │   │   │   ['Amazon.com, Inc.', ''],\n",
+       "│   │   │   │   │   │   ['Domicile', 'Seattle, Washington, United States'],\n",
+       "│   │   │   │   │   │   ['Long Term Rating', 'A1'],\n",
+       "│   │   │   │   │   │   ['Type', 'Senior Unsecured - Dom Curr'],\n",
+       "│   │   │   │   │   │   ['Outlook', 'Stable']\n",
+       "│   │   │   │   │   ],\n",
+       "│   │   │   │   │   'md': '|Amazon.com, Inc.| |\\n|---|---|\\n|Domicile|Seattle, Washington, United States|\\n|Long Term Rating|A1|\\n|Type|Senior Unsecured - Dom Curr|\\n|Outlook|Stable|',\n",
+       "│   │   │   │   │   'isPerfectTable': True,\n",
+       "│   │   │   │   │   'csv': '\"Amazon.com, Inc.\",\"\"\\n\"Domicile\",\"Seattle, Washington, United States\"\\n\"Long Term Rating\",\"A1\"\\n\"Type\",\"Senior Unsecured - Dom Curr\"\\n\"Outlook\",\"Stable\"'\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'text',\n",
+       "│   │   │   │   │   'value': 'Please see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.',\n",
+       "│   │   │   │   │   'md': 'Please see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.'\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   {'type': 'heading', 'lvl': 1, 'value': 'Contacts', 'md': '# Contacts'},\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'text',\n",
+       "│   │   │   │   │   'value': 'Christina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com',\n",
+       "│   │   │   │   │   'md': 'Christina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com'\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   {'type': 'heading', 'lvl': 1, 'value': 'Exhibit 1', 'md': '# Exhibit 1'},\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'table',\n",
+       "│   │   │   │   │   'rows': [\n",
+       "│   │   │   │   │   │   ['', \"Moody's adjusted operating income\", \"Moody's adjusted debt\"],\n",
+       "│   │   │   │   │   │   ['$30,000', '', '$180,000'],\n",
+       "│   │   │   │   │   │   ['$25,000', '', '$160,000'],\n",
+       "│   │   │   │   │   │   ['$20,000', '', '$140,000'],\n",
+       "│   │   │   │   │   │   ['$15,000', '', '$120,000'],\n",
+       "│   │   │   │   │   │   ['$10,000', '', '$100,000'],\n",
+       "│   │   │   │   │   │   ['$5,000', '', '$80,000'],\n",
+       "│   │   │   │   │   │   ['$0', '2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM', '$0']\n",
+       "│   │   │   │   │   ],\n",
+       "│   │   │   │   │   'md': \"| |Moody's adjusted operating income|Moody's adjusted debt|\\n|---|---|---|\\n|$30,000| |$180,000|\\n|$25,000| |$160,000|\\n|$20,000| |$140,000|\\n|$15,000| |$120,000|\\n|$10,000| |$100,000|\\n|$5,000| |$80,000|\\n|$0|2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM|$0|\",\n",
+       "│   │   │   │   │   'isPerfectTable': True,\n",
+       "│   │   │   │   │   'csv': '\"\",\"Moody\\'s adjusted operating income\",\"Moody\\'s adjusted debt\"\\n\"$30,000\",\"\",\"$180,000\"\\n\"$25,000\",\"\",\"$160,000\"\\n\"$20,000\",\"\",\"$140,000\"\\n\"$15,000\",\"\",\"$120,000\"\\n\"$10,000\",\"\",\"$100,000\"\\n\"$5,000\",\"\",\"$80,000\"\\n\"$0\",\"2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\",\"$0\"'\n",
+       "│   │   │   │   },\n",
+       "│   │   │   │   {\n",
+       "│   │   │   │   │   'type': 'text',\n",
+       "│   │   │   │   │   'value': 'Debt includes lease\\n\\nSource: Moody’s Financial Metrics™',\n",
+       "│   │   │   │   │   'md': 'Debt includes lease\\n\\nSource: Moody’s Financial Metrics™'\n",
+       "│   │   │   │   }\n",
+       "│   │   │   ]\n",
+       "│   │   }\n",
+       "],\n",
+       "'job_id': '4e1922f5-566b-4bf7-8684-da7f4a977e37',\n",
+       "'file_path': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf'\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[32m'pages'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'page'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'text'\u001b[0m: \u001b[32m\" CORPORATES\\n\\n\\n CREDIT OPINION Amazon.com, Inc.\\n 23 May 2023\\nMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m Update to credit analysis\\n Update Summary\\n Amazon.com, Inc.'s \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which\\n Send Your Feedback is synonymous with online retail, as well as the strength and profitability of Amazon Web\\n Services \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant\\n on the operating income derived from AWS, as its non-AWS profitability has remained weak\\n since the end of 2021. Although the company is making progress with improving productivity\\n RATINGS and reducing costs, online operating margins remain well below historical levels. Amazon has\\n Amazon.com, Inc. taken actions to make its fulfillment operations more efficient as its business grows into its\\n Domicile Seattle, Washington,\\n United States capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\n Long Term Rating A1 entertainment content that enhances its offering, operates a formidable third-party seller\\n Type Senior Unsecured - business and generates a solid and growing revenue stream from advertising. Nonetheless,\\n Dom Curr\\n Outlook Stable its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\n profitability, coupled with increased levels of investment have led to higher debt levels\\n Please see the ratings section at the end of this report and lower cash balances. Capital allocation will be critical to improving its credit profile as\\n for more information. The ratings and outlook shown Amazon navigates a weaker economic backdrop that could dampen demand for its products\\n reflect information as of the publication date. and services as it pursues cost reductions and efficiencies to restore profitability at online\\n retail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\n Contacts competition from larger, well capitalized companies in AWS' universe also presents future\\n Christina Boni +1.212.553.0514 challenges.\\n Senior Vice President Exhibit 1\\n christina.boni@moodys.com Amazon's debt has continued to rise as operating income remains below 2019\\n\\n\\n Jack Myers +1.212.553.5116 $30,000 Moody's adjusted operating incomeMoody's adjusted debt $180,000\\n Associate Analyst $160,000\\n jack.myers@moodys.com $25,000 $140,000\\n\\n\\n Margaret Taylor +1.212.553.0424 $20,000 $120,000\\nMoody's Adjusted Debt \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m $100,000\\n Associate Managing Director $15,000 $80,000\\n margaret.taylor@moodys.com $10,000 $60,000\\n\\n\\n $5,000 $40,000\\n $20,000\\n\\n\\n $0 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM$0\\n Debt includes lease\\n Source: Moody’s Financial Metrics™\"\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m\"# CREDIT OPINION\\n\\nAmazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\\n\\n# RATINGS\\n\\n|Amazon.com, Inc.| |\\n|---|---|\\n|Domicile|Seattle, Washington, United States|\\n|Long Term Rating|A1|\\n|Type|Senior Unsecured - Dom Curr|\\n|Outlook|Stable|\\n\\nPlease see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.\\n\\n# Contacts\\n\\nChristina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com\\n\\n# Exhibit 1\\n\\n| |Moody's adjusted operating income|Moody's adjusted debt|\\n|---|---|---|\\n|$30,000| |$180,000|\\n|$25,000| |$160,000|\\n|$20,000| |$140,000|\\n|$15,000| |$120,000|\\n|$10,000| |$100,000|\\n|$5,000| |$80,000|\\n|$0|2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM|$0|\\n\\nDebt includes lease\\n\\nSource: Moody’s Financial Metrics™\"\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'images'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'img_p0_1.png'\u001b[0m, \u001b[32m'height'\u001b[0m: \u001b[1;36m193\u001b[0m, \u001b[32m'width'\u001b[0m: \u001b[1;36m1117\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'img_p0_2.png'\u001b[0m, \u001b[32m'height'\u001b[0m: \u001b[1;36m57\u001b[0m, \u001b[32m'width'\u001b[0m: \u001b[1;36m281\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'name'\u001b[0m: \u001b[32m'img_p0_3.png'\u001b[0m, \u001b[32m'height'\u001b[0m: \u001b[1;36m28\u001b[0m, \u001b[32m'width'\u001b[0m: \u001b[1;36m1116\u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'items'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'heading'\u001b[0m, \u001b[32m'lvl'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[32m'CREDIT OPINION'\u001b[0m, \u001b[32m'md'\u001b[0m: \u001b[32m'# CREDIT OPINION'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'text'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'value'\u001b[0m: \u001b[32m\"Amazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\"\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m\"Amazon.com, Inc.\\n\\n23 May 2023\\n\\nMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\n\\nUpdate to credit analysis\\n\\nSummary\\n\\nAmazon.com, Inc.'s \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which is synonymous with online retail, as well as the strength and profitability of Amazon Web Services \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant on the operating income derived from AWS, as its non-AWS profitability has remained weak since the end of 2021. Although the company is making progress with improving productivity and reducing costs, online operating margins remain well below historical levels. Amazon has taken actions to make its fulfillment operations more efficient as its business grows into its capacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of entertainment content that enhances its offering, operates a formidable third-party seller business and generates a solid and growing revenue stream from advertising. Nonetheless, its credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower profitability, coupled with increased levels of investment have led to higher debt levels and lower cash balances. Capital allocation will be critical to improving its credit profile as Amazon navigates a weaker economic backdrop that could dampen demand for its products and services as it pursues cost reductions and efficiencies to restore profitability at online retail. The growing online presence of brick-and-mortar retailers, as well as the increasing competition from larger, well capitalized companies in AWS' universe also presents future challenges.\"\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'heading'\u001b[0m, \u001b[32m'lvl'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[32m'RATINGS'\u001b[0m, \u001b[32m'md'\u001b[0m: \u001b[32m'# RATINGS'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'table'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'rows'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'Amazon.com, Inc.'\u001b[0m, \u001b[32m''\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'Domicile'\u001b[0m, \u001b[32m'Seattle, Washington, United States'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'Long Term Rating'\u001b[0m, \u001b[32m'A1'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'Type'\u001b[0m, \u001b[32m'Senior Unsecured - Dom Curr'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'Outlook'\u001b[0m, \u001b[32m'Stable'\u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m'|Amazon.com, Inc.| |\\n|---|---|\\n|Domicile|Seattle, Washington, United States|\\n|Long Term Rating|A1|\\n|Type|Senior Unsecured - Dom Curr|\\n|Outlook|Stable|'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'isPerfectTable'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'csv'\u001b[0m: \u001b[32m'\"Amazon.com, Inc.\",\"\"\\n\"Domicile\",\"Seattle, Washington, United States\"\\n\"Long Term Rating\",\"A1\"\\n\"Type\",\"Senior Unsecured - Dom Curr\"\\n\"Outlook\",\"Stable\"'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'text'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'value'\u001b[0m: \u001b[32m'Please see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m'Please see the ratings section at the end of this report for more information. The ratings and outlook shown reflect information as of the publication date.'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'heading'\u001b[0m, \u001b[32m'lvl'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[32m'Contacts'\u001b[0m, \u001b[32m'md'\u001b[0m: \u001b[32m'# Contacts'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'text'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'value'\u001b[0m: \u001b[32m'Christina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m'Christina Boni +1.212.553.0514\\n\\nSenior Vice President\\n\\nchristina.boni@moodys.com\\n\\nJack Myers +1.212.553.5116\\n\\nAssociate Analyst\\n\\njack.myers@moodys.com\\n\\nMargaret Taylor +1.212.553.0424\\n\\nAssociate Managing Director\\n\\nmargaret.taylor@moodys.com'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'heading'\u001b[0m, \u001b[32m'lvl'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'value'\u001b[0m: \u001b[32m'Exhibit 1'\u001b[0m, \u001b[32m'md'\u001b[0m: \u001b[32m'# Exhibit 1'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'table'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'rows'\u001b[0m: \u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m''\u001b[0m, \u001b[32m\"Moody's adjusted operating income\"\u001b[0m, \u001b[32m\"Moody's adjusted debt\"\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$30,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$180,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$25,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$160,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$20,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$140,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$15,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$120,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$10,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$100,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$5,000'\u001b[0m, \u001b[32m''\u001b[0m, \u001b[32m'$80,000'\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ │ \u001b[0m\u001b[1m[\u001b[0m\u001b[32m'$0'\u001b[0m, \u001b[32m'2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM'\u001b[0m, \u001b[32m'$0'\u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m\"| |Moody's adjusted operating income|Moody's adjusted debt|\\n|---|---|---|\\n|$30,000| |$180,000|\\n|$25,000| |$160,000|\\n|$20,000| |$140,000|\\n|$15,000| |$120,000|\\n|$10,000| |$100,000|\\n|$5,000| |$80,000|\\n|$0|2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM|$0|\"\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'isPerfectTable'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'csv'\u001b[0m: \u001b[32m'\"\",\"Moody\\'s adjusted operating income\",\"Moody\\'s adjusted debt\"\\n\"$30,000\",\"\",\"$180,000\"\\n\"$25,000\",\"\",\"$160,000\"\\n\"$20,000\",\"\",\"$140,000\"\\n\"$15,000\",\"\",\"$120,000\"\\n\"$10,000\",\"\",\"$100,000\"\\n\"$5,000\",\"\",\"$80,000\"\\n\"$0\",\"2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 Q1 -23 LTM\",\"$0\"'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'type'\u001b[0m: \u001b[32m'text'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'value'\u001b[0m: \u001b[32m'Debt includes lease\\n\\nSource: Moody’s Financial Metrics™'\u001b[0m,\n", + "\u001b[2;32m│ │ │ │ │ \u001b[0m\u001b[32m'md'\u001b[0m: \u001b[32m'Debt includes lease\\n\\nSource: Moody’s Financial Metrics™'\u001b[0m\n", + "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m]\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[32m'job_id'\u001b[0m: \u001b[32m'4e1922f5-566b-4bf7-8684-da7f4a977e37'\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[32m'file_path'\u001b[0m: \u001b[32m'data/AMZN_Moodys_CreditRating_2023_p1.pdf'\u001b[0m\n", + "\u001b[1m}\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pprint(docs2[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Quite good but it can't read the bar graph in Exhibit1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# one can use SimpleDirectoryReader to load files from a directory\n", + "from llama_index.core import SimpleDirectoryReader # pip install llama-index\n", + "\n", + "file_extractor = {\".pdf\": parser}\n", + "reader = SimpleDirectoryReader(\"./data\", file_extractor=file_extractor)\n", + "documents = reader.load_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PYMUPDF" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "Document(\n",
+       "│   │   id_='a66aa726-5a25-4a0b-90f6-74b4f3af1599',\n",
+       "│   │   embedding=None,\n",
+       "│   │   metadata={'total_pages': 2, 'file_path': 'data/test.pdf', 'source': '1'},\n",
+       "│   │   excluded_embed_metadata_keys=[],\n",
+       "│   │   excluded_llm_metadata_keys=[],\n",
+       "│   │   relationships={},\n",
+       "│   │   text='A very simple pdf file\\nsecond line\\n',\n",
+       "│   │   start_char_idx=None,\n",
+       "│   │   end_char_idx=None,\n",
+       "│   │   text_template='{metadata_str}\\n\\n{content}',\n",
+       "│   │   metadata_template='{key}: {value}',\n",
+       "│   │   metadata_seperator='\\n'\n",
+       "),\n",
+       "Document(\n",
+       "│   │   id_='44d08ca7-e6f2-4f9d-828e-4dfc4c5d1281',\n",
+       "│   │   embedding=None,\n",
+       "│   │   metadata={'total_pages': 2, 'file_path': 'data/test.pdf', 'source': '2'},\n",
+       "│   │   excluded_embed_metadata_keys=[],\n",
+       "│   │   excluded_llm_metadata_keys=[],\n",
+       "│   │   relationships={},\n",
+       "│   │   text='text on 2nd page\\nurl\\nText in annotation box\\n',\n",
+       "│   │   start_char_idx=None,\n",
+       "│   │   end_char_idx=None,\n",
+       "│   │   text_template='{metadata_str}\\n\\n{content}',\n",
+       "│   │   metadata_template='{key}: {value}',\n",
+       "│   │   metadata_seperator='\\n'\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mid_\u001b[0m=\u001b[32m'a66aa726-5a25-4a0b-90f6-74b4f3af1599'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33membedding\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'total_pages'\u001b[0m: \u001b[1;36m2\u001b[0m, \u001b[32m'file_path'\u001b[0m: \u001b[32m'data/test.pdf'\u001b[0m, \u001b[32m'source'\u001b[0m: \u001b[32m'1'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_embed_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_llm_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mrelationships\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext\u001b[0m=\u001b[32m'A very simple pdf file\\nsecond line\\n'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mstart_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mend_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mmetadata_str\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n\\n\u001b[0m\u001b[32m{\u001b[0m\u001b[32mcontent\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mkey\u001b[0m\u001b[32m}\u001b[0m\u001b[32m: \u001b[0m\u001b[32m{\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_seperator\u001b[0m=\u001b[32m'\\n'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mid_\u001b[0m=\u001b[32m'44d08ca7-e6f2-4f9d-828e-4dfc4c5d1281'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33membedding\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'total_pages'\u001b[0m: \u001b[1;36m2\u001b[0m, \u001b[32m'file_path'\u001b[0m: \u001b[32m'data/test.pdf'\u001b[0m, \u001b[32m'source'\u001b[0m: \u001b[32m'2'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_embed_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_llm_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mrelationships\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext\u001b[0m=\u001b[32m'text on 2nd page\\nurl\\nText in annotation box\\n'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mstart_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mend_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mmetadata_str\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n\\n\u001b[0m\u001b[32m{\u001b[0m\u001b[32mcontent\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mkey\u001b[0m\u001b[32m}\u001b[0m\u001b[32m: \u001b[0m\u001b[32m{\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_seperator\u001b[0m=\u001b[32m'\\n'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# https://pymupdf.readthedocs.io/en/latest/rag.html\n", + "from llama_index.readers.file import PyMuPDFReader\n", + "loader = PyMuPDFReader()\n", + "documents = loader.load(file_path=pdf1)\n", + "pprint(documents)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "Document(\n",
+       "│   │   id_='fe0315af-5cec-4c78-a0b7-479c5a44de7f',\n",
+       "│   │   embedding=None,\n",
+       "│   │   metadata={'total_pages': 1, 'file_path': 'data/AMZN_Moodys_CreditRating_2023_p1.pdf', 'source': '1'},\n",
+       "│   │   excluded_embed_metadata_keys=[],\n",
+       "│   │   excluded_llm_metadata_keys=[],\n",
+       "│   │   relationships={},\n",
+       "│   │   text=\"CORPORATES\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile\\nSeattle, Washington,\\nUnited States\\nLong Term Rating\\nA1\\nType\\nSenior Unsecured -\\nDom Curr\\nOutlook\\nStable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni\\n+1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers\\n+1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor\\n+1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.com\\nAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc.'s (A1/Prime-1 stable) credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices (“AWS”), the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0\\n$20,000\\n$40,000\\n$60,000\\n$80,000\\n$100,000\\n$120,000\\n$140,000\\n$160,000\\n$180,000\\n$0\\n$5,000\\n$10,000\\n$15,000\\n$20,000\\n$25,000\\n$30,000\\n2013\\n2014\\n2015\\n2016\\n2017\\n2018\\n2019\\n2020\\n2021\\n2022\\nQ1 -23 LTM\\nMoody's Adjusted Debt (USD Millions)\\nMoody's Adj. Operating Income (USD Millions)\\nMoody's adjusted operating income\\nMoody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\",\n",
+       "│   │   start_char_idx=None,\n",
+       "│   │   end_char_idx=None,\n",
+       "│   │   text_template='{metadata_str}\\n\\n{content}',\n",
+       "│   │   metadata_template='{key}: {value}',\n",
+       "│   │   metadata_seperator='\\n'\n",
+       ")\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mid_\u001b[0m=\u001b[32m'fe0315af-5cec-4c78-a0b7-479c5a44de7f'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33membedding\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'total_pages'\u001b[0m: \u001b[1;36m1\u001b[0m, \u001b[32m'file_path'\u001b[0m: \u001b[32m'data/AMZN_Moodys_CreditRating_2023_p1.pdf'\u001b[0m, \u001b[32m'source'\u001b[0m: \u001b[32m'1'\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_embed_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_llm_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mrelationships\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext\u001b[0m=\u001b[32m\"CORPORATES\u001b[0m\u001b[32m\\nCREDIT OPINION\\n23 May 2023\\nUpdate\\nRATINGS\\nAmazon.com, Inc.\\nDomicile\\nSeattle, Washington,\\nUnited States\\nLong Term Rating\\nA1\\nType\\nSenior Unsecured -\\nDom Curr\\nOutlook\\nStable\\nPlease see the ratings section at the end of this report\\nfor more information. The ratings and outlook shown\\nreflect information as of the publication date.\\nContacts\\nChristina Boni\\n+1.212.553.0514\\nSenior Vice President\\nchristina.boni@moodys.com\\nJack Myers\\n+1.212.553.5116\\nAssociate Analyst\\njack.myers@moodys.com\\nMargaret Taylor\\n+1.212.553.0424\\nAssociate Managing Director\\nmargaret.taylor@moodys.com\\nAmazon.com, Inc.\\nUpdate to credit analysis\\nSummary\\nAmazon.com, Inc.'s \u001b[0m\u001b[32m(\u001b[0m\u001b[32mA1/Prime-1 stable\u001b[0m\u001b[32m)\u001b[0m\u001b[32m credit profile reflects its powerful global brand, which\\nis synonymous with online retail, as well as the strength and profitability of Amazon Web\\nServices \u001b[0m\u001b[32m(\u001b[0m\u001b[32m“AWS”\u001b[0m\u001b[32m)\u001b[0m\u001b[32m, the market leader in the cloud computing market. The company is reliant\\non the operating income derived from AWS, as its non-AWS profitability has remained weak\\nsince the end of 2021. Although the company is making progress with improving productivity\\nand reducing costs, online operating margins remain well below historical levels. Amazon has\\ntaken actions to make its fulfillment operations more efficient as its business grows into its\\ncapacity, which doubled during the pandemic. Amazon has also built a solid ecosystem of\\nentertainment content that enhances its offering, operates a formidable third-party seller\\nbusiness and generates a solid and growing revenue stream from advertising. Nonetheless,\\nits credit metrics are currently weak for the A1 rating with RCF/Debt below 50%, as lower\\nprofitability, coupled with increased levels of investment have led to higher debt levels\\nand lower cash balances. Capital allocation will be critical to improving its credit profile as\\nAmazon navigates a weaker economic backdrop that could dampen demand for its products\\nand services as it pursues cost reductions and efficiencies to restore profitability at online\\nretail. The growing online presence of brick-and-mortar retailers, as well as the increasing\\ncompetition from larger, well capitalized companies in AWS' universe also presents future\\nchallenges.\\nExhibit 1\\nAmazon's debt has continued to rise as operating income remains below 2019\\n$0\\n$20,000\\n$40,000\\n$60,000\\n$80,000\\n$100,000\\n$120,000\\n$140,000\\n$160,000\\n$180,000\\n$0\\n$5,000\\n$10,000\\n$15,000\\n$20,000\\n$25,000\\n$30,000\\n2013\\n2014\\n2015\\n2016\\n2017\\n2018\\n2019\\n2020\\n2021\\n2022\\nQ1 -23 LTM\\nMoody's Adjusted Debt \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nMoody's Adj. Operating Income \u001b[0m\u001b[32m(\u001b[0m\u001b[32mUSD Millions\u001b[0m\u001b[32m)\u001b[0m\u001b[32m\\nMoody's adjusted operating income\\nMoody's adjusted debt\\nDebt includes lease\\nSource: Moody’s Financial Metrics™\\n\"\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mstart_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mend_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mmetadata_str\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n\\n\u001b[0m\u001b[32m{\u001b[0m\u001b[32mcontent\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mkey\u001b[0m\u001b[32m}\u001b[0m\u001b[32m: \u001b[0m\u001b[32m{\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_seperator\u001b[0m=\u001b[32m'\\n'\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "documents = loader.load(file_path=pdf2)\n", + "pprint(documents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Not bad but it reads the axis values of Exhibit1, and not the bar graph itself" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## PDFPLUMBER" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'PDFPlumberParser' object has no attribute 'load'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[50], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m parser \u001b[38;5;241m=\u001b[39m PDFPlumberParser()\n\u001b[1;32m 6\u001b[0m \u001b[38;5;66;03m# Load your PDF data\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mparser\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m(pdf1)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Now you can process the data\u001b[39;00m\n\u001b[1;32m 10\u001b[0m processed_data \u001b[38;5;241m=\u001b[39m parser\u001b[38;5;241m.\u001b[39mprocess(data)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'PDFPlumberParser' object has no attribute 'load'" + ] + } + ], + "source": [ + "from langchain.document_loaders.parsers.pdf import PDFPlumberParser\n", + "\n", + "\n", + "# look into the library, plenty of options and other libraries \n", + "\n", + "# Initialize the parser\n", + "parser = PDFPlumberParser()\n", + "\n", + "# Load your PDF data\n", + "data = parser.load(pdf1)\n", + "\n", + "# Now you can process the data\n", + "processed_data = parser.process(data)\n", + "pprint(processed_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "import pdfplumber" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
[\n",
+       "{\n",
+       "│   │   'x0': 27.0,\n",
+       "│   │   'y0': 645.856,\n",
+       "│   │   'x1': 585.0,\n",
+       "│   │   'y1': 645.856,\n",
+       "│   │   'width': 558.0,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(27.0, 146.144), (585.0, 146.144)],\n",
+       "│   │   'linewidth': 4.002,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (1, 0.6, 0, 0),\n",
+       "│   │   'non_stroking_color': None,\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': None,\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (27.0, 146.144)), ('l', (585.0, 146.144))],\n",
+       "│   │   'dash': ([], 0),\n",
+       "│   │   'top': 146.144,\n",
+       "│   │   'bottom': 146.144,\n",
+       "│   │   'doctop': 146.144\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 27.0,\n",
+       "│   │   'y0': 645.856,\n",
+       "│   │   'x1': 585.0,\n",
+       "│   │   'y1': 645.856,\n",
+       "│   │   'width': 558.0,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(27.0, 146.144), (585.0, 146.144)],\n",
+       "│   │   'linewidth': 4.002,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (1, 0.6, 0, 0),\n",
+       "│   │   'non_stroking_color': None,\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': None,\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (27.0, 146.144)), ('l', (585.0, 146.144))],\n",
+       "│   │   'dash': ([], 0),\n",
+       "│   │   'top': 146.144,\n",
+       "│   │   'bottom': 146.144,\n",
+       "│   │   'doctop': 146.144\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 45.0,\n",
+       "│   │   'y0': 343.268025,\n",
+       "│   │   'x1': 189.0,\n",
+       "│   │   'y1': 343.268025,\n",
+       "│   │   'width': 144.0,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(45.0, 448.731975), (189.0, 448.731975)],\n",
+       "│   │   'linewidth': 0.149994,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (0, 0, 0, 0.62),\n",
+       "│   │   'non_stroking_color': (0.6, 0.4, 0.4, 1),\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': None,\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (45.0, 448.731975)), ('l', (189.0, 448.731975))],\n",
+       "│   │   'dash': ([], 0),\n",
+       "│   │   'top': 448.731975,\n",
+       "│   │   'bottom': 448.731975,\n",
+       "│   │   'doctop': 448.731975\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 526.4859179252,\n",
+       "│   │   'y0': 188.08087635180004,\n",
+       "│   │   'x1': 526.4859179252,\n",
+       "│   │   'y1': 270.55985592300004,\n",
+       "│   │   'width': 0.0,\n",
+       "│   │   'height': 82.4789795712,\n",
+       "│   │   'pts': [(526.4859179252, 603.9191236482), (526.4859179252, 521.440144077)],\n",
+       "│   │   'linewidth': 0.2279,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (0.843, 0.855, 0.839),\n",
+       "│   │   'non_stroking_color': (0, 0.592, 0.459),\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': 'Artifact',\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (526.4859179252, 603.9191236482)), ('l', (526.4859179252, 521.440144077))],\n",
+       "│   │   'dash': None,\n",
+       "│   │   'top': 521.440144077,\n",
+       "│   │   'bottom': 603.9191236482,\n",
+       "│   │   'doctop': 521.440144077\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 241.102111912,\n",
+       "│   │   'y0': 188.08087635180004,\n",
+       "│   │   'x1': 241.102111912,\n",
+       "│   │   'y1': 270.55985592300004,\n",
+       "│   │   'width': 0.0,\n",
+       "│   │   'height': 82.4789795712,\n",
+       "│   │   'pts': [(241.102111912, 603.9191236482), (241.102111912, 521.440144077)],\n",
+       "│   │   'linewidth': 0.2279,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (0.843, 0.855, 0.839),\n",
+       "│   │   'non_stroking_color': (0, 0.592, 0.459),\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': 'Artifact',\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (241.102111912, 603.9191236482)), ('l', (241.102111912, 521.440144077))],\n",
+       "│   │   'dash': None,\n",
+       "│   │   'top': 521.440144077,\n",
+       "│   │   'bottom': 603.9191236482,\n",
+       "│   │   'doctop': 521.440144077\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 241.102111912,\n",
+       "│   │   'y0': 188.08087635180004,\n",
+       "│   │   'x1': 526.4859179252,\n",
+       "│   │   'y1': 188.08087635180004,\n",
+       "│   │   'width': 285.38380601320006,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(241.102111912, 603.9191236482), (526.4859179252, 603.9191236482)],\n",
+       "│   │   'linewidth': 0.2279,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (0.271, 0.282, 0.294),\n",
+       "│   │   'non_stroking_color': (0, 0.592, 0.459),\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': 'Artifact',\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (241.102111912, 603.9191236482)), ('l', (526.4859179252, 603.9191236482))],\n",
+       "│   │   'dash': None,\n",
+       "│   │   'top': 603.9191236482,\n",
+       "│   │   'bottom': 603.9191236482,\n",
+       "│   │   'doctop': 603.9191236482\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 335.94366227,\n",
+       "│   │   'y0': 275.4331016422001,\n",
+       "│   │   'x1': 348.2605638164,\n",
+       "│   │   'y1': 275.4331016422001,\n",
+       "│   │   'width': 12.316901546400004,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(335.94366227, 516.5668983577999), (348.2605638164, 516.5668983577999)],\n",
+       "│   │   'linewidth': 1.8228,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (0, 0.157, 0.627),\n",
+       "│   │   'non_stroking_color': (0, 0.592, 0.459),\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': 'Artifact',\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (335.94366227, 516.5668983577999)), ('l', (348.2605638164, 516.5668983577999))],\n",
+       "│   │   'dash': None,\n",
+       "│   │   'top': 516.5668983577999,\n",
+       "│   │   'bottom': 516.5668983577999,\n",
+       "│   │   'doctop': 516.5668983577999\n",
+       "},\n",
+       "{\n",
+       "│   │   'x0': 27.0,\n",
+       "│   │   'y0': 29.016999999999957,\n",
+       "│   │   'x1': 585.0,\n",
+       "│   │   'y1': 29.016999999999957,\n",
+       "│   │   'width': 558.0,\n",
+       "│   │   'height': 0.0,\n",
+       "│   │   'pts': [(27.0, 762.9830000000001), (585.0, 762.9830000000001)],\n",
+       "│   │   'linewidth': 4.002,\n",
+       "│   │   'stroke': True,\n",
+       "│   │   'fill': False,\n",
+       "│   │   'evenodd': False,\n",
+       "│   │   'stroking_color': (1, 0.6, 0, 0),\n",
+       "│   │   'non_stroking_color': None,\n",
+       "│   │   'mcid': None,\n",
+       "│   │   'tag': None,\n",
+       "│   │   'object_type': 'line',\n",
+       "│   │   'page_number': 1,\n",
+       "│   │   'stroking_pattern': None,\n",
+       "│   │   'non_stroking_pattern': None,\n",
+       "│   │   'path': [('m', (27.0, 762.9830000000001)), ('l', (585.0, 762.9830000000001))],\n",
+       "│   │   'dash': ([], 0),\n",
+       "│   │   'top': 762.9830000000001,\n",
+       "│   │   'bottom': 762.9830000000001,\n",
+       "│   │   'doctop': 762.9830000000001\n",
+       "}\n",
+       "]\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m[\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m27.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m645.856\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m585.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m645.856\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m558.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m4.002\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m1\u001b[0m, \u001b[1;36m0.6\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m146.144\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m146.144\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m146.144\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m27.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m645.856\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m585.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m645.856\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m558.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m4.002\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m1\u001b[0m, \u001b[1;36m0.6\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m146.144\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m146.144\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m146.144\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m146.144\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m45.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m343.268025\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m189.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m343.268025\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m144.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m45.0\u001b[0m, \u001b[1;36m448.731975\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m189.0\u001b[0m, \u001b[1;36m448.731975\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m0.149994\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m0.62\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0.6\u001b[0m, \u001b[1;36m0.4\u001b[0m, \u001b[1;36m0.4\u001b[0m, \u001b[1;36m1\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m45.0\u001b[0m, \u001b[1;36m448.731975\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m189.0\u001b[0m, \u001b[1;36m448.731975\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m448.731975\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m448.731975\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m448.731975\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m526.4859179252\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m188.08087635180004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m526.4859179252\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m270.55985592300004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m82.4789795712\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m521.440144077\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m0.2279\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0.843\u001b[0m, \u001b[1;36m0.855\u001b[0m, \u001b[1;36m0.839\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0.592\u001b[0m, \u001b[1;36m0.459\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[32m'Artifact'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m521.440144077\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m521.440144077\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m603.9191236482\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m521.440144077\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m241.102111912\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m188.08087635180004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m241.102111912\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m270.55985592300004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m82.4789795712\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m521.440144077\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m0.2279\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0.843\u001b[0m, \u001b[1;36m0.855\u001b[0m, \u001b[1;36m0.839\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0.592\u001b[0m, \u001b[1;36m0.459\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[32m'Artifact'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m521.440144077\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m521.440144077\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m603.9191236482\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m521.440144077\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m241.102111912\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m188.08087635180004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m526.4859179252\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m188.08087635180004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m285.38380601320006\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m0.2279\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0.271\u001b[0m, \u001b[1;36m0.282\u001b[0m, \u001b[1;36m0.294\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0.592\u001b[0m, \u001b[1;36m0.459\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[32m'Artifact'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m241.102111912\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m526.4859179252\u001b[0m, \u001b[1;36m603.9191236482\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m603.9191236482\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m603.9191236482\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m603.9191236482\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m335.94366227\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m275.4331016422001\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m348.2605638164\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m275.4331016422001\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m12.316901546400004\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m335.94366227\u001b[0m, \u001b[1;36m516.5668983577999\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m348.2605638164\u001b[0m, \u001b[1;36m516.5668983577999\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m1.8228\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0.157\u001b[0m, \u001b[1;36m0.627\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m0\u001b[0m, \u001b[1;36m0.592\u001b[0m, \u001b[1;36m0.459\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[32m'Artifact'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m335.94366227\u001b[0m, \u001b[1;36m516.5668983577999\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m348.2605638164\u001b[0m, \u001b[1;36m516.5668983577999\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m516.5668983577999\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m516.5668983577999\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m516.5668983577999\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m,\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m{\u001b[0m\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x0'\u001b[0m: \u001b[1;36m27.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y0'\u001b[0m: \u001b[1;36m29.016999999999957\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'x1'\u001b[0m: \u001b[1;36m585.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'y1'\u001b[0m: \u001b[1;36m29.016999999999957\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'width'\u001b[0m: \u001b[1;36m558.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'height'\u001b[0m: \u001b[1;36m0.0\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'pts'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m762.9830000000001\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m762.9830000000001\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'linewidth'\u001b[0m: \u001b[1;36m4.002\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroke'\u001b[0m: \u001b[3;92mTrue\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'fill'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'evenodd'\u001b[0m: \u001b[3;91mFalse\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_color'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1;36m1\u001b[0m, \u001b[1;36m0.6\u001b[0m, \u001b[1;36m0\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_color'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'mcid'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'tag'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'object_type'\u001b[0m: \u001b[32m'line'\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'page_number'\u001b[0m: \u001b[1;36m1\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'non_stroking_pattern'\u001b[0m: \u001b[3;35mNone\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'path'\u001b[0m: \u001b[1m[\u001b[0m\u001b[1m(\u001b[0m\u001b[32m'm'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m27.0\u001b[0m, \u001b[1;36m762.9830000000001\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m, \u001b[1m(\u001b[0m\u001b[32m'l'\u001b[0m, \u001b[1m(\u001b[0m\u001b[1;36m585.0\u001b[0m, \u001b[1;36m762.9830000000001\u001b[0m\u001b[1m)\u001b[0m\u001b[1m)\u001b[0m\u001b[1m]\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'dash'\u001b[0m: \u001b[1m(\u001b[0m\u001b[1m[\u001b[0m\u001b[1m]\u001b[0m, \u001b[1;36m0\u001b[0m\u001b[1m)\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'top'\u001b[0m: \u001b[1;36m762.9830000000001\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'bottom'\u001b[0m: \u001b[1;36m762.9830000000001\u001b[0m,\n", + "\u001b[2;32m│ │ \u001b[0m\u001b[32m'doctop'\u001b[0m: \u001b[1;36m762.9830000000001\u001b[0m\n", + "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", + "\u001b[1m]\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with pdfplumber.open(pdf2) as pdf:\n", + " first_page = pdf.pages[0]\n", + " pprint(first_page.lines)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## LLAVA\n", + "Partition PDF tables, text, and images\n", + "LLaVA Paper: https://arxiv.org/pdf/2304.08485.pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at microsoft/table-transformer-structure-recognition were not used when initializing TableTransformerForObjectDetection: ['model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']\n", + "- This IS expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing TableTransformerForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" + ] + }, + { + "ename": "NameError", + "evalue": "name 'sort_page_elements' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[35], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m path \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdata\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Get elements\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m raw_pdf_elements \u001b[38;5;241m=\u001b[39m \u001b[43mpartition_pdf\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpdf1\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Using pdf format to find embedded image blocks\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_images_in_pdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\u001b[39;49;00m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Titles are any sub-section of the document\u001b[39;49;00m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_table_structure\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Post processing to aggregate text once we have the title\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m \u001b[49m\u001b[43mchunking_strategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mby_title\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Chunking params to aggregate text blocks\u001b[39;49;00m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Attempt to create a new chunk 3800 chars\u001b[39;49;00m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Attempt to keep chunks > 2000 chars\u001b[39;49;00m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Hard max on chunks\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_characters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m4000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mnew_after_n_chars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3800\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 23\u001b[0m \u001b[43m \u001b[49m\u001b[43mcombine_text_under_n_chars\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43mimage_output_dir_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 26\u001b[0m pprint(raw_pdf_elements)\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/documents/elements.py:570\u001b[0m, in \u001b[0;36mprocess_metadata..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 568\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 569\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs: _P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: _P\u001b[38;5;241m.\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mlist\u001b[39m[Element]:\n\u001b[0;32m--> 570\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 571\u001b[0m sig \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(func)\n\u001b[1;32m 572\u001b[0m params: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/file_utils/filetype.py:622\u001b[0m, in \u001b[0;36madd_filetype..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 620\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs: _P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: _P\u001b[38;5;241m.\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Element]:\n\u001b[0;32m--> 622\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 623\u001b[0m sig \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(func)\n\u001b[1;32m 624\u001b[0m params: Dict[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/file_utils/filetype.py:582\u001b[0m, in \u001b[0;36madd_metadata..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 580\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 581\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapper\u001b[39m(\u001b[38;5;241m*\u001b[39margs: _P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: _P\u001b[38;5;241m.\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m List[Element]:\n\u001b[0;32m--> 582\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 583\u001b[0m sig \u001b[38;5;241m=\u001b[39m inspect\u001b[38;5;241m.\u001b[39msignature(func)\n\u001b[1;32m 584\u001b[0m params: Dict[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mdict\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mdict\u001b[39m(\u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/chunking/dispatch.py:83\u001b[0m, in \u001b[0;36madd_chunking_strategy..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m call_args\n\u001b[1;32m 82\u001b[0m \u001b[38;5;66;03m# -- call the partitioning function to get the elements --\u001b[39;00m\n\u001b[0;32m---> 83\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;66;03m# -- look for a chunking-strategy argument --\u001b[39;00m\n\u001b[1;32m 86\u001b[0m call_args \u001b[38;5;241m=\u001b[39m get_call_args_applying_defaults()\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/partition/pdf.py:201\u001b[0m, in \u001b[0;36mpartition_pdf\u001b[0;34m(filename, file, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, include_metadata, metadata_filename, metadata_last_modified, chunking_strategy, links, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, starting_page_number, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m exactly_one(filename\u001b[38;5;241m=\u001b[39mfilename, file\u001b[38;5;241m=\u001b[39mfile)\n\u001b[1;32m 199\u001b[0m languages \u001b[38;5;241m=\u001b[39m check_language_args(languages \u001b[38;5;129;01mor\u001b[39;00m [], ocr_languages) \u001b[38;5;129;01mor\u001b[39;00m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meng\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpartition_pdf_or_image\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 203\u001b[0m \u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfile\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 204\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_page_breaks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_page_breaks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 205\u001b[0m \u001b[43m \u001b[49m\u001b[43mstrategy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrategy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 206\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_table_structure\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minfer_table_structure\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 207\u001b[0m \u001b[43m \u001b[49m\u001b[43mlanguages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlanguages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata_last_modified\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata_last_modified\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[43m \u001b[49m\u001b[43mhi_res_model_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhi_res_model_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 210\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_images_in_pdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_images_in_pdf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 212\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_output_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_output_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 213\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_to_payload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_to_payload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 214\u001b[0m \u001b[43m \u001b[49m\u001b[43mdate_from_file_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdate_from_file_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 215\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarting_page_number\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstarting_page_number\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 216\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 217\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/partition/pdf.py:292\u001b[0m, in \u001b[0;36mpartition_pdf_or_image\u001b[0;34m(filename, file, is_image, include_page_breaks, strategy, infer_table_structure, ocr_languages, languages, metadata_last_modified, hi_res_model_name, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, date_from_file_object, starting_page_number, **kwargs)\u001b[0m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m warnings\u001b[38;5;241m.\u001b[39mcatch_warnings():\n\u001b[1;32m 291\u001b[0m warnings\u001b[38;5;241m.\u001b[39msimplefilter(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 292\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43m_partition_pdf_or_image_local\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43mfile\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mspooled_to_bytes_io_if_needed\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_image\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_image\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_table_structure\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minfer_table_structure\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 297\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_page_breaks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_page_breaks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mlanguages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlanguages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata_last_modified\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata_last_modified\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mlast_modification_date\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[43m \u001b[49m\u001b[43mhi_res_model_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhi_res_model_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 301\u001b[0m \u001b[43m \u001b[49m\u001b[43mpdf_text_extractable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpdf_text_extractable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 302\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_images_in_pdf\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_images_in_pdf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 303\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 304\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_output_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_output_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 305\u001b[0m \u001b[43m \u001b[49m\u001b[43mextract_image_block_to_payload\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextract_image_block_to_payload\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 306\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarting_page_number\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstarting_page_number\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 308\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 309\u001b[0m out_elements \u001b[38;5;241m=\u001b[39m _process_uncategorized_text_elements(elements)\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m strategy \u001b[38;5;241m==\u001b[39m PartitionStrategy\u001b[38;5;241m.\u001b[39mFAST:\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/utils.py:230\u001b[0m, in \u001b[0;36mrequires_dependencies..decorator..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 221\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_deps) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 223\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFollowing dependencies are missing: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(missing_deps)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;241m+\u001b[39m (\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 228\u001b[0m ),\n\u001b[1;32m 229\u001b[0m )\n\u001b[0;32m--> 230\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/partition/pdf.py:518\u001b[0m, in \u001b[0;36m_partition_pdf_or_image_local\u001b[0;34m(filename, file, is_image, infer_table_structure, include_page_breaks, languages, ocr_mode, model_name, hi_res_model_name, pdf_image_dpi, metadata_last_modified, pdf_text_extractable, extract_images_in_pdf, extract_image_block_types, extract_image_block_output_dir, extract_image_block_to_payload, analysis, analyzed_image_output_dir_path, starting_page_number, **kwargs)\u001b[0m\n\u001b[1;32m 515\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m el \u001b[38;5;129;01min\u001b[39;00m page\u001b[38;5;241m.\u001b[39melements:\n\u001b[1;32m 516\u001b[0m el\u001b[38;5;241m.\u001b[39mtext \u001b[38;5;241m=\u001b[39m el\u001b[38;5;241m.\u001b[39mtext \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m--> 518\u001b[0m elements \u001b[38;5;241m=\u001b[39m \u001b[43mdocument_to_element_list\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 519\u001b[0m \u001b[43m \u001b[49m\u001b[43mfinal_document_layout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[43m \u001b[49m\u001b[43msortable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 521\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_page_breaks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_page_breaks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 522\u001b[0m \u001b[43m \u001b[49m\u001b[43mlast_modification_date\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata_last_modified\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 523\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# NOTE(crag): do not attempt to derive ListItem's from a layout-recognized \"list\"\u001b[39;49;00m\n\u001b[1;32m 524\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# block with NLP rules. Otherwise, the assumptions in\u001b[39;49;00m\n\u001b[1;32m 525\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# unstructured.partition.common::layout_list_to_list_items often result in weird chunking.\u001b[39;49;00m\n\u001b[1;32m 526\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_list_items\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 527\u001b[0m \u001b[43m \u001b[49m\u001b[43mlanguages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlanguages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[43mstarting_page_number\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstarting_page_number\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 529\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 532\u001b[0m extract_image_block_types \u001b[38;5;241m=\u001b[39m check_element_types_to_extract(extract_image_block_types)\n\u001b[1;32m 533\u001b[0m \u001b[38;5;66;03m# NOTE(christine): `extract_images_in_pdf` would deprecate\u001b[39;00m\n\u001b[1;32m 534\u001b[0m \u001b[38;5;66;03m# (but continue to support for a while)\u001b[39;00m\n", + "File \u001b[0;32m/Volumes/DATA/Dropbox/IMAC_BACKUP/WORK/PROJECTS/LIQUIDITY/venv/lib/python3.10/site-packages/unstructured/partition/common.py:630\u001b[0m, in \u001b[0;36mdocument_to_element_list\u001b[0;34m(document, sortable, include_page_breaks, last_modification_date, infer_list_items, source_format, detection_origin, sort_mode, languages, starting_page_number, **kwargs)\u001b[0m\n\u001b[1;32m 628\u001b[0m sorted_page_elements \u001b[38;5;241m=\u001b[39m page_elements\n\u001b[1;32m 629\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sortable \u001b[38;5;129;01mand\u001b[39;00m sort_mode \u001b[38;5;241m!=\u001b[39m SORT_MODE_DONT:\n\u001b[0;32m--> 630\u001b[0m sorted_page_elements \u001b[38;5;241m=\u001b[39m \u001b[43msort_page_elements\u001b[49m(page_elements, sort_mode)\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m include_page_breaks \u001b[38;5;129;01mand\u001b[39;00m page_number \u001b[38;5;241m<\u001b[39m num_pages \u001b[38;5;241m+\u001b[39m starting_page_number:\n\u001b[1;32m 633\u001b[0m sorted_page_elements\u001b[38;5;241m.\u001b[39mappend(PageBreak(text\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n", + "\u001b[0;31mNameError\u001b[0m: name 'sort_page_elements' is not defined" + ] + } + ], + "source": [ + "# pip install \"pip install unstructured[all-docs]\"\n", + "from unstructured.partition.pdf import partition_pdf\n", + "\n", + "# Path to save images\n", + "path = \"data\"\n", + "\n", + "# Get elements\n", + "raw_pdf_elements = partition_pdf(\n", + " filename=pdf1,\n", + " # Using pdf format to find embedded image blocks\n", + " extract_images_in_pdf=True,\n", + " # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles\n", + " # Titles are any sub-section of the document\n", + " infer_table_structure=True,\n", + " # Post processing to aggregate text once we have the title\n", + " chunking_strategy=\"by_title\",\n", + " # Chunking params to aggregate text blocks\n", + " # Attempt to create a new chunk 3800 chars\n", + " # Attempt to keep chunks > 2000 chars\n", + " # Hard max on chunks\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=path,\n", + ")\n", + "pprint(raw_pdf_elements)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Impossible to make it work, it's error 'something is missing', I brew/pip install it, then a new error, 10 times over. I pip installed unstructured[all-docs] as requested in the article, and yet there's sthg missing ... I give up on this." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TABULA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For tables... https://github.com/tabulapdf/tabula" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CHARTS\n", + "\n", + "https://www.researchgate.net/publication/372616217_Automatic_Chart_Understanding_a_Review\n", + "\n", + "https://ieeexplore.ieee.org/document/9599112" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}