1:HL["/_next/static/media/6905431624c34d00-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}] 2:HL["/_next/static/css/9e925a33b1acdac1.css","style",{"crossOrigin":""}] 0:["rmcKjFZ3e9kKdH1iJwCIQ",[[["",{"children":["blog",{"children":[["slug","2023/06/01/real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit","c"],{"children":["__PAGE__?{\"slug\":[\"2023\",\"06\",\"01\",\"real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit\"]}",{}]}]}]},"$undefined","$undefined",true],"$L3",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/9e925a33b1acdac1.css","precedence":"next","crossOrigin":""}]],"$L4"]]]] 5:HL["/_next/static/css/c130d1629644f070.css","style",{"crossOrigin":""}] 6:I[7821,["326","static/chunks/326-3a90a6443b9c824c.js","980","static/chunks/980-6e243f9cd384c7d2.js","702","static/chunks/702-a2bf9fe707814b79.js","185","static/chunks/app/layout-776a485845c720ef.js"],"ThemeProviders"] 7:I[3994,["326","static/chunks/326-3a90a6443b9c824c.js","980","static/chunks/980-6e243f9cd384c7d2.js","702","static/chunks/702-a2bf9fe707814b79.js","185","static/chunks/app/layout-776a485845c720ef.js"],""] 8:I[9640,["326","static/chunks/326-3a90a6443b9c824c.js","980","static/chunks/980-6e243f9cd384c7d2.js","702","static/chunks/702-a2bf9fe707814b79.js","185","static/chunks/app/layout-776a485845c720ef.js"],"AlgoliaSearchProvider"] 9:I[7975,["326","static/chunks/326-3a90a6443b9c824c.js","980","static/chunks/980-6e243f9cd384c7d2.js","702","static/chunks/702-a2bf9fe707814b79.js","185","static/chunks/app/layout-776a485845c720ef.js"],""] a:I[6954,[],""] b:I[7264,[],""] c:I[8326,["326","static/chunks/326-3a90a6443b9c824c.js","413","static/chunks/413-f9f40b83f7bb3f22.js","980","static/chunks/980-6e243f9cd384c7d2.js","797","static/chunks/app/blog/%5B...slug%5D/page-502e08b6677b55da.js"],""] f:T9fe,M42.99 18.448c1.032-.553 2.21-.831 3.535-.831 1.542 0 2.938.38 4.187 1.14 1.248.76 2.236 1.841 2.965 3.241.728 1.402 1.091 3.025 1.091 4.872s-.363 3.482-1.091 4.903c-.729 1.424-1.717 2.525-2.965 3.307-1.25.782-2.645 1.173-4.187 1.173-1.325 0-2.493-.271-3.503-.815-1.01-.543-1.83-1.226-2.46-2.053v14.612H36V17.912h4.562v2.606c.586-.825 1.395-1.515 2.426-2.068l.002-.002m6.452 5.605c-.445-.793-1.032-1.395-1.76-1.808a4.72 4.72 0 0 0-2.362-.618c-.847 0-1.602.211-2.33.635-.728.423-1.315 1.038-1.76 1.841-.445.804-.668 1.749-.668 2.835 0 1.087.221 2.032.668 2.835.445.804 1.032 1.417 1.76 1.842a4.557 4.557 0 0 0 2.33.635 4.57 4.57 0 0 0 2.362-.652c.728-.435 1.313-1.053 1.76-1.856.445-.804.668-1.76.668-2.867s-.223-2.025-.668-2.818v-.004M62.947 17.912v18.051h-4.562V17.912h4.562m.551-6.079a2.833 2.833 0 1 1-5.666 0 2.833 2.833 0 0 1 5.666 0M82.954 19.687c1.325 1.358 1.988 3.253 1.988 5.685v10.59H80.38v-9.97c0-1.434-.358-2.537-1.075-3.307-.717-.772-1.695-1.157-2.933-1.157-1.239 0-2.254.387-2.982 1.157-.728.772-1.091 1.873-1.091 3.307v9.97h-4.562V17.91h4.562v2.248a6.322 6.322 0 0 1 2.33-1.841c.944-.445 1.981-.669 3.111-.669 2.15 0 3.889.68 5.214 2.037v.002M92.892 35.098c-1.39-.77-2.482-1.861-3.275-3.275-.794-1.411-1.19-3.041-1.19-4.888s.406-3.475 1.221-4.888a8.502 8.502 0 0 1 3.34-3.275c1.412-.772 2.987-1.157 4.725-1.157 1.739 0 3.312.387 4.725 1.157a8.5 8.5 0 0 1 3.34 3.275c.815 1.411 1.222 3.041 1.222 4.888s-.418 3.475-1.255 4.888a8.708 8.708 0 0 1-3.388 3.275c-1.424.772-3.014 1.157-4.774 1.157-1.76 0-3.301-.385-4.691-1.157m7.021-3.421c.729-.402 1.309-1.005 1.744-1.809.435-.803.651-1.781.651-2.933 0-1.715-.451-3.035-1.351-3.958-.902-.924-2.004-1.385-3.307-1.385s-2.395.461-3.275 1.385c-.88.923-1.32 2.243-1.32 3.958 0 1.715.428 3.035 1.287 3.958.858.924 1.938 1.385 3.241 1.385.825 0 1.602-.2 2.33-.603v.002M115.96 21.658v8.734c0 .608.147 1.048.44 1.32.293.271.787.406 1.482.406H120v3.845h-2.867c-3.845 0-5.766-1.868-5.766-5.605v-8.7h-2.15v-3.746h2.15V13l4.595-1v5.912h4.04v3.746h-4.042M20.03 46.757l-5.538-1.385A1.97 1.97 0 0 1 13 43.46v-5.462c0-.841.349-1.601.907-2.146a12.212 12.212 0 0 0 6.975-3.644c2.602-2.731 3.627-6.578 2.882-10.251L21 9h-4V4a1 1 0 0 0-2 0v7a1 1 0 0 1-2 0v-1a1 1 0 0 0-2 0v6.758a4.489 4.489 0 0 1 2.694-.755c2.278.095 4.156 1.934 4.297 4.21a4.501 4.501 0 0 1-6.992 4.029V29a1 1 0 0 1-2 0V7a1 1 0 0 0-2 0v2h-4L.237 21.957c-.745 3.675.279 7.52 2.882 10.251a12.202 12.202 0 0 0 6.975 3.644c.558.545.907 1.305.907 2.146V43.4c0 .938-.639 1.757-1.55 1.985l-5.48 1.37c-.57.143-.97.655-.97 1.243h18c0-.588-.4-1.1-.97-1.243v.0023:[null,["$","html",null,{"lang":"en-us","className":"__variable_1fc36d scroll-smooth","suppressHydrationWarning":true,"children":[["$","head",null,{"children":[["$","meta",null,{"httpEquiv":"Content-Security-Policy","content":"default-src 'self';script-src 'self' 'unsafe-eval' 'unsafe-inline' giscus.app analytics.umami.is www.youtube.com www.googletagmanager.com www.google-analytics.com;style-src 'self' 'unsafe-inline';img-src * blob: data:;media-src *.s3.amazonaws.com;connect-src *;font-src 'self';frame-src www.youtube.com youtube.com giscus.app youtu.be https://www.youtube.com https://youtube.com;"}],["$","link",null,{"rel":"apple-touch-icon","sizes":"76x76","href":"/static/favicons/apple-touch-icon.png"}],["$","link",null,{"rel":"icon","type":"image/png","sizes":"32x32","href":"/static/favicons/favicon-32x32.png"}],["$","link",null,{"rel":"icon","type":"image/png","sizes":"16x16","href":"/static/favicons/favicon-16x16.png"}],["$","link",null,{"rel":"manifest","href":"/static/favicons/site.webmanifest"}],["$","link",null,{"rel":"mask-icon","href":"/static/favicons/safari-pinned-tab.svg","color":"#5bbad5"}],["$","meta",null,{"name":"msapplication-TileColor","content":"#000000"}],["$","meta",null,{"name":"theme-color","media":"(prefers-color-scheme: light)","content":"#fff"}],["$","meta",null,{"name":"theme-color","media":"(prefers-color-scheme: dark)","content":"#000"}],["$","link",null,{"rel":"alternate","type":"application/rss+xml","href":"/feed.xml"}]]}],["$","body",null,{"className":"bg-white text-black antialiased dark:bg-gray-950 dark:text-white","children":["$","$L6",null,{"children":[["$undefined","$undefined","$undefined","$undefined",[["$","$L7",null,{"strategy":"afterInteractive","src":"https://www.googletagmanager.com/gtag/js?id=G-ZXG79NJEBY"}],["$","$L7",null,{"strategy":"afterInteractive","id":"ga-script","children":"\n window.dataLayer = window.dataLayer || [];\n function gtag(){dataLayer.push(arguments);}\n gtag('js', new Date());\n gtag('config', 'G-ZXG79NJEBY');\n "}]]],["$","div",null,{"className":"mx-auto flex max-w-screen-customDesktop flex-col justify-between font-sans","children":["$","$L8",null,{"algoliaConfig":{"appId":"CKRA00L2X9","apiKey":"6531f8f7783a88d76629190843f1801e","indexName":"prod_apache_pinot_docs"},"children":[["$","$L9",null,{}],["$","main",null,{"children":["$","$La",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$Lb",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":["$","div",null,{"className":"flex flex-col items-start justify-start md:mt-24 md:flex-row md:items-center md:justify-center md:space-x-6","children":[["$","div",null,{"className":"space-x-2 pb-8 pt-6 md:space-y-5","children":["$","h1",null,{"className":"text-6xl font-extrabold leading-9 tracking-tight text-gray-900 dark:text-gray-100 md:border-r-2 md:px-6 md:text-8xl md:leading-14","children":"404"}]}],["$","div",null,{"className":"max-w-md","children":[["$","p",null,{"className":"mb-4 text-xl font-bold leading-normal md:text-2xl","children":"Sorry we couldn't find this page."}],["$","p",null,{"className":"mb-8","children":"But dont worry, you can find plenty of other things on our homepage."}],["$","$Lc",null,{"href":"/","className":"focus:shadow-outline-blue inline rounded-lg border border-transparent bg-blue-600 px-4 py-2 text-sm font-medium leading-5 text-white shadow transition-colors duration-150 hover:bg-blue-700 focus:outline-none dark:hover:bg-blue-500","children":"Back to homepage"}]]}]]}],"notFoundStyles":[],"initialChildNode":["$","$La",null,{"parallelRouterKey":"children","segmentPath":["children","blog","children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$Lb",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","initialChildNode":["$","$La",null,{"parallelRouterKey":"children","segmentPath":["children","blog","children",["slug","2023/06/01/real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit","c"],"children"],"loading":"$undefined","loadingStyles":"$undefined","loadingScripts":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","errorScripts":"$undefined","template":["$","$Lb",null,{}],"templateStyles":"$undefined","templateScripts":"$undefined","notFound":"$undefined","notFoundStyles":"$undefined","initialChildNode":["$Ld","$Le",null],"childPropSegment":"__PAGE__?{\"slug\":[\"2023\",\"06\",\"01\",\"real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit\"]}","styles":[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/c130d1629644f070.css","precedence":"next","crossOrigin":""}]]}],"childPropSegment":["slug","2023/06/01/real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit","c"],"styles":null}],"childPropSegment":"blog","styles":null}]}],["$","footer",null,{"className":"border-t bg-sky-100 px-5 py-10 md:px-[6.75rem] md:pb-10 md:pt-16","children":[["$","div",null,{"className":"mx-auto flex max-w-7xl flex-wrap justify-between","children":[["$","div",null,{"className":"flex-shrink-0","children":["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":120,"height":48,"fill":"none","children":[["$","g",null,{"fill":"#C7154A","clipPath":"url(#logo_svg__a)","children":[["$","path",null,{"d":"$f"}],["$","path",null,{"d":"M13.5 23a2.5 2.5 0 1 0 0-5 2.5 2.5 0 0 0 0 5M8 5a1 1 0 1 0 0-2 1 1 0 0 0 0 2M12 8a1 1 0 1 0 0-2 1 1 0 0 0 0 2M16 2a1 1 0 1 0 0-2 1 1 0 0 0 0 2"}]]}],["$","defs",null,{"children":["$","clipPath",null,{"id":"logo_svg__a","children":["$","path",null,{"fill":"#fff","d":"M0 0h120v48H0z"}]}]}]]}]}],["$","div",null,{"className":"flex flex-wrap gap-x-16 gap-y-5 py-8 md:pl-24 md:pr-[21.625rem]","children":[" ",[["$","div","Resources",{"children":[["$","h5",null,{"className":"mb-4 text-lg font-semibold","children":"Resources"}],["$","div",null,{"className":"flex justify-between gap-x-10","children":[["$","div",null,{"className":"flex flex-col","children":[["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://docs.pinot.apache.org/","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Docs"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://docs.pinot.apache.org/getting-started","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Getting Started"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://docs.pinot.apache.org/integrations/thirdeye","className":"block py-1 text-gray-600 hover:text-gray-900","children":"ThirdEye"}]]}],["$","div",null,{"className":"flex flex-col","children":[["$","$Lc",null,{"href":"/powered-by","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Company Stories"}],["$","$Lc",null,{"href":"/download","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Download"}],["$","$Lc",null,{"href":"/blog","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Blog"}]]}]]}]]}],["$","div","Apache",{"children":[["$","h5",null,{"className":"mb-4 text-lg font-semibold","children":"Apache"}],["$","div",null,{"className":"flex justify-between gap-x-10","children":[["$","div",null,{"className":"flex flex-col","children":[["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Foundation"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org/licenses","className":"block py-1 text-gray-600 hover:text-gray-900","children":"License"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org/security","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Security"}]]}],["$","div",null,{"className":"flex flex-col","children":[["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org/foundation/sponsorship.html","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Sponsorship"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org/events/current-event","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Events"}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://www.apache.org/foundation/thanks.html","className":"block py-1 text-gray-600 hover:text-gray-900","children":"Thanks"}]]}]]}]]}]]]}],["$","div",null,{"className":"mt-4 flex justify-center md:mt-0","children":[["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://join.slack.com/t/apache-pinot/shared_invite/zt-5z7pav2f-yYtjZdVA~EDmrGkho87Vzw","className":"mr-4","children":["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"viewBox":"0 0 24 24","fill":"none","stroke":"currentColor","strokeWidth":2,"strokeLinecap":"round","strokeLinejoin":"round","className":"lucide lucide-slack fill-gray-900","children":[["$","rect","diqz80",{"width":"3","height":"8","x":"13","y":"2","rx":"1.5"}],["$","path","183iwg",{"d":"M19 8.5V10h1.5A1.5 1.5 0 1 0 19 8.5"}],["$","rect","hqg7r1",{"width":"3","height":"8","x":"8","y":"14","rx":"1.5"}],["$","path","76g71w",{"d":"M5 15.5V14H3.5A1.5 1.5 0 1 0 5 15.5"}],["$","rect","1kmz0a",{"width":"8","height":"3","x":"14","y":"13","rx":"1.5"}],["$","path","jc4sz0",{"d":"M15.5 19H14v1.5a1.5 1.5 0 1 0 1.5-1.5"}],["$","rect","1omvl4",{"width":"8","height":"3","x":"2","y":"8","rx":"1.5"}],["$","path","16f3cl",{"d":"M8.5 5H10V3.5A1.5 1.5 0 1 0 8.5 5"}],"$undefined"]}]}],["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://github.com/apache/pinot","children":["$","svg",null,{"xmlns":"http://www.w3.org/2000/svg","width":24,"height":24,"fill":"currentColor","size":24,"children":[["$","g",null,{"clipPath":"url(#github_svg__a)","children":["$","path",null,{"fillRule":"evenodd","d":"M12.01 0C5.369 0 0 5.5 0 12.304c0 5.44 3.44 10.043 8.212 11.673.597.122.815-.265.815-.59 0-.286-.02-1.264-.02-2.283-3.34.734-4.036-1.466-4.036-1.466-.537-1.426-1.332-1.793-1.332-1.793-1.094-.754.08-.754.08-.754 1.212.082 1.849 1.263 1.849 1.263 1.073 1.874 2.803 1.345 3.5 1.019.098-.795.417-1.345.755-1.65-2.665-.285-5.468-1.345-5.468-6.07 0-1.345.477-2.445 1.232-3.3-.119-.306-.537-1.57.12-3.26 0 0 1.014-.326 3.3 1.263.98-.27 1.989-.407 3.003-.408 1.014 0 2.048.143 3.002.408 2.287-1.59 3.301-1.263 3.301-1.263.657 1.69.239 2.954.12 3.26.775.855 1.232 1.955 1.232 3.3 0 4.725-2.803 5.764-5.488 6.07.438.387.815 1.12.815 2.281 0 1.65-.02 2.975-.02 3.382 0 .326.22.713.816.59C20.56 22.347 24 17.744 24 12.305 24.02 5.5 18.63 0 12.01 0","clipRule":"evenodd"}]}],["$","defs",null,{"children":["$","clipPath",null,{"id":"github_svg__a","children":["$","path",null,{"fill":"#fff","d":"M0 0h24v24H0z"}]}]}]]}]}]]}]]}],["$","div",null,{"className":"mt-8 border-t border-neutral-300 pt-4 text-left text-sm text-gray-600","children":["Copyright © ",2024," The Apache Software Foundation. Apache Pinot, Pinot, Apache, the Apache feather logo, and the Apache Pinot project logo are registered trademarks of The Apache Software Foundation. This page has references to third party software - Presto, PrestoDB, ThirdEye, Trino, TrinoDB, that are not part of the Apache Software Foundation and are not covered under the Apache License."]}]]}]]}]}]]}]}]]}],null] 4:[["$","meta","0",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","1",{"charSet":"utf-8"}],["$","title","2",{"children":"Real-Time Mastodon Usage with Apache Kafka, Apache Pinot, and Streamlit | Apache Pinot™"}],["$","meta","3",{"name":"description","content":"The blog post discusses analyzing user activity and server popularity on Mastodon using Kafka Connect, Parquet, Seaborn, and DuckDB. It explores the potential of using Apache Pinot for real-time data streaming and creating a dashboard. The post provides instructions on ingesting Apache Avro messages into Pinot, creating a Pinot table, and querying the data."}],["$","meta","4",{"name":"robots","content":"index, follow"}],["$","meta","5",{"name":"googlebot","content":"index, follow, max-video-preview:-1, max-image-preview:large, max-snippet:-1"}],["$","link","6",{"rel":"canonical","href":"https://pinot.apache.org/blog/2023/06/01/real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit"}],["$","link","7",{"rel":"alternate","type":"application/rss+xml","href":"https://pinot.apache.org/feed.xml"}],["$","meta","8",{"property":"og:title","content":"Real-Time Mastodon Usage with Apache Kafka, Apache Pinot, and Streamlit"}],["$","meta","9",{"property":"og:description","content":"The blog post discusses analyzing user activity and server popularity on Mastodon using Kafka Connect, Parquet, Seaborn, and DuckDB. It explores the potential of using Apache Pinot for real-time data streaming and creating a dashboard. The post provides instructions on ingesting Apache Avro messages into Pinot, creating a Pinot table, and querying the data."}],["$","meta","10",{"property":"og:url","content":"https://pinot.apache.org/blog/2023/06/01/real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit"}],["$","meta","11",{"property":"og:site_name","content":"Apache Pinot™"}],["$","meta","12",{"property":"og:locale","content":"en_US"}],["$","meta","13",{"property":"og:image","content":"https://pinot.apache.org/static/images/twitter-card.png"}],["$","meta","14",{"property":"og:type","content":"article"}],["$","meta","15",{"property":"article:published_time","content":"2023-06-01T00:00:00.000Z"}],["$","meta","16",{"property":"article:modified_time","content":"2023-06-01T00:00:00.000Z"}],["$","meta","17",{"property":"article:author","content":"Mark Needham"}],["$","meta","18",{"name":"twitter:card","content":"summary_large_image"}],["$","meta","19",{"name":"twitter:title","content":"Real-Time Mastodon Usage with Apache Kafka, Apache Pinot, and Streamlit"}],["$","meta","20",{"name":"twitter:description","content":"The blog post discusses analyzing user activity and server popularity on Mastodon using Kafka Connect, Parquet, Seaborn, and DuckDB. It explores the potential of using Apache Pinot for real-time data streaming and creating a dashboard. The post provides instructions on ingesting Apache Avro messages into Pinot, creating a Pinot table, and querying the data."}],["$","meta","21",{"name":"twitter:image","content":"https://pinot.apache.org/static/images/twitter-card.png"}],["$","meta","22",{"name":"next-size-adjust"}]] 10:I[1514,["326","static/chunks/326-3a90a6443b9c824c.js","413","static/chunks/413-f9f40b83f7bb3f22.js","980","static/chunks/980-6e243f9cd384c7d2.js","797","static/chunks/app/blog/%5B...slug%5D/page-502e08b6677b55da.js"],""] 11:I[2529,["326","static/chunks/326-3a90a6443b9c824c.js","413","static/chunks/413-f9f40b83f7bb3f22.js","980","static/chunks/980-6e243f9cd384c7d2.js","797","static/chunks/app/blog/%5B...slug%5D/page-502e08b6677b55da.js"],""] 12:I[5185,["326","static/chunks/326-3a90a6443b9c824c.js","413","static/chunks/413-f9f40b83f7bb3f22.js","980","static/chunks/980-6e243f9cd384c7d2.js","797","static/chunks/app/blog/%5B...slug%5D/page-502e08b6677b55da.js"],""] e:[["$","script",null,{"type":"application/ld+json","dangerouslySetInnerHTML":{"__html":"{\"@context\":\"https://schema.org\",\"@type\":\"BlogPosting\",\"headline\":\"Real-Time Mastodon Usage with Apache Kafka, Apache Pinot, and Streamlit\",\"datePublished\":\"2023-06-01T00:00:00.000Z\",\"dateModified\":\"2023-06-01T00:00:00.000Z\",\"description\":\"The blog post discusses analyzing user activity and server popularity on Mastodon using Kafka Connect, Parquet, Seaborn, and DuckDB. It explores the potential of using Apache Pinot for real-time data streaming and creating a dashboard. The post provides instructions on ingesting Apache Avro messages into Pinot, creating a Pinot table, and querying the data.\",\"image\":\"/static/images/twitter-card.png\",\"url\":\"https://pinot.apache.org/blog/2023-06-01-real-time-mastodon-usage-with-apache-kafka-apache-pinot-and-streamlit\",\"author\":[{\"@type\":\"Person\",\"name\":\"Mark Needham\"}]}"}}],["$","section",null,{"className":" px-5 pt-10 md:px-[13.313rem] md:py-16","children":[["$","$L10",null,{}],["$","article",null,{"className":"","children":["$","div",null,{"className":"mx-auto lg:flex","children":[["$","div",null,{"className":"lg:pr-12","children":[["$","header",null,{"className":"pt-6 md:pr-10","children":[["$","h1",null,{"className":"text-4xl font-semibold","children":"Real-Time Mastodon Usage with Apache Kafka, Apache Pinot, and Streamlit"}],["$","p",null,{"className":"pt-2 text-lg","children":["By: ","Mark Needham"]}],["$","p",null,{"className":"py-2 text-sm","children":["June 1st, 2023"," • ","7 min read"]}]]}],["$","div",null,{"className":"flex flex-col lg:flex-row","children":["$","main",null,{"className":"","children":["$","div",null,{"className":"prose max-w-[45rem] pb-8 pt-10 dark:prose-invert","children":[["$","p",null,{"children":["I recently came across a fascinating blog post written by Simon Aubury that shows ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://simonaubury.com/posts/202302_mastodon_duckdb/","children":"how to analyze user activity, server popularity, and language usage on Mastodon"}],", a decentralized social networking platform that has become quite popular in the last six months."]}],["$","h2",null,{"id":"the-existing-solution-kafka-connect-parquet-seaborn-and-duckdb","children":[["$","a",null,{"href":"#the-existing-solution-kafka-connect-parquet-seaborn-and-duckdb","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"The Existing Solution: Kafka Connect, Parquet, Seaborn and DuckDB "]}],["$","p",null,{"children":"To start, Simon wrote a listener to collect the messages, which he then published into Apache Kafka®. He then wrote a Kafka Connect configuration that consumes messages from Kafka and flushes them after every 1,000 messages into Apache Parquet files stored in an Amazon S3 bucket."}],["$","p",null,{"children":"Finally, he queried those Parquet files using DuckDB and created some charts using the Seaborn library, as reflected in the architecture diagram below:"}],["$","p",null,{"children":["$","img",null,{"alt":"Flowchart of data collection to data processing","src":"https://www.datocms-assets.com/75153/1685637607-image1.png","title":"Flowchart of data collection to data processing"}]}],["$","p",null,{"children":["Fig: ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://simonaubury.com/posts/202302_mastodon_duckdb/","children":"Data Collection Architecture"}]]}],["$","p",null,{"children":"The awesome visualizations that Simon created make me wonder whether we can change what happens downstream of Kafka to make our queries even more real-time. Let’s find out!"}],["$","h2",null,{"id":"going-real-time-with-apache-pinot","children":[["$","a",null,{"href":"#going-real-time-with-apache-pinot","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Going Real-Time with Apache Pinot™"]}],["$","p",null,{"children":["Now ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://startree.ai/resources/what-is-apache-pinot","children":"Apache Pinot"}]," comes into the picture. Instead of using Kafka Connect to batch Mastodon toots into groups of 1,000 messages to generate Parquet files, we can stream the data immediately and directly, toot-by-toot into Pinot and then build a real-time dashboard using Streamlit:"]}],["$","p",null,{"children":["$","img",null,{"alt":"Data collection in Mastodon, followed by processing in Apache Kafka, Apache Pinot, and Streamlit","src":"https://www.datocms-assets.com/75153/1685637507-image4.png","title":"Data collection in Mastodon, followed by processing in Apache Kafka, Apache Pinot, and Streamlit"}]}],["$","h2",null,{"id":"setup","children":[["$","a",null,{"href":"#setup","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Setup"]}],["$","p",null,{"children":"To follow along, first clone my fork of Simon’s GitHub repository:"}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token function","children":"git"}]," clone git@github.com:mneedham/mastodon-stream.git\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token builtin class-name","children":"cd"}]," mastodon-stream\n"]}]]}]}],["$","p",null,{"children":"Then launch all of the components using Docker Compose:"}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token function","children":"docker-compose"}]," up\n"]}]}]}],["$","h2",null,{"id":"pinot-schema-and-table","children":[["$","a",null,{"href":"#pinot-schema-and-table","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Pinot Schema and Table"]}],["$","p",null,{"children":"Similar to what Simon did with DuckDB, we’ll ingest the Mastodon events into a table. Pinot tables have a schema that’s defined in a schema file."}],["$","p",null,{"children":"To come up with a schema file, we need to know the structure of the ingested events. For example:"}],["$","$L11",null,{"className":"language-json","children":["$","code",null,{"className":"code-highlight language-json","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"m_id\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"110146691030544274"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"created_at\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"1680705124"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"created_at_str\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"2023 04 05 15:32:04\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"app\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"url\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"https://mastodon.social/@Xingcat/110146690810165414\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"base_url\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"https://techhub.social\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"language\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"en\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"favourites\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"0"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"username\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"Xingcat\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"bot\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token boolean","children":"false"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"tags\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"0"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"characters\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"196"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"words\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token number","children":"36"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"mastodon_text\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"Another, “I don’t know what this is yet,” paintings. Many, many layers that look like distressed metal or some sort of rock crosscut. Liking it so far, need to figure out what it’ll wind up being.\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}]]}]}],["$","p",null,{"children":"Mapping these fields directly to columns is easiest and will result in a schema file that looks like this:"}],["$","$L11",null,{"className":"language-json","children":["$","code",null,{"className":"code-highlight language-json","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"schemaName\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"dimensionFieldSpecs\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"["}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"m_id\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"LONG\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"created_at_str\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"app\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"url\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"base_url\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"language\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"username\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"bot\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"BOOLEAN\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon_text\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"STRING\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"]"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"metricFieldSpecs\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"["}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"favourites\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"INT\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"words\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"INT\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"characters\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"INT\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}]," ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"tags\""}],["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"INT\""}]," ",["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"]"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"dateTimeFieldSpecs\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"["}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"created_at\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"dataType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"LONG\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"format\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"1:MILLISECONDS:EPOCH\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"granularity\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"1:MILLISECONDS\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"]"}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}]]}]}],["$","p",null,{"children":"Next up: our table config, shown below:"}],["$","$L11",null,{"className":"language-json","children":["$","code",null,{"className":"code-highlight language-json","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"tableName\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"tableType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"REALTIME\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"segmentsConfig\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"timeColumnName\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"created_at\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"timeType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"MILLISECONDS\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"schemaName\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"replicasPerPartition\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"1\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"tenants\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"tableIndexConfig\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"loadMode\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"MMAP\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"streamConfigs\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"streamType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"kafka\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.consumer.type\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"lowLevel\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.topic.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon-topic\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.class.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"org.apache.pinot.plugin.inputformat.avro.confluent.KafkaConfluentSchemaRegistryAvroMessageDecoder\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.consumer.factory.class.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"org.apache.pinot.plugin.stream.kafka20.KafkaConsumerFactory\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.format\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"AVRO\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.schema.registry.rest.url\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"http://schema-registry:8081\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.schema.registry.schema.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon-topic-value\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.broker.list\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"broker:9093\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"stream.kafka.consumer.prop.auto.offset.reset\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"smallest\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"metadata\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"customConfigs\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"routing\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token punctuation","children":"{"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token property","children":"\"instanceSelectorType\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"strictReplicaGroup\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":"}"}],"\n"]}]]}]}],["$","p",null,{"children":"The following configs represent the most important ones for ingesting Apache Avro™ messages into Pinot:"}],["$","$L11",null,{"className":"language-json","children":["$","code",null,{"className":"code-highlight language-json","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.class.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"org.apache.pinot.plugin.inputformat.avro.confluent.KafkaConfluentSchemaRegistryAvroMessageDecoder\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.format\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"AVRO\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.schema.registry.rest.url\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"http://schema-registry:8081\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token property","children":"\"stream.kafka.decoder.prop.schema.registry.schema.name\""}],["$","span",null,{"className":"token operator","children":":"}]," ",["$","span",null,{"className":"token string","children":"\"mastodon-topic-value\""}],["$","span",null,{"className":"token punctuation","children":","}],"\n"]}]]}]}],["$","p",null,{"children":"The KafkaConfluentSchemaRegistryAvroMessageDecoder decoder calls the Schema Registry with the schema name to get back the schema that it will use to decode messages."}],["$","p",null,{"children":"We can create the Pinot table by running the following command:"}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token function","children":"docker"}]," run ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"--network"}]," mastodon ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-v"}]," ",["$","span",null,{"className":"token constant environment","children":"$$PWD"}],"/pinot:/config ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" apachepinot/pinot:0.12.0-arm64 AddTable ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-schemaFile"}]," /config/schema.json ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-tableConfigFile"}]," /config/table.json ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-controllerHost"}]," ",["$","span",null,{"className":"token string","children":"\"pinot-controller\""}]," ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-exec"}],"\n"]}]]}]}],["$","p",null,{"children":"We can then navigate to the table page of the Pinot UI:"}],["$","p",null,{"children":["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"http://localhost:9000/#/tenants/table/mastodon_REALTIME","children":"http://localhost:9000/#/tenants/table/mastodon_REALTIME"}]}],["$","p",null,{"children":"Here, we’ll see the following:"}],["$","p",null,{"children":["$","img",null,{"alt":"Apache Pinot table config and schema","src":"https://www.datocms-assets.com/75153/1685637837-image6.png","title":"Apache Pinot table config and schema"}]}],["$","h2",null,{"id":"ingest-data-into-kafka","children":[["$","a",null,{"href":"#ingest-data-into-kafka","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Ingest Data into Kafka"]}],["$","p",null,{"children":"Now, we need to start ingesting data into Kafka. Simon created a script that accomplishes this for us, so we just need to indicate which Mastodon servers to query."}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":[["$","span",null,{"className":"code-line","children":["python mastodonlisten.py ",["$","span",null,{"className":"token variable parameter","children":"--baseURL"}]," https://data-folks.masto.host ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"--public"}]," ",["$","span",null,{"className":"token variable parameter","children":"--enableKafka"}]," ",["$","span",null,{"className":"token variable parameter","children":"--quiet"}],"\n"]}],["$","span",null,{"className":"code-line","children":["python mastodonlisten.py ",["$","span",null,{"className":"token variable parameter","children":"--baseURL"}]," https://fosstodon.org/ ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"--public"}]," ",["$","span",null,{"className":"token variable parameter","children":"--enableKafka"}]," ",["$","span",null,{"className":"token variable parameter","children":"--quiet"}],"\n"]}],["$","span",null,{"className":"code-line","children":["python mastodonlisten.py ",["$","span",null,{"className":"token variable parameter","children":"--baseURL"}]," https://mstdn.social/ ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"--public"}]," ",["$","span",null,{"className":"token variable parameter","children":"--enableKafka"}]," ",["$","span",null,{"className":"token variable parameter","children":"--quiet"}],"\n"]}]]}]}],["$","p",null,{"children":["We can then check the ingestion of messages with the ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://docs.confluent.io/platform/current/clients/kafkacat-usage.html","children":"kcat"}]," command line tool:"]}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":[["$","span",null,{"className":"code-line","children":["kcat ",["$","span",null,{"className":"token variable parameter","children":"-C"}]," ",["$","span",null,{"className":"token variable parameter","children":"-b"}]," localhost:9092 ",["$","span",null,{"className":"token variable parameter","children":"-t"}]," mastodon-topic ",["$","span",null,{"className":"token punctuation","children":"\\"}],"\n"]}],["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token variable parameter","children":"-s"}]," ",["$","span",null,{"className":"token variable assign-left","children":"value"}],["$","span",null,{"className":"token operator","children":"="}],"avro ",["$","span",null,{"className":"token variable parameter","children":"-r"}]," http://localhost:8081 ",["$","span",null,{"className":"token variable parameter","children":"-e"}],"\n"]}]]}]}],["$","h2",null,{"id":"query-pinot","children":[["$","a",null,{"href":"#query-pinot","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Query Pinot"]}],["$","p",null,{"children":"Now, let’s go to the Pinot UI to see what data we’ve got to play with:"}],["$","p",null,{"children":["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"http://localhost:9000/","children":"http://localhost:9000"}]}],["$","p",null,{"children":"We’ll see the following preview of the data in the mastodon table:"}],["$","p",null,{"children":["$","img",null,{"alt":"SQL Editor, query response stats, and query result in Apache Pinot","src":"https://www.datocms-assets.com/75153/1685637772-image5.png","title":"SQL Editor, query response stats, and query result in Apache Pinot"}]}],["$","p",null,{"children":"We can then write a query to find the number of messages posted in the last five minutes:"}],["$","$L11",null,{"className":"language-sql","children":["$","code",null,{"className":"code-highlight language-sql","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"select"}]," ",["$","span",null,{"className":"token function","children":"count"}],["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token operator","children":"*"}],["$","span",null,{"className":"token punctuation","children":")"}]," ",["$","span",null,{"className":"token keyword","children":"as"}]," ",["$","span",null,{"className":"token string","children":"\"Num toots\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token function","children":"count"}],["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token keyword","children":"distinct"}],["$","span",null,{"className":"token punctuation","children":"("}],"username",["$","span",null,{"className":"token punctuation","children":")"}],["$","span",null,{"className":"token punctuation","children":")"}]," ",["$","span",null,{"className":"token keyword","children":"as"}]," ",["$","span",null,{"className":"token string","children":"\"Num users\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token punctuation","children":","}]," ",["$","span",null,{"className":"token function","children":"count"}],["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token keyword","children":"distinct"}],["$","span",null,{"className":"token punctuation","children":"("}],"url",["$","span",null,{"className":"token punctuation","children":")"}],["$","span",null,{"className":"token punctuation","children":")"}]," ",["$","span",null,{"className":"token keyword","children":"as"}]," ",["$","span",null,{"className":"token string","children":"\"Num urls\""}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"from"}]," mastodon\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"where"}]," created_at",["$","span",null,{"className":"token operator","children":"*"}],["$","span",null,{"className":"token number","children":"1000"}]," ",["$","span",null,{"className":"token operator","children":">"}]," ago",["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token string","children":"'PT1M'"}],["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"order"}]," ",["$","span",null,{"className":"token keyword","children":"by"}]," ",["$","span",null,{"className":"token number","children":"1"}]," ",["$","span",null,{"className":"token keyword","children":"DESC"}],["$","span",null,{"className":"token punctuation","children":";"}],"\n"]}]]}]}],["$","p",null,{"children":["$","img",null,{"alt":"Query results for toots, users, and urls","src":"https://www.datocms-assets.com/75153/1685637909-image8.png","title":"Query results for toots, users, and urls"}]}],["$","p",null,{"children":"We can also query Pinot via the Python client, which we can install by running the following:"}],["$","$L11",null,{"className":"language-bash","children":["$","code",null,{"className":"code-highlight language-bash","children":["$","span",null,{"className":"code-line","children":["pip ",["$","span",null,{"className":"token function","children":"install"}]," pinotdb\n"]}]}]}],["$","p",null,{"children":"Once we’ve done that, let’s open the Python REPL and run the following code:"}],["$","$L11",null,{"className":"language-python","children":["$","code",null,{"className":"code-highlight language-python","children":[["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"from"}]," pinotdb ",["$","span",null,{"className":"token keyword","children":"import"}]," connect\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token keyword","children":"import"}]," pandas ",["$","span",null,{"className":"token keyword","children":"as"}]," pd\n"]}],["$","span",null,{"className":"code-line","children":"\n"}],["$","span",null,{"className":"code-line","children":["conn ",["$","span",null,{"className":"token operator","children":"="}]," connect",["$","span",null,{"className":"token punctuation","children":"("}],"host",["$","span",null,{"className":"token operator","children":"="}],["$","span",null,{"className":"token string","children":"'localhost'"}],["$","span",null,{"className":"token punctuation","children":","}]," port",["$","span",null,{"className":"token operator","children":"="}],["$","span",null,{"className":"token number","children":"8099"}],["$","span",null,{"className":"token punctuation","children":","}]," path",["$","span",null,{"className":"token operator","children":"="}],["$","span",null,{"className":"token string","children":"'/query/sql'"}],["$","span",null,{"className":"token punctuation","children":","}]," scheme",["$","span",null,{"className":"token operator","children":"="}],["$","span",null,{"className":"token string","children":"'http'"}],["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}],["$","span",null,{"className":"code-line","children":"\n"}],["$","span",null,{"className":"code-line","children":["curs ",["$","span",null,{"className":"token operator","children":"="}]," conn",["$","span",null,{"className":"token punctuation","children":"."}],"cursor",["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}],["$","span",null,{"className":"code-line","children":"\n"}],["$","span",null,{"className":"code-line","children":["st",["$","span",null,{"className":"token punctuation","children":"."}],"header",["$","span",null,{"className":"token punctuation","children":"("}],["$","span",null,{"className":"token string","children":"\"Daily Mastodon Usage\""}],["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}],["$","span",null,{"className":"code-line","children":["query ",["$","span",null,{"className":"token operator","children":"="}]," ",["$","span",null,{"className":"token string triple-quoted-string","children":"\"\"\"\n"}]]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":"select count(*) as \"Num toots\"\n"}]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":", count(distinct(username)) as \"Num users\"\n"}]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":", count(distinct(url)) as \"Num urls\"\n"}]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":"from mastodon\n"}]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":"where created_at*1000 > ago('PT1M')\n"}]}],["$","span",null,{"className":"code-line","children":["$","span",null,{"className":"token string triple-quoted-string","children":"order by 1 DESC;\n"}]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token string triple-quoted-string","children":"\"\"\""}],"\n"]}],["$","span",null,{"className":"code-line","children":["curs",["$","span",null,{"className":"token punctuation","children":"."}],"execute",["$","span",null,{"className":"token punctuation","children":"("}],"query",["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}],["$","span",null,{"className":"code-line","children":"\n"}],["$","span",null,{"className":"code-line","children":["df ",["$","span",null,{"className":"token operator","children":"="}]," pd",["$","span",null,{"className":"token punctuation","children":"."}],"DataFrame",["$","span",null,{"className":"token punctuation","children":"("}],"curs",["$","span",null,{"className":"token punctuation","children":","}]," columns",["$","span",null,{"className":"token operator","children":"="}],["$","span",null,{"className":"token punctuation","children":"["}],"item",["$","span",null,{"className":"token punctuation","children":"["}],["$","span",null,{"className":"token number","children":"0"}],["$","span",null,{"className":"token punctuation","children":"]"}]," ",["$","span",null,{"className":"token keyword","children":"for"}]," item ",["$","span",null,{"className":"token keyword","children":"in"}]," curs",["$","span",null,{"className":"token punctuation","children":"."}],"description",["$","span",null,{"className":"token punctuation","children":"]"}],["$","span",null,{"className":"token punctuation","children":")"}],"\n"]}]]}]}],["$","p",null,{"children":"This produces the resulting DataFrame:"}],["$","$L11",null,{"className":"language-js","children":["$","code",null,{"className":"code-highlight language-js","children":[["$","span",null,{"className":"code-line","children":[" ",["$","span",null,{"className":"token maybe-class-name","children":"Num"}]," toots ",["$","span",null,{"className":"token maybe-class-name","children":"Num"}]," users ",["$","span",null,{"className":"token maybe-class-name","children":"Num"}]," urls\n"]}],["$","span",null,{"className":"code-line","children":[["$","span",null,{"className":"token number","children":"0"}]," ",["$","span",null,{"className":"token number","children":"552"}]," ",["$","span",null,{"className":"token number","children":"173"}]," ",["$","span",null,{"className":"token number","children":"192"}],"\n"]}]]}]}],["$","h2",null,{"id":"streamlit","children":[["$","a",null,{"href":"#streamlit","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Streamlit"]}],["$","p",null,{"children":"Next, we’ll create a Streamlit dashboard to package up these queries. We’ll visualize the results using Plotly, which you can install using:"}],["$","p",null,{"children":"pip install streamlit plotly"}],["$","p",null,{"children":["I’ve created a Streamlit app in the file ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://github.com/mneedham/mastodon-stream/blob/main/app.py","children":"app.py"}],", which you can find in the GitHub repository. Let’s have a look at the kinds of visualizations that we can generate."]}],["$","p",null,{"children":["First, we’ll create metrics to show the number of toots, users, and URLs in the last ",["$","em",null,{"children":"n"}]," minutes. ",["$","em",null,{"children":"n"}]," will be configurable from the app as shown in the screenshot below:"]}],["$","p",null,{"children":["$","img",null,{"alt":"Chart of real-time Mastodon usage","src":"https://www.datocms-assets.com/75153/1685637876-image7.png","title":"Chart of real-time Mastodon usage"}]}],["$","p",null,{"children":"From the screenshot, we can identify mastodon.cloud as the most active server, though it produces only 1,800 messages in 10 minutes or three messages per second. The values in green indicate the change in values compared to the previous 10 minutes."}],["$","p",null,{"children":"We can also create a chart showing the number of messages per minute for the last 10 minutes:"}],["$","p",null,{"children":["$","img",null,{"alt":"Time of day Mastodon usage","src":"https://www.datocms-assets.com/75153/1685637945-image9.png","title":"Time of day Mastodon usage"}]}],["$","p",null,{"children":"Based on this chart, we can see that we’re creating anywhere from 200–900 messages per second. Part of the reason lies in the fact that the Mastodon servers sometimes disconnect our listener, and at the moment, I have to manually reconnect."}],["$","p",null,{"children":"Finally, we can look at the toot length by language:"}],["$","p",null,{"children":["$","img",null,{"alt":"Chart of toot length by language usage","src":"https://www.datocms-assets.com/75153/1685637644-image2.png","title":"Chart of toot length by language usage"}]}],["$","p",null,{"children":"We see much bigger ranges here than Simon saw in his analysis. He saw a maximum length of 200 characters, whereas we see some messages of up to 4,200 characters."}],["$","h2",null,{"id":"summary","children":[["$","a",null,{"href":"#summary","aria-hidden":"true","tabIndex":"-1","children":["$","span",null,{"className":"icon icon-link"}]}],"Summary"]}],["$","p",null,{"children":["We hope you enjoyed following along as we explored this fun use case for ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://startree.ai/resources/what-is-real-time-analytics","children":"real-time analytics"}],". As you can see, even though we’re pulling the data from many of the popular Mastodon servers, it’s still not all that much data!"]}],["$","p",null,{"children":["Give the code a try and let us know how it goes. If you have any questions, feel free to ",["$","a",null,{"target":"_blank","rel":"noopener noreferrer","href":"https://stree.ai/slack","children":"join us on Slack"}],", where we’ll gladly do our best to help you out."]}]]}]}]}]]}],["$","aside",null,{"className":"mt-10 hidden border-l-2 pl-5 lg:sticky lg:top-1 lg:block lg:h-full","children":["$","section",null,{"className":"sticky top-0 mb-4 w-[15.375rem]","children":[["$","div",null,{"className":"flex flex-col space-y-1.5 pb-3","children":["$","h3",null,{"className":"text-sm font-semibold leading-snug text-neutral-500 dark:text-neutral-100","children":"Table of Contents"}]}],["$","$L12",null,{"chapters":[{"value":"The Existing Solution: Kafka Connect, Parquet, Seaborn and DuckDB ","url":"#the-existing-solution-kafka-connect-parquet-seaborn-and-duckdb","depth":2},{"value":"Going Real-Time with Apache Pinot™","url":"#going-real-time-with-apache-pinot","depth":2},{"value":"Setup","url":"#setup","depth":2},{"value":"Pinot Schema and Table","url":"#pinot-schema-and-table","depth":2},{"value":"Ingest Data into Kafka","url":"#ingest-data-into-kafka","depth":2},{"value":"Query Pinot","url":"#query-pinot","depth":2},{"value":"Streamlit","url":"#streamlit","depth":2},{"value":"Summary","url":"#summary","depth":2}]}]]}]}]]}]}]]}]] d:null