fp/apps/twscrape/examples/parallel_search_with_limit.py
CJ_Clippy de5a4c11b2 git subrepo clone https://github.com/vladkens/twscrape ./apps/twscrape
subrepo:
  subdir:   "apps/twscrape"
  merged:   "e4dce5d3"
upstream:
  origin:   "https://github.com/vladkens/twscrape"
  branch:   "main"
  commit:   "e4dce5d3"
git-subrepo:
  version:  "0.4.9"
  origin:   "???"
  commit:   "???"
2025-03-11 08:51:36 -08:00

47 lines
1.2 KiB
Python

"""
This example shows how to use twscrape in parallel with concurrency limit.
"""
import asyncio
import time
import twscrape
async def worker(queue: asyncio.Queue, api: twscrape.API):
while True:
query = await queue.get()
try:
tweets = await twscrape.gather(api.search(query))
print(f"{query} - {len(tweets)} - {int(time.time())}")
# do something with tweets here, eg same to file, etc
except Exception as e:
print(f"Error on {query} - {type(e)}")
finally:
queue.task_done()
async def main():
api = twscrape.API()
# add accounts here or before from cli (see README.md for examples)
await api.pool.add_account("u1", "p1", "eu1", "ep1")
await api.pool.login_all()
queries = ["elon musk", "tesla", "spacex", "neuralink", "boring company"]
queue = asyncio.Queue()
workers_count = 2 # limit concurrency here 2 concurrent requests at time
workers = [asyncio.create_task(worker(queue, api)) for _ in range(workers_count)]
for q in queries:
queue.put_nowait(q)
await queue.join()
for worker_task in workers:
worker_task.cancel()
if __name__ == "__main__":
asyncio.run(main())