diff --git a/code/step_1_data_collection_Luis.py b/code/step_1_data_collection_Luis.py
index a18473b..5945465 100644
--- a/code/step_1_data_collection_Luis.py
+++ b/code/step_1_data_collection_Luis.py
@@ -3,6 +3,8 @@
 from io import StringIO
 from Bio import Medline
 from io import BytesIO
+import time
+import os
 
 class PubMedDownloader:
     def __init__(self, api_key, email):
@@ -11,36 +13,47 @@ def __init__(self, api_key, email):
         self.email = email
         Entrez.email = email  # Setting email for Biopython Entrez
 
-    def fetch_pubmed_data(self, query, batch_size=10000):
-        search_url = f"{self.base_url}esearch.fcgi?db=pubmed&term={query}&retmax=1&api_key={self.api_key}&usehistory=y"
-        search_response = requests.get(search_url)
-        if search_response.status_code == 200:
-            try:
-                # Use BytesIO for binary data
-                search_results = Entrez.read(BytesIO(search_response.content))
-                webenv = search_results['WebEnv']
-                query_key = search_results['QueryKey']
-                count = int(search_results['Count'])
-                print(f"Total records found: {count}")
-            except Exception as e:
-                print("Error reading search results:", e)
-                return []
-        else:
-            print("Failed to retrieve search results")
-            return []
-
+    def fetch_pubmed_data(self, query, year, max_records_per_query=9999):
         records = []
-        for start in range(0, count, batch_size):
-            fetch_url = f"{self.base_url}efetch.fcgi?db=pubmed&rettype=abstract&retmode=text&retstart={start}&retmax={batch_size}&webenv={webenv}&query_key={query_key}&api_key={self.api_key}"
-            fetch_response = requests.get(fetch_url)
-            if fetch_response.status_code == 200:
-                records.extend(fetch_response.content.decode('utf-8').split('\n\n'))  # Each record separated by two newlines
-                print(f"Fetched {start + batch_size} of {count} records")
-            else:
-                print(f"Failed to fetch data for batch starting at {start}")
+        attempt = 0
+        max_attempts = 5
 
+        while attempt < max_attempts:
+            try:
+                search_url = f"{self.base_url}esearch.fcgi?db=pubmed&term={query}&retmax=1&api_key={self.api_key}&usehistory=y"
+                search_response = requests.get(search_url, timeout=10)
+                if search_response.status_code == 200:
+                    search_results = Entrez.read(BytesIO(search_response.content))
+                    webenv = search_results['WebEnv']
+                    query_key = search_results['QueryKey']
+                    count = int(search_results['Count'])
+                    print(f"Total records found for the query '{query}': {count}")
+
+                    for start in range(0, min(count, max_records_per_query), max_records_per_query):
+                        fetch_url = f"{self.base_url}efetch.fcgi?db=pubmed&rettype=medline&retmode=text&retstart={start}&retmax={max_records_per_query}&webenv={webenv}&query_key={query_key}&api_key={self.api_key}"
+                        fetch_response = requests.get(fetch_url, timeout=10)
+                        records.append(fetch_response.text)
+                        print(f"Fetched records starting from {start}")
+                    break
+                else:
+                    print(f"Failed to initiate search with status {search_response.status_code}")
+            except requests.exceptions.RequestException as e:
+                attempt += 1
+                print(f"Attempt {attempt}: An error occurred: {e}")
+                time.sleep(2 ** attempt)  # Exponential backoff
+        # Save records to a file
+        self.save_records_to_file(query, year, records)
         return records
 
+    def save_records_to_file(self, query, year, records):
+        directory = f"./results/baseline_doc"
+        os.makedirs(directory, exist_ok=True)  # Create directory if it doesn't exist
+        filename = f"{query}.{year}.txt"
+        file_path = os.path.join(directory, filename)
+        with open(file_path, 'w', encoding='utf-8') as file:
+            file.write("\n".join(records))  # Each record is separated by a newline
+        print(f"Saved records to {file_path}")
+
 class ids_pubmed():
     def __init__(self):
         self.snp_ids = []
@@ -104,4 +117,7 @@ def search_ids(self, search_email):
 
 downloader = PubMedDownloader(api_key, email)
 topic = "zinc"  # Define the topic of interest
-pubmed_records = downloader.fetch_pubmed_data(topic, 10000)  # Adjust batch size as needed
\ No newline at end of file
+# Fetch and save records by year
+for year in range(1990, 2023):  # Example range of years
+    year_query = f"{topic} AND {year}[Date]"
+    downloader.fetch_pubmed_data(year_query, year)
\ No newline at end of file