diff --git a/code/lib/Loading_PudMed.py b/code/lib/Loading_PudMed.py index 43916a5..31104b5 100644 --- a/code/lib/Loading_PudMed.py +++ b/code/lib/Loading_PudMed.py @@ -13,7 +13,7 @@ import time sys.path.append('lib') -import lib.Medline +from Bio import Medline import os date = startTime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S") diff --git a/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc b/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc index 4786d65..8f0ff86 100644 Binary files a/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc and b/code/lib/__pycache__/Loading_PudMed.cpython-311.pyc differ diff --git a/code/step1_data_collection.py b/code/step1_data_collection.py index 35538d6..e01710c 100644 --- a/code/step1_data_collection.py +++ b/code/step1_data_collection.py @@ -33,16 +33,16 @@ ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key) ########### word query based literature data collection ################# -gap=9000 -batch = 400 -w2d_starting_point = 2 +# gap=9000 +# batch = 400 +# w2d_starting_point = 2 -search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query) -print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query) +# search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query) +# print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query) -if int(sys.argv[2])==0: - word_end_point = _word_end_point -ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point) +# if int(sys.argv[2])==0: +# word_end_point = _word_end_point +# ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point) ########### gene name-query based literature data collection ################# query_full=ld.text_open('./data/gene_name_info/query_full_name.txt') diff --git a/esearch.dtd b/esearch.dtd new file mode 100644 index 0000000..bd11e35 --- /dev/null +++ b/esearch.dtd @@ -0,0 +1,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +