Skip to content

Updated #3

Merged
merged 1 commit into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion code/lib/Loading_PudMed.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import time
sys.path.append('lib')

import lib.Medline
from Bio import Medline
import os

date = startTime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
Expand Down
Binary file modified code/lib/__pycache__/Loading_PudMed.cpython-311.pyc
Binary file not shown.
16 changes: 8 additions & 8 deletions code/step1_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key)

########### word query based literature data collection #################
gap=9000
batch = 400
w2d_starting_point = 2
# gap=9000
# batch = 400
# w2d_starting_point = 2

search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query)
print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query)
# search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query)
# print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query)

if int(sys.argv[2])==0:
word_end_point = _word_end_point
ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point)
# if int(sys.argv[2])==0:
# word_end_point = _word_end_point
# ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point)

########### gene name-query based literature data collection #################
query_full=ld.text_open('./data/gene_name_info/query_full_name.txt')
Expand Down
103 changes: 103 additions & 0 deletions esearch.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
<!--
This is the Current DTD for Entrez eSearch
$Id: eSearch_020511.dtd 85163 2006-06-28 17:35:21Z olegh $
-->
<!-- ================================================================= -->

<!--~~ !dtd
~~json
<json type='esearch' version='0.3'>
<config lcnames='true'/>
</json>
~~-->

<!ELEMENT eSearchResult (
(
(
Count,
( RetMax,
RetStart,
QueryKey?,
WebEnv?,
IdList,
TranslationSet,
TranslationStack?,
QueryTranslation
)?
) | ERROR
),
ErrorList?,
WarningList?
)>


<!ELEMENT Count (#PCDATA)> <!-- \d+ -->
<!ELEMENT RetMax (#PCDATA)> <!-- \d+ -->
<!ELEMENT RetStart (#PCDATA)> <!-- \d+ -->
<!ELEMENT Id (#PCDATA)> <!-- \d+ -->

<!ELEMENT From (#PCDATA)> <!-- .+ -->
<!ELEMENT To (#PCDATA)> <!-- .+ -->
<!ELEMENT Term (#PCDATA)> <!-- .+ -->

<!ELEMENT Field (#PCDATA)> <!-- .+ -->

<!ELEMENT QueryKey (#PCDATA)> <!-- \d+ -->
<!ELEMENT WebEnv (#PCDATA)> <!-- \S+ -->

<!ELEMENT Explode (#PCDATA)> <!-- (Y|N) -->
<!ELEMENT OP (#PCDATA)> <!-- (AND|OR|NOT|RANGE|GROUP) -->
<!ELEMENT IdList (Id*)>

<!ELEMENT Translation (From, To)>
<!ELEMENT TranslationSet (Translation*)>

<!ELEMENT TermSet (Term, Field, Count, Explode)>

<!--~~ <TranslationStack>
~~ json <array/>
~~-->
<!ELEMENT TranslationStack ((TermSet|OP)*)>

<!-- Error message tags -->
<!--~~ <ERROR>
~~ json <json key="ERROR"/>
~~-->
<!ELEMENT ERROR (#PCDATA)> <!-- .+ -->

<!ELEMENT OutputMessage (#PCDATA)> <!-- .+ -->

<!ELEMENT QuotedPhraseNotFound (#PCDATA)> <!-- .+ -->

<!ELEMENT PhraseIgnored (#PCDATA)> <!-- .+ -->

<!ELEMENT FieldNotFound (#PCDATA)> <!-- .+ -->

<!ELEMENT PhraseNotFound (#PCDATA)> <!-- .+ -->


<!ELEMENT QueryTranslation (#PCDATA)> <!-- .+ -->

<!--~~ <ErrorList>
~~ json
<object>
<array key="phrasesnotfound" select='PhraseNotFound'/>
<array key="fieldsnotfound" select='FieldsNotFound'/>
</object>
~~-->
<!ELEMENT ErrorList (PhraseNotFound*,FieldNotFound*)>

<!--~~ <WarningList>
~~ json
<object>
<array key="phrasesignored" select='PhraseIgnored'/>
<array key="quotedphrasesnotfound" select='QuotedPhraseNotFound'/>
<array key="outputmessages" select='OutputMessage'/>
</object>
~~-->
<!ELEMENT WarningList ( PhraseIgnored*,
QuotedPhraseNotFound*,
OutputMessage* )>