Skip to content

Commit

Permalink
Updated
Browse files Browse the repository at this point in the history
Support in the files and documents.
  • Loading branch information
lrm22005 committed May 6, 2024
1 parent fe91786 commit 0e4993d
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 9 deletions.
2 changes: 1 addition & 1 deletion code/lib/Loading_PudMed.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import time
sys.path.append('lib')

import lib.Medline
from Bio import Medline
import os

date = startTime = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
Expand Down
Binary file modified code/lib/__pycache__/Loading_PudMed.cpython-311.pyc
Binary file not shown.
16 changes: 8 additions & 8 deletions code/step1_data_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
ld = literature_data_collection(email, output_dir, document_output_dir, api_key=api_key)

########### word query based literature data collection #################
gap=9000
batch = 400
w2d_starting_point = 2
# gap=9000
# batch = 400
# w2d_starting_point = 2

search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query)
print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query)
# search_results, _word_end_point = ld.word_based_query_fit(year = years, user_term=word_query)
# print('The number of avaliable abstracts :', _word_end_point, 'for ', word_query)

if int(sys.argv[2])==0:
word_end_point = _word_end_point
ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point)
# if int(sys.argv[2])==0:
# word_end_point = _word_end_point
# ld.collecting_doc_using_word_based_query(year = years, user_term=word_query, gap = gap, starting = gap*w2d_starting_point, ixs = w2d_starting_point, test_end_point=word_end_point)

########### gene name-query based literature data collection #################
query_full=ld.text_open('./data/gene_name_info/query_full_name.txt')
Expand Down
103 changes: 103 additions & 0 deletions esearch.dtd
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
<!--
This is the Current DTD for Entrez eSearch
$Id: eSearch_020511.dtd 85163 2006-06-28 17:35:21Z olegh $
-->
<!-- ================================================================= -->

<!--~~ !dtd
~~json
<json type='esearch' version='0.3'>
<config lcnames='true'/>
</json>
~~-->

<!ELEMENT eSearchResult (
(
(
Count,
( RetMax,
RetStart,
QueryKey?,
WebEnv?,
IdList,
TranslationSet,
TranslationStack?,
QueryTranslation
)?
) | ERROR
),
ErrorList?,
WarningList?
)>


<!ELEMENT Count (#PCDATA)> <!-- \d+ -->
<!ELEMENT RetMax (#PCDATA)> <!-- \d+ -->
<!ELEMENT RetStart (#PCDATA)> <!-- \d+ -->
<!ELEMENT Id (#PCDATA)> <!-- \d+ -->

<!ELEMENT From (#PCDATA)> <!-- .+ -->
<!ELEMENT To (#PCDATA)> <!-- .+ -->
<!ELEMENT Term (#PCDATA)> <!-- .+ -->

<!ELEMENT Field (#PCDATA)> <!-- .+ -->

<!ELEMENT QueryKey (#PCDATA)> <!-- \d+ -->
<!ELEMENT WebEnv (#PCDATA)> <!-- \S+ -->

<!ELEMENT Explode (#PCDATA)> <!-- (Y|N) -->
<!ELEMENT OP (#PCDATA)> <!-- (AND|OR|NOT|RANGE|GROUP) -->
<!ELEMENT IdList (Id*)>

<!ELEMENT Translation (From, To)>
<!ELEMENT TranslationSet (Translation*)>

<!ELEMENT TermSet (Term, Field, Count, Explode)>

<!--~~ <TranslationStack>
~~ json <array/>
~~-->
<!ELEMENT TranslationStack ((TermSet|OP)*)>

<!-- Error message tags -->
<!--~~ <ERROR>
~~ json <json key="ERROR"/>
~~-->
<!ELEMENT ERROR (#PCDATA)> <!-- .+ -->

<!ELEMENT OutputMessage (#PCDATA)> <!-- .+ -->

<!ELEMENT QuotedPhraseNotFound (#PCDATA)> <!-- .+ -->

<!ELEMENT PhraseIgnored (#PCDATA)> <!-- .+ -->

<!ELEMENT FieldNotFound (#PCDATA)> <!-- .+ -->

<!ELEMENT PhraseNotFound (#PCDATA)> <!-- .+ -->


<!ELEMENT QueryTranslation (#PCDATA)> <!-- .+ -->

<!--~~ <ErrorList>
~~ json
<object>
<array key="phrasesnotfound" select='PhraseNotFound'/>
<array key="fieldsnotfound" select='FieldsNotFound'/>
</object>
~~-->
<!ELEMENT ErrorList (PhraseNotFound*,FieldNotFound*)>

<!--~~ <WarningList>
~~ json
<object>
<array key="phrasesignored" select='PhraseIgnored'/>
<array key="quotedphrasesnotfound" select='QuotedPhraseNotFound'/>
<array key="outputmessages" select='OutputMessage'/>
</object>
~~-->
<!ELEMENT WarningList ( PhraseIgnored*,
QuotedPhraseNotFound*,
OutputMessage* )>



0 comments on commit 0e4993d

Please sign in to comment.