It's a little Python script that will extract all the questions.
copte the following codes and paste it notepad and save it as extract.py. run it with python support.
python should be installed before running this script.
# -*- coding: utf-8 -*-
import os
import re
import codecs
from bs4 import BeautifulSoup
import mechanize
def pre_database():
for i in range(0,800): # Increase in range will increase the question, change 800 to any no. like 900.
br = mechanize.Browser()
link=br.open("http://www.gk.nepalwebtech.com/") # This script will only work for this site, but can be modified accordingly
bt = BeautifulSoup(link)
fnd= bt.find_all("div",{"class":"block"})
# Creating required folders
os.makedirs('c:\\test\\database\\tmp')
os.mkdir('c:\\test\\database\\pre_data')
# Creating data.txt file to save all unmanaged questions including input forms of html
opn= open('c:\\test\\database\\tmp\\data.txt', 'w')
opn.write(str(fnd))
opn.close()
# Removing html tags from saved in data.txt and saving it to done.txt file. pure text is saved on done.txt
soup=BeautifulSoup(open("c:\\test\\database\\tmp\\data.txt"))
all_txt= ''.join(soup.findAll(text=True))
encoded=all_txt.encode("utf-8")
opn= open('c:\\test\\database\\tmp\\done.txt', 'w')
opn.write(encoded)
opn.close()
# Removing line breaks of done.txt file and saving it to mad.txt file
file = open('c:\\test\\database\\tmp\\done.txt', 'r')
twit = open('c:\\test\\database\\tmp\\mad.txt','w')
for line in file.readlines():
if re.search('\S', line): twit.write(line)
twit.close()
file.close()
# Removing the firs and last line from mad.txt and saving it to frd.txt
twit = open('c:\\test\\database\\tmp\\mad.txt')
lines= twit.readlines()
w = open("c:\\test\\database\\tmp\\frd.txt",'w')
w.writelines([item for item in lines[1:-1]])
w.close()
# finalizing and compiling database source
w = open("c:\\test\\database\\tmp\\frd.txt")
fin=open("c:\\test\\database\\pre_data\\quest_data_" + str(i) +".txt",'a')
lines= w.readlines()
for line in lines:
if line==', \n':
print 'processing ... ' +line
else:
fin.writelines(line)
fin.close()
w.close()
twit.close()
# deleting tmp files
os.remove('C:\\test\\database\\tmp\\data.txt')
os.remove('C:\\test\\database\\tmp\\done.txt')
os.remove('C:\\test\\database\\tmp\\frd.txt')
os.remove('C:\\test\\database\\tmp\\mad.txt')
# Creating database
def post_database():
fin=open("c:\\test\\database\\pre_data\\quest_data_0.txt",'r')
lines= fin.readlines()
num_lines = [l for l in lines if l != '\n']
filename= raw_input("enter filename : ")
man=0
print len(num_lines)
wait=raw_input('wait')
for i in range(0,(len(num_lines)-5)) :
if(i%5)==0 and (len(num_lines)-5)!=0:
quest = lines[i]
print quest
ans1 = lines[i+1]
print ans1
ans2 = lines[i+2]
print ans2
ans3 = lines[i+3]
print ans3
ans4 = lines[i+4]
print ans4
corr = 'A'
cor=corr.upper()
data = '{"groupDesc":"","ques":""%s","A":"%s","B":"%s","C":"%s","D":"%s","corr":"%s","exp":""},' % (quest,ans1,ans2,ans3,ans4,cor)
print ' Writing to file..........' + data
target = open("C:\\test\\database\\tmp\\"+filename+".txt",'a')
#wait=raw_input('Press Enter for next question ....')
target.write(data)
target.close()
file = open("C:\\test\\database\\tmp\\"+filename+".txt", 'r')
twit = open("C:\\test\\database\\done_"+filename+".txt",'w')
twit.close()
twit = open("C:\\test\\database\\done_"+filename+".txt",'a')
for line in file:
line= line.replace('\n','')
twit.write(line)
twit.close()
file.close()
operation=raw_input('Enter 1 to create Post-Database \n Enter 2 to create pre-Database \n Note: Post Database is a final porcedure = ')
if (operation==1):
post_database()
else:
pre_database()
copte the following codes and paste it notepad and save it as extract.py. run it with python support.
python should be installed before running this script.
# -*- coding: utf-8 -*-
import os
import re
import codecs
from bs4 import BeautifulSoup
import mechanize
def pre_database():
for i in range(0,800): # Increase in range will increase the question, change 800 to any no. like 900.
br = mechanize.Browser()
link=br.open("http://www.gk.nepalwebtech.com/") # This script will only work for this site, but can be modified accordingly
bt = BeautifulSoup(link)
fnd= bt.find_all("div",{"class":"block"})
# Creating required folders
os.makedirs('c:\\test\\database\\tmp')
os.mkdir('c:\\test\\database\\pre_data')
# Creating data.txt file to save all unmanaged questions including input forms of html
opn= open('c:\\test\\database\\tmp\\data.txt', 'w')
opn.write(str(fnd))
opn.close()
# Removing html tags from saved in data.txt and saving it to done.txt file. pure text is saved on done.txt
soup=BeautifulSoup(open("c:\\test\\database\\tmp\\data.txt"))
all_txt= ''.join(soup.findAll(text=True))
encoded=all_txt.encode("utf-8")
opn= open('c:\\test\\database\\tmp\\done.txt', 'w')
opn.write(encoded)
opn.close()
# Removing line breaks of done.txt file and saving it to mad.txt file
file = open('c:\\test\\database\\tmp\\done.txt', 'r')
twit = open('c:\\test\\database\\tmp\\mad.txt','w')
for line in file.readlines():
if re.search('\S', line): twit.write(line)
twit.close()
file.close()
# Removing the firs and last line from mad.txt and saving it to frd.txt
twit = open('c:\\test\\database\\tmp\\mad.txt')
lines= twit.readlines()
w = open("c:\\test\\database\\tmp\\frd.txt",'w')
w.writelines([item for item in lines[1:-1]])
w.close()
# finalizing and compiling database source
w = open("c:\\test\\database\\tmp\\frd.txt")
fin=open("c:\\test\\database\\pre_data\\quest_data_" + str(i) +".txt",'a')
lines= w.readlines()
for line in lines:
if line==', \n':
print 'processing ... ' +line
else:
fin.writelines(line)
fin.close()
w.close()
twit.close()
# deleting tmp files
os.remove('C:\\test\\database\\tmp\\data.txt')
os.remove('C:\\test\\database\\tmp\\done.txt')
os.remove('C:\\test\\database\\tmp\\frd.txt')
os.remove('C:\\test\\database\\tmp\\mad.txt')
# Creating database
def post_database():
fin=open("c:\\test\\database\\pre_data\\quest_data_0.txt",'r')
lines= fin.readlines()
num_lines = [l for l in lines if l != '\n']
filename= raw_input("enter filename : ")
man=0
print len(num_lines)
wait=raw_input('wait')
for i in range(0,(len(num_lines)-5)) :
if(i%5)==0 and (len(num_lines)-5)!=0:
quest = lines[i]
print quest
ans1 = lines[i+1]
print ans1
ans2 = lines[i+2]
print ans2
ans3 = lines[i+3]
print ans3
ans4 = lines[i+4]
print ans4
corr = 'A'
cor=corr.upper()
data = '{"groupDesc":"","ques":""%s","A":"%s","B":"%s","C":"%s","D":"%s","corr":"%s","exp":""},' % (quest,ans1,ans2,ans3,ans4,cor)
print ' Writing to file..........' + data
target = open("C:\\test\\database\\tmp\\"+filename+".txt",'a')
#wait=raw_input('Press Enter for next question ....')
target.write(data)
target.close()
file = open("C:\\test\\database\\tmp\\"+filename+".txt", 'r')
twit = open("C:\\test\\database\\done_"+filename+".txt",'w')
twit.close()
twit = open("C:\\test\\database\\done_"+filename+".txt",'a')
for line in file:
line= line.replace('\n','')
twit.write(line)
twit.close()
file.close()
operation=raw_input('Enter 1 to create Post-Database \n Enter 2 to create pre-Database \n Note: Post Database is a final porcedure = ')
if (operation==1):
post_database()
else:
pre_database()
No comments:
Post a Comment