tony8310的个人博客分享 http://blog.sciencenet.cn/u/tony8310

博文

用R获取中国天气网实时天气信息

已有 1619 次阅读 2022-3-17 13:42 |系统分类:科研笔记

用R获取中国天气网实时天气信息。代码比较粗糙,可能有错误。

pcks<-c("httr", "rlist", "jsonlite","dplyr","magrittr","xml2","stringr","rio",
        "readr","gsubfn","rrapply","rvest")
lapply(pcks, require, character.only = TRUE)


WD <- getwd() #获取工作目录
if (!is.null(WD))
  setwd(WD)
date <-Sys.Date() # 获取系统日期
now <- Sys.time() # 获取系统时间
Jan1 <- strptime("1970-01-01 00:00:00", "%Y-%m-%d %H:%M:%S")
timestamp <- as.numeric(difftime(now, Jan1, unit="secs"))#  时间戳 unix timestamps
timestamps <- round(timestamp*1000) #取整,13位时间戳,四舍六入五成双
ts <- as.character(round(timestamp))

###------下载/导入站点ID-------
myheaders1 <- c('Host'='j.i8tq.com',
               'User-Agent'='Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0',
               'Accept'='*/*',
                'Accept-Language'= 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Accept-Encoding'= 'gzip, deflate, br',
               'Connection'= 'keep-alive',
               'Referer'= 'http://www.weather.com.cn/',
               'Sec-Fetch-Dest'= 'script',
               'Sec-Fetch-Mode'= 'no-cors',
               'Sec-Fetch-Site'= 'cross-site')
url <- c('https://j.i8tq.com/weather2020/search/city.js')
download_html(url, paste('temp.txt'))# response is a js html file, UTF-8 object
library("tcltk")
button <- tkmessageBox(title='Message',
                       message='Delete the FIRST line of temp.txt
                       删除 temp.txt的第一行',
                       type='ok')
button <- tclvalue(button)
if(button == 'ok'){
  #do something
}
dat <- read_json('temp.txt')
city <- rrapply(dat, how = "melt") %>%
  filter(L4 == "AREAID") %>%
  select(Sheng = L2, Shi = L3,  AreadID = value)
city
export(city,paste('City_ID',".csv",sep=""))
file.remove('temp.txt')
###-----下载天气实时数据--------
tq_ID <-import("City_ID.csv", header = T) # read the stations
tq_ID <- tq_ID[,3] #只取第3列,编号
tq_ID <- data.frame(matrix(unlist(tq_ID), nrow=1, byrow=T))
tq_ID <- as.character(tq_ID)

mycookie <- paste('f_city=%E5%A4%A9%E6%B4%A5%7C101030100%7C')

myheaders <- c('Accept' ='*/*',
               'Accept-Encoding'='gzip, deflate',
               'Accept-Language'='zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
               'Connection'='keep-alive',
               'Host' ='d1.weather.com.cn',
               'Referer'='http://www.weather.com.cn/',
               'cookie' = mycookie)
i=1
n <- length(tq_ID)
tqs_inf <- data.frame()
for (i in 1:n){
  url <- str_c('http://d1.weather.com.cn/sk_2d/',tq_ID[[i]],'.html?','_=', paste(round(timestamps)))
  resp<- GET(url = url, add_headers(.headers = myheaders),encode="raw" )  
  HTML <- read_html(x= resp)
  s <- html_text2(HTML)
  s1 <- str_sub(s,12,-1) #sub string in {}
  tq_inf <- fromJSON(s1)
  tq_inf <- data.frame(matrix(lapply(tq_inf,as.character),nrow=1)) #将list转换为dataframe
  if (is.null (tq_inf$X23)) {
    next  } # 如果站点没有数值,进入下一个循环
  tqs_inf <- bind_rows(tqs_inf,tq_inf) #变量的列数不等,智能合并
  }
file_name <- paste("tq_real",date,".txt")
export(tqs_inf, file=file_name)





https://wap.sciencenet.cn/blog-331295-1329835.html

上一篇:windows R 句法分析器里不能有多字节字符 bat 不能运行
下一篇:windows 定时任务 OnTime HOU 任务计划
收藏 IP: 202.113.99.*| 热度|

0

该博文允许注册用户评论 请点击登录 评论 (0 个评论)

数据加载中...

Archiver|手机版|科学网 ( 京ICP备07017567号-12 )

GMT+8, 2024-5-3 00:05

Powered by ScienceNet.cn

Copyright © 2007- 中国科学报社

返回顶部