Python 根据请求返回 422 错误 POST
Python is returning 422 error on request POST
我正在尝试部署网络抓取工具 - python Selenium 在 docker 容器中到数字海洋,我可以从不同的网站 request.POST url 到数字海洋。使用数字海洋上的控制台,我可以 运行 代码并且它工作正常。所以我认为问题在于我如何接收或发布 url 到网络抓取工具。目前正在 returning <Response [422]>
这是代码,我以字符串的形式将 extract_text_via_scraper_service 函数传递给 url,例如“https://google.com”,docker 应用程序应 return 字典形式的标题:
SCRAPER_API_TOKEN_HEADER=os.environ.get("SCRAPER_API_TOKEN_HEADER")
SCRAPER_API_ENDPOINT=os.environ.get("SCRAPER_API_ENDPOINT")
def extract_text_via_scraper_service(website): # website = url
answer = {}
if SCRAPER_API_ENDPOINT is None:
return answer
if SCRAPER_API_TOKEN_HEADER is None:
return answer
if website is None:
return answer
# send url through HTTP POST
# return dict {}
headers={
"Authorization": f"Bearer {SCRAPER_API_TOKEN_HEADER}"
}
r = requests.post(SCRAPER_API_ENDPOINT, data=website, headers=headers)
print(r)
if r.status_code in range(200, 299):
if r.headers.get("content-type") == 'application/json':
answer = r.json()
return answer
docker 文件:
import pathlib
import os
import io
from functools import lru_cache
from fastapi import (
FastAPI,
Header,
HTTPException,
Depends,
Request,
)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementNotInteractableException, NoSuchElementException, StaleElementReferenceException, TimeoutException, ElementClickInterceptedException
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseSettings
class Settings(BaseSettings):
app_auth_token: str
debug: bool = False
echo_active: bool = False
app_auth_token_prod: str = None
skip_auth: bool = False
class Config:
env_file = ".env"
@lru_cache
def get_settings():
return Settings()
settings = get_settings()
DEBUG=settings.debug
BASE_DIR = pathlib.Path(__file__).parent
UPLOAD_DIR = BASE_DIR / "uploads"
app = FastAPI()
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
# REST API
@app.get("/", response_class=HTMLResponse) # http GET -> JSON
def home_view(request: Request, settings:Settings = Depends(get_settings)):
return templates.TemplateResponse("home.html", {"request": request, "abc": 123})
def verify_auth(authorization = Header(None), settings:Settings = Depends(get_settings)):
if settings.debug and settings.skip_auth:
return
if authorization is None:
raise HTTPException(detail="Invalid endpoint", status_code=401)
label, token = authorization.split()
if token != settings.app_auth_token:
raise HTTPException(detail="Invalid endpoint", status_code=401)
@app.post("/") # http POST
async def prediction_view(website, authorization = Header(None), settings:Settings = Depends(get_settings)):
verify_auth(authorization, settings)
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument("--headless")
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome("/usr/local/bin/chromedriver", options=options)
wait = WebDriverWait(driver, 10)
driver.get(website)
title = "Sorry, we failed to get the correct name"
#title
try:
title = driver.find_element(By.XPATH, "//title")
title = title.get_attribute("innerText")
except:
pass
print(title)
return{"results": title, "original": title}
感谢任何帮助。
问题出在这里
旧:
@app.post("/") # http POST
async def prediction_view(website, authorization = Header(None), settings:Settings = Depends(get_settings)):
verify_auth(authorization, settings)
新:
class Item(BaseModel):
website: str
@app.post("/") # http POST
async def prediction_view(requested_url: Item, authorization = Header(None), settings:Settings = Depends(get_settings)):
确保您正在传递作为 JSON 发布的数据。可以找到相关的FastApi文档here,注意以下几点:
函数参数将被识别为:
如果在路径中也声明了参数,则作为
路径参数。
如果参数是单数类型(如int, float, str,
bool 等)它将被解释为查询参数。
如果参数声明为 Pydantic 模型的类型,
它将被解释为请求正文。
我正在尝试部署网络抓取工具 - python Selenium 在 docker 容器中到数字海洋,我可以从不同的网站 request.POST url 到数字海洋。使用数字海洋上的控制台,我可以 运行 代码并且它工作正常。所以我认为问题在于我如何接收或发布 url 到网络抓取工具。目前正在 returning <Response [422]>
这是代码,我以字符串的形式将 extract_text_via_scraper_service 函数传递给 url,例如“https://google.com”,docker 应用程序应 return 字典形式的标题:
SCRAPER_API_TOKEN_HEADER=os.environ.get("SCRAPER_API_TOKEN_HEADER")
SCRAPER_API_ENDPOINT=os.environ.get("SCRAPER_API_ENDPOINT")
def extract_text_via_scraper_service(website): # website = url
answer = {}
if SCRAPER_API_ENDPOINT is None:
return answer
if SCRAPER_API_TOKEN_HEADER is None:
return answer
if website is None:
return answer
# send url through HTTP POST
# return dict {}
headers={
"Authorization": f"Bearer {SCRAPER_API_TOKEN_HEADER}"
}
r = requests.post(SCRAPER_API_ENDPOINT, data=website, headers=headers)
print(r)
if r.status_code in range(200, 299):
if r.headers.get("content-type") == 'application/json':
answer = r.json()
return answer
docker 文件:
import pathlib
import os
import io
from functools import lru_cache
from fastapi import (
FastAPI,
Header,
HTTPException,
Depends,
Request,
)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementNotInteractableException, NoSuchElementException, StaleElementReferenceException, TimeoutException, ElementClickInterceptedException
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseSettings
class Settings(BaseSettings):
app_auth_token: str
debug: bool = False
echo_active: bool = False
app_auth_token_prod: str = None
skip_auth: bool = False
class Config:
env_file = ".env"
@lru_cache
def get_settings():
return Settings()
settings = get_settings()
DEBUG=settings.debug
BASE_DIR = pathlib.Path(__file__).parent
UPLOAD_DIR = BASE_DIR / "uploads"
app = FastAPI()
templates = Jinja2Templates(directory=str(BASE_DIR / "templates"))
# REST API
@app.get("/", response_class=HTMLResponse) # http GET -> JSON
def home_view(request: Request, settings:Settings = Depends(get_settings)):
return templates.TemplateResponse("home.html", {"request": request, "abc": 123})
def verify_auth(authorization = Header(None), settings:Settings = Depends(get_settings)):
if settings.debug and settings.skip_auth:
return
if authorization is None:
raise HTTPException(detail="Invalid endpoint", status_code=401)
label, token = authorization.split()
if token != settings.app_auth_token:
raise HTTPException(detail="Invalid endpoint", status_code=401)
@app.post("/") # http POST
async def prediction_view(website, authorization = Header(None), settings:Settings = Depends(get_settings)):
verify_auth(authorization, settings)
options = webdriver.ChromeOptions()
options.headless = True
options.add_argument("--headless")
options.add_argument('--no-sandbox')
options.add_argument('--disable-gpu')
driver = webdriver.Chrome("/usr/local/bin/chromedriver", options=options)
wait = WebDriverWait(driver, 10)
driver.get(website)
title = "Sorry, we failed to get the correct name"
#title
try:
title = driver.find_element(By.XPATH, "//title")
title = title.get_attribute("innerText")
except:
pass
print(title)
return{"results": title, "original": title}
感谢任何帮助。
问题出在这里
旧:
@app.post("/") # http POST
async def prediction_view(website, authorization = Header(None), settings:Settings = Depends(get_settings)):
verify_auth(authorization, settings)
新:
class Item(BaseModel):
website: str
@app.post("/") # http POST
async def prediction_view(requested_url: Item, authorization = Header(None), settings:Settings = Depends(get_settings)):
确保您正在传递作为 JSON 发布的数据。可以找到相关的FastApi文档here,注意以下几点: 函数参数将被识别为:
如果在路径中也声明了参数,则作为 路径参数。
如果参数是单数类型(如int, float, str, bool 等)它将被解释为查询参数。
如果参数声明为 Pydantic 模型的类型,
它将被解释为请求正文。